xref: /openbmc/linux/fs/xfs/libxfs/xfs_rmap.c (revision 2fa5ebe3)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2014 Red Hat, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_mount.h"
14 #include "xfs_sb.h"
15 #include "xfs_defer.h"
16 #include "xfs_btree.h"
17 #include "xfs_trans.h"
18 #include "xfs_alloc.h"
19 #include "xfs_rmap.h"
20 #include "xfs_rmap_btree.h"
21 #include "xfs_trace.h"
22 #include "xfs_errortag.h"
23 #include "xfs_error.h"
24 #include "xfs_inode.h"
25 #include "xfs_ag.h"
26 
27 struct kmem_cache	*xfs_rmap_intent_cache;
28 
29 /*
30  * Lookup the first record less than or equal to [bno, len, owner, offset]
31  * in the btree given by cur.
32  */
33 int
34 xfs_rmap_lookup_le(
35 	struct xfs_btree_cur	*cur,
36 	xfs_agblock_t		bno,
37 	uint64_t		owner,
38 	uint64_t		offset,
39 	unsigned int		flags,
40 	struct xfs_rmap_irec	*irec,
41 	int			*stat)
42 {
43 	int			get_stat = 0;
44 	int			error;
45 
46 	cur->bc_rec.r.rm_startblock = bno;
47 	cur->bc_rec.r.rm_blockcount = 0;
48 	cur->bc_rec.r.rm_owner = owner;
49 	cur->bc_rec.r.rm_offset = offset;
50 	cur->bc_rec.r.rm_flags = flags;
51 
52 	error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
53 	if (error || !(*stat) || !irec)
54 		return error;
55 
56 	error = xfs_rmap_get_rec(cur, irec, &get_stat);
57 	if (error)
58 		return error;
59 	if (!get_stat)
60 		return -EFSCORRUPTED;
61 
62 	return 0;
63 }
64 
65 /*
66  * Lookup the record exactly matching [bno, len, owner, offset]
67  * in the btree given by cur.
68  */
69 int
70 xfs_rmap_lookup_eq(
71 	struct xfs_btree_cur	*cur,
72 	xfs_agblock_t		bno,
73 	xfs_extlen_t		len,
74 	uint64_t		owner,
75 	uint64_t		offset,
76 	unsigned int		flags,
77 	int			*stat)
78 {
79 	cur->bc_rec.r.rm_startblock = bno;
80 	cur->bc_rec.r.rm_blockcount = len;
81 	cur->bc_rec.r.rm_owner = owner;
82 	cur->bc_rec.r.rm_offset = offset;
83 	cur->bc_rec.r.rm_flags = flags;
84 	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
85 }
86 
87 /*
88  * Update the record referred to by cur to the value given
89  * by [bno, len, owner, offset].
90  * This either works (return 0) or gets an EFSCORRUPTED error.
91  */
92 STATIC int
93 xfs_rmap_update(
94 	struct xfs_btree_cur	*cur,
95 	struct xfs_rmap_irec	*irec)
96 {
97 	union xfs_btree_rec	rec;
98 	int			error;
99 
100 	trace_xfs_rmap_update(cur->bc_mp, cur->bc_ag.pag->pag_agno,
101 			irec->rm_startblock, irec->rm_blockcount,
102 			irec->rm_owner, irec->rm_offset, irec->rm_flags);
103 
104 	rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
105 	rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount);
106 	rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner);
107 	rec.rmap.rm_offset = cpu_to_be64(
108 			xfs_rmap_irec_offset_pack(irec));
109 	error = xfs_btree_update(cur, &rec);
110 	if (error)
111 		trace_xfs_rmap_update_error(cur->bc_mp,
112 				cur->bc_ag.pag->pag_agno, error, _RET_IP_);
113 	return error;
114 }
115 
116 int
117 xfs_rmap_insert(
118 	struct xfs_btree_cur	*rcur,
119 	xfs_agblock_t		agbno,
120 	xfs_extlen_t		len,
121 	uint64_t		owner,
122 	uint64_t		offset,
123 	unsigned int		flags)
124 {
125 	int			i;
126 	int			error;
127 
128 	trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, agbno,
129 			len, owner, offset, flags);
130 
131 	error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
132 	if (error)
133 		goto done;
134 	if (XFS_IS_CORRUPT(rcur->bc_mp, i != 0)) {
135 		error = -EFSCORRUPTED;
136 		goto done;
137 	}
138 
139 	rcur->bc_rec.r.rm_startblock = agbno;
140 	rcur->bc_rec.r.rm_blockcount = len;
141 	rcur->bc_rec.r.rm_owner = owner;
142 	rcur->bc_rec.r.rm_offset = offset;
143 	rcur->bc_rec.r.rm_flags = flags;
144 	error = xfs_btree_insert(rcur, &i);
145 	if (error)
146 		goto done;
147 	if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
148 		error = -EFSCORRUPTED;
149 		goto done;
150 	}
151 done:
152 	if (error)
153 		trace_xfs_rmap_insert_error(rcur->bc_mp,
154 				rcur->bc_ag.pag->pag_agno, error, _RET_IP_);
155 	return error;
156 }
157 
158 STATIC int
159 xfs_rmap_delete(
160 	struct xfs_btree_cur	*rcur,
161 	xfs_agblock_t		agbno,
162 	xfs_extlen_t		len,
163 	uint64_t		owner,
164 	uint64_t		offset,
165 	unsigned int		flags)
166 {
167 	int			i;
168 	int			error;
169 
170 	trace_xfs_rmap_delete(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, agbno,
171 			len, owner, offset, flags);
172 
173 	error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
174 	if (error)
175 		goto done;
176 	if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
177 		error = -EFSCORRUPTED;
178 		goto done;
179 	}
180 
181 	error = xfs_btree_delete(rcur, &i);
182 	if (error)
183 		goto done;
184 	if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
185 		error = -EFSCORRUPTED;
186 		goto done;
187 	}
188 done:
189 	if (error)
190 		trace_xfs_rmap_delete_error(rcur->bc_mp,
191 				rcur->bc_ag.pag->pag_agno, error, _RET_IP_);
192 	return error;
193 }
194 
195 /* Convert an internal btree record to an rmap record. */
196 int
197 xfs_rmap_btrec_to_irec(
198 	const union xfs_btree_rec	*rec,
199 	struct xfs_rmap_irec		*irec)
200 {
201 	irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock);
202 	irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount);
203 	irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner);
204 	return xfs_rmap_irec_offset_unpack(be64_to_cpu(rec->rmap.rm_offset),
205 			irec);
206 }
207 
208 /*
209  * Get the data from the pointed-to record.
210  */
211 int
212 xfs_rmap_get_rec(
213 	struct xfs_btree_cur	*cur,
214 	struct xfs_rmap_irec	*irec,
215 	int			*stat)
216 {
217 	struct xfs_mount	*mp = cur->bc_mp;
218 	struct xfs_perag	*pag = cur->bc_ag.pag;
219 	union xfs_btree_rec	*rec;
220 	int			error;
221 
222 	error = xfs_btree_get_rec(cur, &rec, stat);
223 	if (error || !*stat)
224 		return error;
225 
226 	if (xfs_rmap_btrec_to_irec(rec, irec))
227 		goto out_bad_rec;
228 
229 	if (irec->rm_blockcount == 0)
230 		goto out_bad_rec;
231 	if (irec->rm_startblock <= XFS_AGFL_BLOCK(mp)) {
232 		if (irec->rm_owner != XFS_RMAP_OWN_FS)
233 			goto out_bad_rec;
234 		if (irec->rm_blockcount != XFS_AGFL_BLOCK(mp) + 1)
235 			goto out_bad_rec;
236 	} else {
237 		/* check for valid extent range, including overflow */
238 		if (!xfs_verify_agbext(pag, irec->rm_startblock,
239 					    irec->rm_blockcount))
240 			goto out_bad_rec;
241 	}
242 
243 	if (!(xfs_verify_ino(mp, irec->rm_owner) ||
244 	      (irec->rm_owner <= XFS_RMAP_OWN_FS &&
245 	       irec->rm_owner >= XFS_RMAP_OWN_MIN)))
246 		goto out_bad_rec;
247 
248 	return 0;
249 out_bad_rec:
250 	xfs_warn(mp,
251 		"Reverse Mapping BTree record corruption in AG %d detected!",
252 		pag->pag_agno);
253 	xfs_warn(mp,
254 		"Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x",
255 		irec->rm_owner, irec->rm_flags, irec->rm_startblock,
256 		irec->rm_blockcount);
257 	return -EFSCORRUPTED;
258 }
259 
260 struct xfs_find_left_neighbor_info {
261 	struct xfs_rmap_irec	high;
262 	struct xfs_rmap_irec	*irec;
263 };
264 
265 /* For each rmap given, figure out if it matches the key we want. */
266 STATIC int
267 xfs_rmap_find_left_neighbor_helper(
268 	struct xfs_btree_cur		*cur,
269 	const struct xfs_rmap_irec	*rec,
270 	void				*priv)
271 {
272 	struct xfs_find_left_neighbor_info	*info = priv;
273 
274 	trace_xfs_rmap_find_left_neighbor_candidate(cur->bc_mp,
275 			cur->bc_ag.pag->pag_agno, rec->rm_startblock,
276 			rec->rm_blockcount, rec->rm_owner, rec->rm_offset,
277 			rec->rm_flags);
278 
279 	if (rec->rm_owner != info->high.rm_owner)
280 		return 0;
281 	if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) &&
282 	    !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
283 	    rec->rm_offset + rec->rm_blockcount - 1 != info->high.rm_offset)
284 		return 0;
285 
286 	*info->irec = *rec;
287 	return -ECANCELED;
288 }
289 
290 /*
291  * Find the record to the left of the given extent, being careful only to
292  * return a match with the same owner and adjacent physical and logical
293  * block ranges.
294  */
295 STATIC int
296 xfs_rmap_find_left_neighbor(
297 	struct xfs_btree_cur	*cur,
298 	xfs_agblock_t		bno,
299 	uint64_t		owner,
300 	uint64_t		offset,
301 	unsigned int		flags,
302 	struct xfs_rmap_irec	*irec,
303 	int			*stat)
304 {
305 	struct xfs_find_left_neighbor_info	info;
306 	int			found = 0;
307 	int			error;
308 
309 	*stat = 0;
310 	if (bno == 0)
311 		return 0;
312 	info.high.rm_startblock = bno - 1;
313 	info.high.rm_owner = owner;
314 	if (!XFS_RMAP_NON_INODE_OWNER(owner) &&
315 	    !(flags & XFS_RMAP_BMBT_BLOCK)) {
316 		if (offset == 0)
317 			return 0;
318 		info.high.rm_offset = offset - 1;
319 	} else
320 		info.high.rm_offset = 0;
321 	info.high.rm_flags = flags;
322 	info.high.rm_blockcount = 0;
323 	info.irec = irec;
324 
325 	trace_xfs_rmap_find_left_neighbor_query(cur->bc_mp,
326 			cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags);
327 
328 	/*
329 	 * Historically, we always used the range query to walk every reverse
330 	 * mapping that could possibly overlap the key that the caller asked
331 	 * for, and filter out the ones that don't.  That is very slow when
332 	 * there are a lot of records.
333 	 *
334 	 * However, there are two scenarios where the classic btree search can
335 	 * produce correct results -- if the index contains a record that is an
336 	 * exact match for the lookup key; and if there are no other records
337 	 * between the record we want and the key we supplied.
338 	 *
339 	 * As an optimization, try a non-overlapped lookup first.  This makes
340 	 * extent conversion and remap operations run a bit faster if the
341 	 * physical extents aren't being shared.  If we don't find what we
342 	 * want, we fall back to the overlapped query.
343 	 */
344 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec,
345 			&found);
346 	if (error)
347 		return error;
348 	if (found)
349 		error = xfs_rmap_find_left_neighbor_helper(cur, irec, &info);
350 	if (!error)
351 		error = xfs_rmap_query_range(cur, &info.high, &info.high,
352 				xfs_rmap_find_left_neighbor_helper, &info);
353 	if (error != -ECANCELED)
354 		return error;
355 
356 	*stat = 1;
357 	trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
358 			cur->bc_ag.pag->pag_agno, irec->rm_startblock,
359 			irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
360 			irec->rm_flags);
361 	return 0;
362 }
363 
364 /* For each rmap given, figure out if it matches the key we want. */
365 STATIC int
366 xfs_rmap_lookup_le_range_helper(
367 	struct xfs_btree_cur		*cur,
368 	const struct xfs_rmap_irec	*rec,
369 	void				*priv)
370 {
371 	struct xfs_find_left_neighbor_info	*info = priv;
372 
373 	trace_xfs_rmap_lookup_le_range_candidate(cur->bc_mp,
374 			cur->bc_ag.pag->pag_agno, rec->rm_startblock,
375 			rec->rm_blockcount, rec->rm_owner, rec->rm_offset,
376 			rec->rm_flags);
377 
378 	if (rec->rm_owner != info->high.rm_owner)
379 		return 0;
380 	if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) &&
381 	    !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
382 	    (rec->rm_offset > info->high.rm_offset ||
383 	     rec->rm_offset + rec->rm_blockcount <= info->high.rm_offset))
384 		return 0;
385 
386 	*info->irec = *rec;
387 	return -ECANCELED;
388 }
389 
390 /*
391  * Find the record to the left of the given extent, being careful only to
392  * return a match with the same owner and overlapping physical and logical
393  * block ranges.  This is the overlapping-interval version of
394  * xfs_rmap_lookup_le.
395  */
396 int
397 xfs_rmap_lookup_le_range(
398 	struct xfs_btree_cur	*cur,
399 	xfs_agblock_t		bno,
400 	uint64_t		owner,
401 	uint64_t		offset,
402 	unsigned int		flags,
403 	struct xfs_rmap_irec	*irec,
404 	int			*stat)
405 {
406 	struct xfs_find_left_neighbor_info	info;
407 	int			found = 0;
408 	int			error;
409 
410 	info.high.rm_startblock = bno;
411 	info.high.rm_owner = owner;
412 	if (!XFS_RMAP_NON_INODE_OWNER(owner) && !(flags & XFS_RMAP_BMBT_BLOCK))
413 		info.high.rm_offset = offset;
414 	else
415 		info.high.rm_offset = 0;
416 	info.high.rm_flags = flags;
417 	info.high.rm_blockcount = 0;
418 	*stat = 0;
419 	info.irec = irec;
420 
421 	trace_xfs_rmap_lookup_le_range(cur->bc_mp, cur->bc_ag.pag->pag_agno,
422 			bno, 0, owner, offset, flags);
423 
424 	/*
425 	 * Historically, we always used the range query to walk every reverse
426 	 * mapping that could possibly overlap the key that the caller asked
427 	 * for, and filter out the ones that don't.  That is very slow when
428 	 * there are a lot of records.
429 	 *
430 	 * However, there are two scenarios where the classic btree search can
431 	 * produce correct results -- if the index contains a record that is an
432 	 * exact match for the lookup key; and if there are no other records
433 	 * between the record we want and the key we supplied.
434 	 *
435 	 * As an optimization, try a non-overlapped lookup first.  This makes
436 	 * scrub run much faster on most filesystems because bmbt records are
437 	 * usually an exact match for rmap records.  If we don't find what we
438 	 * want, we fall back to the overlapped query.
439 	 */
440 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec,
441 			&found);
442 	if (error)
443 		return error;
444 	if (found)
445 		error = xfs_rmap_lookup_le_range_helper(cur, irec, &info);
446 	if (!error)
447 		error = xfs_rmap_query_range(cur, &info.high, &info.high,
448 				xfs_rmap_lookup_le_range_helper, &info);
449 	if (error != -ECANCELED)
450 		return error;
451 
452 	*stat = 1;
453 	trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
454 			cur->bc_ag.pag->pag_agno, irec->rm_startblock,
455 			irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
456 			irec->rm_flags);
457 	return 0;
458 }
459 
460 /*
461  * Perform all the relevant owner checks for a removal op.  If we're doing an
462  * unknown-owner removal then we have no owner information to check.
463  */
464 static int
465 xfs_rmap_free_check_owner(
466 	struct xfs_mount	*mp,
467 	uint64_t		ltoff,
468 	struct xfs_rmap_irec	*rec,
469 	xfs_filblks_t		len,
470 	uint64_t		owner,
471 	uint64_t		offset,
472 	unsigned int		flags)
473 {
474 	int			error = 0;
475 
476 	if (owner == XFS_RMAP_OWN_UNKNOWN)
477 		return 0;
478 
479 	/* Make sure the unwritten flag matches. */
480 	if (XFS_IS_CORRUPT(mp,
481 			   (flags & XFS_RMAP_UNWRITTEN) !=
482 			   (rec->rm_flags & XFS_RMAP_UNWRITTEN))) {
483 		error = -EFSCORRUPTED;
484 		goto out;
485 	}
486 
487 	/* Make sure the owner matches what we expect to find in the tree. */
488 	if (XFS_IS_CORRUPT(mp, owner != rec->rm_owner)) {
489 		error = -EFSCORRUPTED;
490 		goto out;
491 	}
492 
493 	/* Check the offset, if necessary. */
494 	if (XFS_RMAP_NON_INODE_OWNER(owner))
495 		goto out;
496 
497 	if (flags & XFS_RMAP_BMBT_BLOCK) {
498 		if (XFS_IS_CORRUPT(mp,
499 				   !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))) {
500 			error = -EFSCORRUPTED;
501 			goto out;
502 		}
503 	} else {
504 		if (XFS_IS_CORRUPT(mp, rec->rm_offset > offset)) {
505 			error = -EFSCORRUPTED;
506 			goto out;
507 		}
508 		if (XFS_IS_CORRUPT(mp,
509 				   offset + len > ltoff + rec->rm_blockcount)) {
510 			error = -EFSCORRUPTED;
511 			goto out;
512 		}
513 	}
514 
515 out:
516 	return error;
517 }
518 
519 /*
520  * Find the extent in the rmap btree and remove it.
521  *
522  * The record we find should always be an exact match for the extent that we're
523  * looking for, since we insert them into the btree without modification.
524  *
525  * Special Case #1: when growing the filesystem, we "free" an extent when
526  * growing the last AG. This extent is new space and so it is not tracked as
527  * used space in the btree. The growfs code will pass in an owner of
528  * XFS_RMAP_OWN_NULL to indicate that it expected that there is no owner of this
529  * extent. We verify that - the extent lookup result in a record that does not
530  * overlap.
531  *
532  * Special Case #2: EFIs do not record the owner of the extent, so when
533  * recovering EFIs from the log we pass in XFS_RMAP_OWN_UNKNOWN to tell the rmap
534  * btree to ignore the owner (i.e. wildcard match) so we don't trigger
535  * corruption checks during log recovery.
536  */
537 STATIC int
538 xfs_rmap_unmap(
539 	struct xfs_btree_cur		*cur,
540 	xfs_agblock_t			bno,
541 	xfs_extlen_t			len,
542 	bool				unwritten,
543 	const struct xfs_owner_info	*oinfo)
544 {
545 	struct xfs_mount		*mp = cur->bc_mp;
546 	struct xfs_rmap_irec		ltrec;
547 	uint64_t			ltoff;
548 	int				error = 0;
549 	int				i;
550 	uint64_t			owner;
551 	uint64_t			offset;
552 	unsigned int			flags;
553 	bool				ignore_off;
554 
555 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
556 	ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) ||
557 			(flags & XFS_RMAP_BMBT_BLOCK);
558 	if (unwritten)
559 		flags |= XFS_RMAP_UNWRITTEN;
560 	trace_xfs_rmap_unmap(mp, cur->bc_ag.pag->pag_agno, bno, len,
561 			unwritten, oinfo);
562 
563 	/*
564 	 * We should always have a left record because there's a static record
565 	 * for the AG headers at rm_startblock == 0 created by mkfs/growfs that
566 	 * will not ever be removed from the tree.
567 	 */
568 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &ltrec, &i);
569 	if (error)
570 		goto out_error;
571 	if (XFS_IS_CORRUPT(mp, i != 1)) {
572 		error = -EFSCORRUPTED;
573 		goto out_error;
574 	}
575 
576 	trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
577 			cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
578 			ltrec.rm_blockcount, ltrec.rm_owner,
579 			ltrec.rm_offset, ltrec.rm_flags);
580 	ltoff = ltrec.rm_offset;
581 
582 	/*
583 	 * For growfs, the incoming extent must be beyond the left record we
584 	 * just found as it is new space and won't be used by anyone. This is
585 	 * just a corruption check as we don't actually do anything with this
586 	 * extent.  Note that we need to use >= instead of > because it might
587 	 * be the case that the "left" extent goes all the way to EOFS.
588 	 */
589 	if (owner == XFS_RMAP_OWN_NULL) {
590 		if (XFS_IS_CORRUPT(mp,
591 				   bno <
592 				   ltrec.rm_startblock + ltrec.rm_blockcount)) {
593 			error = -EFSCORRUPTED;
594 			goto out_error;
595 		}
596 		goto out_done;
597 	}
598 
599 	/*
600 	 * If we're doing an unknown-owner removal for EFI recovery, we expect
601 	 * to find the full range in the rmapbt or nothing at all.  If we
602 	 * don't find any rmaps overlapping either end of the range, we're
603 	 * done.  Hopefully this means that the EFI creator already queued
604 	 * (and finished) a RUI to remove the rmap.
605 	 */
606 	if (owner == XFS_RMAP_OWN_UNKNOWN &&
607 	    ltrec.rm_startblock + ltrec.rm_blockcount <= bno) {
608 		struct xfs_rmap_irec    rtrec;
609 
610 		error = xfs_btree_increment(cur, 0, &i);
611 		if (error)
612 			goto out_error;
613 		if (i == 0)
614 			goto out_done;
615 		error = xfs_rmap_get_rec(cur, &rtrec, &i);
616 		if (error)
617 			goto out_error;
618 		if (XFS_IS_CORRUPT(mp, i != 1)) {
619 			error = -EFSCORRUPTED;
620 			goto out_error;
621 		}
622 		if (rtrec.rm_startblock >= bno + len)
623 			goto out_done;
624 	}
625 
626 	/* Make sure the extent we found covers the entire freeing range. */
627 	if (XFS_IS_CORRUPT(mp,
628 			   ltrec.rm_startblock > bno ||
629 			   ltrec.rm_startblock + ltrec.rm_blockcount <
630 			   bno + len)) {
631 		error = -EFSCORRUPTED;
632 		goto out_error;
633 	}
634 
635 	/* Check owner information. */
636 	error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, len, owner,
637 			offset, flags);
638 	if (error)
639 		goto out_error;
640 
641 	if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
642 		/* exact match, simply remove the record from rmap tree */
643 		trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
644 				ltrec.rm_startblock, ltrec.rm_blockcount,
645 				ltrec.rm_owner, ltrec.rm_offset,
646 				ltrec.rm_flags);
647 		error = xfs_btree_delete(cur, &i);
648 		if (error)
649 			goto out_error;
650 		if (XFS_IS_CORRUPT(mp, i != 1)) {
651 			error = -EFSCORRUPTED;
652 			goto out_error;
653 		}
654 	} else if (ltrec.rm_startblock == bno) {
655 		/*
656 		 * overlap left hand side of extent: move the start, trim the
657 		 * length and update the current record.
658 		 *
659 		 *       ltbno                ltlen
660 		 * Orig:    |oooooooooooooooooooo|
661 		 * Freeing: |fffffffff|
662 		 * Result:            |rrrrrrrrrr|
663 		 *         bno       len
664 		 */
665 		ltrec.rm_startblock += len;
666 		ltrec.rm_blockcount -= len;
667 		if (!ignore_off)
668 			ltrec.rm_offset += len;
669 		error = xfs_rmap_update(cur, &ltrec);
670 		if (error)
671 			goto out_error;
672 	} else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) {
673 		/*
674 		 * overlap right hand side of extent: trim the length and update
675 		 * the current record.
676 		 *
677 		 *       ltbno                ltlen
678 		 * Orig:    |oooooooooooooooooooo|
679 		 * Freeing:            |fffffffff|
680 		 * Result:  |rrrrrrrrrr|
681 		 *                    bno       len
682 		 */
683 		ltrec.rm_blockcount -= len;
684 		error = xfs_rmap_update(cur, &ltrec);
685 		if (error)
686 			goto out_error;
687 	} else {
688 
689 		/*
690 		 * overlap middle of extent: trim the length of the existing
691 		 * record to the length of the new left-extent size, increment
692 		 * the insertion position so we can insert a new record
693 		 * containing the remaining right-extent space.
694 		 *
695 		 *       ltbno                ltlen
696 		 * Orig:    |oooooooooooooooooooo|
697 		 * Freeing:       |fffffffff|
698 		 * Result:  |rrrrr|         |rrrr|
699 		 *               bno       len
700 		 */
701 		xfs_extlen_t	orig_len = ltrec.rm_blockcount;
702 
703 		ltrec.rm_blockcount = bno - ltrec.rm_startblock;
704 		error = xfs_rmap_update(cur, &ltrec);
705 		if (error)
706 			goto out_error;
707 
708 		error = xfs_btree_increment(cur, 0, &i);
709 		if (error)
710 			goto out_error;
711 
712 		cur->bc_rec.r.rm_startblock = bno + len;
713 		cur->bc_rec.r.rm_blockcount = orig_len - len -
714 						     ltrec.rm_blockcount;
715 		cur->bc_rec.r.rm_owner = ltrec.rm_owner;
716 		if (ignore_off)
717 			cur->bc_rec.r.rm_offset = 0;
718 		else
719 			cur->bc_rec.r.rm_offset = offset + len;
720 		cur->bc_rec.r.rm_flags = flags;
721 		trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno,
722 				cur->bc_rec.r.rm_startblock,
723 				cur->bc_rec.r.rm_blockcount,
724 				cur->bc_rec.r.rm_owner,
725 				cur->bc_rec.r.rm_offset,
726 				cur->bc_rec.r.rm_flags);
727 		error = xfs_btree_insert(cur, &i);
728 		if (error)
729 			goto out_error;
730 	}
731 
732 out_done:
733 	trace_xfs_rmap_unmap_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
734 			unwritten, oinfo);
735 out_error:
736 	if (error)
737 		trace_xfs_rmap_unmap_error(mp, cur->bc_ag.pag->pag_agno,
738 				error, _RET_IP_);
739 	return error;
740 }
741 
742 /*
743  * Remove a reference to an extent in the rmap btree.
744  */
745 int
746 xfs_rmap_free(
747 	struct xfs_trans		*tp,
748 	struct xfs_buf			*agbp,
749 	struct xfs_perag		*pag,
750 	xfs_agblock_t			bno,
751 	xfs_extlen_t			len,
752 	const struct xfs_owner_info	*oinfo)
753 {
754 	struct xfs_mount		*mp = tp->t_mountp;
755 	struct xfs_btree_cur		*cur;
756 	int				error;
757 
758 	if (!xfs_has_rmapbt(mp))
759 		return 0;
760 
761 	cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
762 
763 	error = xfs_rmap_unmap(cur, bno, len, false, oinfo);
764 
765 	xfs_btree_del_cursor(cur, error);
766 	return error;
767 }
768 
769 /*
770  * A mergeable rmap must have the same owner and the same values for
771  * the unwritten, attr_fork, and bmbt flags.  The startblock and
772  * offset are checked separately.
773  */
774 static bool
775 xfs_rmap_is_mergeable(
776 	struct xfs_rmap_irec	*irec,
777 	uint64_t		owner,
778 	unsigned int		flags)
779 {
780 	if (irec->rm_owner == XFS_RMAP_OWN_NULL)
781 		return false;
782 	if (irec->rm_owner != owner)
783 		return false;
784 	if ((flags & XFS_RMAP_UNWRITTEN) ^
785 	    (irec->rm_flags & XFS_RMAP_UNWRITTEN))
786 		return false;
787 	if ((flags & XFS_RMAP_ATTR_FORK) ^
788 	    (irec->rm_flags & XFS_RMAP_ATTR_FORK))
789 		return false;
790 	if ((flags & XFS_RMAP_BMBT_BLOCK) ^
791 	    (irec->rm_flags & XFS_RMAP_BMBT_BLOCK))
792 		return false;
793 	return true;
794 }
795 
796 /*
797  * When we allocate a new block, the first thing we do is add a reference to
798  * the extent in the rmap btree. This takes the form of a [agbno, length,
799  * owner, offset] record.  Flags are encoded in the high bits of the offset
800  * field.
801  */
802 STATIC int
803 xfs_rmap_map(
804 	struct xfs_btree_cur		*cur,
805 	xfs_agblock_t			bno,
806 	xfs_extlen_t			len,
807 	bool				unwritten,
808 	const struct xfs_owner_info	*oinfo)
809 {
810 	struct xfs_mount		*mp = cur->bc_mp;
811 	struct xfs_rmap_irec		ltrec;
812 	struct xfs_rmap_irec		gtrec;
813 	int				have_gt;
814 	int				have_lt;
815 	int				error = 0;
816 	int				i;
817 	uint64_t			owner;
818 	uint64_t			offset;
819 	unsigned int			flags = 0;
820 	bool				ignore_off;
821 
822 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
823 	ASSERT(owner != 0);
824 	ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) ||
825 			(flags & XFS_RMAP_BMBT_BLOCK);
826 	if (unwritten)
827 		flags |= XFS_RMAP_UNWRITTEN;
828 	trace_xfs_rmap_map(mp, cur->bc_ag.pag->pag_agno, bno, len,
829 			unwritten, oinfo);
830 	ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
831 
832 	/*
833 	 * For the initial lookup, look for an exact match or the left-adjacent
834 	 * record for our insertion point. This will also give us the record for
835 	 * start block contiguity tests.
836 	 */
837 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &ltrec,
838 			&have_lt);
839 	if (error)
840 		goto out_error;
841 	if (have_lt) {
842 		trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
843 				cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
844 				ltrec.rm_blockcount, ltrec.rm_owner,
845 				ltrec.rm_offset, ltrec.rm_flags);
846 
847 		if (!xfs_rmap_is_mergeable(&ltrec, owner, flags))
848 			have_lt = 0;
849 	}
850 
851 	if (XFS_IS_CORRUPT(mp,
852 			   have_lt != 0 &&
853 			   ltrec.rm_startblock + ltrec.rm_blockcount > bno)) {
854 		error = -EFSCORRUPTED;
855 		goto out_error;
856 	}
857 
858 	/*
859 	 * Increment the cursor to see if we have a right-adjacent record to our
860 	 * insertion point. This will give us the record for end block
861 	 * contiguity tests.
862 	 */
863 	error = xfs_btree_increment(cur, 0, &have_gt);
864 	if (error)
865 		goto out_error;
866 	if (have_gt) {
867 		error = xfs_rmap_get_rec(cur, &gtrec, &have_gt);
868 		if (error)
869 			goto out_error;
870 		if (XFS_IS_CORRUPT(mp, have_gt != 1)) {
871 			error = -EFSCORRUPTED;
872 			goto out_error;
873 		}
874 		if (XFS_IS_CORRUPT(mp, bno + len > gtrec.rm_startblock)) {
875 			error = -EFSCORRUPTED;
876 			goto out_error;
877 		}
878 		trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
879 			cur->bc_ag.pag->pag_agno, gtrec.rm_startblock,
880 			gtrec.rm_blockcount, gtrec.rm_owner,
881 			gtrec.rm_offset, gtrec.rm_flags);
882 		if (!xfs_rmap_is_mergeable(&gtrec, owner, flags))
883 			have_gt = 0;
884 	}
885 
886 	/*
887 	 * Note: cursor currently points one record to the right of ltrec, even
888 	 * if there is no record in the tree to the right.
889 	 */
890 	if (have_lt &&
891 	    ltrec.rm_startblock + ltrec.rm_blockcount == bno &&
892 	    (ignore_off || ltrec.rm_offset + ltrec.rm_blockcount == offset)) {
893 		/*
894 		 * left edge contiguous, merge into left record.
895 		 *
896 		 *       ltbno     ltlen
897 		 * orig:   |ooooooooo|
898 		 * adding:           |aaaaaaaaa|
899 		 * result: |rrrrrrrrrrrrrrrrrrr|
900 		 *                  bno       len
901 		 */
902 		ltrec.rm_blockcount += len;
903 		if (have_gt &&
904 		    bno + len == gtrec.rm_startblock &&
905 		    (ignore_off || offset + len == gtrec.rm_offset) &&
906 		    (unsigned long)ltrec.rm_blockcount + len +
907 				gtrec.rm_blockcount <= XFS_RMAP_LEN_MAX) {
908 			/*
909 			 * right edge also contiguous, delete right record
910 			 * and merge into left record.
911 			 *
912 			 *       ltbno     ltlen    gtbno     gtlen
913 			 * orig:   |ooooooooo|         |ooooooooo|
914 			 * adding:           |aaaaaaaaa|
915 			 * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
916 			 */
917 			ltrec.rm_blockcount += gtrec.rm_blockcount;
918 			trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
919 					gtrec.rm_startblock,
920 					gtrec.rm_blockcount,
921 					gtrec.rm_owner,
922 					gtrec.rm_offset,
923 					gtrec.rm_flags);
924 			error = xfs_btree_delete(cur, &i);
925 			if (error)
926 				goto out_error;
927 			if (XFS_IS_CORRUPT(mp, i != 1)) {
928 				error = -EFSCORRUPTED;
929 				goto out_error;
930 			}
931 		}
932 
933 		/* point the cursor back to the left record and update */
934 		error = xfs_btree_decrement(cur, 0, &have_gt);
935 		if (error)
936 			goto out_error;
937 		error = xfs_rmap_update(cur, &ltrec);
938 		if (error)
939 			goto out_error;
940 	} else if (have_gt &&
941 		   bno + len == gtrec.rm_startblock &&
942 		   (ignore_off || offset + len == gtrec.rm_offset)) {
943 		/*
944 		 * right edge contiguous, merge into right record.
945 		 *
946 		 *                 gtbno     gtlen
947 		 * Orig:             |ooooooooo|
948 		 * adding: |aaaaaaaaa|
949 		 * Result: |rrrrrrrrrrrrrrrrrrr|
950 		 *        bno       len
951 		 */
952 		gtrec.rm_startblock = bno;
953 		gtrec.rm_blockcount += len;
954 		if (!ignore_off)
955 			gtrec.rm_offset = offset;
956 		error = xfs_rmap_update(cur, &gtrec);
957 		if (error)
958 			goto out_error;
959 	} else {
960 		/*
961 		 * no contiguous edge with identical owner, insert
962 		 * new record at current cursor position.
963 		 */
964 		cur->bc_rec.r.rm_startblock = bno;
965 		cur->bc_rec.r.rm_blockcount = len;
966 		cur->bc_rec.r.rm_owner = owner;
967 		cur->bc_rec.r.rm_offset = offset;
968 		cur->bc_rec.r.rm_flags = flags;
969 		trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, len,
970 			owner, offset, flags);
971 		error = xfs_btree_insert(cur, &i);
972 		if (error)
973 			goto out_error;
974 		if (XFS_IS_CORRUPT(mp, i != 1)) {
975 			error = -EFSCORRUPTED;
976 			goto out_error;
977 		}
978 	}
979 
980 	trace_xfs_rmap_map_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
981 			unwritten, oinfo);
982 out_error:
983 	if (error)
984 		trace_xfs_rmap_map_error(mp, cur->bc_ag.pag->pag_agno,
985 				error, _RET_IP_);
986 	return error;
987 }
988 
989 /*
990  * Add a reference to an extent in the rmap btree.
991  */
992 int
993 xfs_rmap_alloc(
994 	struct xfs_trans		*tp,
995 	struct xfs_buf			*agbp,
996 	struct xfs_perag		*pag,
997 	xfs_agblock_t			bno,
998 	xfs_extlen_t			len,
999 	const struct xfs_owner_info	*oinfo)
1000 {
1001 	struct xfs_mount		*mp = tp->t_mountp;
1002 	struct xfs_btree_cur		*cur;
1003 	int				error;
1004 
1005 	if (!xfs_has_rmapbt(mp))
1006 		return 0;
1007 
1008 	cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
1009 	error = xfs_rmap_map(cur, bno, len, false, oinfo);
1010 
1011 	xfs_btree_del_cursor(cur, error);
1012 	return error;
1013 }
1014 
1015 #define RMAP_LEFT_CONTIG	(1 << 0)
1016 #define RMAP_RIGHT_CONTIG	(1 << 1)
1017 #define RMAP_LEFT_FILLING	(1 << 2)
1018 #define RMAP_RIGHT_FILLING	(1 << 3)
1019 #define RMAP_LEFT_VALID		(1 << 6)
1020 #define RMAP_RIGHT_VALID	(1 << 7)
1021 
1022 #define LEFT		r[0]
1023 #define RIGHT		r[1]
1024 #define PREV		r[2]
1025 #define NEW		r[3]
1026 
1027 /*
1028  * Convert an unwritten extent to a real extent or vice versa.
1029  * Does not handle overlapping extents.
1030  */
1031 STATIC int
1032 xfs_rmap_convert(
1033 	struct xfs_btree_cur		*cur,
1034 	xfs_agblock_t			bno,
1035 	xfs_extlen_t			len,
1036 	bool				unwritten,
1037 	const struct xfs_owner_info	*oinfo)
1038 {
1039 	struct xfs_mount		*mp = cur->bc_mp;
1040 	struct xfs_rmap_irec		r[4];	/* neighbor extent entries */
1041 						/* left is 0, right is 1, */
1042 						/* prev is 2, new is 3 */
1043 	uint64_t		owner;
1044 	uint64_t		offset;
1045 	uint64_t		new_endoff;
1046 	unsigned int		oldext;
1047 	unsigned int		newext;
1048 	unsigned int		flags = 0;
1049 	int			i;
1050 	int			state = 0;
1051 	int			error;
1052 
1053 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
1054 	ASSERT(!(XFS_RMAP_NON_INODE_OWNER(owner) ||
1055 			(flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))));
1056 	oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0;
1057 	new_endoff = offset + len;
1058 	trace_xfs_rmap_convert(mp, cur->bc_ag.pag->pag_agno, bno, len,
1059 			unwritten, oinfo);
1060 
1061 	/*
1062 	 * For the initial lookup, look for an exact match or the left-adjacent
1063 	 * record for our insertion point. This will also give us the record for
1064 	 * start block contiguity tests.
1065 	 */
1066 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, &PREV, &i);
1067 	if (error)
1068 		goto done;
1069 	if (XFS_IS_CORRUPT(mp, i != 1)) {
1070 		error = -EFSCORRUPTED;
1071 		goto done;
1072 	}
1073 
1074 	trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
1075 			cur->bc_ag.pag->pag_agno, PREV.rm_startblock,
1076 			PREV.rm_blockcount, PREV.rm_owner,
1077 			PREV.rm_offset, PREV.rm_flags);
1078 
1079 	ASSERT(PREV.rm_offset <= offset);
1080 	ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff);
1081 	ASSERT((PREV.rm_flags & XFS_RMAP_UNWRITTEN) == oldext);
1082 	newext = ~oldext & XFS_RMAP_UNWRITTEN;
1083 
1084 	/*
1085 	 * Set flags determining what part of the previous oldext allocation
1086 	 * extent is being replaced by a newext allocation.
1087 	 */
1088 	if (PREV.rm_offset == offset)
1089 		state |= RMAP_LEFT_FILLING;
1090 	if (PREV.rm_offset + PREV.rm_blockcount == new_endoff)
1091 		state |= RMAP_RIGHT_FILLING;
1092 
1093 	/*
1094 	 * Decrement the cursor to see if we have a left-adjacent record to our
1095 	 * insertion point. This will give us the record for end block
1096 	 * contiguity tests.
1097 	 */
1098 	error = xfs_btree_decrement(cur, 0, &i);
1099 	if (error)
1100 		goto done;
1101 	if (i) {
1102 		state |= RMAP_LEFT_VALID;
1103 		error = xfs_rmap_get_rec(cur, &LEFT, &i);
1104 		if (error)
1105 			goto done;
1106 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1107 			error = -EFSCORRUPTED;
1108 			goto done;
1109 		}
1110 		if (XFS_IS_CORRUPT(mp,
1111 				   LEFT.rm_startblock + LEFT.rm_blockcount >
1112 				   bno)) {
1113 			error = -EFSCORRUPTED;
1114 			goto done;
1115 		}
1116 		trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
1117 				cur->bc_ag.pag->pag_agno, LEFT.rm_startblock,
1118 				LEFT.rm_blockcount, LEFT.rm_owner,
1119 				LEFT.rm_offset, LEFT.rm_flags);
1120 		if (LEFT.rm_startblock + LEFT.rm_blockcount == bno &&
1121 		    LEFT.rm_offset + LEFT.rm_blockcount == offset &&
1122 		    xfs_rmap_is_mergeable(&LEFT, owner, newext))
1123 			state |= RMAP_LEFT_CONTIG;
1124 	}
1125 
1126 	/*
1127 	 * Increment the cursor to see if we have a right-adjacent record to our
1128 	 * insertion point. This will give us the record for end block
1129 	 * contiguity tests.
1130 	 */
1131 	error = xfs_btree_increment(cur, 0, &i);
1132 	if (error)
1133 		goto done;
1134 	if (XFS_IS_CORRUPT(mp, i != 1)) {
1135 		error = -EFSCORRUPTED;
1136 		goto done;
1137 	}
1138 	error = xfs_btree_increment(cur, 0, &i);
1139 	if (error)
1140 		goto done;
1141 	if (i) {
1142 		state |= RMAP_RIGHT_VALID;
1143 		error = xfs_rmap_get_rec(cur, &RIGHT, &i);
1144 		if (error)
1145 			goto done;
1146 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1147 			error = -EFSCORRUPTED;
1148 			goto done;
1149 		}
1150 		if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) {
1151 			error = -EFSCORRUPTED;
1152 			goto done;
1153 		}
1154 		trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
1155 				cur->bc_ag.pag->pag_agno, RIGHT.rm_startblock,
1156 				RIGHT.rm_blockcount, RIGHT.rm_owner,
1157 				RIGHT.rm_offset, RIGHT.rm_flags);
1158 		if (bno + len == RIGHT.rm_startblock &&
1159 		    offset + len == RIGHT.rm_offset &&
1160 		    xfs_rmap_is_mergeable(&RIGHT, owner, newext))
1161 			state |= RMAP_RIGHT_CONTIG;
1162 	}
1163 
1164 	/* check that left + prev + right is not too long */
1165 	if ((state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1166 			 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) ==
1167 	    (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1168 	     RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG) &&
1169 	    (unsigned long)LEFT.rm_blockcount + len +
1170 	     RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX)
1171 		state &= ~RMAP_RIGHT_CONTIG;
1172 
1173 	trace_xfs_rmap_convert_state(mp, cur->bc_ag.pag->pag_agno, state,
1174 			_RET_IP_);
1175 
1176 	/* reset the cursor back to PREV */
1177 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, NULL, &i);
1178 	if (error)
1179 		goto done;
1180 	if (XFS_IS_CORRUPT(mp, i != 1)) {
1181 		error = -EFSCORRUPTED;
1182 		goto done;
1183 	}
1184 
1185 	/*
1186 	 * Switch out based on the FILLING and CONTIG state bits.
1187 	 */
1188 	switch (state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1189 			 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) {
1190 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1191 	     RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1192 		/*
1193 		 * Setting all of a previous oldext extent to newext.
1194 		 * The left and right neighbors are both contiguous with new.
1195 		 */
1196 		error = xfs_btree_increment(cur, 0, &i);
1197 		if (error)
1198 			goto done;
1199 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1200 			error = -EFSCORRUPTED;
1201 			goto done;
1202 		}
1203 		trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
1204 				RIGHT.rm_startblock, RIGHT.rm_blockcount,
1205 				RIGHT.rm_owner, RIGHT.rm_offset,
1206 				RIGHT.rm_flags);
1207 		error = xfs_btree_delete(cur, &i);
1208 		if (error)
1209 			goto done;
1210 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1211 			error = -EFSCORRUPTED;
1212 			goto done;
1213 		}
1214 		error = xfs_btree_decrement(cur, 0, &i);
1215 		if (error)
1216 			goto done;
1217 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1218 			error = -EFSCORRUPTED;
1219 			goto done;
1220 		}
1221 		trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
1222 				PREV.rm_startblock, PREV.rm_blockcount,
1223 				PREV.rm_owner, PREV.rm_offset,
1224 				PREV.rm_flags);
1225 		error = xfs_btree_delete(cur, &i);
1226 		if (error)
1227 			goto done;
1228 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1229 			error = -EFSCORRUPTED;
1230 			goto done;
1231 		}
1232 		error = xfs_btree_decrement(cur, 0, &i);
1233 		if (error)
1234 			goto done;
1235 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1236 			error = -EFSCORRUPTED;
1237 			goto done;
1238 		}
1239 		NEW = LEFT;
1240 		NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount;
1241 		error = xfs_rmap_update(cur, &NEW);
1242 		if (error)
1243 			goto done;
1244 		break;
1245 
1246 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
1247 		/*
1248 		 * Setting all of a previous oldext extent to newext.
1249 		 * The left neighbor is contiguous, the right is not.
1250 		 */
1251 		trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
1252 				PREV.rm_startblock, PREV.rm_blockcount,
1253 				PREV.rm_owner, PREV.rm_offset,
1254 				PREV.rm_flags);
1255 		error = xfs_btree_delete(cur, &i);
1256 		if (error)
1257 			goto done;
1258 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1259 			error = -EFSCORRUPTED;
1260 			goto done;
1261 		}
1262 		error = xfs_btree_decrement(cur, 0, &i);
1263 		if (error)
1264 			goto done;
1265 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1266 			error = -EFSCORRUPTED;
1267 			goto done;
1268 		}
1269 		NEW = LEFT;
1270 		NEW.rm_blockcount += PREV.rm_blockcount;
1271 		error = xfs_rmap_update(cur, &NEW);
1272 		if (error)
1273 			goto done;
1274 		break;
1275 
1276 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1277 		/*
1278 		 * Setting all of a previous oldext extent to newext.
1279 		 * The right neighbor is contiguous, the left is not.
1280 		 */
1281 		error = xfs_btree_increment(cur, 0, &i);
1282 		if (error)
1283 			goto done;
1284 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1285 			error = -EFSCORRUPTED;
1286 			goto done;
1287 		}
1288 		trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
1289 				RIGHT.rm_startblock, RIGHT.rm_blockcount,
1290 				RIGHT.rm_owner, RIGHT.rm_offset,
1291 				RIGHT.rm_flags);
1292 		error = xfs_btree_delete(cur, &i);
1293 		if (error)
1294 			goto done;
1295 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1296 			error = -EFSCORRUPTED;
1297 			goto done;
1298 		}
1299 		error = xfs_btree_decrement(cur, 0, &i);
1300 		if (error)
1301 			goto done;
1302 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1303 			error = -EFSCORRUPTED;
1304 			goto done;
1305 		}
1306 		NEW = PREV;
1307 		NEW.rm_blockcount = len + RIGHT.rm_blockcount;
1308 		NEW.rm_flags = newext;
1309 		error = xfs_rmap_update(cur, &NEW);
1310 		if (error)
1311 			goto done;
1312 		break;
1313 
1314 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING:
1315 		/*
1316 		 * Setting all of a previous oldext extent to newext.
1317 		 * Neither the left nor right neighbors are contiguous with
1318 		 * the new one.
1319 		 */
1320 		NEW = PREV;
1321 		NEW.rm_flags = newext;
1322 		error = xfs_rmap_update(cur, &NEW);
1323 		if (error)
1324 			goto done;
1325 		break;
1326 
1327 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG:
1328 		/*
1329 		 * Setting the first part of a previous oldext extent to newext.
1330 		 * The left neighbor is contiguous.
1331 		 */
1332 		NEW = PREV;
1333 		NEW.rm_offset += len;
1334 		NEW.rm_startblock += len;
1335 		NEW.rm_blockcount -= len;
1336 		error = xfs_rmap_update(cur, &NEW);
1337 		if (error)
1338 			goto done;
1339 		error = xfs_btree_decrement(cur, 0, &i);
1340 		if (error)
1341 			goto done;
1342 		NEW = LEFT;
1343 		NEW.rm_blockcount += len;
1344 		error = xfs_rmap_update(cur, &NEW);
1345 		if (error)
1346 			goto done;
1347 		break;
1348 
1349 	case RMAP_LEFT_FILLING:
1350 		/*
1351 		 * Setting the first part of a previous oldext extent to newext.
1352 		 * The left neighbor is not contiguous.
1353 		 */
1354 		NEW = PREV;
1355 		NEW.rm_startblock += len;
1356 		NEW.rm_offset += len;
1357 		NEW.rm_blockcount -= len;
1358 		error = xfs_rmap_update(cur, &NEW);
1359 		if (error)
1360 			goto done;
1361 		NEW.rm_startblock = bno;
1362 		NEW.rm_owner = owner;
1363 		NEW.rm_offset = offset;
1364 		NEW.rm_blockcount = len;
1365 		NEW.rm_flags = newext;
1366 		cur->bc_rec.r = NEW;
1367 		trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno,
1368 				len, owner, offset, newext);
1369 		error = xfs_btree_insert(cur, &i);
1370 		if (error)
1371 			goto done;
1372 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1373 			error = -EFSCORRUPTED;
1374 			goto done;
1375 		}
1376 		break;
1377 
1378 	case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1379 		/*
1380 		 * Setting the last part of a previous oldext extent to newext.
1381 		 * The right neighbor is contiguous with the new allocation.
1382 		 */
1383 		NEW = PREV;
1384 		NEW.rm_blockcount -= len;
1385 		error = xfs_rmap_update(cur, &NEW);
1386 		if (error)
1387 			goto done;
1388 		error = xfs_btree_increment(cur, 0, &i);
1389 		if (error)
1390 			goto done;
1391 		NEW = RIGHT;
1392 		NEW.rm_offset = offset;
1393 		NEW.rm_startblock = bno;
1394 		NEW.rm_blockcount += len;
1395 		error = xfs_rmap_update(cur, &NEW);
1396 		if (error)
1397 			goto done;
1398 		break;
1399 
1400 	case RMAP_RIGHT_FILLING:
1401 		/*
1402 		 * Setting the last part of a previous oldext extent to newext.
1403 		 * The right neighbor is not contiguous.
1404 		 */
1405 		NEW = PREV;
1406 		NEW.rm_blockcount -= len;
1407 		error = xfs_rmap_update(cur, &NEW);
1408 		if (error)
1409 			goto done;
1410 		error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset,
1411 				oldext, &i);
1412 		if (error)
1413 			goto done;
1414 		if (XFS_IS_CORRUPT(mp, i != 0)) {
1415 			error = -EFSCORRUPTED;
1416 			goto done;
1417 		}
1418 		NEW.rm_startblock = bno;
1419 		NEW.rm_owner = owner;
1420 		NEW.rm_offset = offset;
1421 		NEW.rm_blockcount = len;
1422 		NEW.rm_flags = newext;
1423 		cur->bc_rec.r = NEW;
1424 		trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno,
1425 				len, owner, offset, newext);
1426 		error = xfs_btree_insert(cur, &i);
1427 		if (error)
1428 			goto done;
1429 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1430 			error = -EFSCORRUPTED;
1431 			goto done;
1432 		}
1433 		break;
1434 
1435 	case 0:
1436 		/*
1437 		 * Setting the middle part of a previous oldext extent to
1438 		 * newext.  Contiguity is impossible here.
1439 		 * One extent becomes three extents.
1440 		 */
1441 		/* new right extent - oldext */
1442 		NEW.rm_startblock = bno + len;
1443 		NEW.rm_owner = owner;
1444 		NEW.rm_offset = new_endoff;
1445 		NEW.rm_blockcount = PREV.rm_offset + PREV.rm_blockcount -
1446 				new_endoff;
1447 		NEW.rm_flags = PREV.rm_flags;
1448 		error = xfs_rmap_update(cur, &NEW);
1449 		if (error)
1450 			goto done;
1451 		/* new left extent - oldext */
1452 		NEW = PREV;
1453 		NEW.rm_blockcount = offset - PREV.rm_offset;
1454 		cur->bc_rec.r = NEW;
1455 		trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno,
1456 				NEW.rm_startblock, NEW.rm_blockcount,
1457 				NEW.rm_owner, NEW.rm_offset,
1458 				NEW.rm_flags);
1459 		error = xfs_btree_insert(cur, &i);
1460 		if (error)
1461 			goto done;
1462 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1463 			error = -EFSCORRUPTED;
1464 			goto done;
1465 		}
1466 		/*
1467 		 * Reset the cursor to the position of the new extent
1468 		 * we are about to insert as we can't trust it after
1469 		 * the previous insert.
1470 		 */
1471 		error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset,
1472 				oldext, &i);
1473 		if (error)
1474 			goto done;
1475 		if (XFS_IS_CORRUPT(mp, i != 0)) {
1476 			error = -EFSCORRUPTED;
1477 			goto done;
1478 		}
1479 		/* new middle extent - newext */
1480 		cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN;
1481 		cur->bc_rec.r.rm_flags |= newext;
1482 		trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, len,
1483 				owner, offset, newext);
1484 		error = xfs_btree_insert(cur, &i);
1485 		if (error)
1486 			goto done;
1487 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1488 			error = -EFSCORRUPTED;
1489 			goto done;
1490 		}
1491 		break;
1492 
1493 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1494 	case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1495 	case RMAP_LEFT_FILLING | RMAP_RIGHT_CONTIG:
1496 	case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
1497 	case RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1498 	case RMAP_LEFT_CONTIG:
1499 	case RMAP_RIGHT_CONTIG:
1500 		/*
1501 		 * These cases are all impossible.
1502 		 */
1503 		ASSERT(0);
1504 	}
1505 
1506 	trace_xfs_rmap_convert_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
1507 			unwritten, oinfo);
1508 done:
1509 	if (error)
1510 		trace_xfs_rmap_convert_error(cur->bc_mp,
1511 				cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1512 	return error;
1513 }
1514 
1515 /*
1516  * Convert an unwritten extent to a real extent or vice versa.  If there is no
1517  * possibility of overlapping extents, delegate to the simpler convert
1518  * function.
1519  */
1520 STATIC int
1521 xfs_rmap_convert_shared(
1522 	struct xfs_btree_cur		*cur,
1523 	xfs_agblock_t			bno,
1524 	xfs_extlen_t			len,
1525 	bool				unwritten,
1526 	const struct xfs_owner_info	*oinfo)
1527 {
1528 	struct xfs_mount		*mp = cur->bc_mp;
1529 	struct xfs_rmap_irec		r[4];	/* neighbor extent entries */
1530 						/* left is 0, right is 1, */
1531 						/* prev is 2, new is 3 */
1532 	uint64_t		owner;
1533 	uint64_t		offset;
1534 	uint64_t		new_endoff;
1535 	unsigned int		oldext;
1536 	unsigned int		newext;
1537 	unsigned int		flags = 0;
1538 	int			i;
1539 	int			state = 0;
1540 	int			error;
1541 
1542 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
1543 	ASSERT(!(XFS_RMAP_NON_INODE_OWNER(owner) ||
1544 			(flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))));
1545 	oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0;
1546 	new_endoff = offset + len;
1547 	trace_xfs_rmap_convert(mp, cur->bc_ag.pag->pag_agno, bno, len,
1548 			unwritten, oinfo);
1549 
1550 	/*
1551 	 * For the initial lookup, look for and exact match or the left-adjacent
1552 	 * record for our insertion point. This will also give us the record for
1553 	 * start block contiguity tests.
1554 	 */
1555 	error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext,
1556 			&PREV, &i);
1557 	if (error)
1558 		goto done;
1559 	if (XFS_IS_CORRUPT(mp, i != 1)) {
1560 		error = -EFSCORRUPTED;
1561 		goto done;
1562 	}
1563 
1564 	ASSERT(PREV.rm_offset <= offset);
1565 	ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff);
1566 	ASSERT((PREV.rm_flags & XFS_RMAP_UNWRITTEN) == oldext);
1567 	newext = ~oldext & XFS_RMAP_UNWRITTEN;
1568 
1569 	/*
1570 	 * Set flags determining what part of the previous oldext allocation
1571 	 * extent is being replaced by a newext allocation.
1572 	 */
1573 	if (PREV.rm_offset == offset)
1574 		state |= RMAP_LEFT_FILLING;
1575 	if (PREV.rm_offset + PREV.rm_blockcount == new_endoff)
1576 		state |= RMAP_RIGHT_FILLING;
1577 
1578 	/* Is there a left record that abuts our range? */
1579 	error = xfs_rmap_find_left_neighbor(cur, bno, owner, offset, newext,
1580 			&LEFT, &i);
1581 	if (error)
1582 		goto done;
1583 	if (i) {
1584 		state |= RMAP_LEFT_VALID;
1585 		if (XFS_IS_CORRUPT(mp,
1586 				   LEFT.rm_startblock + LEFT.rm_blockcount >
1587 				   bno)) {
1588 			error = -EFSCORRUPTED;
1589 			goto done;
1590 		}
1591 		if (xfs_rmap_is_mergeable(&LEFT, owner, newext))
1592 			state |= RMAP_LEFT_CONTIG;
1593 	}
1594 
1595 	/* Is there a right record that abuts our range? */
1596 	error = xfs_rmap_lookup_eq(cur, bno + len, len, owner, offset + len,
1597 			newext, &i);
1598 	if (error)
1599 		goto done;
1600 	if (i) {
1601 		state |= RMAP_RIGHT_VALID;
1602 		error = xfs_rmap_get_rec(cur, &RIGHT, &i);
1603 		if (error)
1604 			goto done;
1605 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1606 			error = -EFSCORRUPTED;
1607 			goto done;
1608 		}
1609 		if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) {
1610 			error = -EFSCORRUPTED;
1611 			goto done;
1612 		}
1613 		trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
1614 				cur->bc_ag.pag->pag_agno, RIGHT.rm_startblock,
1615 				RIGHT.rm_blockcount, RIGHT.rm_owner,
1616 				RIGHT.rm_offset, RIGHT.rm_flags);
1617 		if (xfs_rmap_is_mergeable(&RIGHT, owner, newext))
1618 			state |= RMAP_RIGHT_CONTIG;
1619 	}
1620 
1621 	/* check that left + prev + right is not too long */
1622 	if ((state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1623 			 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) ==
1624 	    (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1625 	     RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG) &&
1626 	    (unsigned long)LEFT.rm_blockcount + len +
1627 	     RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX)
1628 		state &= ~RMAP_RIGHT_CONTIG;
1629 
1630 	trace_xfs_rmap_convert_state(mp, cur->bc_ag.pag->pag_agno, state,
1631 			_RET_IP_);
1632 	/*
1633 	 * Switch out based on the FILLING and CONTIG state bits.
1634 	 */
1635 	switch (state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1636 			 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) {
1637 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1638 	     RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1639 		/*
1640 		 * Setting all of a previous oldext extent to newext.
1641 		 * The left and right neighbors are both contiguous with new.
1642 		 */
1643 		error = xfs_rmap_delete(cur, RIGHT.rm_startblock,
1644 				RIGHT.rm_blockcount, RIGHT.rm_owner,
1645 				RIGHT.rm_offset, RIGHT.rm_flags);
1646 		if (error)
1647 			goto done;
1648 		error = xfs_rmap_delete(cur, PREV.rm_startblock,
1649 				PREV.rm_blockcount, PREV.rm_owner,
1650 				PREV.rm_offset, PREV.rm_flags);
1651 		if (error)
1652 			goto done;
1653 		NEW = LEFT;
1654 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1655 				NEW.rm_blockcount, NEW.rm_owner,
1656 				NEW.rm_offset, NEW.rm_flags, &i);
1657 		if (error)
1658 			goto done;
1659 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1660 			error = -EFSCORRUPTED;
1661 			goto done;
1662 		}
1663 		NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount;
1664 		error = xfs_rmap_update(cur, &NEW);
1665 		if (error)
1666 			goto done;
1667 		break;
1668 
1669 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
1670 		/*
1671 		 * Setting all of a previous oldext extent to newext.
1672 		 * The left neighbor is contiguous, the right is not.
1673 		 */
1674 		error = xfs_rmap_delete(cur, PREV.rm_startblock,
1675 				PREV.rm_blockcount, PREV.rm_owner,
1676 				PREV.rm_offset, PREV.rm_flags);
1677 		if (error)
1678 			goto done;
1679 		NEW = LEFT;
1680 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1681 				NEW.rm_blockcount, NEW.rm_owner,
1682 				NEW.rm_offset, NEW.rm_flags, &i);
1683 		if (error)
1684 			goto done;
1685 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1686 			error = -EFSCORRUPTED;
1687 			goto done;
1688 		}
1689 		NEW.rm_blockcount += PREV.rm_blockcount;
1690 		error = xfs_rmap_update(cur, &NEW);
1691 		if (error)
1692 			goto done;
1693 		break;
1694 
1695 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1696 		/*
1697 		 * Setting all of a previous oldext extent to newext.
1698 		 * The right neighbor is contiguous, the left is not.
1699 		 */
1700 		error = xfs_rmap_delete(cur, RIGHT.rm_startblock,
1701 				RIGHT.rm_blockcount, RIGHT.rm_owner,
1702 				RIGHT.rm_offset, RIGHT.rm_flags);
1703 		if (error)
1704 			goto done;
1705 		NEW = PREV;
1706 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1707 				NEW.rm_blockcount, NEW.rm_owner,
1708 				NEW.rm_offset, NEW.rm_flags, &i);
1709 		if (error)
1710 			goto done;
1711 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1712 			error = -EFSCORRUPTED;
1713 			goto done;
1714 		}
1715 		NEW.rm_blockcount += RIGHT.rm_blockcount;
1716 		NEW.rm_flags = RIGHT.rm_flags;
1717 		error = xfs_rmap_update(cur, &NEW);
1718 		if (error)
1719 			goto done;
1720 		break;
1721 
1722 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING:
1723 		/*
1724 		 * Setting all of a previous oldext extent to newext.
1725 		 * Neither the left nor right neighbors are contiguous with
1726 		 * the new one.
1727 		 */
1728 		NEW = PREV;
1729 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1730 				NEW.rm_blockcount, NEW.rm_owner,
1731 				NEW.rm_offset, NEW.rm_flags, &i);
1732 		if (error)
1733 			goto done;
1734 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1735 			error = -EFSCORRUPTED;
1736 			goto done;
1737 		}
1738 		NEW.rm_flags = newext;
1739 		error = xfs_rmap_update(cur, &NEW);
1740 		if (error)
1741 			goto done;
1742 		break;
1743 
1744 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG:
1745 		/*
1746 		 * Setting the first part of a previous oldext extent to newext.
1747 		 * The left neighbor is contiguous.
1748 		 */
1749 		NEW = PREV;
1750 		error = xfs_rmap_delete(cur, NEW.rm_startblock,
1751 				NEW.rm_blockcount, NEW.rm_owner,
1752 				NEW.rm_offset, NEW.rm_flags);
1753 		if (error)
1754 			goto done;
1755 		NEW.rm_offset += len;
1756 		NEW.rm_startblock += len;
1757 		NEW.rm_blockcount -= len;
1758 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
1759 				NEW.rm_blockcount, NEW.rm_owner,
1760 				NEW.rm_offset, NEW.rm_flags);
1761 		if (error)
1762 			goto done;
1763 		NEW = LEFT;
1764 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1765 				NEW.rm_blockcount, NEW.rm_owner,
1766 				NEW.rm_offset, NEW.rm_flags, &i);
1767 		if (error)
1768 			goto done;
1769 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1770 			error = -EFSCORRUPTED;
1771 			goto done;
1772 		}
1773 		NEW.rm_blockcount += len;
1774 		error = xfs_rmap_update(cur, &NEW);
1775 		if (error)
1776 			goto done;
1777 		break;
1778 
1779 	case RMAP_LEFT_FILLING:
1780 		/*
1781 		 * Setting the first part of a previous oldext extent to newext.
1782 		 * The left neighbor is not contiguous.
1783 		 */
1784 		NEW = PREV;
1785 		error = xfs_rmap_delete(cur, NEW.rm_startblock,
1786 				NEW.rm_blockcount, NEW.rm_owner,
1787 				NEW.rm_offset, NEW.rm_flags);
1788 		if (error)
1789 			goto done;
1790 		NEW.rm_offset += len;
1791 		NEW.rm_startblock += len;
1792 		NEW.rm_blockcount -= len;
1793 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
1794 				NEW.rm_blockcount, NEW.rm_owner,
1795 				NEW.rm_offset, NEW.rm_flags);
1796 		if (error)
1797 			goto done;
1798 		error = xfs_rmap_insert(cur, bno, len, owner, offset, newext);
1799 		if (error)
1800 			goto done;
1801 		break;
1802 
1803 	case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1804 		/*
1805 		 * Setting the last part of a previous oldext extent to newext.
1806 		 * The right neighbor is contiguous with the new allocation.
1807 		 */
1808 		NEW = PREV;
1809 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1810 				NEW.rm_blockcount, NEW.rm_owner,
1811 				NEW.rm_offset, NEW.rm_flags, &i);
1812 		if (error)
1813 			goto done;
1814 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1815 			error = -EFSCORRUPTED;
1816 			goto done;
1817 		}
1818 		NEW.rm_blockcount = offset - NEW.rm_offset;
1819 		error = xfs_rmap_update(cur, &NEW);
1820 		if (error)
1821 			goto done;
1822 		NEW = RIGHT;
1823 		error = xfs_rmap_delete(cur, NEW.rm_startblock,
1824 				NEW.rm_blockcount, NEW.rm_owner,
1825 				NEW.rm_offset, NEW.rm_flags);
1826 		if (error)
1827 			goto done;
1828 		NEW.rm_offset = offset;
1829 		NEW.rm_startblock = bno;
1830 		NEW.rm_blockcount += len;
1831 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
1832 				NEW.rm_blockcount, NEW.rm_owner,
1833 				NEW.rm_offset, NEW.rm_flags);
1834 		if (error)
1835 			goto done;
1836 		break;
1837 
1838 	case RMAP_RIGHT_FILLING:
1839 		/*
1840 		 * Setting the last part of a previous oldext extent to newext.
1841 		 * The right neighbor is not contiguous.
1842 		 */
1843 		NEW = PREV;
1844 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1845 				NEW.rm_blockcount, NEW.rm_owner,
1846 				NEW.rm_offset, NEW.rm_flags, &i);
1847 		if (error)
1848 			goto done;
1849 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1850 			error = -EFSCORRUPTED;
1851 			goto done;
1852 		}
1853 		NEW.rm_blockcount -= len;
1854 		error = xfs_rmap_update(cur, &NEW);
1855 		if (error)
1856 			goto done;
1857 		error = xfs_rmap_insert(cur, bno, len, owner, offset, newext);
1858 		if (error)
1859 			goto done;
1860 		break;
1861 
1862 	case 0:
1863 		/*
1864 		 * Setting the middle part of a previous oldext extent to
1865 		 * newext.  Contiguity is impossible here.
1866 		 * One extent becomes three extents.
1867 		 */
1868 		/* new right extent - oldext */
1869 		NEW.rm_startblock = bno + len;
1870 		NEW.rm_owner = owner;
1871 		NEW.rm_offset = new_endoff;
1872 		NEW.rm_blockcount = PREV.rm_offset + PREV.rm_blockcount -
1873 				new_endoff;
1874 		NEW.rm_flags = PREV.rm_flags;
1875 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
1876 				NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset,
1877 				NEW.rm_flags);
1878 		if (error)
1879 			goto done;
1880 		/* new left extent - oldext */
1881 		NEW = PREV;
1882 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1883 				NEW.rm_blockcount, NEW.rm_owner,
1884 				NEW.rm_offset, NEW.rm_flags, &i);
1885 		if (error)
1886 			goto done;
1887 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1888 			error = -EFSCORRUPTED;
1889 			goto done;
1890 		}
1891 		NEW.rm_blockcount = offset - NEW.rm_offset;
1892 		error = xfs_rmap_update(cur, &NEW);
1893 		if (error)
1894 			goto done;
1895 		/* new middle extent - newext */
1896 		NEW.rm_startblock = bno;
1897 		NEW.rm_blockcount = len;
1898 		NEW.rm_owner = owner;
1899 		NEW.rm_offset = offset;
1900 		NEW.rm_flags = newext;
1901 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
1902 				NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset,
1903 				NEW.rm_flags);
1904 		if (error)
1905 			goto done;
1906 		break;
1907 
1908 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1909 	case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1910 	case RMAP_LEFT_FILLING | RMAP_RIGHT_CONTIG:
1911 	case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
1912 	case RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1913 	case RMAP_LEFT_CONTIG:
1914 	case RMAP_RIGHT_CONTIG:
1915 		/*
1916 		 * These cases are all impossible.
1917 		 */
1918 		ASSERT(0);
1919 	}
1920 
1921 	trace_xfs_rmap_convert_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
1922 			unwritten, oinfo);
1923 done:
1924 	if (error)
1925 		trace_xfs_rmap_convert_error(cur->bc_mp,
1926 				cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1927 	return error;
1928 }
1929 
1930 #undef	NEW
1931 #undef	LEFT
1932 #undef	RIGHT
1933 #undef	PREV
1934 
1935 /*
1936  * Find an extent in the rmap btree and unmap it.  For rmap extent types that
1937  * can overlap (data fork rmaps on reflink filesystems) we must be careful
1938  * that the prev/next records in the btree might belong to another owner.
1939  * Therefore we must use delete+insert to alter any of the key fields.
1940  *
1941  * For every other situation there can only be one owner for a given extent,
1942  * so we can call the regular _free function.
1943  */
1944 STATIC int
1945 xfs_rmap_unmap_shared(
1946 	struct xfs_btree_cur		*cur,
1947 	xfs_agblock_t			bno,
1948 	xfs_extlen_t			len,
1949 	bool				unwritten,
1950 	const struct xfs_owner_info	*oinfo)
1951 {
1952 	struct xfs_mount		*mp = cur->bc_mp;
1953 	struct xfs_rmap_irec		ltrec;
1954 	uint64_t			ltoff;
1955 	int				error = 0;
1956 	int				i;
1957 	uint64_t			owner;
1958 	uint64_t			offset;
1959 	unsigned int			flags;
1960 
1961 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
1962 	if (unwritten)
1963 		flags |= XFS_RMAP_UNWRITTEN;
1964 	trace_xfs_rmap_unmap(mp, cur->bc_ag.pag->pag_agno, bno, len,
1965 			unwritten, oinfo);
1966 
1967 	/*
1968 	 * We should always have a left record because there's a static record
1969 	 * for the AG headers at rm_startblock == 0 created by mkfs/growfs that
1970 	 * will not ever be removed from the tree.
1971 	 */
1972 	error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags,
1973 			&ltrec, &i);
1974 	if (error)
1975 		goto out_error;
1976 	if (XFS_IS_CORRUPT(mp, i != 1)) {
1977 		error = -EFSCORRUPTED;
1978 		goto out_error;
1979 	}
1980 	ltoff = ltrec.rm_offset;
1981 
1982 	/* Make sure the extent we found covers the entire freeing range. */
1983 	if (XFS_IS_CORRUPT(mp,
1984 			   ltrec.rm_startblock > bno ||
1985 			   ltrec.rm_startblock + ltrec.rm_blockcount <
1986 			   bno + len)) {
1987 		error = -EFSCORRUPTED;
1988 		goto out_error;
1989 	}
1990 
1991 	/* Make sure the owner matches what we expect to find in the tree. */
1992 	if (XFS_IS_CORRUPT(mp, owner != ltrec.rm_owner)) {
1993 		error = -EFSCORRUPTED;
1994 		goto out_error;
1995 	}
1996 
1997 	/* Make sure the unwritten flag matches. */
1998 	if (XFS_IS_CORRUPT(mp,
1999 			   (flags & XFS_RMAP_UNWRITTEN) !=
2000 			   (ltrec.rm_flags & XFS_RMAP_UNWRITTEN))) {
2001 		error = -EFSCORRUPTED;
2002 		goto out_error;
2003 	}
2004 
2005 	/* Check the offset. */
2006 	if (XFS_IS_CORRUPT(mp, ltrec.rm_offset > offset)) {
2007 		error = -EFSCORRUPTED;
2008 		goto out_error;
2009 	}
2010 	if (XFS_IS_CORRUPT(mp, offset > ltoff + ltrec.rm_blockcount)) {
2011 		error = -EFSCORRUPTED;
2012 		goto out_error;
2013 	}
2014 
2015 	if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
2016 		/* Exact match, simply remove the record from rmap tree. */
2017 		error = xfs_rmap_delete(cur, ltrec.rm_startblock,
2018 				ltrec.rm_blockcount, ltrec.rm_owner,
2019 				ltrec.rm_offset, ltrec.rm_flags);
2020 		if (error)
2021 			goto out_error;
2022 	} else if (ltrec.rm_startblock == bno) {
2023 		/*
2024 		 * Overlap left hand side of extent: move the start, trim the
2025 		 * length and update the current record.
2026 		 *
2027 		 *       ltbno                ltlen
2028 		 * Orig:    |oooooooooooooooooooo|
2029 		 * Freeing: |fffffffff|
2030 		 * Result:            |rrrrrrrrrr|
2031 		 *         bno       len
2032 		 */
2033 
2034 		/* Delete prev rmap. */
2035 		error = xfs_rmap_delete(cur, ltrec.rm_startblock,
2036 				ltrec.rm_blockcount, ltrec.rm_owner,
2037 				ltrec.rm_offset, ltrec.rm_flags);
2038 		if (error)
2039 			goto out_error;
2040 
2041 		/* Add an rmap at the new offset. */
2042 		ltrec.rm_startblock += len;
2043 		ltrec.rm_blockcount -= len;
2044 		ltrec.rm_offset += len;
2045 		error = xfs_rmap_insert(cur, ltrec.rm_startblock,
2046 				ltrec.rm_blockcount, ltrec.rm_owner,
2047 				ltrec.rm_offset, ltrec.rm_flags);
2048 		if (error)
2049 			goto out_error;
2050 	} else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) {
2051 		/*
2052 		 * Overlap right hand side of extent: trim the length and
2053 		 * update the current record.
2054 		 *
2055 		 *       ltbno                ltlen
2056 		 * Orig:    |oooooooooooooooooooo|
2057 		 * Freeing:            |fffffffff|
2058 		 * Result:  |rrrrrrrrrr|
2059 		 *                    bno       len
2060 		 */
2061 		error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock,
2062 				ltrec.rm_blockcount, ltrec.rm_owner,
2063 				ltrec.rm_offset, ltrec.rm_flags, &i);
2064 		if (error)
2065 			goto out_error;
2066 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2067 			error = -EFSCORRUPTED;
2068 			goto out_error;
2069 		}
2070 		ltrec.rm_blockcount -= len;
2071 		error = xfs_rmap_update(cur, &ltrec);
2072 		if (error)
2073 			goto out_error;
2074 	} else {
2075 		/*
2076 		 * Overlap middle of extent: trim the length of the existing
2077 		 * record to the length of the new left-extent size, increment
2078 		 * the insertion position so we can insert a new record
2079 		 * containing the remaining right-extent space.
2080 		 *
2081 		 *       ltbno                ltlen
2082 		 * Orig:    |oooooooooooooooooooo|
2083 		 * Freeing:       |fffffffff|
2084 		 * Result:  |rrrrr|         |rrrr|
2085 		 *               bno       len
2086 		 */
2087 		xfs_extlen_t	orig_len = ltrec.rm_blockcount;
2088 
2089 		/* Shrink the left side of the rmap */
2090 		error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock,
2091 				ltrec.rm_blockcount, ltrec.rm_owner,
2092 				ltrec.rm_offset, ltrec.rm_flags, &i);
2093 		if (error)
2094 			goto out_error;
2095 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2096 			error = -EFSCORRUPTED;
2097 			goto out_error;
2098 		}
2099 		ltrec.rm_blockcount = bno - ltrec.rm_startblock;
2100 		error = xfs_rmap_update(cur, &ltrec);
2101 		if (error)
2102 			goto out_error;
2103 
2104 		/* Add an rmap at the new offset */
2105 		error = xfs_rmap_insert(cur, bno + len,
2106 				orig_len - len - ltrec.rm_blockcount,
2107 				ltrec.rm_owner, offset + len,
2108 				ltrec.rm_flags);
2109 		if (error)
2110 			goto out_error;
2111 	}
2112 
2113 	trace_xfs_rmap_unmap_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
2114 			unwritten, oinfo);
2115 out_error:
2116 	if (error)
2117 		trace_xfs_rmap_unmap_error(cur->bc_mp,
2118 				cur->bc_ag.pag->pag_agno, error, _RET_IP_);
2119 	return error;
2120 }
2121 
2122 /*
2123  * Find an extent in the rmap btree and map it.  For rmap extent types that
2124  * can overlap (data fork rmaps on reflink filesystems) we must be careful
2125  * that the prev/next records in the btree might belong to another owner.
2126  * Therefore we must use delete+insert to alter any of the key fields.
2127  *
2128  * For every other situation there can only be one owner for a given extent,
2129  * so we can call the regular _alloc function.
2130  */
2131 STATIC int
2132 xfs_rmap_map_shared(
2133 	struct xfs_btree_cur		*cur,
2134 	xfs_agblock_t			bno,
2135 	xfs_extlen_t			len,
2136 	bool				unwritten,
2137 	const struct xfs_owner_info	*oinfo)
2138 {
2139 	struct xfs_mount		*mp = cur->bc_mp;
2140 	struct xfs_rmap_irec		ltrec;
2141 	struct xfs_rmap_irec		gtrec;
2142 	int				have_gt;
2143 	int				have_lt;
2144 	int				error = 0;
2145 	int				i;
2146 	uint64_t			owner;
2147 	uint64_t			offset;
2148 	unsigned int			flags = 0;
2149 
2150 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
2151 	if (unwritten)
2152 		flags |= XFS_RMAP_UNWRITTEN;
2153 	trace_xfs_rmap_map(mp, cur->bc_ag.pag->pag_agno, bno, len,
2154 			unwritten, oinfo);
2155 
2156 	/* Is there a left record that abuts our range? */
2157 	error = xfs_rmap_find_left_neighbor(cur, bno, owner, offset, flags,
2158 			&ltrec, &have_lt);
2159 	if (error)
2160 		goto out_error;
2161 	if (have_lt &&
2162 	    !xfs_rmap_is_mergeable(&ltrec, owner, flags))
2163 		have_lt = 0;
2164 
2165 	/* Is there a right record that abuts our range? */
2166 	error = xfs_rmap_lookup_eq(cur, bno + len, len, owner, offset + len,
2167 			flags, &have_gt);
2168 	if (error)
2169 		goto out_error;
2170 	if (have_gt) {
2171 		error = xfs_rmap_get_rec(cur, &gtrec, &have_gt);
2172 		if (error)
2173 			goto out_error;
2174 		if (XFS_IS_CORRUPT(mp, have_gt != 1)) {
2175 			error = -EFSCORRUPTED;
2176 			goto out_error;
2177 		}
2178 		trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
2179 			cur->bc_ag.pag->pag_agno, gtrec.rm_startblock,
2180 			gtrec.rm_blockcount, gtrec.rm_owner,
2181 			gtrec.rm_offset, gtrec.rm_flags);
2182 
2183 		if (!xfs_rmap_is_mergeable(&gtrec, owner, flags))
2184 			have_gt = 0;
2185 	}
2186 
2187 	if (have_lt &&
2188 	    ltrec.rm_startblock + ltrec.rm_blockcount == bno &&
2189 	    ltrec.rm_offset + ltrec.rm_blockcount == offset) {
2190 		/*
2191 		 * Left edge contiguous, merge into left record.
2192 		 *
2193 		 *       ltbno     ltlen
2194 		 * orig:   |ooooooooo|
2195 		 * adding:           |aaaaaaaaa|
2196 		 * result: |rrrrrrrrrrrrrrrrrrr|
2197 		 *                  bno       len
2198 		 */
2199 		ltrec.rm_blockcount += len;
2200 		if (have_gt &&
2201 		    bno + len == gtrec.rm_startblock &&
2202 		    offset + len == gtrec.rm_offset) {
2203 			/*
2204 			 * Right edge also contiguous, delete right record
2205 			 * and merge into left record.
2206 			 *
2207 			 *       ltbno     ltlen    gtbno     gtlen
2208 			 * orig:   |ooooooooo|         |ooooooooo|
2209 			 * adding:           |aaaaaaaaa|
2210 			 * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
2211 			 */
2212 			ltrec.rm_blockcount += gtrec.rm_blockcount;
2213 			error = xfs_rmap_delete(cur, gtrec.rm_startblock,
2214 					gtrec.rm_blockcount, gtrec.rm_owner,
2215 					gtrec.rm_offset, gtrec.rm_flags);
2216 			if (error)
2217 				goto out_error;
2218 		}
2219 
2220 		/* Point the cursor back to the left record and update. */
2221 		error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock,
2222 				ltrec.rm_blockcount, ltrec.rm_owner,
2223 				ltrec.rm_offset, ltrec.rm_flags, &i);
2224 		if (error)
2225 			goto out_error;
2226 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2227 			error = -EFSCORRUPTED;
2228 			goto out_error;
2229 		}
2230 
2231 		error = xfs_rmap_update(cur, &ltrec);
2232 		if (error)
2233 			goto out_error;
2234 	} else if (have_gt &&
2235 		   bno + len == gtrec.rm_startblock &&
2236 		   offset + len == gtrec.rm_offset) {
2237 		/*
2238 		 * Right edge contiguous, merge into right record.
2239 		 *
2240 		 *                 gtbno     gtlen
2241 		 * Orig:             |ooooooooo|
2242 		 * adding: |aaaaaaaaa|
2243 		 * Result: |rrrrrrrrrrrrrrrrrrr|
2244 		 *        bno       len
2245 		 */
2246 		/* Delete the old record. */
2247 		error = xfs_rmap_delete(cur, gtrec.rm_startblock,
2248 				gtrec.rm_blockcount, gtrec.rm_owner,
2249 				gtrec.rm_offset, gtrec.rm_flags);
2250 		if (error)
2251 			goto out_error;
2252 
2253 		/* Move the start and re-add it. */
2254 		gtrec.rm_startblock = bno;
2255 		gtrec.rm_blockcount += len;
2256 		gtrec.rm_offset = offset;
2257 		error = xfs_rmap_insert(cur, gtrec.rm_startblock,
2258 				gtrec.rm_blockcount, gtrec.rm_owner,
2259 				gtrec.rm_offset, gtrec.rm_flags);
2260 		if (error)
2261 			goto out_error;
2262 	} else {
2263 		/*
2264 		 * No contiguous edge with identical owner, insert
2265 		 * new record at current cursor position.
2266 		 */
2267 		error = xfs_rmap_insert(cur, bno, len, owner, offset, flags);
2268 		if (error)
2269 			goto out_error;
2270 	}
2271 
2272 	trace_xfs_rmap_map_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
2273 			unwritten, oinfo);
2274 out_error:
2275 	if (error)
2276 		trace_xfs_rmap_map_error(cur->bc_mp,
2277 				cur->bc_ag.pag->pag_agno, error, _RET_IP_);
2278 	return error;
2279 }
2280 
2281 /* Insert a raw rmap into the rmapbt. */
2282 int
2283 xfs_rmap_map_raw(
2284 	struct xfs_btree_cur	*cur,
2285 	struct xfs_rmap_irec	*rmap)
2286 {
2287 	struct xfs_owner_info	oinfo;
2288 
2289 	oinfo.oi_owner = rmap->rm_owner;
2290 	oinfo.oi_offset = rmap->rm_offset;
2291 	oinfo.oi_flags = 0;
2292 	if (rmap->rm_flags & XFS_RMAP_ATTR_FORK)
2293 		oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
2294 	if (rmap->rm_flags & XFS_RMAP_BMBT_BLOCK)
2295 		oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
2296 
2297 	if (rmap->rm_flags || XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
2298 		return xfs_rmap_map(cur, rmap->rm_startblock,
2299 				rmap->rm_blockcount,
2300 				rmap->rm_flags & XFS_RMAP_UNWRITTEN,
2301 				&oinfo);
2302 
2303 	return xfs_rmap_map_shared(cur, rmap->rm_startblock,
2304 			rmap->rm_blockcount,
2305 			rmap->rm_flags & XFS_RMAP_UNWRITTEN,
2306 			&oinfo);
2307 }
2308 
2309 struct xfs_rmap_query_range_info {
2310 	xfs_rmap_query_range_fn	fn;
2311 	void				*priv;
2312 };
2313 
2314 /* Format btree record and pass to our callback. */
2315 STATIC int
2316 xfs_rmap_query_range_helper(
2317 	struct xfs_btree_cur		*cur,
2318 	const union xfs_btree_rec	*rec,
2319 	void				*priv)
2320 {
2321 	struct xfs_rmap_query_range_info	*query = priv;
2322 	struct xfs_rmap_irec			irec;
2323 	int					error;
2324 
2325 	error = xfs_rmap_btrec_to_irec(rec, &irec);
2326 	if (error)
2327 		return error;
2328 	return query->fn(cur, &irec, query->priv);
2329 }
2330 
2331 /* Find all rmaps between two keys. */
2332 int
2333 xfs_rmap_query_range(
2334 	struct xfs_btree_cur			*cur,
2335 	const struct xfs_rmap_irec		*low_rec,
2336 	const struct xfs_rmap_irec		*high_rec,
2337 	xfs_rmap_query_range_fn			fn,
2338 	void					*priv)
2339 {
2340 	union xfs_btree_irec			low_brec;
2341 	union xfs_btree_irec			high_brec;
2342 	struct xfs_rmap_query_range_info	query;
2343 
2344 	low_brec.r = *low_rec;
2345 	high_brec.r = *high_rec;
2346 	query.priv = priv;
2347 	query.fn = fn;
2348 	return xfs_btree_query_range(cur, &low_brec, &high_brec,
2349 			xfs_rmap_query_range_helper, &query);
2350 }
2351 
2352 /* Find all rmaps. */
2353 int
2354 xfs_rmap_query_all(
2355 	struct xfs_btree_cur			*cur,
2356 	xfs_rmap_query_range_fn			fn,
2357 	void					*priv)
2358 {
2359 	struct xfs_rmap_query_range_info	query;
2360 
2361 	query.priv = priv;
2362 	query.fn = fn;
2363 	return xfs_btree_query_all(cur, xfs_rmap_query_range_helper, &query);
2364 }
2365 
2366 /* Clean up after calling xfs_rmap_finish_one. */
2367 void
2368 xfs_rmap_finish_one_cleanup(
2369 	struct xfs_trans	*tp,
2370 	struct xfs_btree_cur	*rcur,
2371 	int			error)
2372 {
2373 	struct xfs_buf		*agbp;
2374 
2375 	if (rcur == NULL)
2376 		return;
2377 	agbp = rcur->bc_ag.agbp;
2378 	xfs_btree_del_cursor(rcur, error);
2379 	if (error)
2380 		xfs_trans_brelse(tp, agbp);
2381 }
2382 
2383 /*
2384  * Process one of the deferred rmap operations.  We pass back the
2385  * btree cursor to maintain our lock on the rmapbt between calls.
2386  * This saves time and eliminates a buffer deadlock between the
2387  * superblock and the AGF because we'll always grab them in the same
2388  * order.
2389  */
2390 int
2391 xfs_rmap_finish_one(
2392 	struct xfs_trans		*tp,
2393 	struct xfs_rmap_intent		*ri,
2394 	struct xfs_btree_cur		**pcur)
2395 {
2396 	struct xfs_mount		*mp = tp->t_mountp;
2397 	struct xfs_perag		*pag;
2398 	struct xfs_btree_cur		*rcur;
2399 	struct xfs_buf			*agbp = NULL;
2400 	int				error = 0;
2401 	struct xfs_owner_info		oinfo;
2402 	xfs_agblock_t			bno;
2403 	bool				unwritten;
2404 
2405 	pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ri->ri_bmap.br_startblock));
2406 	bno = XFS_FSB_TO_AGBNO(mp, ri->ri_bmap.br_startblock);
2407 
2408 	trace_xfs_rmap_deferred(mp, pag->pag_agno, ri->ri_type, bno,
2409 			ri->ri_owner, ri->ri_whichfork,
2410 			ri->ri_bmap.br_startoff, ri->ri_bmap.br_blockcount,
2411 			ri->ri_bmap.br_state);
2412 
2413 	if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE)) {
2414 		error = -EIO;
2415 		goto out_drop;
2416 	}
2417 
2418 
2419 	/*
2420 	 * If we haven't gotten a cursor or the cursor AG doesn't match
2421 	 * the startblock, get one now.
2422 	 */
2423 	rcur = *pcur;
2424 	if (rcur != NULL && rcur->bc_ag.pag != pag) {
2425 		xfs_rmap_finish_one_cleanup(tp, rcur, 0);
2426 		rcur = NULL;
2427 		*pcur = NULL;
2428 	}
2429 	if (rcur == NULL) {
2430 		/*
2431 		 * Refresh the freelist before we start changing the
2432 		 * rmapbt, because a shape change could cause us to
2433 		 * allocate blocks.
2434 		 */
2435 		error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
2436 		if (error)
2437 			goto out_drop;
2438 		if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) {
2439 			error = -EFSCORRUPTED;
2440 			goto out_drop;
2441 		}
2442 
2443 		rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
2444 	}
2445 	*pcur = rcur;
2446 
2447 	xfs_rmap_ino_owner(&oinfo, ri->ri_owner, ri->ri_whichfork,
2448 			ri->ri_bmap.br_startoff);
2449 	unwritten = ri->ri_bmap.br_state == XFS_EXT_UNWRITTEN;
2450 	bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, ri->ri_bmap.br_startblock);
2451 
2452 	switch (ri->ri_type) {
2453 	case XFS_RMAP_ALLOC:
2454 	case XFS_RMAP_MAP:
2455 		error = xfs_rmap_map(rcur, bno, ri->ri_bmap.br_blockcount,
2456 				unwritten, &oinfo);
2457 		break;
2458 	case XFS_RMAP_MAP_SHARED:
2459 		error = xfs_rmap_map_shared(rcur, bno,
2460 				ri->ri_bmap.br_blockcount, unwritten, &oinfo);
2461 		break;
2462 	case XFS_RMAP_FREE:
2463 	case XFS_RMAP_UNMAP:
2464 		error = xfs_rmap_unmap(rcur, bno, ri->ri_bmap.br_blockcount,
2465 				unwritten, &oinfo);
2466 		break;
2467 	case XFS_RMAP_UNMAP_SHARED:
2468 		error = xfs_rmap_unmap_shared(rcur, bno,
2469 				ri->ri_bmap.br_blockcount, unwritten, &oinfo);
2470 		break;
2471 	case XFS_RMAP_CONVERT:
2472 		error = xfs_rmap_convert(rcur, bno, ri->ri_bmap.br_blockcount,
2473 				!unwritten, &oinfo);
2474 		break;
2475 	case XFS_RMAP_CONVERT_SHARED:
2476 		error = xfs_rmap_convert_shared(rcur, bno,
2477 				ri->ri_bmap.br_blockcount, !unwritten, &oinfo);
2478 		break;
2479 	default:
2480 		ASSERT(0);
2481 		error = -EFSCORRUPTED;
2482 	}
2483 out_drop:
2484 	xfs_perag_put(pag);
2485 	return error;
2486 }
2487 
2488 /*
2489  * Don't defer an rmap if we aren't an rmap filesystem.
2490  */
2491 static bool
2492 xfs_rmap_update_is_needed(
2493 	struct xfs_mount	*mp,
2494 	int			whichfork)
2495 {
2496 	return xfs_has_rmapbt(mp) && whichfork != XFS_COW_FORK;
2497 }
2498 
2499 /*
2500  * Record a rmap intent; the list is kept sorted first by AG and then by
2501  * increasing age.
2502  */
2503 static void
2504 __xfs_rmap_add(
2505 	struct xfs_trans		*tp,
2506 	enum xfs_rmap_intent_type	type,
2507 	uint64_t			owner,
2508 	int				whichfork,
2509 	struct xfs_bmbt_irec		*bmap)
2510 {
2511 	struct xfs_rmap_intent		*ri;
2512 
2513 	trace_xfs_rmap_defer(tp->t_mountp,
2514 			XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
2515 			type,
2516 			XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
2517 			owner, whichfork,
2518 			bmap->br_startoff,
2519 			bmap->br_blockcount,
2520 			bmap->br_state);
2521 
2522 	ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_NOFS | __GFP_NOFAIL);
2523 	INIT_LIST_HEAD(&ri->ri_list);
2524 	ri->ri_type = type;
2525 	ri->ri_owner = owner;
2526 	ri->ri_whichfork = whichfork;
2527 	ri->ri_bmap = *bmap;
2528 
2529 	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list);
2530 }
2531 
2532 /* Map an extent into a file. */
2533 void
2534 xfs_rmap_map_extent(
2535 	struct xfs_trans	*tp,
2536 	struct xfs_inode	*ip,
2537 	int			whichfork,
2538 	struct xfs_bmbt_irec	*PREV)
2539 {
2540 	enum xfs_rmap_intent_type type = XFS_RMAP_MAP;
2541 
2542 	if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
2543 		return;
2544 
2545 	if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
2546 		type = XFS_RMAP_MAP_SHARED;
2547 
2548 	__xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
2549 }
2550 
2551 /* Unmap an extent out of a file. */
2552 void
2553 xfs_rmap_unmap_extent(
2554 	struct xfs_trans	*tp,
2555 	struct xfs_inode	*ip,
2556 	int			whichfork,
2557 	struct xfs_bmbt_irec	*PREV)
2558 {
2559 	enum xfs_rmap_intent_type type = XFS_RMAP_UNMAP;
2560 
2561 	if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
2562 		return;
2563 
2564 	if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
2565 		type = XFS_RMAP_UNMAP_SHARED;
2566 
2567 	__xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
2568 }
2569 
2570 /*
2571  * Convert a data fork extent from unwritten to real or vice versa.
2572  *
2573  * Note that tp can be NULL here as no transaction is used for COW fork
2574  * unwritten conversion.
2575  */
2576 void
2577 xfs_rmap_convert_extent(
2578 	struct xfs_mount	*mp,
2579 	struct xfs_trans	*tp,
2580 	struct xfs_inode	*ip,
2581 	int			whichfork,
2582 	struct xfs_bmbt_irec	*PREV)
2583 {
2584 	enum xfs_rmap_intent_type type = XFS_RMAP_CONVERT;
2585 
2586 	if (!xfs_rmap_update_is_needed(mp, whichfork))
2587 		return;
2588 
2589 	if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
2590 		type = XFS_RMAP_CONVERT_SHARED;
2591 
2592 	__xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
2593 }
2594 
2595 /* Schedule the creation of an rmap for non-file data. */
2596 void
2597 xfs_rmap_alloc_extent(
2598 	struct xfs_trans	*tp,
2599 	xfs_agnumber_t		agno,
2600 	xfs_agblock_t		bno,
2601 	xfs_extlen_t		len,
2602 	uint64_t		owner)
2603 {
2604 	struct xfs_bmbt_irec	bmap;
2605 
2606 	if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK))
2607 		return;
2608 
2609 	bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno);
2610 	bmap.br_blockcount = len;
2611 	bmap.br_startoff = 0;
2612 	bmap.br_state = XFS_EXT_NORM;
2613 
2614 	__xfs_rmap_add(tp, XFS_RMAP_ALLOC, owner, XFS_DATA_FORK, &bmap);
2615 }
2616 
2617 /* Schedule the deletion of an rmap for non-file data. */
2618 void
2619 xfs_rmap_free_extent(
2620 	struct xfs_trans	*tp,
2621 	xfs_agnumber_t		agno,
2622 	xfs_agblock_t		bno,
2623 	xfs_extlen_t		len,
2624 	uint64_t		owner)
2625 {
2626 	struct xfs_bmbt_irec	bmap;
2627 
2628 	if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK))
2629 		return;
2630 
2631 	bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno);
2632 	bmap.br_blockcount = len;
2633 	bmap.br_startoff = 0;
2634 	bmap.br_state = XFS_EXT_NORM;
2635 
2636 	__xfs_rmap_add(tp, XFS_RMAP_FREE, owner, XFS_DATA_FORK, &bmap);
2637 }
2638 
2639 /* Compare rmap records.  Returns -1 if a < b, 1 if a > b, and 0 if equal. */
2640 int
2641 xfs_rmap_compare(
2642 	const struct xfs_rmap_irec	*a,
2643 	const struct xfs_rmap_irec	*b)
2644 {
2645 	__u64				oa;
2646 	__u64				ob;
2647 
2648 	oa = xfs_rmap_irec_offset_pack(a);
2649 	ob = xfs_rmap_irec_offset_pack(b);
2650 
2651 	if (a->rm_startblock < b->rm_startblock)
2652 		return -1;
2653 	else if (a->rm_startblock > b->rm_startblock)
2654 		return 1;
2655 	else if (a->rm_owner < b->rm_owner)
2656 		return -1;
2657 	else if (a->rm_owner > b->rm_owner)
2658 		return 1;
2659 	else if (oa < ob)
2660 		return -1;
2661 	else if (oa > ob)
2662 		return 1;
2663 	else
2664 		return 0;
2665 }
2666 
2667 /* Is there a record covering a given extent? */
2668 int
2669 xfs_rmap_has_record(
2670 	struct xfs_btree_cur	*cur,
2671 	xfs_agblock_t		bno,
2672 	xfs_extlen_t		len,
2673 	bool			*exists)
2674 {
2675 	union xfs_btree_irec	low;
2676 	union xfs_btree_irec	high;
2677 
2678 	memset(&low, 0, sizeof(low));
2679 	low.r.rm_startblock = bno;
2680 	memset(&high, 0xFF, sizeof(high));
2681 	high.r.rm_startblock = bno + len - 1;
2682 
2683 	return xfs_btree_has_record(cur, &low, &high, exists);
2684 }
2685 
2686 /*
2687  * Is there a record for this owner completely covering a given physical
2688  * extent?  If so, *has_rmap will be set to true.  If there is no record
2689  * or the record only covers part of the range, we set *has_rmap to false.
2690  * This function doesn't perform range lookups or offset checks, so it is
2691  * not suitable for checking data fork blocks.
2692  */
2693 int
2694 xfs_rmap_record_exists(
2695 	struct xfs_btree_cur		*cur,
2696 	xfs_agblock_t			bno,
2697 	xfs_extlen_t			len,
2698 	const struct xfs_owner_info	*oinfo,
2699 	bool				*has_rmap)
2700 {
2701 	uint64_t			owner;
2702 	uint64_t			offset;
2703 	unsigned int			flags;
2704 	int				has_record;
2705 	struct xfs_rmap_irec		irec;
2706 	int				error;
2707 
2708 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
2709 	ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) ||
2710 	       (flags & XFS_RMAP_BMBT_BLOCK));
2711 
2712 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &irec,
2713 			&has_record);
2714 	if (error)
2715 		return error;
2716 	if (!has_record) {
2717 		*has_rmap = false;
2718 		return 0;
2719 	}
2720 
2721 	*has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno &&
2722 		     irec.rm_startblock + irec.rm_blockcount >= bno + len);
2723 	return 0;
2724 }
2725 
2726 struct xfs_rmap_key_state {
2727 	uint64_t			owner;
2728 	uint64_t			offset;
2729 	unsigned int			flags;
2730 };
2731 
2732 /* For each rmap given, figure out if it doesn't match the key we want. */
2733 STATIC int
2734 xfs_rmap_has_other_keys_helper(
2735 	struct xfs_btree_cur		*cur,
2736 	const struct xfs_rmap_irec	*rec,
2737 	void				*priv)
2738 {
2739 	struct xfs_rmap_key_state	*rks = priv;
2740 
2741 	if (rks->owner == rec->rm_owner && rks->offset == rec->rm_offset &&
2742 	    ((rks->flags & rec->rm_flags) & XFS_RMAP_KEY_FLAGS) == rks->flags)
2743 		return 0;
2744 	return -ECANCELED;
2745 }
2746 
2747 /*
2748  * Given an extent and some owner info, can we find records overlapping
2749  * the extent whose owner info does not match the given owner?
2750  */
2751 int
2752 xfs_rmap_has_other_keys(
2753 	struct xfs_btree_cur		*cur,
2754 	xfs_agblock_t			bno,
2755 	xfs_extlen_t			len,
2756 	const struct xfs_owner_info	*oinfo,
2757 	bool				*has_rmap)
2758 {
2759 	struct xfs_rmap_irec		low = {0};
2760 	struct xfs_rmap_irec		high;
2761 	struct xfs_rmap_key_state	rks;
2762 	int				error;
2763 
2764 	xfs_owner_info_unpack(oinfo, &rks.owner, &rks.offset, &rks.flags);
2765 	*has_rmap = false;
2766 
2767 	low.rm_startblock = bno;
2768 	memset(&high, 0xFF, sizeof(high));
2769 	high.rm_startblock = bno + len - 1;
2770 
2771 	error = xfs_rmap_query_range(cur, &low, &high,
2772 			xfs_rmap_has_other_keys_helper, &rks);
2773 	if (error == -ECANCELED) {
2774 		*has_rmap = true;
2775 		return 0;
2776 	}
2777 
2778 	return error;
2779 }
2780 
2781 const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE = {
2782 	.oi_owner = XFS_RMAP_OWN_NULL,
2783 };
2784 const struct xfs_owner_info XFS_RMAP_OINFO_ANY_OWNER = {
2785 	.oi_owner = XFS_RMAP_OWN_UNKNOWN,
2786 };
2787 const struct xfs_owner_info XFS_RMAP_OINFO_FS = {
2788 	.oi_owner = XFS_RMAP_OWN_FS,
2789 };
2790 const struct xfs_owner_info XFS_RMAP_OINFO_LOG = {
2791 	.oi_owner = XFS_RMAP_OWN_LOG,
2792 };
2793 const struct xfs_owner_info XFS_RMAP_OINFO_AG = {
2794 	.oi_owner = XFS_RMAP_OWN_AG,
2795 };
2796 const struct xfs_owner_info XFS_RMAP_OINFO_INOBT = {
2797 	.oi_owner = XFS_RMAP_OWN_INOBT,
2798 };
2799 const struct xfs_owner_info XFS_RMAP_OINFO_INODES = {
2800 	.oi_owner = XFS_RMAP_OWN_INODES,
2801 };
2802 const struct xfs_owner_info XFS_RMAP_OINFO_REFC = {
2803 	.oi_owner = XFS_RMAP_OWN_REFC,
2804 };
2805 const struct xfs_owner_info XFS_RMAP_OINFO_COW = {
2806 	.oi_owner = XFS_RMAP_OWN_COW,
2807 };
2808 
2809 int __init
2810 xfs_rmap_intent_init_cache(void)
2811 {
2812 	xfs_rmap_intent_cache = kmem_cache_create("xfs_rmap_intent",
2813 			sizeof(struct xfs_rmap_intent),
2814 			0, 0, NULL);
2815 
2816 	return xfs_rmap_intent_cache != NULL ? 0 : -ENOMEM;
2817 }
2818 
2819 void
2820 xfs_rmap_intent_destroy_cache(void)
2821 {
2822 	kmem_cache_destroy(xfs_rmap_intent_cache);
2823 	xfs_rmap_intent_cache = NULL;
2824 }
2825