xref: /openbmc/linux/fs/xfs/libxfs/xfs_rmap.c (revision c4e34172)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2014 Red Hat, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_mount.h"
14 #include "xfs_sb.h"
15 #include "xfs_defer.h"
16 #include "xfs_btree.h"
17 #include "xfs_trans.h"
18 #include "xfs_alloc.h"
19 #include "xfs_rmap.h"
20 #include "xfs_rmap_btree.h"
21 #include "xfs_trace.h"
22 #include "xfs_errortag.h"
23 #include "xfs_error.h"
24 #include "xfs_inode.h"
25 #include "xfs_ag.h"
26 
27 struct kmem_cache	*xfs_rmap_intent_cache;
28 
29 /*
30  * Lookup the first record less than or equal to [bno, len, owner, offset]
31  * in the btree given by cur.
32  */
33 int
34 xfs_rmap_lookup_le(
35 	struct xfs_btree_cur	*cur,
36 	xfs_agblock_t		bno,
37 	uint64_t		owner,
38 	uint64_t		offset,
39 	unsigned int		flags,
40 	struct xfs_rmap_irec	*irec,
41 	int			*stat)
42 {
43 	int			get_stat = 0;
44 	int			error;
45 
46 	cur->bc_rec.r.rm_startblock = bno;
47 	cur->bc_rec.r.rm_blockcount = 0;
48 	cur->bc_rec.r.rm_owner = owner;
49 	cur->bc_rec.r.rm_offset = offset;
50 	cur->bc_rec.r.rm_flags = flags;
51 
52 	error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
53 	if (error || !(*stat) || !irec)
54 		return error;
55 
56 	error = xfs_rmap_get_rec(cur, irec, &get_stat);
57 	if (error)
58 		return error;
59 	if (!get_stat)
60 		return -EFSCORRUPTED;
61 
62 	return 0;
63 }
64 
65 /*
66  * Lookup the record exactly matching [bno, len, owner, offset]
67  * in the btree given by cur.
68  */
69 int
70 xfs_rmap_lookup_eq(
71 	struct xfs_btree_cur	*cur,
72 	xfs_agblock_t		bno,
73 	xfs_extlen_t		len,
74 	uint64_t		owner,
75 	uint64_t		offset,
76 	unsigned int		flags,
77 	int			*stat)
78 {
79 	cur->bc_rec.r.rm_startblock = bno;
80 	cur->bc_rec.r.rm_blockcount = len;
81 	cur->bc_rec.r.rm_owner = owner;
82 	cur->bc_rec.r.rm_offset = offset;
83 	cur->bc_rec.r.rm_flags = flags;
84 	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
85 }
86 
87 /*
88  * Update the record referred to by cur to the value given
89  * by [bno, len, owner, offset].
90  * This either works (return 0) or gets an EFSCORRUPTED error.
91  */
92 STATIC int
93 xfs_rmap_update(
94 	struct xfs_btree_cur	*cur,
95 	struct xfs_rmap_irec	*irec)
96 {
97 	union xfs_btree_rec	rec;
98 	int			error;
99 
100 	trace_xfs_rmap_update(cur->bc_mp, cur->bc_ag.pag->pag_agno,
101 			irec->rm_startblock, irec->rm_blockcount,
102 			irec->rm_owner, irec->rm_offset, irec->rm_flags);
103 
104 	rec.rmap.rm_startblock = cpu_to_be32(irec->rm_startblock);
105 	rec.rmap.rm_blockcount = cpu_to_be32(irec->rm_blockcount);
106 	rec.rmap.rm_owner = cpu_to_be64(irec->rm_owner);
107 	rec.rmap.rm_offset = cpu_to_be64(
108 			xfs_rmap_irec_offset_pack(irec));
109 	error = xfs_btree_update(cur, &rec);
110 	if (error)
111 		trace_xfs_rmap_update_error(cur->bc_mp,
112 				cur->bc_ag.pag->pag_agno, error, _RET_IP_);
113 	return error;
114 }
115 
116 int
117 xfs_rmap_insert(
118 	struct xfs_btree_cur	*rcur,
119 	xfs_agblock_t		agbno,
120 	xfs_extlen_t		len,
121 	uint64_t		owner,
122 	uint64_t		offset,
123 	unsigned int		flags)
124 {
125 	int			i;
126 	int			error;
127 
128 	trace_xfs_rmap_insert(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, agbno,
129 			len, owner, offset, flags);
130 
131 	error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
132 	if (error)
133 		goto done;
134 	if (XFS_IS_CORRUPT(rcur->bc_mp, i != 0)) {
135 		error = -EFSCORRUPTED;
136 		goto done;
137 	}
138 
139 	rcur->bc_rec.r.rm_startblock = agbno;
140 	rcur->bc_rec.r.rm_blockcount = len;
141 	rcur->bc_rec.r.rm_owner = owner;
142 	rcur->bc_rec.r.rm_offset = offset;
143 	rcur->bc_rec.r.rm_flags = flags;
144 	error = xfs_btree_insert(rcur, &i);
145 	if (error)
146 		goto done;
147 	if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
148 		error = -EFSCORRUPTED;
149 		goto done;
150 	}
151 done:
152 	if (error)
153 		trace_xfs_rmap_insert_error(rcur->bc_mp,
154 				rcur->bc_ag.pag->pag_agno, error, _RET_IP_);
155 	return error;
156 }
157 
158 STATIC int
159 xfs_rmap_delete(
160 	struct xfs_btree_cur	*rcur,
161 	xfs_agblock_t		agbno,
162 	xfs_extlen_t		len,
163 	uint64_t		owner,
164 	uint64_t		offset,
165 	unsigned int		flags)
166 {
167 	int			i;
168 	int			error;
169 
170 	trace_xfs_rmap_delete(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, agbno,
171 			len, owner, offset, flags);
172 
173 	error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
174 	if (error)
175 		goto done;
176 	if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
177 		error = -EFSCORRUPTED;
178 		goto done;
179 	}
180 
181 	error = xfs_btree_delete(rcur, &i);
182 	if (error)
183 		goto done;
184 	if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
185 		error = -EFSCORRUPTED;
186 		goto done;
187 	}
188 done:
189 	if (error)
190 		trace_xfs_rmap_delete_error(rcur->bc_mp,
191 				rcur->bc_ag.pag->pag_agno, error, _RET_IP_);
192 	return error;
193 }
194 
195 /* Convert an internal btree record to an rmap record. */
196 xfs_failaddr_t
197 xfs_rmap_btrec_to_irec(
198 	const union xfs_btree_rec	*rec,
199 	struct xfs_rmap_irec		*irec)
200 {
201 	irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock);
202 	irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount);
203 	irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner);
204 	return xfs_rmap_irec_offset_unpack(be64_to_cpu(rec->rmap.rm_offset),
205 			irec);
206 }
207 
208 /* Simple checks for rmap records. */
209 xfs_failaddr_t
210 xfs_rmap_check_irec(
211 	struct xfs_btree_cur		*cur,
212 	const struct xfs_rmap_irec	*irec)
213 {
214 	struct xfs_mount		*mp = cur->bc_mp;
215 
216 	if (irec->rm_blockcount == 0)
217 		return __this_address;
218 	if (irec->rm_startblock <= XFS_AGFL_BLOCK(mp)) {
219 		if (irec->rm_owner != XFS_RMAP_OWN_FS)
220 			return __this_address;
221 		if (irec->rm_blockcount != XFS_AGFL_BLOCK(mp) + 1)
222 			return __this_address;
223 	} else {
224 		/* check for valid extent range, including overflow */
225 		if (!xfs_verify_agbext(cur->bc_ag.pag, irec->rm_startblock,
226 						       irec->rm_blockcount))
227 			return __this_address;
228 	}
229 
230 	if (!(xfs_verify_ino(mp, irec->rm_owner) ||
231 	      (irec->rm_owner <= XFS_RMAP_OWN_FS &&
232 	       irec->rm_owner >= XFS_RMAP_OWN_MIN)))
233 		return __this_address;
234 
235 	return NULL;
236 }
237 
238 /*
239  * Get the data from the pointed-to record.
240  */
241 int
242 xfs_rmap_get_rec(
243 	struct xfs_btree_cur	*cur,
244 	struct xfs_rmap_irec	*irec,
245 	int			*stat)
246 {
247 	struct xfs_mount	*mp = cur->bc_mp;
248 	struct xfs_perag	*pag = cur->bc_ag.pag;
249 	union xfs_btree_rec	*rec;
250 	xfs_failaddr_t		fa;
251 	int			error;
252 
253 	error = xfs_btree_get_rec(cur, &rec, stat);
254 	if (error || !*stat)
255 		return error;
256 
257 	fa = xfs_rmap_btrec_to_irec(rec, irec);
258 	if (!fa)
259 		fa = xfs_rmap_check_irec(cur, irec);
260 	if (fa)
261 		goto out_bad_rec;
262 
263 	return 0;
264 out_bad_rec:
265 	xfs_warn(mp,
266 		"Reverse Mapping BTree record corruption in AG %d detected at %pS!",
267 		pag->pag_agno, fa);
268 	xfs_warn(mp,
269 		"Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x",
270 		irec->rm_owner, irec->rm_flags, irec->rm_startblock,
271 		irec->rm_blockcount);
272 	return -EFSCORRUPTED;
273 }
274 
275 struct xfs_find_left_neighbor_info {
276 	struct xfs_rmap_irec	high;
277 	struct xfs_rmap_irec	*irec;
278 };
279 
280 /* For each rmap given, figure out if it matches the key we want. */
281 STATIC int
282 xfs_rmap_find_left_neighbor_helper(
283 	struct xfs_btree_cur		*cur,
284 	const struct xfs_rmap_irec	*rec,
285 	void				*priv)
286 {
287 	struct xfs_find_left_neighbor_info	*info = priv;
288 
289 	trace_xfs_rmap_find_left_neighbor_candidate(cur->bc_mp,
290 			cur->bc_ag.pag->pag_agno, rec->rm_startblock,
291 			rec->rm_blockcount, rec->rm_owner, rec->rm_offset,
292 			rec->rm_flags);
293 
294 	if (rec->rm_owner != info->high.rm_owner)
295 		return 0;
296 	if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) &&
297 	    !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
298 	    rec->rm_offset + rec->rm_blockcount - 1 != info->high.rm_offset)
299 		return 0;
300 
301 	*info->irec = *rec;
302 	return -ECANCELED;
303 }
304 
305 /*
306  * Find the record to the left of the given extent, being careful only to
307  * return a match with the same owner and adjacent physical and logical
308  * block ranges.
309  */
310 STATIC int
311 xfs_rmap_find_left_neighbor(
312 	struct xfs_btree_cur	*cur,
313 	xfs_agblock_t		bno,
314 	uint64_t		owner,
315 	uint64_t		offset,
316 	unsigned int		flags,
317 	struct xfs_rmap_irec	*irec,
318 	int			*stat)
319 {
320 	struct xfs_find_left_neighbor_info	info;
321 	int			found = 0;
322 	int			error;
323 
324 	*stat = 0;
325 	if (bno == 0)
326 		return 0;
327 	info.high.rm_startblock = bno - 1;
328 	info.high.rm_owner = owner;
329 	if (!XFS_RMAP_NON_INODE_OWNER(owner) &&
330 	    !(flags & XFS_RMAP_BMBT_BLOCK)) {
331 		if (offset == 0)
332 			return 0;
333 		info.high.rm_offset = offset - 1;
334 	} else
335 		info.high.rm_offset = 0;
336 	info.high.rm_flags = flags;
337 	info.high.rm_blockcount = 0;
338 	info.irec = irec;
339 
340 	trace_xfs_rmap_find_left_neighbor_query(cur->bc_mp,
341 			cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags);
342 
343 	/*
344 	 * Historically, we always used the range query to walk every reverse
345 	 * mapping that could possibly overlap the key that the caller asked
346 	 * for, and filter out the ones that don't.  That is very slow when
347 	 * there are a lot of records.
348 	 *
349 	 * However, there are two scenarios where the classic btree search can
350 	 * produce correct results -- if the index contains a record that is an
351 	 * exact match for the lookup key; and if there are no other records
352 	 * between the record we want and the key we supplied.
353 	 *
354 	 * As an optimization, try a non-overlapped lookup first.  This makes
355 	 * extent conversion and remap operations run a bit faster if the
356 	 * physical extents aren't being shared.  If we don't find what we
357 	 * want, we fall back to the overlapped query.
358 	 */
359 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec,
360 			&found);
361 	if (error)
362 		return error;
363 	if (found)
364 		error = xfs_rmap_find_left_neighbor_helper(cur, irec, &info);
365 	if (!error)
366 		error = xfs_rmap_query_range(cur, &info.high, &info.high,
367 				xfs_rmap_find_left_neighbor_helper, &info);
368 	if (error != -ECANCELED)
369 		return error;
370 
371 	*stat = 1;
372 	trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
373 			cur->bc_ag.pag->pag_agno, irec->rm_startblock,
374 			irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
375 			irec->rm_flags);
376 	return 0;
377 }
378 
379 /* For each rmap given, figure out if it matches the key we want. */
380 STATIC int
381 xfs_rmap_lookup_le_range_helper(
382 	struct xfs_btree_cur		*cur,
383 	const struct xfs_rmap_irec	*rec,
384 	void				*priv)
385 {
386 	struct xfs_find_left_neighbor_info	*info = priv;
387 
388 	trace_xfs_rmap_lookup_le_range_candidate(cur->bc_mp,
389 			cur->bc_ag.pag->pag_agno, rec->rm_startblock,
390 			rec->rm_blockcount, rec->rm_owner, rec->rm_offset,
391 			rec->rm_flags);
392 
393 	if (rec->rm_owner != info->high.rm_owner)
394 		return 0;
395 	if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) &&
396 	    !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
397 	    (rec->rm_offset > info->high.rm_offset ||
398 	     rec->rm_offset + rec->rm_blockcount <= info->high.rm_offset))
399 		return 0;
400 
401 	*info->irec = *rec;
402 	return -ECANCELED;
403 }
404 
405 /*
406  * Find the record to the left of the given extent, being careful only to
407  * return a match with the same owner and overlapping physical and logical
408  * block ranges.  This is the overlapping-interval version of
409  * xfs_rmap_lookup_le.
410  */
411 int
412 xfs_rmap_lookup_le_range(
413 	struct xfs_btree_cur	*cur,
414 	xfs_agblock_t		bno,
415 	uint64_t		owner,
416 	uint64_t		offset,
417 	unsigned int		flags,
418 	struct xfs_rmap_irec	*irec,
419 	int			*stat)
420 {
421 	struct xfs_find_left_neighbor_info	info;
422 	int			found = 0;
423 	int			error;
424 
425 	info.high.rm_startblock = bno;
426 	info.high.rm_owner = owner;
427 	if (!XFS_RMAP_NON_INODE_OWNER(owner) && !(flags & XFS_RMAP_BMBT_BLOCK))
428 		info.high.rm_offset = offset;
429 	else
430 		info.high.rm_offset = 0;
431 	info.high.rm_flags = flags;
432 	info.high.rm_blockcount = 0;
433 	*stat = 0;
434 	info.irec = irec;
435 
436 	trace_xfs_rmap_lookup_le_range(cur->bc_mp, cur->bc_ag.pag->pag_agno,
437 			bno, 0, owner, offset, flags);
438 
439 	/*
440 	 * Historically, we always used the range query to walk every reverse
441 	 * mapping that could possibly overlap the key that the caller asked
442 	 * for, and filter out the ones that don't.  That is very slow when
443 	 * there are a lot of records.
444 	 *
445 	 * However, there are two scenarios where the classic btree search can
446 	 * produce correct results -- if the index contains a record that is an
447 	 * exact match for the lookup key; and if there are no other records
448 	 * between the record we want and the key we supplied.
449 	 *
450 	 * As an optimization, try a non-overlapped lookup first.  This makes
451 	 * scrub run much faster on most filesystems because bmbt records are
452 	 * usually an exact match for rmap records.  If we don't find what we
453 	 * want, we fall back to the overlapped query.
454 	 */
455 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec,
456 			&found);
457 	if (error)
458 		return error;
459 	if (found)
460 		error = xfs_rmap_lookup_le_range_helper(cur, irec, &info);
461 	if (!error)
462 		error = xfs_rmap_query_range(cur, &info.high, &info.high,
463 				xfs_rmap_lookup_le_range_helper, &info);
464 	if (error != -ECANCELED)
465 		return error;
466 
467 	*stat = 1;
468 	trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
469 			cur->bc_ag.pag->pag_agno, irec->rm_startblock,
470 			irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
471 			irec->rm_flags);
472 	return 0;
473 }
474 
475 /*
476  * Perform all the relevant owner checks for a removal op.  If we're doing an
477  * unknown-owner removal then we have no owner information to check.
478  */
479 static int
480 xfs_rmap_free_check_owner(
481 	struct xfs_mount	*mp,
482 	uint64_t		ltoff,
483 	struct xfs_rmap_irec	*rec,
484 	xfs_filblks_t		len,
485 	uint64_t		owner,
486 	uint64_t		offset,
487 	unsigned int		flags)
488 {
489 	int			error = 0;
490 
491 	if (owner == XFS_RMAP_OWN_UNKNOWN)
492 		return 0;
493 
494 	/* Make sure the unwritten flag matches. */
495 	if (XFS_IS_CORRUPT(mp,
496 			   (flags & XFS_RMAP_UNWRITTEN) !=
497 			   (rec->rm_flags & XFS_RMAP_UNWRITTEN))) {
498 		error = -EFSCORRUPTED;
499 		goto out;
500 	}
501 
502 	/* Make sure the owner matches what we expect to find in the tree. */
503 	if (XFS_IS_CORRUPT(mp, owner != rec->rm_owner)) {
504 		error = -EFSCORRUPTED;
505 		goto out;
506 	}
507 
508 	/* Check the offset, if necessary. */
509 	if (XFS_RMAP_NON_INODE_OWNER(owner))
510 		goto out;
511 
512 	if (flags & XFS_RMAP_BMBT_BLOCK) {
513 		if (XFS_IS_CORRUPT(mp,
514 				   !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))) {
515 			error = -EFSCORRUPTED;
516 			goto out;
517 		}
518 	} else {
519 		if (XFS_IS_CORRUPT(mp, rec->rm_offset > offset)) {
520 			error = -EFSCORRUPTED;
521 			goto out;
522 		}
523 		if (XFS_IS_CORRUPT(mp,
524 				   offset + len > ltoff + rec->rm_blockcount)) {
525 			error = -EFSCORRUPTED;
526 			goto out;
527 		}
528 	}
529 
530 out:
531 	return error;
532 }
533 
534 /*
535  * Find the extent in the rmap btree and remove it.
536  *
537  * The record we find should always be an exact match for the extent that we're
538  * looking for, since we insert them into the btree without modification.
539  *
540  * Special Case #1: when growing the filesystem, we "free" an extent when
541  * growing the last AG. This extent is new space and so it is not tracked as
542  * used space in the btree. The growfs code will pass in an owner of
543  * XFS_RMAP_OWN_NULL to indicate that it expected that there is no owner of this
544  * extent. We verify that - the extent lookup result in a record that does not
545  * overlap.
546  *
547  * Special Case #2: EFIs do not record the owner of the extent, so when
548  * recovering EFIs from the log we pass in XFS_RMAP_OWN_UNKNOWN to tell the rmap
549  * btree to ignore the owner (i.e. wildcard match) so we don't trigger
550  * corruption checks during log recovery.
551  */
552 STATIC int
553 xfs_rmap_unmap(
554 	struct xfs_btree_cur		*cur,
555 	xfs_agblock_t			bno,
556 	xfs_extlen_t			len,
557 	bool				unwritten,
558 	const struct xfs_owner_info	*oinfo)
559 {
560 	struct xfs_mount		*mp = cur->bc_mp;
561 	struct xfs_rmap_irec		ltrec;
562 	uint64_t			ltoff;
563 	int				error = 0;
564 	int				i;
565 	uint64_t			owner;
566 	uint64_t			offset;
567 	unsigned int			flags;
568 	bool				ignore_off;
569 
570 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
571 	ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) ||
572 			(flags & XFS_RMAP_BMBT_BLOCK);
573 	if (unwritten)
574 		flags |= XFS_RMAP_UNWRITTEN;
575 	trace_xfs_rmap_unmap(mp, cur->bc_ag.pag->pag_agno, bno, len,
576 			unwritten, oinfo);
577 
578 	/*
579 	 * We should always have a left record because there's a static record
580 	 * for the AG headers at rm_startblock == 0 created by mkfs/growfs that
581 	 * will not ever be removed from the tree.
582 	 */
583 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &ltrec, &i);
584 	if (error)
585 		goto out_error;
586 	if (XFS_IS_CORRUPT(mp, i != 1)) {
587 		error = -EFSCORRUPTED;
588 		goto out_error;
589 	}
590 
591 	trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
592 			cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
593 			ltrec.rm_blockcount, ltrec.rm_owner,
594 			ltrec.rm_offset, ltrec.rm_flags);
595 	ltoff = ltrec.rm_offset;
596 
597 	/*
598 	 * For growfs, the incoming extent must be beyond the left record we
599 	 * just found as it is new space and won't be used by anyone. This is
600 	 * just a corruption check as we don't actually do anything with this
601 	 * extent.  Note that we need to use >= instead of > because it might
602 	 * be the case that the "left" extent goes all the way to EOFS.
603 	 */
604 	if (owner == XFS_RMAP_OWN_NULL) {
605 		if (XFS_IS_CORRUPT(mp,
606 				   bno <
607 				   ltrec.rm_startblock + ltrec.rm_blockcount)) {
608 			error = -EFSCORRUPTED;
609 			goto out_error;
610 		}
611 		goto out_done;
612 	}
613 
614 	/*
615 	 * If we're doing an unknown-owner removal for EFI recovery, we expect
616 	 * to find the full range in the rmapbt or nothing at all.  If we
617 	 * don't find any rmaps overlapping either end of the range, we're
618 	 * done.  Hopefully this means that the EFI creator already queued
619 	 * (and finished) a RUI to remove the rmap.
620 	 */
621 	if (owner == XFS_RMAP_OWN_UNKNOWN &&
622 	    ltrec.rm_startblock + ltrec.rm_blockcount <= bno) {
623 		struct xfs_rmap_irec    rtrec;
624 
625 		error = xfs_btree_increment(cur, 0, &i);
626 		if (error)
627 			goto out_error;
628 		if (i == 0)
629 			goto out_done;
630 		error = xfs_rmap_get_rec(cur, &rtrec, &i);
631 		if (error)
632 			goto out_error;
633 		if (XFS_IS_CORRUPT(mp, i != 1)) {
634 			error = -EFSCORRUPTED;
635 			goto out_error;
636 		}
637 		if (rtrec.rm_startblock >= bno + len)
638 			goto out_done;
639 	}
640 
641 	/* Make sure the extent we found covers the entire freeing range. */
642 	if (XFS_IS_CORRUPT(mp,
643 			   ltrec.rm_startblock > bno ||
644 			   ltrec.rm_startblock + ltrec.rm_blockcount <
645 			   bno + len)) {
646 		error = -EFSCORRUPTED;
647 		goto out_error;
648 	}
649 
650 	/* Check owner information. */
651 	error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, len, owner,
652 			offset, flags);
653 	if (error)
654 		goto out_error;
655 
656 	if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
657 		/* exact match, simply remove the record from rmap tree */
658 		trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
659 				ltrec.rm_startblock, ltrec.rm_blockcount,
660 				ltrec.rm_owner, ltrec.rm_offset,
661 				ltrec.rm_flags);
662 		error = xfs_btree_delete(cur, &i);
663 		if (error)
664 			goto out_error;
665 		if (XFS_IS_CORRUPT(mp, i != 1)) {
666 			error = -EFSCORRUPTED;
667 			goto out_error;
668 		}
669 	} else if (ltrec.rm_startblock == bno) {
670 		/*
671 		 * overlap left hand side of extent: move the start, trim the
672 		 * length and update the current record.
673 		 *
674 		 *       ltbno                ltlen
675 		 * Orig:    |oooooooooooooooooooo|
676 		 * Freeing: |fffffffff|
677 		 * Result:            |rrrrrrrrrr|
678 		 *         bno       len
679 		 */
680 		ltrec.rm_startblock += len;
681 		ltrec.rm_blockcount -= len;
682 		if (!ignore_off)
683 			ltrec.rm_offset += len;
684 		error = xfs_rmap_update(cur, &ltrec);
685 		if (error)
686 			goto out_error;
687 	} else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) {
688 		/*
689 		 * overlap right hand side of extent: trim the length and update
690 		 * the current record.
691 		 *
692 		 *       ltbno                ltlen
693 		 * Orig:    |oooooooooooooooooooo|
694 		 * Freeing:            |fffffffff|
695 		 * Result:  |rrrrrrrrrr|
696 		 *                    bno       len
697 		 */
698 		ltrec.rm_blockcount -= len;
699 		error = xfs_rmap_update(cur, &ltrec);
700 		if (error)
701 			goto out_error;
702 	} else {
703 
704 		/*
705 		 * overlap middle of extent: trim the length of the existing
706 		 * record to the length of the new left-extent size, increment
707 		 * the insertion position so we can insert a new record
708 		 * containing the remaining right-extent space.
709 		 *
710 		 *       ltbno                ltlen
711 		 * Orig:    |oooooooooooooooooooo|
712 		 * Freeing:       |fffffffff|
713 		 * Result:  |rrrrr|         |rrrr|
714 		 *               bno       len
715 		 */
716 		xfs_extlen_t	orig_len = ltrec.rm_blockcount;
717 
718 		ltrec.rm_blockcount = bno - ltrec.rm_startblock;
719 		error = xfs_rmap_update(cur, &ltrec);
720 		if (error)
721 			goto out_error;
722 
723 		error = xfs_btree_increment(cur, 0, &i);
724 		if (error)
725 			goto out_error;
726 
727 		cur->bc_rec.r.rm_startblock = bno + len;
728 		cur->bc_rec.r.rm_blockcount = orig_len - len -
729 						     ltrec.rm_blockcount;
730 		cur->bc_rec.r.rm_owner = ltrec.rm_owner;
731 		if (ignore_off)
732 			cur->bc_rec.r.rm_offset = 0;
733 		else
734 			cur->bc_rec.r.rm_offset = offset + len;
735 		cur->bc_rec.r.rm_flags = flags;
736 		trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno,
737 				cur->bc_rec.r.rm_startblock,
738 				cur->bc_rec.r.rm_blockcount,
739 				cur->bc_rec.r.rm_owner,
740 				cur->bc_rec.r.rm_offset,
741 				cur->bc_rec.r.rm_flags);
742 		error = xfs_btree_insert(cur, &i);
743 		if (error)
744 			goto out_error;
745 	}
746 
747 out_done:
748 	trace_xfs_rmap_unmap_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
749 			unwritten, oinfo);
750 out_error:
751 	if (error)
752 		trace_xfs_rmap_unmap_error(mp, cur->bc_ag.pag->pag_agno,
753 				error, _RET_IP_);
754 	return error;
755 }
756 
757 /*
758  * Remove a reference to an extent in the rmap btree.
759  */
760 int
761 xfs_rmap_free(
762 	struct xfs_trans		*tp,
763 	struct xfs_buf			*agbp,
764 	struct xfs_perag		*pag,
765 	xfs_agblock_t			bno,
766 	xfs_extlen_t			len,
767 	const struct xfs_owner_info	*oinfo)
768 {
769 	struct xfs_mount		*mp = tp->t_mountp;
770 	struct xfs_btree_cur		*cur;
771 	int				error;
772 
773 	if (!xfs_has_rmapbt(mp))
774 		return 0;
775 
776 	cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
777 
778 	error = xfs_rmap_unmap(cur, bno, len, false, oinfo);
779 
780 	xfs_btree_del_cursor(cur, error);
781 	return error;
782 }
783 
784 /*
785  * A mergeable rmap must have the same owner and the same values for
786  * the unwritten, attr_fork, and bmbt flags.  The startblock and
787  * offset are checked separately.
788  */
789 static bool
790 xfs_rmap_is_mergeable(
791 	struct xfs_rmap_irec	*irec,
792 	uint64_t		owner,
793 	unsigned int		flags)
794 {
795 	if (irec->rm_owner == XFS_RMAP_OWN_NULL)
796 		return false;
797 	if (irec->rm_owner != owner)
798 		return false;
799 	if ((flags & XFS_RMAP_UNWRITTEN) ^
800 	    (irec->rm_flags & XFS_RMAP_UNWRITTEN))
801 		return false;
802 	if ((flags & XFS_RMAP_ATTR_FORK) ^
803 	    (irec->rm_flags & XFS_RMAP_ATTR_FORK))
804 		return false;
805 	if ((flags & XFS_RMAP_BMBT_BLOCK) ^
806 	    (irec->rm_flags & XFS_RMAP_BMBT_BLOCK))
807 		return false;
808 	return true;
809 }
810 
811 /*
812  * When we allocate a new block, the first thing we do is add a reference to
813  * the extent in the rmap btree. This takes the form of a [agbno, length,
814  * owner, offset] record.  Flags are encoded in the high bits of the offset
815  * field.
816  */
817 STATIC int
818 xfs_rmap_map(
819 	struct xfs_btree_cur		*cur,
820 	xfs_agblock_t			bno,
821 	xfs_extlen_t			len,
822 	bool				unwritten,
823 	const struct xfs_owner_info	*oinfo)
824 {
825 	struct xfs_mount		*mp = cur->bc_mp;
826 	struct xfs_rmap_irec		ltrec;
827 	struct xfs_rmap_irec		gtrec;
828 	int				have_gt;
829 	int				have_lt;
830 	int				error = 0;
831 	int				i;
832 	uint64_t			owner;
833 	uint64_t			offset;
834 	unsigned int			flags = 0;
835 	bool				ignore_off;
836 
837 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
838 	ASSERT(owner != 0);
839 	ignore_off = XFS_RMAP_NON_INODE_OWNER(owner) ||
840 			(flags & XFS_RMAP_BMBT_BLOCK);
841 	if (unwritten)
842 		flags |= XFS_RMAP_UNWRITTEN;
843 	trace_xfs_rmap_map(mp, cur->bc_ag.pag->pag_agno, bno, len,
844 			unwritten, oinfo);
845 	ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
846 
847 	/*
848 	 * For the initial lookup, look for an exact match or the left-adjacent
849 	 * record for our insertion point. This will also give us the record for
850 	 * start block contiguity tests.
851 	 */
852 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &ltrec,
853 			&have_lt);
854 	if (error)
855 		goto out_error;
856 	if (have_lt) {
857 		trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
858 				cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
859 				ltrec.rm_blockcount, ltrec.rm_owner,
860 				ltrec.rm_offset, ltrec.rm_flags);
861 
862 		if (!xfs_rmap_is_mergeable(&ltrec, owner, flags))
863 			have_lt = 0;
864 	}
865 
866 	if (XFS_IS_CORRUPT(mp,
867 			   have_lt != 0 &&
868 			   ltrec.rm_startblock + ltrec.rm_blockcount > bno)) {
869 		error = -EFSCORRUPTED;
870 		goto out_error;
871 	}
872 
873 	/*
874 	 * Increment the cursor to see if we have a right-adjacent record to our
875 	 * insertion point. This will give us the record for end block
876 	 * contiguity tests.
877 	 */
878 	error = xfs_btree_increment(cur, 0, &have_gt);
879 	if (error)
880 		goto out_error;
881 	if (have_gt) {
882 		error = xfs_rmap_get_rec(cur, &gtrec, &have_gt);
883 		if (error)
884 			goto out_error;
885 		if (XFS_IS_CORRUPT(mp, have_gt != 1)) {
886 			error = -EFSCORRUPTED;
887 			goto out_error;
888 		}
889 		if (XFS_IS_CORRUPT(mp, bno + len > gtrec.rm_startblock)) {
890 			error = -EFSCORRUPTED;
891 			goto out_error;
892 		}
893 		trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
894 			cur->bc_ag.pag->pag_agno, gtrec.rm_startblock,
895 			gtrec.rm_blockcount, gtrec.rm_owner,
896 			gtrec.rm_offset, gtrec.rm_flags);
897 		if (!xfs_rmap_is_mergeable(&gtrec, owner, flags))
898 			have_gt = 0;
899 	}
900 
901 	/*
902 	 * Note: cursor currently points one record to the right of ltrec, even
903 	 * if there is no record in the tree to the right.
904 	 */
905 	if (have_lt &&
906 	    ltrec.rm_startblock + ltrec.rm_blockcount == bno &&
907 	    (ignore_off || ltrec.rm_offset + ltrec.rm_blockcount == offset)) {
908 		/*
909 		 * left edge contiguous, merge into left record.
910 		 *
911 		 *       ltbno     ltlen
912 		 * orig:   |ooooooooo|
913 		 * adding:           |aaaaaaaaa|
914 		 * result: |rrrrrrrrrrrrrrrrrrr|
915 		 *                  bno       len
916 		 */
917 		ltrec.rm_blockcount += len;
918 		if (have_gt &&
919 		    bno + len == gtrec.rm_startblock &&
920 		    (ignore_off || offset + len == gtrec.rm_offset) &&
921 		    (unsigned long)ltrec.rm_blockcount + len +
922 				gtrec.rm_blockcount <= XFS_RMAP_LEN_MAX) {
923 			/*
924 			 * right edge also contiguous, delete right record
925 			 * and merge into left record.
926 			 *
927 			 *       ltbno     ltlen    gtbno     gtlen
928 			 * orig:   |ooooooooo|         |ooooooooo|
929 			 * adding:           |aaaaaaaaa|
930 			 * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
931 			 */
932 			ltrec.rm_blockcount += gtrec.rm_blockcount;
933 			trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
934 					gtrec.rm_startblock,
935 					gtrec.rm_blockcount,
936 					gtrec.rm_owner,
937 					gtrec.rm_offset,
938 					gtrec.rm_flags);
939 			error = xfs_btree_delete(cur, &i);
940 			if (error)
941 				goto out_error;
942 			if (XFS_IS_CORRUPT(mp, i != 1)) {
943 				error = -EFSCORRUPTED;
944 				goto out_error;
945 			}
946 		}
947 
948 		/* point the cursor back to the left record and update */
949 		error = xfs_btree_decrement(cur, 0, &have_gt);
950 		if (error)
951 			goto out_error;
952 		error = xfs_rmap_update(cur, &ltrec);
953 		if (error)
954 			goto out_error;
955 	} else if (have_gt &&
956 		   bno + len == gtrec.rm_startblock &&
957 		   (ignore_off || offset + len == gtrec.rm_offset)) {
958 		/*
959 		 * right edge contiguous, merge into right record.
960 		 *
961 		 *                 gtbno     gtlen
962 		 * Orig:             |ooooooooo|
963 		 * adding: |aaaaaaaaa|
964 		 * Result: |rrrrrrrrrrrrrrrrrrr|
965 		 *        bno       len
966 		 */
967 		gtrec.rm_startblock = bno;
968 		gtrec.rm_blockcount += len;
969 		if (!ignore_off)
970 			gtrec.rm_offset = offset;
971 		error = xfs_rmap_update(cur, &gtrec);
972 		if (error)
973 			goto out_error;
974 	} else {
975 		/*
976 		 * no contiguous edge with identical owner, insert
977 		 * new record at current cursor position.
978 		 */
979 		cur->bc_rec.r.rm_startblock = bno;
980 		cur->bc_rec.r.rm_blockcount = len;
981 		cur->bc_rec.r.rm_owner = owner;
982 		cur->bc_rec.r.rm_offset = offset;
983 		cur->bc_rec.r.rm_flags = flags;
984 		trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, len,
985 			owner, offset, flags);
986 		error = xfs_btree_insert(cur, &i);
987 		if (error)
988 			goto out_error;
989 		if (XFS_IS_CORRUPT(mp, i != 1)) {
990 			error = -EFSCORRUPTED;
991 			goto out_error;
992 		}
993 	}
994 
995 	trace_xfs_rmap_map_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
996 			unwritten, oinfo);
997 out_error:
998 	if (error)
999 		trace_xfs_rmap_map_error(mp, cur->bc_ag.pag->pag_agno,
1000 				error, _RET_IP_);
1001 	return error;
1002 }
1003 
1004 /*
1005  * Add a reference to an extent in the rmap btree.
1006  */
1007 int
1008 xfs_rmap_alloc(
1009 	struct xfs_trans		*tp,
1010 	struct xfs_buf			*agbp,
1011 	struct xfs_perag		*pag,
1012 	xfs_agblock_t			bno,
1013 	xfs_extlen_t			len,
1014 	const struct xfs_owner_info	*oinfo)
1015 {
1016 	struct xfs_mount		*mp = tp->t_mountp;
1017 	struct xfs_btree_cur		*cur;
1018 	int				error;
1019 
1020 	if (!xfs_has_rmapbt(mp))
1021 		return 0;
1022 
1023 	cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
1024 	error = xfs_rmap_map(cur, bno, len, false, oinfo);
1025 
1026 	xfs_btree_del_cursor(cur, error);
1027 	return error;
1028 }
1029 
1030 #define RMAP_LEFT_CONTIG	(1 << 0)
1031 #define RMAP_RIGHT_CONTIG	(1 << 1)
1032 #define RMAP_LEFT_FILLING	(1 << 2)
1033 #define RMAP_RIGHT_FILLING	(1 << 3)
1034 #define RMAP_LEFT_VALID		(1 << 6)
1035 #define RMAP_RIGHT_VALID	(1 << 7)
1036 
1037 #define LEFT		r[0]
1038 #define RIGHT		r[1]
1039 #define PREV		r[2]
1040 #define NEW		r[3]
1041 
1042 /*
1043  * Convert an unwritten extent to a real extent or vice versa.
1044  * Does not handle overlapping extents.
1045  */
1046 STATIC int
1047 xfs_rmap_convert(
1048 	struct xfs_btree_cur		*cur,
1049 	xfs_agblock_t			bno,
1050 	xfs_extlen_t			len,
1051 	bool				unwritten,
1052 	const struct xfs_owner_info	*oinfo)
1053 {
1054 	struct xfs_mount		*mp = cur->bc_mp;
1055 	struct xfs_rmap_irec		r[4];	/* neighbor extent entries */
1056 						/* left is 0, right is 1, */
1057 						/* prev is 2, new is 3 */
1058 	uint64_t		owner;
1059 	uint64_t		offset;
1060 	uint64_t		new_endoff;
1061 	unsigned int		oldext;
1062 	unsigned int		newext;
1063 	unsigned int		flags = 0;
1064 	int			i;
1065 	int			state = 0;
1066 	int			error;
1067 
1068 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
1069 	ASSERT(!(XFS_RMAP_NON_INODE_OWNER(owner) ||
1070 			(flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))));
1071 	oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0;
1072 	new_endoff = offset + len;
1073 	trace_xfs_rmap_convert(mp, cur->bc_ag.pag->pag_agno, bno, len,
1074 			unwritten, oinfo);
1075 
1076 	/*
1077 	 * For the initial lookup, look for an exact match or the left-adjacent
1078 	 * record for our insertion point. This will also give us the record for
1079 	 * start block contiguity tests.
1080 	 */
1081 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, &PREV, &i);
1082 	if (error)
1083 		goto done;
1084 	if (XFS_IS_CORRUPT(mp, i != 1)) {
1085 		error = -EFSCORRUPTED;
1086 		goto done;
1087 	}
1088 
1089 	trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
1090 			cur->bc_ag.pag->pag_agno, PREV.rm_startblock,
1091 			PREV.rm_blockcount, PREV.rm_owner,
1092 			PREV.rm_offset, PREV.rm_flags);
1093 
1094 	ASSERT(PREV.rm_offset <= offset);
1095 	ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff);
1096 	ASSERT((PREV.rm_flags & XFS_RMAP_UNWRITTEN) == oldext);
1097 	newext = ~oldext & XFS_RMAP_UNWRITTEN;
1098 
1099 	/*
1100 	 * Set flags determining what part of the previous oldext allocation
1101 	 * extent is being replaced by a newext allocation.
1102 	 */
1103 	if (PREV.rm_offset == offset)
1104 		state |= RMAP_LEFT_FILLING;
1105 	if (PREV.rm_offset + PREV.rm_blockcount == new_endoff)
1106 		state |= RMAP_RIGHT_FILLING;
1107 
1108 	/*
1109 	 * Decrement the cursor to see if we have a left-adjacent record to our
1110 	 * insertion point. This will give us the record for end block
1111 	 * contiguity tests.
1112 	 */
1113 	error = xfs_btree_decrement(cur, 0, &i);
1114 	if (error)
1115 		goto done;
1116 	if (i) {
1117 		state |= RMAP_LEFT_VALID;
1118 		error = xfs_rmap_get_rec(cur, &LEFT, &i);
1119 		if (error)
1120 			goto done;
1121 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1122 			error = -EFSCORRUPTED;
1123 			goto done;
1124 		}
1125 		if (XFS_IS_CORRUPT(mp,
1126 				   LEFT.rm_startblock + LEFT.rm_blockcount >
1127 				   bno)) {
1128 			error = -EFSCORRUPTED;
1129 			goto done;
1130 		}
1131 		trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
1132 				cur->bc_ag.pag->pag_agno, LEFT.rm_startblock,
1133 				LEFT.rm_blockcount, LEFT.rm_owner,
1134 				LEFT.rm_offset, LEFT.rm_flags);
1135 		if (LEFT.rm_startblock + LEFT.rm_blockcount == bno &&
1136 		    LEFT.rm_offset + LEFT.rm_blockcount == offset &&
1137 		    xfs_rmap_is_mergeable(&LEFT, owner, newext))
1138 			state |= RMAP_LEFT_CONTIG;
1139 	}
1140 
1141 	/*
1142 	 * Increment the cursor to see if we have a right-adjacent record to our
1143 	 * insertion point. This will give us the record for end block
1144 	 * contiguity tests.
1145 	 */
1146 	error = xfs_btree_increment(cur, 0, &i);
1147 	if (error)
1148 		goto done;
1149 	if (XFS_IS_CORRUPT(mp, i != 1)) {
1150 		error = -EFSCORRUPTED;
1151 		goto done;
1152 	}
1153 	error = xfs_btree_increment(cur, 0, &i);
1154 	if (error)
1155 		goto done;
1156 	if (i) {
1157 		state |= RMAP_RIGHT_VALID;
1158 		error = xfs_rmap_get_rec(cur, &RIGHT, &i);
1159 		if (error)
1160 			goto done;
1161 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1162 			error = -EFSCORRUPTED;
1163 			goto done;
1164 		}
1165 		if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) {
1166 			error = -EFSCORRUPTED;
1167 			goto done;
1168 		}
1169 		trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
1170 				cur->bc_ag.pag->pag_agno, RIGHT.rm_startblock,
1171 				RIGHT.rm_blockcount, RIGHT.rm_owner,
1172 				RIGHT.rm_offset, RIGHT.rm_flags);
1173 		if (bno + len == RIGHT.rm_startblock &&
1174 		    offset + len == RIGHT.rm_offset &&
1175 		    xfs_rmap_is_mergeable(&RIGHT, owner, newext))
1176 			state |= RMAP_RIGHT_CONTIG;
1177 	}
1178 
1179 	/* check that left + prev + right is not too long */
1180 	if ((state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1181 			 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) ==
1182 	    (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1183 	     RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG) &&
1184 	    (unsigned long)LEFT.rm_blockcount + len +
1185 	     RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX)
1186 		state &= ~RMAP_RIGHT_CONTIG;
1187 
1188 	trace_xfs_rmap_convert_state(mp, cur->bc_ag.pag->pag_agno, state,
1189 			_RET_IP_);
1190 
1191 	/* reset the cursor back to PREV */
1192 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, NULL, &i);
1193 	if (error)
1194 		goto done;
1195 	if (XFS_IS_CORRUPT(mp, i != 1)) {
1196 		error = -EFSCORRUPTED;
1197 		goto done;
1198 	}
1199 
1200 	/*
1201 	 * Switch out based on the FILLING and CONTIG state bits.
1202 	 */
1203 	switch (state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1204 			 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) {
1205 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1206 	     RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1207 		/*
1208 		 * Setting all of a previous oldext extent to newext.
1209 		 * The left and right neighbors are both contiguous with new.
1210 		 */
1211 		error = xfs_btree_increment(cur, 0, &i);
1212 		if (error)
1213 			goto done;
1214 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1215 			error = -EFSCORRUPTED;
1216 			goto done;
1217 		}
1218 		trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
1219 				RIGHT.rm_startblock, RIGHT.rm_blockcount,
1220 				RIGHT.rm_owner, RIGHT.rm_offset,
1221 				RIGHT.rm_flags);
1222 		error = xfs_btree_delete(cur, &i);
1223 		if (error)
1224 			goto done;
1225 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1226 			error = -EFSCORRUPTED;
1227 			goto done;
1228 		}
1229 		error = xfs_btree_decrement(cur, 0, &i);
1230 		if (error)
1231 			goto done;
1232 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1233 			error = -EFSCORRUPTED;
1234 			goto done;
1235 		}
1236 		trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
1237 				PREV.rm_startblock, PREV.rm_blockcount,
1238 				PREV.rm_owner, PREV.rm_offset,
1239 				PREV.rm_flags);
1240 		error = xfs_btree_delete(cur, &i);
1241 		if (error)
1242 			goto done;
1243 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1244 			error = -EFSCORRUPTED;
1245 			goto done;
1246 		}
1247 		error = xfs_btree_decrement(cur, 0, &i);
1248 		if (error)
1249 			goto done;
1250 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1251 			error = -EFSCORRUPTED;
1252 			goto done;
1253 		}
1254 		NEW = LEFT;
1255 		NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount;
1256 		error = xfs_rmap_update(cur, &NEW);
1257 		if (error)
1258 			goto done;
1259 		break;
1260 
1261 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
1262 		/*
1263 		 * Setting all of a previous oldext extent to newext.
1264 		 * The left neighbor is contiguous, the right is not.
1265 		 */
1266 		trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
1267 				PREV.rm_startblock, PREV.rm_blockcount,
1268 				PREV.rm_owner, PREV.rm_offset,
1269 				PREV.rm_flags);
1270 		error = xfs_btree_delete(cur, &i);
1271 		if (error)
1272 			goto done;
1273 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1274 			error = -EFSCORRUPTED;
1275 			goto done;
1276 		}
1277 		error = xfs_btree_decrement(cur, 0, &i);
1278 		if (error)
1279 			goto done;
1280 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1281 			error = -EFSCORRUPTED;
1282 			goto done;
1283 		}
1284 		NEW = LEFT;
1285 		NEW.rm_blockcount += PREV.rm_blockcount;
1286 		error = xfs_rmap_update(cur, &NEW);
1287 		if (error)
1288 			goto done;
1289 		break;
1290 
1291 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1292 		/*
1293 		 * Setting all of a previous oldext extent to newext.
1294 		 * The right neighbor is contiguous, the left is not.
1295 		 */
1296 		error = xfs_btree_increment(cur, 0, &i);
1297 		if (error)
1298 			goto done;
1299 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1300 			error = -EFSCORRUPTED;
1301 			goto done;
1302 		}
1303 		trace_xfs_rmap_delete(mp, cur->bc_ag.pag->pag_agno,
1304 				RIGHT.rm_startblock, RIGHT.rm_blockcount,
1305 				RIGHT.rm_owner, RIGHT.rm_offset,
1306 				RIGHT.rm_flags);
1307 		error = xfs_btree_delete(cur, &i);
1308 		if (error)
1309 			goto done;
1310 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1311 			error = -EFSCORRUPTED;
1312 			goto done;
1313 		}
1314 		error = xfs_btree_decrement(cur, 0, &i);
1315 		if (error)
1316 			goto done;
1317 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1318 			error = -EFSCORRUPTED;
1319 			goto done;
1320 		}
1321 		NEW = PREV;
1322 		NEW.rm_blockcount = len + RIGHT.rm_blockcount;
1323 		NEW.rm_flags = newext;
1324 		error = xfs_rmap_update(cur, &NEW);
1325 		if (error)
1326 			goto done;
1327 		break;
1328 
1329 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING:
1330 		/*
1331 		 * Setting all of a previous oldext extent to newext.
1332 		 * Neither the left nor right neighbors are contiguous with
1333 		 * the new one.
1334 		 */
1335 		NEW = PREV;
1336 		NEW.rm_flags = newext;
1337 		error = xfs_rmap_update(cur, &NEW);
1338 		if (error)
1339 			goto done;
1340 		break;
1341 
1342 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG:
1343 		/*
1344 		 * Setting the first part of a previous oldext extent to newext.
1345 		 * The left neighbor is contiguous.
1346 		 */
1347 		NEW = PREV;
1348 		NEW.rm_offset += len;
1349 		NEW.rm_startblock += len;
1350 		NEW.rm_blockcount -= len;
1351 		error = xfs_rmap_update(cur, &NEW);
1352 		if (error)
1353 			goto done;
1354 		error = xfs_btree_decrement(cur, 0, &i);
1355 		if (error)
1356 			goto done;
1357 		NEW = LEFT;
1358 		NEW.rm_blockcount += len;
1359 		error = xfs_rmap_update(cur, &NEW);
1360 		if (error)
1361 			goto done;
1362 		break;
1363 
1364 	case RMAP_LEFT_FILLING:
1365 		/*
1366 		 * Setting the first part of a previous oldext extent to newext.
1367 		 * The left neighbor is not contiguous.
1368 		 */
1369 		NEW = PREV;
1370 		NEW.rm_startblock += len;
1371 		NEW.rm_offset += len;
1372 		NEW.rm_blockcount -= len;
1373 		error = xfs_rmap_update(cur, &NEW);
1374 		if (error)
1375 			goto done;
1376 		NEW.rm_startblock = bno;
1377 		NEW.rm_owner = owner;
1378 		NEW.rm_offset = offset;
1379 		NEW.rm_blockcount = len;
1380 		NEW.rm_flags = newext;
1381 		cur->bc_rec.r = NEW;
1382 		trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno,
1383 				len, owner, offset, newext);
1384 		error = xfs_btree_insert(cur, &i);
1385 		if (error)
1386 			goto done;
1387 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1388 			error = -EFSCORRUPTED;
1389 			goto done;
1390 		}
1391 		break;
1392 
1393 	case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1394 		/*
1395 		 * Setting the last part of a previous oldext extent to newext.
1396 		 * The right neighbor is contiguous with the new allocation.
1397 		 */
1398 		NEW = PREV;
1399 		NEW.rm_blockcount -= len;
1400 		error = xfs_rmap_update(cur, &NEW);
1401 		if (error)
1402 			goto done;
1403 		error = xfs_btree_increment(cur, 0, &i);
1404 		if (error)
1405 			goto done;
1406 		NEW = RIGHT;
1407 		NEW.rm_offset = offset;
1408 		NEW.rm_startblock = bno;
1409 		NEW.rm_blockcount += len;
1410 		error = xfs_rmap_update(cur, &NEW);
1411 		if (error)
1412 			goto done;
1413 		break;
1414 
1415 	case RMAP_RIGHT_FILLING:
1416 		/*
1417 		 * Setting the last part of a previous oldext extent to newext.
1418 		 * The right neighbor is not contiguous.
1419 		 */
1420 		NEW = PREV;
1421 		NEW.rm_blockcount -= len;
1422 		error = xfs_rmap_update(cur, &NEW);
1423 		if (error)
1424 			goto done;
1425 		error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset,
1426 				oldext, &i);
1427 		if (error)
1428 			goto done;
1429 		if (XFS_IS_CORRUPT(mp, i != 0)) {
1430 			error = -EFSCORRUPTED;
1431 			goto done;
1432 		}
1433 		NEW.rm_startblock = bno;
1434 		NEW.rm_owner = owner;
1435 		NEW.rm_offset = offset;
1436 		NEW.rm_blockcount = len;
1437 		NEW.rm_flags = newext;
1438 		cur->bc_rec.r = NEW;
1439 		trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno,
1440 				len, owner, offset, newext);
1441 		error = xfs_btree_insert(cur, &i);
1442 		if (error)
1443 			goto done;
1444 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1445 			error = -EFSCORRUPTED;
1446 			goto done;
1447 		}
1448 		break;
1449 
1450 	case 0:
1451 		/*
1452 		 * Setting the middle part of a previous oldext extent to
1453 		 * newext.  Contiguity is impossible here.
1454 		 * One extent becomes three extents.
1455 		 */
1456 		/* new right extent - oldext */
1457 		NEW.rm_startblock = bno + len;
1458 		NEW.rm_owner = owner;
1459 		NEW.rm_offset = new_endoff;
1460 		NEW.rm_blockcount = PREV.rm_offset + PREV.rm_blockcount -
1461 				new_endoff;
1462 		NEW.rm_flags = PREV.rm_flags;
1463 		error = xfs_rmap_update(cur, &NEW);
1464 		if (error)
1465 			goto done;
1466 		/* new left extent - oldext */
1467 		NEW = PREV;
1468 		NEW.rm_blockcount = offset - PREV.rm_offset;
1469 		cur->bc_rec.r = NEW;
1470 		trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno,
1471 				NEW.rm_startblock, NEW.rm_blockcount,
1472 				NEW.rm_owner, NEW.rm_offset,
1473 				NEW.rm_flags);
1474 		error = xfs_btree_insert(cur, &i);
1475 		if (error)
1476 			goto done;
1477 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1478 			error = -EFSCORRUPTED;
1479 			goto done;
1480 		}
1481 		/*
1482 		 * Reset the cursor to the position of the new extent
1483 		 * we are about to insert as we can't trust it after
1484 		 * the previous insert.
1485 		 */
1486 		error = xfs_rmap_lookup_eq(cur, bno, len, owner, offset,
1487 				oldext, &i);
1488 		if (error)
1489 			goto done;
1490 		if (XFS_IS_CORRUPT(mp, i != 0)) {
1491 			error = -EFSCORRUPTED;
1492 			goto done;
1493 		}
1494 		/* new middle extent - newext */
1495 		cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN;
1496 		cur->bc_rec.r.rm_flags |= newext;
1497 		trace_xfs_rmap_insert(mp, cur->bc_ag.pag->pag_agno, bno, len,
1498 				owner, offset, newext);
1499 		error = xfs_btree_insert(cur, &i);
1500 		if (error)
1501 			goto done;
1502 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1503 			error = -EFSCORRUPTED;
1504 			goto done;
1505 		}
1506 		break;
1507 
1508 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1509 	case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1510 	case RMAP_LEFT_FILLING | RMAP_RIGHT_CONTIG:
1511 	case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
1512 	case RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1513 	case RMAP_LEFT_CONTIG:
1514 	case RMAP_RIGHT_CONTIG:
1515 		/*
1516 		 * These cases are all impossible.
1517 		 */
1518 		ASSERT(0);
1519 	}
1520 
1521 	trace_xfs_rmap_convert_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
1522 			unwritten, oinfo);
1523 done:
1524 	if (error)
1525 		trace_xfs_rmap_convert_error(cur->bc_mp,
1526 				cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1527 	return error;
1528 }
1529 
1530 /*
1531  * Convert an unwritten extent to a real extent or vice versa.  If there is no
1532  * possibility of overlapping extents, delegate to the simpler convert
1533  * function.
1534  */
1535 STATIC int
1536 xfs_rmap_convert_shared(
1537 	struct xfs_btree_cur		*cur,
1538 	xfs_agblock_t			bno,
1539 	xfs_extlen_t			len,
1540 	bool				unwritten,
1541 	const struct xfs_owner_info	*oinfo)
1542 {
1543 	struct xfs_mount		*mp = cur->bc_mp;
1544 	struct xfs_rmap_irec		r[4];	/* neighbor extent entries */
1545 						/* left is 0, right is 1, */
1546 						/* prev is 2, new is 3 */
1547 	uint64_t		owner;
1548 	uint64_t		offset;
1549 	uint64_t		new_endoff;
1550 	unsigned int		oldext;
1551 	unsigned int		newext;
1552 	unsigned int		flags = 0;
1553 	int			i;
1554 	int			state = 0;
1555 	int			error;
1556 
1557 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
1558 	ASSERT(!(XFS_RMAP_NON_INODE_OWNER(owner) ||
1559 			(flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))));
1560 	oldext = unwritten ? XFS_RMAP_UNWRITTEN : 0;
1561 	new_endoff = offset + len;
1562 	trace_xfs_rmap_convert(mp, cur->bc_ag.pag->pag_agno, bno, len,
1563 			unwritten, oinfo);
1564 
1565 	/*
1566 	 * For the initial lookup, look for and exact match or the left-adjacent
1567 	 * record for our insertion point. This will also give us the record for
1568 	 * start block contiguity tests.
1569 	 */
1570 	error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext,
1571 			&PREV, &i);
1572 	if (error)
1573 		goto done;
1574 	if (XFS_IS_CORRUPT(mp, i != 1)) {
1575 		error = -EFSCORRUPTED;
1576 		goto done;
1577 	}
1578 
1579 	ASSERT(PREV.rm_offset <= offset);
1580 	ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff);
1581 	ASSERT((PREV.rm_flags & XFS_RMAP_UNWRITTEN) == oldext);
1582 	newext = ~oldext & XFS_RMAP_UNWRITTEN;
1583 
1584 	/*
1585 	 * Set flags determining what part of the previous oldext allocation
1586 	 * extent is being replaced by a newext allocation.
1587 	 */
1588 	if (PREV.rm_offset == offset)
1589 		state |= RMAP_LEFT_FILLING;
1590 	if (PREV.rm_offset + PREV.rm_blockcount == new_endoff)
1591 		state |= RMAP_RIGHT_FILLING;
1592 
1593 	/* Is there a left record that abuts our range? */
1594 	error = xfs_rmap_find_left_neighbor(cur, bno, owner, offset, newext,
1595 			&LEFT, &i);
1596 	if (error)
1597 		goto done;
1598 	if (i) {
1599 		state |= RMAP_LEFT_VALID;
1600 		if (XFS_IS_CORRUPT(mp,
1601 				   LEFT.rm_startblock + LEFT.rm_blockcount >
1602 				   bno)) {
1603 			error = -EFSCORRUPTED;
1604 			goto done;
1605 		}
1606 		if (xfs_rmap_is_mergeable(&LEFT, owner, newext))
1607 			state |= RMAP_LEFT_CONTIG;
1608 	}
1609 
1610 	/* Is there a right record that abuts our range? */
1611 	error = xfs_rmap_lookup_eq(cur, bno + len, len, owner, offset + len,
1612 			newext, &i);
1613 	if (error)
1614 		goto done;
1615 	if (i) {
1616 		state |= RMAP_RIGHT_VALID;
1617 		error = xfs_rmap_get_rec(cur, &RIGHT, &i);
1618 		if (error)
1619 			goto done;
1620 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1621 			error = -EFSCORRUPTED;
1622 			goto done;
1623 		}
1624 		if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) {
1625 			error = -EFSCORRUPTED;
1626 			goto done;
1627 		}
1628 		trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
1629 				cur->bc_ag.pag->pag_agno, RIGHT.rm_startblock,
1630 				RIGHT.rm_blockcount, RIGHT.rm_owner,
1631 				RIGHT.rm_offset, RIGHT.rm_flags);
1632 		if (xfs_rmap_is_mergeable(&RIGHT, owner, newext))
1633 			state |= RMAP_RIGHT_CONTIG;
1634 	}
1635 
1636 	/* check that left + prev + right is not too long */
1637 	if ((state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1638 			 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) ==
1639 	    (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1640 	     RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG) &&
1641 	    (unsigned long)LEFT.rm_blockcount + len +
1642 	     RIGHT.rm_blockcount > XFS_RMAP_LEN_MAX)
1643 		state &= ~RMAP_RIGHT_CONTIG;
1644 
1645 	trace_xfs_rmap_convert_state(mp, cur->bc_ag.pag->pag_agno, state,
1646 			_RET_IP_);
1647 	/*
1648 	 * Switch out based on the FILLING and CONTIG state bits.
1649 	 */
1650 	switch (state & (RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1651 			 RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG)) {
1652 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG |
1653 	     RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1654 		/*
1655 		 * Setting all of a previous oldext extent to newext.
1656 		 * The left and right neighbors are both contiguous with new.
1657 		 */
1658 		error = xfs_rmap_delete(cur, RIGHT.rm_startblock,
1659 				RIGHT.rm_blockcount, RIGHT.rm_owner,
1660 				RIGHT.rm_offset, RIGHT.rm_flags);
1661 		if (error)
1662 			goto done;
1663 		error = xfs_rmap_delete(cur, PREV.rm_startblock,
1664 				PREV.rm_blockcount, PREV.rm_owner,
1665 				PREV.rm_offset, PREV.rm_flags);
1666 		if (error)
1667 			goto done;
1668 		NEW = LEFT;
1669 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1670 				NEW.rm_blockcount, NEW.rm_owner,
1671 				NEW.rm_offset, NEW.rm_flags, &i);
1672 		if (error)
1673 			goto done;
1674 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1675 			error = -EFSCORRUPTED;
1676 			goto done;
1677 		}
1678 		NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount;
1679 		error = xfs_rmap_update(cur, &NEW);
1680 		if (error)
1681 			goto done;
1682 		break;
1683 
1684 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
1685 		/*
1686 		 * Setting all of a previous oldext extent to newext.
1687 		 * The left neighbor is contiguous, the right is not.
1688 		 */
1689 		error = xfs_rmap_delete(cur, PREV.rm_startblock,
1690 				PREV.rm_blockcount, PREV.rm_owner,
1691 				PREV.rm_offset, PREV.rm_flags);
1692 		if (error)
1693 			goto done;
1694 		NEW = LEFT;
1695 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1696 				NEW.rm_blockcount, NEW.rm_owner,
1697 				NEW.rm_offset, NEW.rm_flags, &i);
1698 		if (error)
1699 			goto done;
1700 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1701 			error = -EFSCORRUPTED;
1702 			goto done;
1703 		}
1704 		NEW.rm_blockcount += PREV.rm_blockcount;
1705 		error = xfs_rmap_update(cur, &NEW);
1706 		if (error)
1707 			goto done;
1708 		break;
1709 
1710 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1711 		/*
1712 		 * Setting all of a previous oldext extent to newext.
1713 		 * The right neighbor is contiguous, the left is not.
1714 		 */
1715 		error = xfs_rmap_delete(cur, RIGHT.rm_startblock,
1716 				RIGHT.rm_blockcount, RIGHT.rm_owner,
1717 				RIGHT.rm_offset, RIGHT.rm_flags);
1718 		if (error)
1719 			goto done;
1720 		NEW = PREV;
1721 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1722 				NEW.rm_blockcount, NEW.rm_owner,
1723 				NEW.rm_offset, NEW.rm_flags, &i);
1724 		if (error)
1725 			goto done;
1726 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1727 			error = -EFSCORRUPTED;
1728 			goto done;
1729 		}
1730 		NEW.rm_blockcount += RIGHT.rm_blockcount;
1731 		NEW.rm_flags = RIGHT.rm_flags;
1732 		error = xfs_rmap_update(cur, &NEW);
1733 		if (error)
1734 			goto done;
1735 		break;
1736 
1737 	case RMAP_LEFT_FILLING | RMAP_RIGHT_FILLING:
1738 		/*
1739 		 * Setting all of a previous oldext extent to newext.
1740 		 * Neither the left nor right neighbors are contiguous with
1741 		 * the new one.
1742 		 */
1743 		NEW = PREV;
1744 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1745 				NEW.rm_blockcount, NEW.rm_owner,
1746 				NEW.rm_offset, NEW.rm_flags, &i);
1747 		if (error)
1748 			goto done;
1749 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1750 			error = -EFSCORRUPTED;
1751 			goto done;
1752 		}
1753 		NEW.rm_flags = newext;
1754 		error = xfs_rmap_update(cur, &NEW);
1755 		if (error)
1756 			goto done;
1757 		break;
1758 
1759 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG:
1760 		/*
1761 		 * Setting the first part of a previous oldext extent to newext.
1762 		 * The left neighbor is contiguous.
1763 		 */
1764 		NEW = PREV;
1765 		error = xfs_rmap_delete(cur, NEW.rm_startblock,
1766 				NEW.rm_blockcount, NEW.rm_owner,
1767 				NEW.rm_offset, NEW.rm_flags);
1768 		if (error)
1769 			goto done;
1770 		NEW.rm_offset += len;
1771 		NEW.rm_startblock += len;
1772 		NEW.rm_blockcount -= len;
1773 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
1774 				NEW.rm_blockcount, NEW.rm_owner,
1775 				NEW.rm_offset, NEW.rm_flags);
1776 		if (error)
1777 			goto done;
1778 		NEW = LEFT;
1779 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1780 				NEW.rm_blockcount, NEW.rm_owner,
1781 				NEW.rm_offset, NEW.rm_flags, &i);
1782 		if (error)
1783 			goto done;
1784 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1785 			error = -EFSCORRUPTED;
1786 			goto done;
1787 		}
1788 		NEW.rm_blockcount += len;
1789 		error = xfs_rmap_update(cur, &NEW);
1790 		if (error)
1791 			goto done;
1792 		break;
1793 
1794 	case RMAP_LEFT_FILLING:
1795 		/*
1796 		 * Setting the first part of a previous oldext extent to newext.
1797 		 * The left neighbor is not contiguous.
1798 		 */
1799 		NEW = PREV;
1800 		error = xfs_rmap_delete(cur, NEW.rm_startblock,
1801 				NEW.rm_blockcount, NEW.rm_owner,
1802 				NEW.rm_offset, NEW.rm_flags);
1803 		if (error)
1804 			goto done;
1805 		NEW.rm_offset += len;
1806 		NEW.rm_startblock += len;
1807 		NEW.rm_blockcount -= len;
1808 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
1809 				NEW.rm_blockcount, NEW.rm_owner,
1810 				NEW.rm_offset, NEW.rm_flags);
1811 		if (error)
1812 			goto done;
1813 		error = xfs_rmap_insert(cur, bno, len, owner, offset, newext);
1814 		if (error)
1815 			goto done;
1816 		break;
1817 
1818 	case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
1819 		/*
1820 		 * Setting the last part of a previous oldext extent to newext.
1821 		 * The right neighbor is contiguous with the new allocation.
1822 		 */
1823 		NEW = PREV;
1824 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1825 				NEW.rm_blockcount, NEW.rm_owner,
1826 				NEW.rm_offset, NEW.rm_flags, &i);
1827 		if (error)
1828 			goto done;
1829 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1830 			error = -EFSCORRUPTED;
1831 			goto done;
1832 		}
1833 		NEW.rm_blockcount = offset - NEW.rm_offset;
1834 		error = xfs_rmap_update(cur, &NEW);
1835 		if (error)
1836 			goto done;
1837 		NEW = RIGHT;
1838 		error = xfs_rmap_delete(cur, NEW.rm_startblock,
1839 				NEW.rm_blockcount, NEW.rm_owner,
1840 				NEW.rm_offset, NEW.rm_flags);
1841 		if (error)
1842 			goto done;
1843 		NEW.rm_offset = offset;
1844 		NEW.rm_startblock = bno;
1845 		NEW.rm_blockcount += len;
1846 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
1847 				NEW.rm_blockcount, NEW.rm_owner,
1848 				NEW.rm_offset, NEW.rm_flags);
1849 		if (error)
1850 			goto done;
1851 		break;
1852 
1853 	case RMAP_RIGHT_FILLING:
1854 		/*
1855 		 * Setting the last part of a previous oldext extent to newext.
1856 		 * The right neighbor is not contiguous.
1857 		 */
1858 		NEW = PREV;
1859 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1860 				NEW.rm_blockcount, NEW.rm_owner,
1861 				NEW.rm_offset, NEW.rm_flags, &i);
1862 		if (error)
1863 			goto done;
1864 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1865 			error = -EFSCORRUPTED;
1866 			goto done;
1867 		}
1868 		NEW.rm_blockcount -= len;
1869 		error = xfs_rmap_update(cur, &NEW);
1870 		if (error)
1871 			goto done;
1872 		error = xfs_rmap_insert(cur, bno, len, owner, offset, newext);
1873 		if (error)
1874 			goto done;
1875 		break;
1876 
1877 	case 0:
1878 		/*
1879 		 * Setting the middle part of a previous oldext extent to
1880 		 * newext.  Contiguity is impossible here.
1881 		 * One extent becomes three extents.
1882 		 */
1883 		/* new right extent - oldext */
1884 		NEW.rm_startblock = bno + len;
1885 		NEW.rm_owner = owner;
1886 		NEW.rm_offset = new_endoff;
1887 		NEW.rm_blockcount = PREV.rm_offset + PREV.rm_blockcount -
1888 				new_endoff;
1889 		NEW.rm_flags = PREV.rm_flags;
1890 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
1891 				NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset,
1892 				NEW.rm_flags);
1893 		if (error)
1894 			goto done;
1895 		/* new left extent - oldext */
1896 		NEW = PREV;
1897 		error = xfs_rmap_lookup_eq(cur, NEW.rm_startblock,
1898 				NEW.rm_blockcount, NEW.rm_owner,
1899 				NEW.rm_offset, NEW.rm_flags, &i);
1900 		if (error)
1901 			goto done;
1902 		if (XFS_IS_CORRUPT(mp, i != 1)) {
1903 			error = -EFSCORRUPTED;
1904 			goto done;
1905 		}
1906 		NEW.rm_blockcount = offset - NEW.rm_offset;
1907 		error = xfs_rmap_update(cur, &NEW);
1908 		if (error)
1909 			goto done;
1910 		/* new middle extent - newext */
1911 		NEW.rm_startblock = bno;
1912 		NEW.rm_blockcount = len;
1913 		NEW.rm_owner = owner;
1914 		NEW.rm_offset = offset;
1915 		NEW.rm_flags = newext;
1916 		error = xfs_rmap_insert(cur, NEW.rm_startblock,
1917 				NEW.rm_blockcount, NEW.rm_owner, NEW.rm_offset,
1918 				NEW.rm_flags);
1919 		if (error)
1920 			goto done;
1921 		break;
1922 
1923 	case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1924 	case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1925 	case RMAP_LEFT_FILLING | RMAP_RIGHT_CONTIG:
1926 	case RMAP_RIGHT_FILLING | RMAP_LEFT_CONTIG:
1927 	case RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
1928 	case RMAP_LEFT_CONTIG:
1929 	case RMAP_RIGHT_CONTIG:
1930 		/*
1931 		 * These cases are all impossible.
1932 		 */
1933 		ASSERT(0);
1934 	}
1935 
1936 	trace_xfs_rmap_convert_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
1937 			unwritten, oinfo);
1938 done:
1939 	if (error)
1940 		trace_xfs_rmap_convert_error(cur->bc_mp,
1941 				cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1942 	return error;
1943 }
1944 
1945 #undef	NEW
1946 #undef	LEFT
1947 #undef	RIGHT
1948 #undef	PREV
1949 
1950 /*
1951  * Find an extent in the rmap btree and unmap it.  For rmap extent types that
1952  * can overlap (data fork rmaps on reflink filesystems) we must be careful
1953  * that the prev/next records in the btree might belong to another owner.
1954  * Therefore we must use delete+insert to alter any of the key fields.
1955  *
1956  * For every other situation there can only be one owner for a given extent,
1957  * so we can call the regular _free function.
1958  */
1959 STATIC int
1960 xfs_rmap_unmap_shared(
1961 	struct xfs_btree_cur		*cur,
1962 	xfs_agblock_t			bno,
1963 	xfs_extlen_t			len,
1964 	bool				unwritten,
1965 	const struct xfs_owner_info	*oinfo)
1966 {
1967 	struct xfs_mount		*mp = cur->bc_mp;
1968 	struct xfs_rmap_irec		ltrec;
1969 	uint64_t			ltoff;
1970 	int				error = 0;
1971 	int				i;
1972 	uint64_t			owner;
1973 	uint64_t			offset;
1974 	unsigned int			flags;
1975 
1976 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
1977 	if (unwritten)
1978 		flags |= XFS_RMAP_UNWRITTEN;
1979 	trace_xfs_rmap_unmap(mp, cur->bc_ag.pag->pag_agno, bno, len,
1980 			unwritten, oinfo);
1981 
1982 	/*
1983 	 * We should always have a left record because there's a static record
1984 	 * for the AG headers at rm_startblock == 0 created by mkfs/growfs that
1985 	 * will not ever be removed from the tree.
1986 	 */
1987 	error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags,
1988 			&ltrec, &i);
1989 	if (error)
1990 		goto out_error;
1991 	if (XFS_IS_CORRUPT(mp, i != 1)) {
1992 		error = -EFSCORRUPTED;
1993 		goto out_error;
1994 	}
1995 	ltoff = ltrec.rm_offset;
1996 
1997 	/* Make sure the extent we found covers the entire freeing range. */
1998 	if (XFS_IS_CORRUPT(mp,
1999 			   ltrec.rm_startblock > bno ||
2000 			   ltrec.rm_startblock + ltrec.rm_blockcount <
2001 			   bno + len)) {
2002 		error = -EFSCORRUPTED;
2003 		goto out_error;
2004 	}
2005 
2006 	/* Make sure the owner matches what we expect to find in the tree. */
2007 	if (XFS_IS_CORRUPT(mp, owner != ltrec.rm_owner)) {
2008 		error = -EFSCORRUPTED;
2009 		goto out_error;
2010 	}
2011 
2012 	/* Make sure the unwritten flag matches. */
2013 	if (XFS_IS_CORRUPT(mp,
2014 			   (flags & XFS_RMAP_UNWRITTEN) !=
2015 			   (ltrec.rm_flags & XFS_RMAP_UNWRITTEN))) {
2016 		error = -EFSCORRUPTED;
2017 		goto out_error;
2018 	}
2019 
2020 	/* Check the offset. */
2021 	if (XFS_IS_CORRUPT(mp, ltrec.rm_offset > offset)) {
2022 		error = -EFSCORRUPTED;
2023 		goto out_error;
2024 	}
2025 	if (XFS_IS_CORRUPT(mp, offset > ltoff + ltrec.rm_blockcount)) {
2026 		error = -EFSCORRUPTED;
2027 		goto out_error;
2028 	}
2029 
2030 	if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
2031 		/* Exact match, simply remove the record from rmap tree. */
2032 		error = xfs_rmap_delete(cur, ltrec.rm_startblock,
2033 				ltrec.rm_blockcount, ltrec.rm_owner,
2034 				ltrec.rm_offset, ltrec.rm_flags);
2035 		if (error)
2036 			goto out_error;
2037 	} else if (ltrec.rm_startblock == bno) {
2038 		/*
2039 		 * Overlap left hand side of extent: move the start, trim the
2040 		 * length and update the current record.
2041 		 *
2042 		 *       ltbno                ltlen
2043 		 * Orig:    |oooooooooooooooooooo|
2044 		 * Freeing: |fffffffff|
2045 		 * Result:            |rrrrrrrrrr|
2046 		 *         bno       len
2047 		 */
2048 
2049 		/* Delete prev rmap. */
2050 		error = xfs_rmap_delete(cur, ltrec.rm_startblock,
2051 				ltrec.rm_blockcount, ltrec.rm_owner,
2052 				ltrec.rm_offset, ltrec.rm_flags);
2053 		if (error)
2054 			goto out_error;
2055 
2056 		/* Add an rmap at the new offset. */
2057 		ltrec.rm_startblock += len;
2058 		ltrec.rm_blockcount -= len;
2059 		ltrec.rm_offset += len;
2060 		error = xfs_rmap_insert(cur, ltrec.rm_startblock,
2061 				ltrec.rm_blockcount, ltrec.rm_owner,
2062 				ltrec.rm_offset, ltrec.rm_flags);
2063 		if (error)
2064 			goto out_error;
2065 	} else if (ltrec.rm_startblock + ltrec.rm_blockcount == bno + len) {
2066 		/*
2067 		 * Overlap right hand side of extent: trim the length and
2068 		 * update the current record.
2069 		 *
2070 		 *       ltbno                ltlen
2071 		 * Orig:    |oooooooooooooooooooo|
2072 		 * Freeing:            |fffffffff|
2073 		 * Result:  |rrrrrrrrrr|
2074 		 *                    bno       len
2075 		 */
2076 		error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock,
2077 				ltrec.rm_blockcount, ltrec.rm_owner,
2078 				ltrec.rm_offset, ltrec.rm_flags, &i);
2079 		if (error)
2080 			goto out_error;
2081 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2082 			error = -EFSCORRUPTED;
2083 			goto out_error;
2084 		}
2085 		ltrec.rm_blockcount -= len;
2086 		error = xfs_rmap_update(cur, &ltrec);
2087 		if (error)
2088 			goto out_error;
2089 	} else {
2090 		/*
2091 		 * Overlap middle of extent: trim the length of the existing
2092 		 * record to the length of the new left-extent size, increment
2093 		 * the insertion position so we can insert a new record
2094 		 * containing the remaining right-extent space.
2095 		 *
2096 		 *       ltbno                ltlen
2097 		 * Orig:    |oooooooooooooooooooo|
2098 		 * Freeing:       |fffffffff|
2099 		 * Result:  |rrrrr|         |rrrr|
2100 		 *               bno       len
2101 		 */
2102 		xfs_extlen_t	orig_len = ltrec.rm_blockcount;
2103 
2104 		/* Shrink the left side of the rmap */
2105 		error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock,
2106 				ltrec.rm_blockcount, ltrec.rm_owner,
2107 				ltrec.rm_offset, ltrec.rm_flags, &i);
2108 		if (error)
2109 			goto out_error;
2110 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2111 			error = -EFSCORRUPTED;
2112 			goto out_error;
2113 		}
2114 		ltrec.rm_blockcount = bno - ltrec.rm_startblock;
2115 		error = xfs_rmap_update(cur, &ltrec);
2116 		if (error)
2117 			goto out_error;
2118 
2119 		/* Add an rmap at the new offset */
2120 		error = xfs_rmap_insert(cur, bno + len,
2121 				orig_len - len - ltrec.rm_blockcount,
2122 				ltrec.rm_owner, offset + len,
2123 				ltrec.rm_flags);
2124 		if (error)
2125 			goto out_error;
2126 	}
2127 
2128 	trace_xfs_rmap_unmap_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
2129 			unwritten, oinfo);
2130 out_error:
2131 	if (error)
2132 		trace_xfs_rmap_unmap_error(cur->bc_mp,
2133 				cur->bc_ag.pag->pag_agno, error, _RET_IP_);
2134 	return error;
2135 }
2136 
2137 /*
2138  * Find an extent in the rmap btree and map it.  For rmap extent types that
2139  * can overlap (data fork rmaps on reflink filesystems) we must be careful
2140  * that the prev/next records in the btree might belong to another owner.
2141  * Therefore we must use delete+insert to alter any of the key fields.
2142  *
2143  * For every other situation there can only be one owner for a given extent,
2144  * so we can call the regular _alloc function.
2145  */
2146 STATIC int
2147 xfs_rmap_map_shared(
2148 	struct xfs_btree_cur		*cur,
2149 	xfs_agblock_t			bno,
2150 	xfs_extlen_t			len,
2151 	bool				unwritten,
2152 	const struct xfs_owner_info	*oinfo)
2153 {
2154 	struct xfs_mount		*mp = cur->bc_mp;
2155 	struct xfs_rmap_irec		ltrec;
2156 	struct xfs_rmap_irec		gtrec;
2157 	int				have_gt;
2158 	int				have_lt;
2159 	int				error = 0;
2160 	int				i;
2161 	uint64_t			owner;
2162 	uint64_t			offset;
2163 	unsigned int			flags = 0;
2164 
2165 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
2166 	if (unwritten)
2167 		flags |= XFS_RMAP_UNWRITTEN;
2168 	trace_xfs_rmap_map(mp, cur->bc_ag.pag->pag_agno, bno, len,
2169 			unwritten, oinfo);
2170 
2171 	/* Is there a left record that abuts our range? */
2172 	error = xfs_rmap_find_left_neighbor(cur, bno, owner, offset, flags,
2173 			&ltrec, &have_lt);
2174 	if (error)
2175 		goto out_error;
2176 	if (have_lt &&
2177 	    !xfs_rmap_is_mergeable(&ltrec, owner, flags))
2178 		have_lt = 0;
2179 
2180 	/* Is there a right record that abuts our range? */
2181 	error = xfs_rmap_lookup_eq(cur, bno + len, len, owner, offset + len,
2182 			flags, &have_gt);
2183 	if (error)
2184 		goto out_error;
2185 	if (have_gt) {
2186 		error = xfs_rmap_get_rec(cur, &gtrec, &have_gt);
2187 		if (error)
2188 			goto out_error;
2189 		if (XFS_IS_CORRUPT(mp, have_gt != 1)) {
2190 			error = -EFSCORRUPTED;
2191 			goto out_error;
2192 		}
2193 		trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
2194 			cur->bc_ag.pag->pag_agno, gtrec.rm_startblock,
2195 			gtrec.rm_blockcount, gtrec.rm_owner,
2196 			gtrec.rm_offset, gtrec.rm_flags);
2197 
2198 		if (!xfs_rmap_is_mergeable(&gtrec, owner, flags))
2199 			have_gt = 0;
2200 	}
2201 
2202 	if (have_lt &&
2203 	    ltrec.rm_startblock + ltrec.rm_blockcount == bno &&
2204 	    ltrec.rm_offset + ltrec.rm_blockcount == offset) {
2205 		/*
2206 		 * Left edge contiguous, merge into left record.
2207 		 *
2208 		 *       ltbno     ltlen
2209 		 * orig:   |ooooooooo|
2210 		 * adding:           |aaaaaaaaa|
2211 		 * result: |rrrrrrrrrrrrrrrrrrr|
2212 		 *                  bno       len
2213 		 */
2214 		ltrec.rm_blockcount += len;
2215 		if (have_gt &&
2216 		    bno + len == gtrec.rm_startblock &&
2217 		    offset + len == gtrec.rm_offset) {
2218 			/*
2219 			 * Right edge also contiguous, delete right record
2220 			 * and merge into left record.
2221 			 *
2222 			 *       ltbno     ltlen    gtbno     gtlen
2223 			 * orig:   |ooooooooo|         |ooooooooo|
2224 			 * adding:           |aaaaaaaaa|
2225 			 * result: |rrrrrrrrrrrrrrrrrrrrrrrrrrrrr|
2226 			 */
2227 			ltrec.rm_blockcount += gtrec.rm_blockcount;
2228 			error = xfs_rmap_delete(cur, gtrec.rm_startblock,
2229 					gtrec.rm_blockcount, gtrec.rm_owner,
2230 					gtrec.rm_offset, gtrec.rm_flags);
2231 			if (error)
2232 				goto out_error;
2233 		}
2234 
2235 		/* Point the cursor back to the left record and update. */
2236 		error = xfs_rmap_lookup_eq(cur, ltrec.rm_startblock,
2237 				ltrec.rm_blockcount, ltrec.rm_owner,
2238 				ltrec.rm_offset, ltrec.rm_flags, &i);
2239 		if (error)
2240 			goto out_error;
2241 		if (XFS_IS_CORRUPT(mp, i != 1)) {
2242 			error = -EFSCORRUPTED;
2243 			goto out_error;
2244 		}
2245 
2246 		error = xfs_rmap_update(cur, &ltrec);
2247 		if (error)
2248 			goto out_error;
2249 	} else if (have_gt &&
2250 		   bno + len == gtrec.rm_startblock &&
2251 		   offset + len == gtrec.rm_offset) {
2252 		/*
2253 		 * Right edge contiguous, merge into right record.
2254 		 *
2255 		 *                 gtbno     gtlen
2256 		 * Orig:             |ooooooooo|
2257 		 * adding: |aaaaaaaaa|
2258 		 * Result: |rrrrrrrrrrrrrrrrrrr|
2259 		 *        bno       len
2260 		 */
2261 		/* Delete the old record. */
2262 		error = xfs_rmap_delete(cur, gtrec.rm_startblock,
2263 				gtrec.rm_blockcount, gtrec.rm_owner,
2264 				gtrec.rm_offset, gtrec.rm_flags);
2265 		if (error)
2266 			goto out_error;
2267 
2268 		/* Move the start and re-add it. */
2269 		gtrec.rm_startblock = bno;
2270 		gtrec.rm_blockcount += len;
2271 		gtrec.rm_offset = offset;
2272 		error = xfs_rmap_insert(cur, gtrec.rm_startblock,
2273 				gtrec.rm_blockcount, gtrec.rm_owner,
2274 				gtrec.rm_offset, gtrec.rm_flags);
2275 		if (error)
2276 			goto out_error;
2277 	} else {
2278 		/*
2279 		 * No contiguous edge with identical owner, insert
2280 		 * new record at current cursor position.
2281 		 */
2282 		error = xfs_rmap_insert(cur, bno, len, owner, offset, flags);
2283 		if (error)
2284 			goto out_error;
2285 	}
2286 
2287 	trace_xfs_rmap_map_done(mp, cur->bc_ag.pag->pag_agno, bno, len,
2288 			unwritten, oinfo);
2289 out_error:
2290 	if (error)
2291 		trace_xfs_rmap_map_error(cur->bc_mp,
2292 				cur->bc_ag.pag->pag_agno, error, _RET_IP_);
2293 	return error;
2294 }
2295 
2296 /* Insert a raw rmap into the rmapbt. */
2297 int
2298 xfs_rmap_map_raw(
2299 	struct xfs_btree_cur	*cur,
2300 	struct xfs_rmap_irec	*rmap)
2301 {
2302 	struct xfs_owner_info	oinfo;
2303 
2304 	oinfo.oi_owner = rmap->rm_owner;
2305 	oinfo.oi_offset = rmap->rm_offset;
2306 	oinfo.oi_flags = 0;
2307 	if (rmap->rm_flags & XFS_RMAP_ATTR_FORK)
2308 		oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
2309 	if (rmap->rm_flags & XFS_RMAP_BMBT_BLOCK)
2310 		oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
2311 
2312 	if (rmap->rm_flags || XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
2313 		return xfs_rmap_map(cur, rmap->rm_startblock,
2314 				rmap->rm_blockcount,
2315 				rmap->rm_flags & XFS_RMAP_UNWRITTEN,
2316 				&oinfo);
2317 
2318 	return xfs_rmap_map_shared(cur, rmap->rm_startblock,
2319 			rmap->rm_blockcount,
2320 			rmap->rm_flags & XFS_RMAP_UNWRITTEN,
2321 			&oinfo);
2322 }
2323 
2324 struct xfs_rmap_query_range_info {
2325 	xfs_rmap_query_range_fn	fn;
2326 	void				*priv;
2327 };
2328 
2329 /* Format btree record and pass to our callback. */
2330 STATIC int
2331 xfs_rmap_query_range_helper(
2332 	struct xfs_btree_cur		*cur,
2333 	const union xfs_btree_rec	*rec,
2334 	void				*priv)
2335 {
2336 	struct xfs_rmap_query_range_info	*query = priv;
2337 	struct xfs_rmap_irec			irec;
2338 
2339 	if (xfs_rmap_btrec_to_irec(rec, &irec) != NULL ||
2340 	    xfs_rmap_check_irec(cur, &irec) != NULL)
2341 		return -EFSCORRUPTED;
2342 
2343 	return query->fn(cur, &irec, query->priv);
2344 }
2345 
2346 /* Find all rmaps between two keys. */
2347 int
2348 xfs_rmap_query_range(
2349 	struct xfs_btree_cur			*cur,
2350 	const struct xfs_rmap_irec		*low_rec,
2351 	const struct xfs_rmap_irec		*high_rec,
2352 	xfs_rmap_query_range_fn			fn,
2353 	void					*priv)
2354 {
2355 	union xfs_btree_irec			low_brec;
2356 	union xfs_btree_irec			high_brec;
2357 	struct xfs_rmap_query_range_info	query;
2358 
2359 	low_brec.r = *low_rec;
2360 	high_brec.r = *high_rec;
2361 	query.priv = priv;
2362 	query.fn = fn;
2363 	return xfs_btree_query_range(cur, &low_brec, &high_brec,
2364 			xfs_rmap_query_range_helper, &query);
2365 }
2366 
2367 /* Find all rmaps. */
2368 int
2369 xfs_rmap_query_all(
2370 	struct xfs_btree_cur			*cur,
2371 	xfs_rmap_query_range_fn			fn,
2372 	void					*priv)
2373 {
2374 	struct xfs_rmap_query_range_info	query;
2375 
2376 	query.priv = priv;
2377 	query.fn = fn;
2378 	return xfs_btree_query_all(cur, xfs_rmap_query_range_helper, &query);
2379 }
2380 
2381 /* Clean up after calling xfs_rmap_finish_one. */
2382 void
2383 xfs_rmap_finish_one_cleanup(
2384 	struct xfs_trans	*tp,
2385 	struct xfs_btree_cur	*rcur,
2386 	int			error)
2387 {
2388 	struct xfs_buf		*agbp;
2389 
2390 	if (rcur == NULL)
2391 		return;
2392 	agbp = rcur->bc_ag.agbp;
2393 	xfs_btree_del_cursor(rcur, error);
2394 	if (error)
2395 		xfs_trans_brelse(tp, agbp);
2396 }
2397 
2398 /*
2399  * Process one of the deferred rmap operations.  We pass back the
2400  * btree cursor to maintain our lock on the rmapbt between calls.
2401  * This saves time and eliminates a buffer deadlock between the
2402  * superblock and the AGF because we'll always grab them in the same
2403  * order.
2404  */
2405 int
2406 xfs_rmap_finish_one(
2407 	struct xfs_trans		*tp,
2408 	struct xfs_rmap_intent		*ri,
2409 	struct xfs_btree_cur		**pcur)
2410 {
2411 	struct xfs_mount		*mp = tp->t_mountp;
2412 	struct xfs_btree_cur		*rcur;
2413 	struct xfs_buf			*agbp = NULL;
2414 	int				error = 0;
2415 	struct xfs_owner_info		oinfo;
2416 	xfs_agblock_t			bno;
2417 	bool				unwritten;
2418 
2419 	bno = XFS_FSB_TO_AGBNO(mp, ri->ri_bmap.br_startblock);
2420 
2421 	trace_xfs_rmap_deferred(mp, ri->ri_pag->pag_agno, ri->ri_type, bno,
2422 			ri->ri_owner, ri->ri_whichfork,
2423 			ri->ri_bmap.br_startoff, ri->ri_bmap.br_blockcount,
2424 			ri->ri_bmap.br_state);
2425 
2426 	if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE))
2427 		return -EIO;
2428 
2429 	/*
2430 	 * If we haven't gotten a cursor or the cursor AG doesn't match
2431 	 * the startblock, get one now.
2432 	 */
2433 	rcur = *pcur;
2434 	if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) {
2435 		xfs_rmap_finish_one_cleanup(tp, rcur, 0);
2436 		rcur = NULL;
2437 		*pcur = NULL;
2438 	}
2439 	if (rcur == NULL) {
2440 		/*
2441 		 * Refresh the freelist before we start changing the
2442 		 * rmapbt, because a shape change could cause us to
2443 		 * allocate blocks.
2444 		 */
2445 		error = xfs_free_extent_fix_freelist(tp, ri->ri_pag, &agbp);
2446 		if (error)
2447 			return error;
2448 		if (XFS_IS_CORRUPT(tp->t_mountp, !agbp))
2449 			return -EFSCORRUPTED;
2450 
2451 		rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, ri->ri_pag);
2452 	}
2453 	*pcur = rcur;
2454 
2455 	xfs_rmap_ino_owner(&oinfo, ri->ri_owner, ri->ri_whichfork,
2456 			ri->ri_bmap.br_startoff);
2457 	unwritten = ri->ri_bmap.br_state == XFS_EXT_UNWRITTEN;
2458 	bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, ri->ri_bmap.br_startblock);
2459 
2460 	switch (ri->ri_type) {
2461 	case XFS_RMAP_ALLOC:
2462 	case XFS_RMAP_MAP:
2463 		error = xfs_rmap_map(rcur, bno, ri->ri_bmap.br_blockcount,
2464 				unwritten, &oinfo);
2465 		break;
2466 	case XFS_RMAP_MAP_SHARED:
2467 		error = xfs_rmap_map_shared(rcur, bno,
2468 				ri->ri_bmap.br_blockcount, unwritten, &oinfo);
2469 		break;
2470 	case XFS_RMAP_FREE:
2471 	case XFS_RMAP_UNMAP:
2472 		error = xfs_rmap_unmap(rcur, bno, ri->ri_bmap.br_blockcount,
2473 				unwritten, &oinfo);
2474 		break;
2475 	case XFS_RMAP_UNMAP_SHARED:
2476 		error = xfs_rmap_unmap_shared(rcur, bno,
2477 				ri->ri_bmap.br_blockcount, unwritten, &oinfo);
2478 		break;
2479 	case XFS_RMAP_CONVERT:
2480 		error = xfs_rmap_convert(rcur, bno, ri->ri_bmap.br_blockcount,
2481 				!unwritten, &oinfo);
2482 		break;
2483 	case XFS_RMAP_CONVERT_SHARED:
2484 		error = xfs_rmap_convert_shared(rcur, bno,
2485 				ri->ri_bmap.br_blockcount, !unwritten, &oinfo);
2486 		break;
2487 	default:
2488 		ASSERT(0);
2489 		error = -EFSCORRUPTED;
2490 	}
2491 
2492 	return error;
2493 }
2494 
2495 /*
2496  * Don't defer an rmap if we aren't an rmap filesystem.
2497  */
2498 static bool
2499 xfs_rmap_update_is_needed(
2500 	struct xfs_mount	*mp,
2501 	int			whichfork)
2502 {
2503 	return xfs_has_rmapbt(mp) && whichfork != XFS_COW_FORK;
2504 }
2505 
2506 /*
2507  * Record a rmap intent; the list is kept sorted first by AG and then by
2508  * increasing age.
2509  */
2510 static void
2511 __xfs_rmap_add(
2512 	struct xfs_trans		*tp,
2513 	enum xfs_rmap_intent_type	type,
2514 	uint64_t			owner,
2515 	int				whichfork,
2516 	struct xfs_bmbt_irec		*bmap)
2517 {
2518 	struct xfs_rmap_intent		*ri;
2519 
2520 	trace_xfs_rmap_defer(tp->t_mountp,
2521 			XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
2522 			type,
2523 			XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
2524 			owner, whichfork,
2525 			bmap->br_startoff,
2526 			bmap->br_blockcount,
2527 			bmap->br_state);
2528 
2529 	ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_NOFS | __GFP_NOFAIL);
2530 	INIT_LIST_HEAD(&ri->ri_list);
2531 	ri->ri_type = type;
2532 	ri->ri_owner = owner;
2533 	ri->ri_whichfork = whichfork;
2534 	ri->ri_bmap = *bmap;
2535 
2536 	xfs_rmap_update_get_group(tp->t_mountp, ri);
2537 	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list);
2538 }
2539 
2540 /* Map an extent into a file. */
2541 void
2542 xfs_rmap_map_extent(
2543 	struct xfs_trans	*tp,
2544 	struct xfs_inode	*ip,
2545 	int			whichfork,
2546 	struct xfs_bmbt_irec	*PREV)
2547 {
2548 	enum xfs_rmap_intent_type type = XFS_RMAP_MAP;
2549 
2550 	if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
2551 		return;
2552 
2553 	if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
2554 		type = XFS_RMAP_MAP_SHARED;
2555 
2556 	__xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
2557 }
2558 
2559 /* Unmap an extent out of a file. */
2560 void
2561 xfs_rmap_unmap_extent(
2562 	struct xfs_trans	*tp,
2563 	struct xfs_inode	*ip,
2564 	int			whichfork,
2565 	struct xfs_bmbt_irec	*PREV)
2566 {
2567 	enum xfs_rmap_intent_type type = XFS_RMAP_UNMAP;
2568 
2569 	if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
2570 		return;
2571 
2572 	if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
2573 		type = XFS_RMAP_UNMAP_SHARED;
2574 
2575 	__xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
2576 }
2577 
2578 /*
2579  * Convert a data fork extent from unwritten to real or vice versa.
2580  *
2581  * Note that tp can be NULL here as no transaction is used for COW fork
2582  * unwritten conversion.
2583  */
2584 void
2585 xfs_rmap_convert_extent(
2586 	struct xfs_mount	*mp,
2587 	struct xfs_trans	*tp,
2588 	struct xfs_inode	*ip,
2589 	int			whichfork,
2590 	struct xfs_bmbt_irec	*PREV)
2591 {
2592 	enum xfs_rmap_intent_type type = XFS_RMAP_CONVERT;
2593 
2594 	if (!xfs_rmap_update_is_needed(mp, whichfork))
2595 		return;
2596 
2597 	if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
2598 		type = XFS_RMAP_CONVERT_SHARED;
2599 
2600 	__xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
2601 }
2602 
2603 /* Schedule the creation of an rmap for non-file data. */
2604 void
2605 xfs_rmap_alloc_extent(
2606 	struct xfs_trans	*tp,
2607 	xfs_agnumber_t		agno,
2608 	xfs_agblock_t		bno,
2609 	xfs_extlen_t		len,
2610 	uint64_t		owner)
2611 {
2612 	struct xfs_bmbt_irec	bmap;
2613 
2614 	if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK))
2615 		return;
2616 
2617 	bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno);
2618 	bmap.br_blockcount = len;
2619 	bmap.br_startoff = 0;
2620 	bmap.br_state = XFS_EXT_NORM;
2621 
2622 	__xfs_rmap_add(tp, XFS_RMAP_ALLOC, owner, XFS_DATA_FORK, &bmap);
2623 }
2624 
2625 /* Schedule the deletion of an rmap for non-file data. */
2626 void
2627 xfs_rmap_free_extent(
2628 	struct xfs_trans	*tp,
2629 	xfs_agnumber_t		agno,
2630 	xfs_agblock_t		bno,
2631 	xfs_extlen_t		len,
2632 	uint64_t		owner)
2633 {
2634 	struct xfs_bmbt_irec	bmap;
2635 
2636 	if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK))
2637 		return;
2638 
2639 	bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno);
2640 	bmap.br_blockcount = len;
2641 	bmap.br_startoff = 0;
2642 	bmap.br_state = XFS_EXT_NORM;
2643 
2644 	__xfs_rmap_add(tp, XFS_RMAP_FREE, owner, XFS_DATA_FORK, &bmap);
2645 }
2646 
2647 /* Compare rmap records.  Returns -1 if a < b, 1 if a > b, and 0 if equal. */
2648 int
2649 xfs_rmap_compare(
2650 	const struct xfs_rmap_irec	*a,
2651 	const struct xfs_rmap_irec	*b)
2652 {
2653 	__u64				oa;
2654 	__u64				ob;
2655 
2656 	oa = xfs_rmap_irec_offset_pack(a);
2657 	ob = xfs_rmap_irec_offset_pack(b);
2658 
2659 	if (a->rm_startblock < b->rm_startblock)
2660 		return -1;
2661 	else if (a->rm_startblock > b->rm_startblock)
2662 		return 1;
2663 	else if (a->rm_owner < b->rm_owner)
2664 		return -1;
2665 	else if (a->rm_owner > b->rm_owner)
2666 		return 1;
2667 	else if (oa < ob)
2668 		return -1;
2669 	else if (oa > ob)
2670 		return 1;
2671 	else
2672 		return 0;
2673 }
2674 
2675 /* Is there a record covering a given extent? */
2676 int
2677 xfs_rmap_has_record(
2678 	struct xfs_btree_cur	*cur,
2679 	xfs_agblock_t		bno,
2680 	xfs_extlen_t		len,
2681 	bool			*exists)
2682 {
2683 	union xfs_btree_irec	low;
2684 	union xfs_btree_irec	high;
2685 
2686 	memset(&low, 0, sizeof(low));
2687 	low.r.rm_startblock = bno;
2688 	memset(&high, 0xFF, sizeof(high));
2689 	high.r.rm_startblock = bno + len - 1;
2690 
2691 	return xfs_btree_has_record(cur, &low, &high, exists);
2692 }
2693 
2694 /*
2695  * Is there a record for this owner completely covering a given physical
2696  * extent?  If so, *has_rmap will be set to true.  If there is no record
2697  * or the record only covers part of the range, we set *has_rmap to false.
2698  * This function doesn't perform range lookups or offset checks, so it is
2699  * not suitable for checking data fork blocks.
2700  */
2701 int
2702 xfs_rmap_record_exists(
2703 	struct xfs_btree_cur		*cur,
2704 	xfs_agblock_t			bno,
2705 	xfs_extlen_t			len,
2706 	const struct xfs_owner_info	*oinfo,
2707 	bool				*has_rmap)
2708 {
2709 	uint64_t			owner;
2710 	uint64_t			offset;
2711 	unsigned int			flags;
2712 	int				has_record;
2713 	struct xfs_rmap_irec		irec;
2714 	int				error;
2715 
2716 	xfs_owner_info_unpack(oinfo, &owner, &offset, &flags);
2717 	ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) ||
2718 	       (flags & XFS_RMAP_BMBT_BLOCK));
2719 
2720 	error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &irec,
2721 			&has_record);
2722 	if (error)
2723 		return error;
2724 	if (!has_record) {
2725 		*has_rmap = false;
2726 		return 0;
2727 	}
2728 
2729 	*has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno &&
2730 		     irec.rm_startblock + irec.rm_blockcount >= bno + len);
2731 	return 0;
2732 }
2733 
2734 struct xfs_rmap_key_state {
2735 	uint64_t			owner;
2736 	uint64_t			offset;
2737 	unsigned int			flags;
2738 };
2739 
2740 /* For each rmap given, figure out if it doesn't match the key we want. */
2741 STATIC int
2742 xfs_rmap_has_other_keys_helper(
2743 	struct xfs_btree_cur		*cur,
2744 	const struct xfs_rmap_irec	*rec,
2745 	void				*priv)
2746 {
2747 	struct xfs_rmap_key_state	*rks = priv;
2748 
2749 	if (rks->owner == rec->rm_owner && rks->offset == rec->rm_offset &&
2750 	    ((rks->flags & rec->rm_flags) & XFS_RMAP_KEY_FLAGS) == rks->flags)
2751 		return 0;
2752 	return -ECANCELED;
2753 }
2754 
2755 /*
2756  * Given an extent and some owner info, can we find records overlapping
2757  * the extent whose owner info does not match the given owner?
2758  */
2759 int
2760 xfs_rmap_has_other_keys(
2761 	struct xfs_btree_cur		*cur,
2762 	xfs_agblock_t			bno,
2763 	xfs_extlen_t			len,
2764 	const struct xfs_owner_info	*oinfo,
2765 	bool				*has_rmap)
2766 {
2767 	struct xfs_rmap_irec		low = {0};
2768 	struct xfs_rmap_irec		high;
2769 	struct xfs_rmap_key_state	rks;
2770 	int				error;
2771 
2772 	xfs_owner_info_unpack(oinfo, &rks.owner, &rks.offset, &rks.flags);
2773 	*has_rmap = false;
2774 
2775 	low.rm_startblock = bno;
2776 	memset(&high, 0xFF, sizeof(high));
2777 	high.rm_startblock = bno + len - 1;
2778 
2779 	error = xfs_rmap_query_range(cur, &low, &high,
2780 			xfs_rmap_has_other_keys_helper, &rks);
2781 	if (error == -ECANCELED) {
2782 		*has_rmap = true;
2783 		return 0;
2784 	}
2785 
2786 	return error;
2787 }
2788 
2789 const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE = {
2790 	.oi_owner = XFS_RMAP_OWN_NULL,
2791 };
2792 const struct xfs_owner_info XFS_RMAP_OINFO_ANY_OWNER = {
2793 	.oi_owner = XFS_RMAP_OWN_UNKNOWN,
2794 };
2795 const struct xfs_owner_info XFS_RMAP_OINFO_FS = {
2796 	.oi_owner = XFS_RMAP_OWN_FS,
2797 };
2798 const struct xfs_owner_info XFS_RMAP_OINFO_LOG = {
2799 	.oi_owner = XFS_RMAP_OWN_LOG,
2800 };
2801 const struct xfs_owner_info XFS_RMAP_OINFO_AG = {
2802 	.oi_owner = XFS_RMAP_OWN_AG,
2803 };
2804 const struct xfs_owner_info XFS_RMAP_OINFO_INOBT = {
2805 	.oi_owner = XFS_RMAP_OWN_INOBT,
2806 };
2807 const struct xfs_owner_info XFS_RMAP_OINFO_INODES = {
2808 	.oi_owner = XFS_RMAP_OWN_INODES,
2809 };
2810 const struct xfs_owner_info XFS_RMAP_OINFO_REFC = {
2811 	.oi_owner = XFS_RMAP_OWN_REFC,
2812 };
2813 const struct xfs_owner_info XFS_RMAP_OINFO_COW = {
2814 	.oi_owner = XFS_RMAP_OWN_COW,
2815 };
2816 
2817 int __init
2818 xfs_rmap_intent_init_cache(void)
2819 {
2820 	xfs_rmap_intent_cache = kmem_cache_create("xfs_rmap_intent",
2821 			sizeof(struct xfs_rmap_intent),
2822 			0, 0, NULL);
2823 
2824 	return xfs_rmap_intent_cache != NULL ? 0 : -ENOMEM;
2825 }
2826 
2827 void
2828 xfs_rmap_intent_destroy_cache(void)
2829 {
2830 	kmem_cache_destroy(xfs_rmap_intent_cache);
2831 	xfs_rmap_intent_cache = NULL;
2832 }
2833