xref: /openbmc/linux/fs/xfs/libxfs/xfs_dir2_leaf.c (revision 4f205687)
1 /*
2  * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3  * Copyright (c) 2013 Red Hat, Inc.
4  * All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it would be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write the Free Software Foundation,
17  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18  */
19 #include "xfs.h"
20 #include "xfs_fs.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_mount.h"
25 #include "xfs_da_format.h"
26 #include "xfs_da_btree.h"
27 #include "xfs_inode.h"
28 #include "xfs_bmap.h"
29 #include "xfs_dir2.h"
30 #include "xfs_dir2_priv.h"
31 #include "xfs_error.h"
32 #include "xfs_trace.h"
33 #include "xfs_trans.h"
34 #include "xfs_buf_item.h"
35 #include "xfs_cksum.h"
36 #include "xfs_log.h"
37 
38 /*
39  * Local function declarations.
40  */
41 static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
42 				    int *indexp, struct xfs_buf **dbpp);
43 static void xfs_dir3_leaf_log_bests(struct xfs_da_args *args,
44 				    struct xfs_buf *bp, int first, int last);
45 static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args,
46 				   struct xfs_buf *bp);
47 
48 /*
49  * Check the internal consistency of a leaf1 block.
50  * Pop an assert if something is wrong.
51  */
52 #ifdef DEBUG
53 #define	xfs_dir3_leaf_check(dp, bp) \
54 do { \
55 	if (!xfs_dir3_leaf1_check((dp), (bp))) \
56 		ASSERT(0); \
57 } while (0);
58 
59 STATIC bool
60 xfs_dir3_leaf1_check(
61 	struct xfs_inode	*dp,
62 	struct xfs_buf		*bp)
63 {
64 	struct xfs_dir2_leaf	*leaf = bp->b_addr;
65 	struct xfs_dir3_icleaf_hdr leafhdr;
66 
67 	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
68 
69 	if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
70 		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
71 		if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
72 			return false;
73 	} else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC)
74 		return false;
75 
76 	return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
77 }
78 #else
79 #define	xfs_dir3_leaf_check(dp, bp)
80 #endif
81 
82 bool
83 xfs_dir3_leaf_check_int(
84 	struct xfs_mount	*mp,
85 	struct xfs_inode	*dp,
86 	struct xfs_dir3_icleaf_hdr *hdr,
87 	struct xfs_dir2_leaf	*leaf)
88 {
89 	struct xfs_dir2_leaf_entry *ents;
90 	xfs_dir2_leaf_tail_t	*ltp;
91 	int			stale;
92 	int			i;
93 	const struct xfs_dir_ops *ops;
94 	struct xfs_dir3_icleaf_hdr leafhdr;
95 	struct xfs_da_geometry	*geo = mp->m_dir_geo;
96 
97 	/*
98 	 * we can be passed a null dp here from a verifier, so we need to go the
99 	 * hard way to get them.
100 	 */
101 	ops = xfs_dir_get_ops(mp, dp);
102 
103 	if (!hdr) {
104 		ops->leaf_hdr_from_disk(&leafhdr, leaf);
105 		hdr = &leafhdr;
106 	}
107 
108 	ents = ops->leaf_ents_p(leaf);
109 	ltp = xfs_dir2_leaf_tail_p(geo, leaf);
110 
111 	/*
112 	 * XXX (dgc): This value is not restrictive enough.
113 	 * Should factor in the size of the bests table as well.
114 	 * We can deduce a value for that from di_size.
115 	 */
116 	if (hdr->count > ops->leaf_max_ents(geo))
117 		return false;
118 
119 	/* Leaves and bests don't overlap in leaf format. */
120 	if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
121 	     hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
122 	    (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
123 		return false;
124 
125 	/* Check hash value order, count stale entries.  */
126 	for (i = stale = 0; i < hdr->count; i++) {
127 		if (i + 1 < hdr->count) {
128 			if (be32_to_cpu(ents[i].hashval) >
129 					be32_to_cpu(ents[i + 1].hashval))
130 				return false;
131 		}
132 		if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
133 			stale++;
134 	}
135 	if (hdr->stale != stale)
136 		return false;
137 	return true;
138 }
139 
140 /*
141  * We verify the magic numbers before decoding the leaf header so that on debug
142  * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due
143  * to incorrect magic numbers.
144  */
145 static bool
146 xfs_dir3_leaf_verify(
147 	struct xfs_buf		*bp,
148 	__uint16_t		magic)
149 {
150 	struct xfs_mount	*mp = bp->b_target->bt_mount;
151 	struct xfs_dir2_leaf	*leaf = bp->b_addr;
152 
153 	ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
154 
155 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
156 		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
157 		__uint16_t		magic3;
158 
159 		magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
160 							 : XFS_DIR3_LEAFN_MAGIC;
161 
162 		if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
163 			return false;
164 		if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid))
165 			return false;
166 		if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
167 			return false;
168 		if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
169 			return false;
170 	} else {
171 		if (leaf->hdr.info.magic != cpu_to_be16(magic))
172 			return false;
173 	}
174 
175 	return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf);
176 }
177 
178 static void
179 __read_verify(
180 	struct xfs_buf  *bp,
181 	__uint16_t	magic)
182 {
183 	struct xfs_mount	*mp = bp->b_target->bt_mount;
184 
185 	if (xfs_sb_version_hascrc(&mp->m_sb) &&
186 	     !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
187 		xfs_buf_ioerror(bp, -EFSBADCRC);
188 	else if (!xfs_dir3_leaf_verify(bp, magic))
189 		xfs_buf_ioerror(bp, -EFSCORRUPTED);
190 
191 	if (bp->b_error)
192 		xfs_verifier_error(bp);
193 }
194 
195 static void
196 __write_verify(
197 	struct xfs_buf  *bp,
198 	__uint16_t	magic)
199 {
200 	struct xfs_mount	*mp = bp->b_target->bt_mount;
201 	struct xfs_buf_log_item	*bip = bp->b_fspriv;
202 	struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
203 
204 	if (!xfs_dir3_leaf_verify(bp, magic)) {
205 		xfs_buf_ioerror(bp, -EFSCORRUPTED);
206 		xfs_verifier_error(bp);
207 		return;
208 	}
209 
210 	if (!xfs_sb_version_hascrc(&mp->m_sb))
211 		return;
212 
213 	if (bip)
214 		hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
215 
216 	xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
217 }
218 
219 static void
220 xfs_dir3_leaf1_read_verify(
221 	struct xfs_buf	*bp)
222 {
223 	__read_verify(bp, XFS_DIR2_LEAF1_MAGIC);
224 }
225 
226 static void
227 xfs_dir3_leaf1_write_verify(
228 	struct xfs_buf	*bp)
229 {
230 	__write_verify(bp, XFS_DIR2_LEAF1_MAGIC);
231 }
232 
233 static void
234 xfs_dir3_leafn_read_verify(
235 	struct xfs_buf	*bp)
236 {
237 	__read_verify(bp, XFS_DIR2_LEAFN_MAGIC);
238 }
239 
240 static void
241 xfs_dir3_leafn_write_verify(
242 	struct xfs_buf	*bp)
243 {
244 	__write_verify(bp, XFS_DIR2_LEAFN_MAGIC);
245 }
246 
247 const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
248 	.name = "xfs_dir3_leaf1",
249 	.verify_read = xfs_dir3_leaf1_read_verify,
250 	.verify_write = xfs_dir3_leaf1_write_verify,
251 };
252 
253 const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
254 	.name = "xfs_dir3_leafn",
255 	.verify_read = xfs_dir3_leafn_read_verify,
256 	.verify_write = xfs_dir3_leafn_write_verify,
257 };
258 
259 static int
260 xfs_dir3_leaf_read(
261 	struct xfs_trans	*tp,
262 	struct xfs_inode	*dp,
263 	xfs_dablk_t		fbno,
264 	xfs_daddr_t		mappedbno,
265 	struct xfs_buf		**bpp)
266 {
267 	int			err;
268 
269 	err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
270 				XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops);
271 	if (!err && tp)
272 		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF);
273 	return err;
274 }
275 
276 int
277 xfs_dir3_leafn_read(
278 	struct xfs_trans	*tp,
279 	struct xfs_inode	*dp,
280 	xfs_dablk_t		fbno,
281 	xfs_daddr_t		mappedbno,
282 	struct xfs_buf		**bpp)
283 {
284 	int			err;
285 
286 	err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
287 				XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops);
288 	if (!err && tp)
289 		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF);
290 	return err;
291 }
292 
293 /*
294  * Initialize a new leaf block, leaf1 or leafn magic accepted.
295  */
296 static void
297 xfs_dir3_leaf_init(
298 	struct xfs_mount	*mp,
299 	struct xfs_trans	*tp,
300 	struct xfs_buf		*bp,
301 	xfs_ino_t		owner,
302 	__uint16_t		type)
303 {
304 	struct xfs_dir2_leaf	*leaf = bp->b_addr;
305 
306 	ASSERT(type == XFS_DIR2_LEAF1_MAGIC || type == XFS_DIR2_LEAFN_MAGIC);
307 
308 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
309 		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
310 
311 		memset(leaf3, 0, sizeof(*leaf3));
312 
313 		leaf3->info.hdr.magic = (type == XFS_DIR2_LEAF1_MAGIC)
314 					 ? cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)
315 					 : cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
316 		leaf3->info.blkno = cpu_to_be64(bp->b_bn);
317 		leaf3->info.owner = cpu_to_be64(owner);
318 		uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid);
319 	} else {
320 		memset(leaf, 0, sizeof(*leaf));
321 		leaf->hdr.info.magic = cpu_to_be16(type);
322 	}
323 
324 	/*
325 	 * If it's a leaf-format directory initialize the tail.
326 	 * Caller is responsible for initialising the bests table.
327 	 */
328 	if (type == XFS_DIR2_LEAF1_MAGIC) {
329 		struct xfs_dir2_leaf_tail *ltp;
330 
331 		ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);
332 		ltp->bestcount = 0;
333 		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
334 		xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAF1_BUF);
335 	} else {
336 		bp->b_ops = &xfs_dir3_leafn_buf_ops;
337 		xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
338 	}
339 }
340 
341 int
342 xfs_dir3_leaf_get_buf(
343 	xfs_da_args_t		*args,
344 	xfs_dir2_db_t		bno,
345 	struct xfs_buf		**bpp,
346 	__uint16_t		magic)
347 {
348 	struct xfs_inode	*dp = args->dp;
349 	struct xfs_trans	*tp = args->trans;
350 	struct xfs_mount	*mp = dp->i_mount;
351 	struct xfs_buf		*bp;
352 	int			error;
353 
354 	ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
355 	ASSERT(bno >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET) &&
356 	       bno < xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET));
357 
358 	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, bno),
359 			       -1, &bp, XFS_DATA_FORK);
360 	if (error)
361 		return error;
362 
363 	xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic);
364 	xfs_dir3_leaf_log_header(args, bp);
365 	if (magic == XFS_DIR2_LEAF1_MAGIC)
366 		xfs_dir3_leaf_log_tail(args, bp);
367 	*bpp = bp;
368 	return 0;
369 }
370 
371 /*
372  * Convert a block form directory to a leaf form directory.
373  */
374 int						/* error */
375 xfs_dir2_block_to_leaf(
376 	xfs_da_args_t		*args,		/* operation arguments */
377 	struct xfs_buf		*dbp)		/* input block's buffer */
378 {
379 	__be16			*bestsp;	/* leaf's bestsp entries */
380 	xfs_dablk_t		blkno;		/* leaf block's bno */
381 	xfs_dir2_data_hdr_t	*hdr;		/* block header */
382 	xfs_dir2_leaf_entry_t	*blp;		/* block's leaf entries */
383 	xfs_dir2_block_tail_t	*btp;		/* block's tail */
384 	xfs_inode_t		*dp;		/* incore directory inode */
385 	int			error;		/* error return code */
386 	struct xfs_buf		*lbp;		/* leaf block's buffer */
387 	xfs_dir2_db_t		ldb;		/* leaf block's bno */
388 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
389 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf's tail */
390 	int			needlog;	/* need to log block header */
391 	int			needscan;	/* need to rescan bestfree */
392 	xfs_trans_t		*tp;		/* transaction pointer */
393 	struct xfs_dir2_data_free *bf;
394 	struct xfs_dir2_leaf_entry *ents;
395 	struct xfs_dir3_icleaf_hdr leafhdr;
396 
397 	trace_xfs_dir2_block_to_leaf(args);
398 
399 	dp = args->dp;
400 	tp = args->trans;
401 	/*
402 	 * Add the leaf block to the inode.
403 	 * This interface will only put blocks in the leaf/node range.
404 	 * Since that's empty now, we'll get the root (block 0 in range).
405 	 */
406 	if ((error = xfs_da_grow_inode(args, &blkno))) {
407 		return error;
408 	}
409 	ldb = xfs_dir2_da_to_db(args->geo, blkno);
410 	ASSERT(ldb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET));
411 	/*
412 	 * Initialize the leaf block, get a buffer for it.
413 	 */
414 	error = xfs_dir3_leaf_get_buf(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC);
415 	if (error)
416 		return error;
417 
418 	leaf = lbp->b_addr;
419 	hdr = dbp->b_addr;
420 	xfs_dir3_data_check(dp, dbp);
421 	btp = xfs_dir2_block_tail_p(args->geo, hdr);
422 	blp = xfs_dir2_block_leaf_p(btp);
423 	bf = dp->d_ops->data_bestfree_p(hdr);
424 	ents = dp->d_ops->leaf_ents_p(leaf);
425 
426 	/*
427 	 * Set the counts in the leaf header.
428 	 */
429 	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
430 	leafhdr.count = be32_to_cpu(btp->count);
431 	leafhdr.stale = be32_to_cpu(btp->stale);
432 	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
433 	xfs_dir3_leaf_log_header(args, lbp);
434 
435 	/*
436 	 * Could compact these but I think we always do the conversion
437 	 * after squeezing out stale entries.
438 	 */
439 	memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
440 	xfs_dir3_leaf_log_ents(args, lbp, 0, leafhdr.count - 1);
441 	needscan = 0;
442 	needlog = 1;
443 	/*
444 	 * Make the space formerly occupied by the leaf entries and block
445 	 * tail be free.
446 	 */
447 	xfs_dir2_data_make_free(args, dbp,
448 		(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
449 		(xfs_dir2_data_aoff_t)((char *)hdr + args->geo->blksize -
450 				       (char *)blp),
451 		&needlog, &needscan);
452 	/*
453 	 * Fix up the block header, make it a data block.
454 	 */
455 	dbp->b_ops = &xfs_dir3_data_buf_ops;
456 	xfs_trans_buf_set_type(tp, dbp, XFS_BLFT_DIR_DATA_BUF);
457 	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
458 		hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
459 	else
460 		hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
461 
462 	if (needscan)
463 		xfs_dir2_data_freescan(dp, hdr, &needlog);
464 	/*
465 	 * Set up leaf tail and bests table.
466 	 */
467 	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
468 	ltp->bestcount = cpu_to_be32(1);
469 	bestsp = xfs_dir2_leaf_bests_p(ltp);
470 	bestsp[0] =  bf[0].length;
471 	/*
472 	 * Log the data header and leaf bests table.
473 	 */
474 	if (needlog)
475 		xfs_dir2_data_log_header(args, dbp);
476 	xfs_dir3_leaf_check(dp, lbp);
477 	xfs_dir3_data_check(dp, dbp);
478 	xfs_dir3_leaf_log_bests(args, lbp, 0, 0);
479 	return 0;
480 }
481 
482 STATIC void
483 xfs_dir3_leaf_find_stale(
484 	struct xfs_dir3_icleaf_hdr *leafhdr,
485 	struct xfs_dir2_leaf_entry *ents,
486 	int			index,
487 	int			*lowstale,
488 	int			*highstale)
489 {
490 	/*
491 	 * Find the first stale entry before our index, if any.
492 	 */
493 	for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
494 		if (ents[*lowstale].address ==
495 		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
496 			break;
497 	}
498 
499 	/*
500 	 * Find the first stale entry at or after our index, if any.
501 	 * Stop if the result would require moving more entries than using
502 	 * lowstale.
503 	 */
504 	for (*highstale = index; *highstale < leafhdr->count; ++*highstale) {
505 		if (ents[*highstale].address ==
506 		    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
507 			break;
508 		if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
509 			break;
510 	}
511 }
512 
513 struct xfs_dir2_leaf_entry *
514 xfs_dir3_leaf_find_entry(
515 	struct xfs_dir3_icleaf_hdr *leafhdr,
516 	struct xfs_dir2_leaf_entry *ents,
517 	int			index,		/* leaf table position */
518 	int			compact,	/* need to compact leaves */
519 	int			lowstale,	/* index of prev stale leaf */
520 	int			highstale,	/* index of next stale leaf */
521 	int			*lfloglow,	/* low leaf logging index */
522 	int			*lfloghigh)	/* high leaf logging index */
523 {
524 	if (!leafhdr->stale) {
525 		xfs_dir2_leaf_entry_t	*lep;	/* leaf entry table pointer */
526 
527 		/*
528 		 * Now we need to make room to insert the leaf entry.
529 		 *
530 		 * If there are no stale entries, just insert a hole at index.
531 		 */
532 		lep = &ents[index];
533 		if (index < leafhdr->count)
534 			memmove(lep + 1, lep,
535 				(leafhdr->count - index) * sizeof(*lep));
536 
537 		/*
538 		 * Record low and high logging indices for the leaf.
539 		 */
540 		*lfloglow = index;
541 		*lfloghigh = leafhdr->count++;
542 		return lep;
543 	}
544 
545 	/*
546 	 * There are stale entries.
547 	 *
548 	 * We will use one of them for the new entry.  It's probably not at
549 	 * the right location, so we'll have to shift some up or down first.
550 	 *
551 	 * If we didn't compact before, we need to find the nearest stale
552 	 * entries before and after our insertion point.
553 	 */
554 	if (compact == 0)
555 		xfs_dir3_leaf_find_stale(leafhdr, ents, index,
556 					 &lowstale, &highstale);
557 
558 	/*
559 	 * If the low one is better, use it.
560 	 */
561 	if (lowstale >= 0 &&
562 	    (highstale == leafhdr->count ||
563 	     index - lowstale - 1 < highstale - index)) {
564 		ASSERT(index - lowstale - 1 >= 0);
565 		ASSERT(ents[lowstale].address ==
566 		       cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
567 
568 		/*
569 		 * Copy entries up to cover the stale entry and make room
570 		 * for the new entry.
571 		 */
572 		if (index - lowstale - 1 > 0) {
573 			memmove(&ents[lowstale], &ents[lowstale + 1],
574 				(index - lowstale - 1) *
575 					sizeof(xfs_dir2_leaf_entry_t));
576 		}
577 		*lfloglow = MIN(lowstale, *lfloglow);
578 		*lfloghigh = MAX(index - 1, *lfloghigh);
579 		leafhdr->stale--;
580 		return &ents[index - 1];
581 	}
582 
583 	/*
584 	 * The high one is better, so use that one.
585 	 */
586 	ASSERT(highstale - index >= 0);
587 	ASSERT(ents[highstale].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
588 
589 	/*
590 	 * Copy entries down to cover the stale entry and make room for the
591 	 * new entry.
592 	 */
593 	if (highstale - index > 0) {
594 		memmove(&ents[index + 1], &ents[index],
595 			(highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
596 	}
597 	*lfloglow = MIN(index, *lfloglow);
598 	*lfloghigh = MAX(highstale, *lfloghigh);
599 	leafhdr->stale--;
600 	return &ents[index];
601 }
602 
603 /*
604  * Add an entry to a leaf form directory.
605  */
606 int						/* error */
607 xfs_dir2_leaf_addname(
608 	xfs_da_args_t		*args)		/* operation arguments */
609 {
610 	__be16			*bestsp;	/* freespace table in leaf */
611 	int			compact;	/* need to compact leaves */
612 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
613 	struct xfs_buf		*dbp;		/* data block buffer */
614 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
615 	xfs_inode_t		*dp;		/* incore directory inode */
616 	xfs_dir2_data_unused_t	*dup;		/* data unused entry */
617 	int			error;		/* error return value */
618 	int			grown;		/* allocated new data block */
619 	int			highstale;	/* index of next stale leaf */
620 	int			i;		/* temporary, index */
621 	int			index;		/* leaf table position */
622 	struct xfs_buf		*lbp;		/* leaf's buffer */
623 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
624 	int			length;		/* length of new entry */
625 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry table pointer */
626 	int			lfloglow;	/* low leaf logging index */
627 	int			lfloghigh;	/* high leaf logging index */
628 	int			lowstale;	/* index of prev stale leaf */
629 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail pointer */
630 	int			needbytes;	/* leaf block bytes needed */
631 	int			needlog;	/* need to log data header */
632 	int			needscan;	/* need to rescan data free */
633 	__be16			*tagp;		/* end of data entry */
634 	xfs_trans_t		*tp;		/* transaction pointer */
635 	xfs_dir2_db_t		use_block;	/* data block number */
636 	struct xfs_dir2_data_free *bf;		/* bestfree table */
637 	struct xfs_dir2_leaf_entry *ents;
638 	struct xfs_dir3_icleaf_hdr leafhdr;
639 
640 	trace_xfs_dir2_leaf_addname(args);
641 
642 	dp = args->dp;
643 	tp = args->trans;
644 
645 	error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);
646 	if (error)
647 		return error;
648 
649 	/*
650 	 * Look up the entry by hash value and name.
651 	 * We know it's not there, our caller has already done a lookup.
652 	 * So the index is of the entry to insert in front of.
653 	 * But if there are dup hash values the index is of the first of those.
654 	 */
655 	index = xfs_dir2_leaf_search_hash(args, lbp);
656 	leaf = lbp->b_addr;
657 	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
658 	ents = dp->d_ops->leaf_ents_p(leaf);
659 	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
660 	bestsp = xfs_dir2_leaf_bests_p(ltp);
661 	length = dp->d_ops->data_entsize(args->namelen);
662 
663 	/*
664 	 * See if there are any entries with the same hash value
665 	 * and space in their block for the new entry.
666 	 * This is good because it puts multiple same-hash value entries
667 	 * in a data block, improving the lookup of those entries.
668 	 */
669 	for (use_block = -1, lep = &ents[index];
670 	     index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
671 	     index++, lep++) {
672 		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
673 			continue;
674 		i = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
675 		ASSERT(i < be32_to_cpu(ltp->bestcount));
676 		ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
677 		if (be16_to_cpu(bestsp[i]) >= length) {
678 			use_block = i;
679 			break;
680 		}
681 	}
682 	/*
683 	 * Didn't find a block yet, linear search all the data blocks.
684 	 */
685 	if (use_block == -1) {
686 		for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) {
687 			/*
688 			 * Remember a block we see that's missing.
689 			 */
690 			if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
691 			    use_block == -1)
692 				use_block = i;
693 			else if (be16_to_cpu(bestsp[i]) >= length) {
694 				use_block = i;
695 				break;
696 			}
697 		}
698 	}
699 	/*
700 	 * How many bytes do we need in the leaf block?
701 	 */
702 	needbytes = 0;
703 	if (!leafhdr.stale)
704 		needbytes += sizeof(xfs_dir2_leaf_entry_t);
705 	if (use_block == -1)
706 		needbytes += sizeof(xfs_dir2_data_off_t);
707 
708 	/*
709 	 * Now kill use_block if it refers to a missing block, so we
710 	 * can use it as an indication of allocation needed.
711 	 */
712 	if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
713 		use_block = -1;
714 	/*
715 	 * If we don't have enough free bytes but we can make enough
716 	 * by compacting out stale entries, we'll do that.
717 	 */
718 	if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes &&
719 	    leafhdr.stale > 1)
720 		compact = 1;
721 
722 	/*
723 	 * Otherwise if we don't have enough free bytes we need to
724 	 * convert to node form.
725 	 */
726 	else if ((char *)bestsp - (char *)&ents[leafhdr.count] < needbytes) {
727 		/*
728 		 * Just checking or no space reservation, give up.
729 		 */
730 		if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
731 							args->total == 0) {
732 			xfs_trans_brelse(tp, lbp);
733 			return -ENOSPC;
734 		}
735 		/*
736 		 * Convert to node form.
737 		 */
738 		error = xfs_dir2_leaf_to_node(args, lbp);
739 		if (error)
740 			return error;
741 		/*
742 		 * Then add the new entry.
743 		 */
744 		return xfs_dir2_node_addname(args);
745 	}
746 	/*
747 	 * Otherwise it will fit without compaction.
748 	 */
749 	else
750 		compact = 0;
751 	/*
752 	 * If just checking, then it will fit unless we needed to allocate
753 	 * a new data block.
754 	 */
755 	if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
756 		xfs_trans_brelse(tp, lbp);
757 		return use_block == -1 ? -ENOSPC : 0;
758 	}
759 	/*
760 	 * If no allocations are allowed, return now before we've
761 	 * changed anything.
762 	 */
763 	if (args->total == 0 && use_block == -1) {
764 		xfs_trans_brelse(tp, lbp);
765 		return -ENOSPC;
766 	}
767 	/*
768 	 * Need to compact the leaf entries, removing stale ones.
769 	 * Leave one stale entry behind - the one closest to our
770 	 * insertion index - and we'll shift that one to our insertion
771 	 * point later.
772 	 */
773 	if (compact) {
774 		xfs_dir3_leaf_compact_x1(&leafhdr, ents, &index, &lowstale,
775 			&highstale, &lfloglow, &lfloghigh);
776 	}
777 	/*
778 	 * There are stale entries, so we'll need log-low and log-high
779 	 * impossibly bad values later.
780 	 */
781 	else if (leafhdr.stale) {
782 		lfloglow = leafhdr.count;
783 		lfloghigh = -1;
784 	}
785 	/*
786 	 * If there was no data block space found, we need to allocate
787 	 * a new one.
788 	 */
789 	if (use_block == -1) {
790 		/*
791 		 * Add the new data block.
792 		 */
793 		if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
794 				&use_block))) {
795 			xfs_trans_brelse(tp, lbp);
796 			return error;
797 		}
798 		/*
799 		 * Initialize the block.
800 		 */
801 		if ((error = xfs_dir3_data_init(args, use_block, &dbp))) {
802 			xfs_trans_brelse(tp, lbp);
803 			return error;
804 		}
805 		/*
806 		 * If we're adding a new data block on the end we need to
807 		 * extend the bests table.  Copy it up one entry.
808 		 */
809 		if (use_block >= be32_to_cpu(ltp->bestcount)) {
810 			bestsp--;
811 			memmove(&bestsp[0], &bestsp[1],
812 				be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
813 			be32_add_cpu(&ltp->bestcount, 1);
814 			xfs_dir3_leaf_log_tail(args, lbp);
815 			xfs_dir3_leaf_log_bests(args, lbp, 0,
816 						be32_to_cpu(ltp->bestcount) - 1);
817 		}
818 		/*
819 		 * If we're filling in a previously empty block just log it.
820 		 */
821 		else
822 			xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);
823 		hdr = dbp->b_addr;
824 		bf = dp->d_ops->data_bestfree_p(hdr);
825 		bestsp[use_block] = bf[0].length;
826 		grown = 1;
827 	} else {
828 		/*
829 		 * Already had space in some data block.
830 		 * Just read that one in.
831 		 */
832 		error = xfs_dir3_data_read(tp, dp,
833 				   xfs_dir2_db_to_da(args->geo, use_block),
834 				   -1, &dbp);
835 		if (error) {
836 			xfs_trans_brelse(tp, lbp);
837 			return error;
838 		}
839 		hdr = dbp->b_addr;
840 		bf = dp->d_ops->data_bestfree_p(hdr);
841 		grown = 0;
842 	}
843 	/*
844 	 * Point to the biggest freespace in our data block.
845 	 */
846 	dup = (xfs_dir2_data_unused_t *)
847 	      ((char *)hdr + be16_to_cpu(bf[0].offset));
848 	ASSERT(be16_to_cpu(dup->length) >= length);
849 	needscan = needlog = 0;
850 	/*
851 	 * Mark the initial part of our freespace in use for the new entry.
852 	 */
853 	xfs_dir2_data_use_free(args, dbp, dup,
854 		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
855 		&needlog, &needscan);
856 	/*
857 	 * Initialize our new entry (at last).
858 	 */
859 	dep = (xfs_dir2_data_entry_t *)dup;
860 	dep->inumber = cpu_to_be64(args->inumber);
861 	dep->namelen = args->namelen;
862 	memcpy(dep->name, args->name, dep->namelen);
863 	dp->d_ops->data_put_ftype(dep, args->filetype);
864 	tagp = dp->d_ops->data_entry_tag_p(dep);
865 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
866 	/*
867 	 * Need to scan fix up the bestfree table.
868 	 */
869 	if (needscan)
870 		xfs_dir2_data_freescan(dp, hdr, &needlog);
871 	/*
872 	 * Need to log the data block's header.
873 	 */
874 	if (needlog)
875 		xfs_dir2_data_log_header(args, dbp);
876 	xfs_dir2_data_log_entry(args, dbp, dep);
877 	/*
878 	 * If the bests table needs to be changed, do it.
879 	 * Log the change unless we've already done that.
880 	 */
881 	if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) {
882 		bestsp[use_block] = bf[0].length;
883 		if (!grown)
884 			xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);
885 	}
886 
887 	lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale,
888 				       highstale, &lfloglow, &lfloghigh);
889 
890 	/*
891 	 * Fill in the new leaf entry.
892 	 */
893 	lep->hashval = cpu_to_be32(args->hashval);
894 	lep->address = cpu_to_be32(
895 				xfs_dir2_db_off_to_dataptr(args->geo, use_block,
896 				be16_to_cpu(*tagp)));
897 	/*
898 	 * Log the leaf fields and give up the buffers.
899 	 */
900 	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
901 	xfs_dir3_leaf_log_header(args, lbp);
902 	xfs_dir3_leaf_log_ents(args, lbp, lfloglow, lfloghigh);
903 	xfs_dir3_leaf_check(dp, lbp);
904 	xfs_dir3_data_check(dp, dbp);
905 	return 0;
906 }
907 
908 /*
909  * Compact out any stale entries in the leaf.
910  * Log the header and changed leaf entries, if any.
911  */
912 void
913 xfs_dir3_leaf_compact(
914 	xfs_da_args_t	*args,		/* operation arguments */
915 	struct xfs_dir3_icleaf_hdr *leafhdr,
916 	struct xfs_buf	*bp)		/* leaf buffer */
917 {
918 	int		from;		/* source leaf index */
919 	xfs_dir2_leaf_t	*leaf;		/* leaf structure */
920 	int		loglow;		/* first leaf entry to log */
921 	int		to;		/* target leaf index */
922 	struct xfs_dir2_leaf_entry *ents;
923 	struct xfs_inode *dp = args->dp;
924 
925 	leaf = bp->b_addr;
926 	if (!leafhdr->stale)
927 		return;
928 
929 	/*
930 	 * Compress out the stale entries in place.
931 	 */
932 	ents = dp->d_ops->leaf_ents_p(leaf);
933 	for (from = to = 0, loglow = -1; from < leafhdr->count; from++) {
934 		if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
935 			continue;
936 		/*
937 		 * Only actually copy the entries that are different.
938 		 */
939 		if (from > to) {
940 			if (loglow == -1)
941 				loglow = to;
942 			ents[to] = ents[from];
943 		}
944 		to++;
945 	}
946 	/*
947 	 * Update and log the header, log the leaf entries.
948 	 */
949 	ASSERT(leafhdr->stale == from - to);
950 	leafhdr->count -= leafhdr->stale;
951 	leafhdr->stale = 0;
952 
953 	dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr);
954 	xfs_dir3_leaf_log_header(args, bp);
955 	if (loglow != -1)
956 		xfs_dir3_leaf_log_ents(args, bp, loglow, to - 1);
957 }
958 
959 /*
960  * Compact the leaf entries, removing stale ones.
961  * Leave one stale entry behind - the one closest to our
962  * insertion index - and the caller will shift that one to our insertion
963  * point later.
964  * Return new insertion index, where the remaining stale entry is,
965  * and leaf logging indices.
966  */
967 void
968 xfs_dir3_leaf_compact_x1(
969 	struct xfs_dir3_icleaf_hdr *leafhdr,
970 	struct xfs_dir2_leaf_entry *ents,
971 	int		*indexp,	/* insertion index */
972 	int		*lowstalep,	/* out: stale entry before us */
973 	int		*highstalep,	/* out: stale entry after us */
974 	int		*lowlogp,	/* out: low log index */
975 	int		*highlogp)	/* out: high log index */
976 {
977 	int		from;		/* source copy index */
978 	int		highstale;	/* stale entry at/after index */
979 	int		index;		/* insertion index */
980 	int		keepstale;	/* source index of kept stale */
981 	int		lowstale;	/* stale entry before index */
982 	int		newindex=0;	/* new insertion index */
983 	int		to;		/* destination copy index */
984 
985 	ASSERT(leafhdr->stale > 1);
986 	index = *indexp;
987 
988 	xfs_dir3_leaf_find_stale(leafhdr, ents, index, &lowstale, &highstale);
989 
990 	/*
991 	 * Pick the better of lowstale and highstale.
992 	 */
993 	if (lowstale >= 0 &&
994 	    (highstale == leafhdr->count ||
995 	     index - lowstale <= highstale - index))
996 		keepstale = lowstale;
997 	else
998 		keepstale = highstale;
999 	/*
1000 	 * Copy the entries in place, removing all the stale entries
1001 	 * except keepstale.
1002 	 */
1003 	for (from = to = 0; from < leafhdr->count; from++) {
1004 		/*
1005 		 * Notice the new value of index.
1006 		 */
1007 		if (index == from)
1008 			newindex = to;
1009 		if (from != keepstale &&
1010 		    ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
1011 			if (from == to)
1012 				*lowlogp = to;
1013 			continue;
1014 		}
1015 		/*
1016 		 * Record the new keepstale value for the insertion.
1017 		 */
1018 		if (from == keepstale)
1019 			lowstale = highstale = to;
1020 		/*
1021 		 * Copy only the entries that have moved.
1022 		 */
1023 		if (from > to)
1024 			ents[to] = ents[from];
1025 		to++;
1026 	}
1027 	ASSERT(from > to);
1028 	/*
1029 	 * If the insertion point was past the last entry,
1030 	 * set the new insertion point accordingly.
1031 	 */
1032 	if (index == from)
1033 		newindex = to;
1034 	*indexp = newindex;
1035 	/*
1036 	 * Adjust the leaf header values.
1037 	 */
1038 	leafhdr->count -= from - to;
1039 	leafhdr->stale = 1;
1040 	/*
1041 	 * Remember the low/high stale value only in the "right"
1042 	 * direction.
1043 	 */
1044 	if (lowstale >= newindex)
1045 		lowstale = -1;
1046 	else
1047 		highstale = leafhdr->count;
1048 	*highlogp = leafhdr->count - 1;
1049 	*lowstalep = lowstale;
1050 	*highstalep = highstale;
1051 }
1052 
1053 /*
1054  * Log the bests entries indicated from a leaf1 block.
1055  */
1056 static void
1057 xfs_dir3_leaf_log_bests(
1058 	struct xfs_da_args	*args,
1059 	struct xfs_buf		*bp,		/* leaf buffer */
1060 	int			first,		/* first entry to log */
1061 	int			last)		/* last entry to log */
1062 {
1063 	__be16			*firstb;	/* pointer to first entry */
1064 	__be16			*lastb;		/* pointer to last entry */
1065 	struct xfs_dir2_leaf	*leaf = bp->b_addr;
1066 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1067 
1068 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1069 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC));
1070 
1071 	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
1072 	firstb = xfs_dir2_leaf_bests_p(ltp) + first;
1073 	lastb = xfs_dir2_leaf_bests_p(ltp) + last;
1074 	xfs_trans_log_buf(args->trans, bp,
1075 		(uint)((char *)firstb - (char *)leaf),
1076 		(uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
1077 }
1078 
1079 /*
1080  * Log the leaf entries indicated from a leaf1 or leafn block.
1081  */
1082 void
1083 xfs_dir3_leaf_log_ents(
1084 	struct xfs_da_args	*args,
1085 	struct xfs_buf		*bp,
1086 	int			first,
1087 	int			last)
1088 {
1089 	xfs_dir2_leaf_entry_t	*firstlep;	/* pointer to first entry */
1090 	xfs_dir2_leaf_entry_t	*lastlep;	/* pointer to last entry */
1091 	struct xfs_dir2_leaf	*leaf = bp->b_addr;
1092 	struct xfs_dir2_leaf_entry *ents;
1093 
1094 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1095 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
1096 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1097 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
1098 
1099 	ents = args->dp->d_ops->leaf_ents_p(leaf);
1100 	firstlep = &ents[first];
1101 	lastlep = &ents[last];
1102 	xfs_trans_log_buf(args->trans, bp,
1103 		(uint)((char *)firstlep - (char *)leaf),
1104 		(uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
1105 }
1106 
1107 /*
1108  * Log the header of the leaf1 or leafn block.
1109  */
1110 void
1111 xfs_dir3_leaf_log_header(
1112 	struct xfs_da_args	*args,
1113 	struct xfs_buf		*bp)
1114 {
1115 	struct xfs_dir2_leaf	*leaf = bp->b_addr;
1116 
1117 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1118 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
1119 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1120 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
1121 
1122 	xfs_trans_log_buf(args->trans, bp,
1123 			  (uint)((char *)&leaf->hdr - (char *)leaf),
1124 			  args->dp->d_ops->leaf_hdr_size - 1);
1125 }
1126 
1127 /*
1128  * Log the tail of the leaf1 block.
1129  */
1130 STATIC void
1131 xfs_dir3_leaf_log_tail(
1132 	struct xfs_da_args	*args,
1133 	struct xfs_buf		*bp)
1134 {
1135 	struct xfs_dir2_leaf	*leaf = bp->b_addr;
1136 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1137 
1138 	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1139 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
1140 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1141 	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
1142 
1143 	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
1144 	xfs_trans_log_buf(args->trans, bp, (uint)((char *)ltp - (char *)leaf),
1145 		(uint)(args->geo->blksize - 1));
1146 }
1147 
1148 /*
1149  * Look up the entry referred to by args in the leaf format directory.
1150  * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which
1151  * is also used by the node-format code.
1152  */
1153 int
1154 xfs_dir2_leaf_lookup(
1155 	xfs_da_args_t		*args)		/* operation arguments */
1156 {
1157 	struct xfs_buf		*dbp;		/* data block buffer */
1158 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
1159 	xfs_inode_t		*dp;		/* incore directory inode */
1160 	int			error;		/* error return code */
1161 	int			index;		/* found entry index */
1162 	struct xfs_buf		*lbp;		/* leaf buffer */
1163 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1164 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1165 	xfs_trans_t		*tp;		/* transaction pointer */
1166 	struct xfs_dir2_leaf_entry *ents;
1167 
1168 	trace_xfs_dir2_leaf_lookup(args);
1169 
1170 	/*
1171 	 * Look up name in the leaf block, returning both buffers and index.
1172 	 */
1173 	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1174 		return error;
1175 	}
1176 	tp = args->trans;
1177 	dp = args->dp;
1178 	xfs_dir3_leaf_check(dp, lbp);
1179 	leaf = lbp->b_addr;
1180 	ents = dp->d_ops->leaf_ents_p(leaf);
1181 	/*
1182 	 * Get to the leaf entry and contained data entry address.
1183 	 */
1184 	lep = &ents[index];
1185 
1186 	/*
1187 	 * Point to the data entry.
1188 	 */
1189 	dep = (xfs_dir2_data_entry_t *)
1190 	      ((char *)dbp->b_addr +
1191 	       xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
1192 	/*
1193 	 * Return the found inode number & CI name if appropriate
1194 	 */
1195 	args->inumber = be64_to_cpu(dep->inumber);
1196 	args->filetype = dp->d_ops->data_get_ftype(dep);
1197 	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
1198 	xfs_trans_brelse(tp, dbp);
1199 	xfs_trans_brelse(tp, lbp);
1200 	return error;
1201 }
1202 
1203 /*
1204  * Look up name/hash in the leaf block.
1205  * Fill in indexp with the found index, and dbpp with the data buffer.
1206  * If not found dbpp will be NULL, and ENOENT comes back.
1207  * lbpp will always be filled in with the leaf buffer unless there's an error.
1208  */
1209 static int					/* error */
1210 xfs_dir2_leaf_lookup_int(
1211 	xfs_da_args_t		*args,		/* operation arguments */
1212 	struct xfs_buf		**lbpp,		/* out: leaf buffer */
1213 	int			*indexp,	/* out: index in leaf block */
1214 	struct xfs_buf		**dbpp)		/* out: data buffer */
1215 {
1216 	xfs_dir2_db_t		curdb = -1;	/* current data block number */
1217 	struct xfs_buf		*dbp = NULL;	/* data buffer */
1218 	xfs_dir2_data_entry_t	*dep;		/* data entry */
1219 	xfs_inode_t		*dp;		/* incore directory inode */
1220 	int			error;		/* error return code */
1221 	int			index;		/* index in leaf block */
1222 	struct xfs_buf		*lbp;		/* leaf buffer */
1223 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1224 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1225 	xfs_mount_t		*mp;		/* filesystem mount point */
1226 	xfs_dir2_db_t		newdb;		/* new data block number */
1227 	xfs_trans_t		*tp;		/* transaction pointer */
1228 	xfs_dir2_db_t		cidb = -1;	/* case match data block no. */
1229 	enum xfs_dacmp		cmp;		/* name compare result */
1230 	struct xfs_dir2_leaf_entry *ents;
1231 	struct xfs_dir3_icleaf_hdr leafhdr;
1232 
1233 	dp = args->dp;
1234 	tp = args->trans;
1235 	mp = dp->i_mount;
1236 
1237 	error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);
1238 	if (error)
1239 		return error;
1240 
1241 	*lbpp = lbp;
1242 	leaf = lbp->b_addr;
1243 	xfs_dir3_leaf_check(dp, lbp);
1244 	ents = dp->d_ops->leaf_ents_p(leaf);
1245 	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
1246 
1247 	/*
1248 	 * Look for the first leaf entry with our hash value.
1249 	 */
1250 	index = xfs_dir2_leaf_search_hash(args, lbp);
1251 	/*
1252 	 * Loop over all the entries with the right hash value
1253 	 * looking to match the name.
1254 	 */
1255 	for (lep = &ents[index];
1256 	     index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
1257 	     lep++, index++) {
1258 		/*
1259 		 * Skip over stale leaf entries.
1260 		 */
1261 		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
1262 			continue;
1263 		/*
1264 		 * Get the new data block number.
1265 		 */
1266 		newdb = xfs_dir2_dataptr_to_db(args->geo,
1267 					       be32_to_cpu(lep->address));
1268 		/*
1269 		 * If it's not the same as the old data block number,
1270 		 * need to pitch the old one and read the new one.
1271 		 */
1272 		if (newdb != curdb) {
1273 			if (dbp)
1274 				xfs_trans_brelse(tp, dbp);
1275 			error = xfs_dir3_data_read(tp, dp,
1276 					   xfs_dir2_db_to_da(args->geo, newdb),
1277 					   -1, &dbp);
1278 			if (error) {
1279 				xfs_trans_brelse(tp, lbp);
1280 				return error;
1281 			}
1282 			curdb = newdb;
1283 		}
1284 		/*
1285 		 * Point to the data entry.
1286 		 */
1287 		dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr +
1288 			xfs_dir2_dataptr_to_off(args->geo,
1289 						be32_to_cpu(lep->address)));
1290 		/*
1291 		 * Compare name and if it's an exact match, return the index
1292 		 * and buffer. If it's the first case-insensitive match, store
1293 		 * the index and buffer and continue looking for an exact match.
1294 		 */
1295 		cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
1296 		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
1297 			args->cmpresult = cmp;
1298 			*indexp = index;
1299 			/* case exact match: return the current buffer. */
1300 			if (cmp == XFS_CMP_EXACT) {
1301 				*dbpp = dbp;
1302 				return 0;
1303 			}
1304 			cidb = curdb;
1305 		}
1306 	}
1307 	ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1308 	/*
1309 	 * Here, we can only be doing a lookup (not a rename or remove).
1310 	 * If a case-insensitive match was found earlier, re-read the
1311 	 * appropriate data block if required and return it.
1312 	 */
1313 	if (args->cmpresult == XFS_CMP_CASE) {
1314 		ASSERT(cidb != -1);
1315 		if (cidb != curdb) {
1316 			xfs_trans_brelse(tp, dbp);
1317 			error = xfs_dir3_data_read(tp, dp,
1318 					   xfs_dir2_db_to_da(args->geo, cidb),
1319 					   -1, &dbp);
1320 			if (error) {
1321 				xfs_trans_brelse(tp, lbp);
1322 				return error;
1323 			}
1324 		}
1325 		*dbpp = dbp;
1326 		return 0;
1327 	}
1328 	/*
1329 	 * No match found, return -ENOENT.
1330 	 */
1331 	ASSERT(cidb == -1);
1332 	if (dbp)
1333 		xfs_trans_brelse(tp, dbp);
1334 	xfs_trans_brelse(tp, lbp);
1335 	return -ENOENT;
1336 }
1337 
1338 /*
1339  * Remove an entry from a leaf format directory.
1340  */
1341 int						/* error */
1342 xfs_dir2_leaf_removename(
1343 	xfs_da_args_t		*args)		/* operation arguments */
1344 {
1345 	__be16			*bestsp;	/* leaf block best freespace */
1346 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
1347 	xfs_dir2_db_t		db;		/* data block number */
1348 	struct xfs_buf		*dbp;		/* data block buffer */
1349 	xfs_dir2_data_entry_t	*dep;		/* data entry structure */
1350 	xfs_inode_t		*dp;		/* incore directory inode */
1351 	int			error;		/* error return code */
1352 	xfs_dir2_db_t		i;		/* temporary data block # */
1353 	int			index;		/* index into leaf entries */
1354 	struct xfs_buf		*lbp;		/* leaf buffer */
1355 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1356 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1357 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1358 	int			needlog;	/* need to log data header */
1359 	int			needscan;	/* need to rescan data frees */
1360 	xfs_dir2_data_off_t	oldbest;	/* old value of best free */
1361 	struct xfs_dir2_data_free *bf;		/* bestfree table */
1362 	struct xfs_dir2_leaf_entry *ents;
1363 	struct xfs_dir3_icleaf_hdr leafhdr;
1364 
1365 	trace_xfs_dir2_leaf_removename(args);
1366 
1367 	/*
1368 	 * Lookup the leaf entry, get the leaf and data blocks read in.
1369 	 */
1370 	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1371 		return error;
1372 	}
1373 	dp = args->dp;
1374 	leaf = lbp->b_addr;
1375 	hdr = dbp->b_addr;
1376 	xfs_dir3_data_check(dp, dbp);
1377 	bf = dp->d_ops->data_bestfree_p(hdr);
1378 	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
1379 	ents = dp->d_ops->leaf_ents_p(leaf);
1380 	/*
1381 	 * Point to the leaf entry, use that to point to the data entry.
1382 	 */
1383 	lep = &ents[index];
1384 	db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
1385 	dep = (xfs_dir2_data_entry_t *)((char *)hdr +
1386 		xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
1387 	needscan = needlog = 0;
1388 	oldbest = be16_to_cpu(bf[0].length);
1389 	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
1390 	bestsp = xfs_dir2_leaf_bests_p(ltp);
1391 	ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
1392 	/*
1393 	 * Mark the former data entry unused.
1394 	 */
1395 	xfs_dir2_data_make_free(args, dbp,
1396 		(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
1397 		dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
1398 	/*
1399 	 * We just mark the leaf entry stale by putting a null in it.
1400 	 */
1401 	leafhdr.stale++;
1402 	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
1403 	xfs_dir3_leaf_log_header(args, lbp);
1404 
1405 	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
1406 	xfs_dir3_leaf_log_ents(args, lbp, index, index);
1407 
1408 	/*
1409 	 * Scan the freespace in the data block again if necessary,
1410 	 * log the data block header if necessary.
1411 	 */
1412 	if (needscan)
1413 		xfs_dir2_data_freescan(dp, hdr, &needlog);
1414 	if (needlog)
1415 		xfs_dir2_data_log_header(args, dbp);
1416 	/*
1417 	 * If the longest freespace in the data block has changed,
1418 	 * put the new value in the bests table and log that.
1419 	 */
1420 	if (be16_to_cpu(bf[0].length) != oldbest) {
1421 		bestsp[db] = bf[0].length;
1422 		xfs_dir3_leaf_log_bests(args, lbp, db, db);
1423 	}
1424 	xfs_dir3_data_check(dp, dbp);
1425 	/*
1426 	 * If the data block is now empty then get rid of the data block.
1427 	 */
1428 	if (be16_to_cpu(bf[0].length) ==
1429 			args->geo->blksize - dp->d_ops->data_entry_offset) {
1430 		ASSERT(db != args->geo->datablk);
1431 		if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1432 			/*
1433 			 * Nope, can't get rid of it because it caused
1434 			 * allocation of a bmap btree block to do so.
1435 			 * Just go on, returning success, leaving the
1436 			 * empty block in place.
1437 			 */
1438 			if (error == -ENOSPC && args->total == 0)
1439 				error = 0;
1440 			xfs_dir3_leaf_check(dp, lbp);
1441 			return error;
1442 		}
1443 		dbp = NULL;
1444 		/*
1445 		 * If this is the last data block then compact the
1446 		 * bests table by getting rid of entries.
1447 		 */
1448 		if (db == be32_to_cpu(ltp->bestcount) - 1) {
1449 			/*
1450 			 * Look for the last active entry (i).
1451 			 */
1452 			for (i = db - 1; i > 0; i--) {
1453 				if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
1454 					break;
1455 			}
1456 			/*
1457 			 * Copy the table down so inactive entries at the
1458 			 * end are removed.
1459 			 */
1460 			memmove(&bestsp[db - i], bestsp,
1461 				(be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
1462 			be32_add_cpu(&ltp->bestcount, -(db - i));
1463 			xfs_dir3_leaf_log_tail(args, lbp);
1464 			xfs_dir3_leaf_log_bests(args, lbp, 0,
1465 						be32_to_cpu(ltp->bestcount) - 1);
1466 		} else
1467 			bestsp[db] = cpu_to_be16(NULLDATAOFF);
1468 	}
1469 	/*
1470 	 * If the data block was not the first one, drop it.
1471 	 */
1472 	else if (db != args->geo->datablk)
1473 		dbp = NULL;
1474 
1475 	xfs_dir3_leaf_check(dp, lbp);
1476 	/*
1477 	 * See if we can convert to block form.
1478 	 */
1479 	return xfs_dir2_leaf_to_block(args, lbp, dbp);
1480 }
1481 
1482 /*
1483  * Replace the inode number in a leaf format directory entry.
1484  */
1485 int						/* error */
1486 xfs_dir2_leaf_replace(
1487 	xfs_da_args_t		*args)		/* operation arguments */
1488 {
1489 	struct xfs_buf		*dbp;		/* data block buffer */
1490 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
1491 	xfs_inode_t		*dp;		/* incore directory inode */
1492 	int			error;		/* error return code */
1493 	int			index;		/* index of leaf entry */
1494 	struct xfs_buf		*lbp;		/* leaf buffer */
1495 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1496 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1497 	xfs_trans_t		*tp;		/* transaction pointer */
1498 	struct xfs_dir2_leaf_entry *ents;
1499 
1500 	trace_xfs_dir2_leaf_replace(args);
1501 
1502 	/*
1503 	 * Look up the entry.
1504 	 */
1505 	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1506 		return error;
1507 	}
1508 	dp = args->dp;
1509 	leaf = lbp->b_addr;
1510 	ents = dp->d_ops->leaf_ents_p(leaf);
1511 	/*
1512 	 * Point to the leaf entry, get data address from it.
1513 	 */
1514 	lep = &ents[index];
1515 	/*
1516 	 * Point to the data entry.
1517 	 */
1518 	dep = (xfs_dir2_data_entry_t *)
1519 	      ((char *)dbp->b_addr +
1520 	       xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
1521 	ASSERT(args->inumber != be64_to_cpu(dep->inumber));
1522 	/*
1523 	 * Put the new inode number in, log it.
1524 	 */
1525 	dep->inumber = cpu_to_be64(args->inumber);
1526 	dp->d_ops->data_put_ftype(dep, args->filetype);
1527 	tp = args->trans;
1528 	xfs_dir2_data_log_entry(args, dbp, dep);
1529 	xfs_dir3_leaf_check(dp, lbp);
1530 	xfs_trans_brelse(tp, lbp);
1531 	return 0;
1532 }
1533 
1534 /*
1535  * Return index in the leaf block (lbp) which is either the first
1536  * one with this hash value, or if there are none, the insert point
1537  * for that hash value.
1538  */
1539 int						/* index value */
1540 xfs_dir2_leaf_search_hash(
1541 	xfs_da_args_t		*args,		/* operation arguments */
1542 	struct xfs_buf		*lbp)		/* leaf buffer */
1543 {
1544 	xfs_dahash_t		hash=0;		/* hash from this entry */
1545 	xfs_dahash_t		hashwant;	/* hash value looking for */
1546 	int			high;		/* high leaf index */
1547 	int			low;		/* low leaf index */
1548 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1549 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1550 	int			mid=0;		/* current leaf index */
1551 	struct xfs_dir2_leaf_entry *ents;
1552 	struct xfs_dir3_icleaf_hdr leafhdr;
1553 
1554 	leaf = lbp->b_addr;
1555 	ents = args->dp->d_ops->leaf_ents_p(leaf);
1556 	args->dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
1557 
1558 	/*
1559 	 * Note, the table cannot be empty, so we have to go through the loop.
1560 	 * Binary search the leaf entries looking for our hash value.
1561 	 */
1562 	for (lep = ents, low = 0, high = leafhdr.count - 1,
1563 		hashwant = args->hashval;
1564 	     low <= high; ) {
1565 		mid = (low + high) >> 1;
1566 		if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant)
1567 			break;
1568 		if (hash < hashwant)
1569 			low = mid + 1;
1570 		else
1571 			high = mid - 1;
1572 	}
1573 	/*
1574 	 * Found one, back up through all the equal hash values.
1575 	 */
1576 	if (hash == hashwant) {
1577 		while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) {
1578 			mid--;
1579 		}
1580 	}
1581 	/*
1582 	 * Need to point to an entry higher than ours.
1583 	 */
1584 	else if (hash < hashwant)
1585 		mid++;
1586 	return mid;
1587 }
1588 
1589 /*
1590  * Trim off a trailing data block.  We know it's empty since the leaf
1591  * freespace table says so.
1592  */
1593 int						/* error */
1594 xfs_dir2_leaf_trim_data(
1595 	xfs_da_args_t		*args,		/* operation arguments */
1596 	struct xfs_buf		*lbp,		/* leaf buffer */
1597 	xfs_dir2_db_t		db)		/* data block number */
1598 {
1599 	__be16			*bestsp;	/* leaf bests table */
1600 	struct xfs_buf		*dbp;		/* data block buffer */
1601 	xfs_inode_t		*dp;		/* incore directory inode */
1602 	int			error;		/* error return value */
1603 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1604 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1605 	xfs_trans_t		*tp;		/* transaction pointer */
1606 
1607 	dp = args->dp;
1608 	tp = args->trans;
1609 	/*
1610 	 * Read the offending data block.  We need its buffer.
1611 	 */
1612 	error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, db),
1613 				   -1, &dbp);
1614 	if (error)
1615 		return error;
1616 
1617 	leaf = lbp->b_addr;
1618 	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
1619 
1620 #ifdef DEBUG
1621 {
1622 	struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
1623 	struct xfs_dir2_data_free *bf = dp->d_ops->data_bestfree_p(hdr);
1624 
1625 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
1626 	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
1627 	ASSERT(be16_to_cpu(bf[0].length) ==
1628 	       args->geo->blksize - dp->d_ops->data_entry_offset);
1629 	ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
1630 }
1631 #endif
1632 
1633 	/*
1634 	 * Get rid of the data block.
1635 	 */
1636 	if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1637 		ASSERT(error != -ENOSPC);
1638 		xfs_trans_brelse(tp, dbp);
1639 		return error;
1640 	}
1641 	/*
1642 	 * Eliminate the last bests entry from the table.
1643 	 */
1644 	bestsp = xfs_dir2_leaf_bests_p(ltp);
1645 	be32_add_cpu(&ltp->bestcount, -1);
1646 	memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
1647 	xfs_dir3_leaf_log_tail(args, lbp);
1648 	xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1649 	return 0;
1650 }
1651 
1652 static inline size_t
1653 xfs_dir3_leaf_size(
1654 	struct xfs_dir3_icleaf_hdr	*hdr,
1655 	int				counts)
1656 {
1657 	int	entries;
1658 	int	hdrsize;
1659 
1660 	entries = hdr->count - hdr->stale;
1661 	if (hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
1662 	    hdr->magic == XFS_DIR2_LEAFN_MAGIC)
1663 		hdrsize = sizeof(struct xfs_dir2_leaf_hdr);
1664 	else
1665 		hdrsize = sizeof(struct xfs_dir3_leaf_hdr);
1666 
1667 	return hdrsize + entries * sizeof(xfs_dir2_leaf_entry_t)
1668 	               + counts * sizeof(xfs_dir2_data_off_t)
1669 		       + sizeof(xfs_dir2_leaf_tail_t);
1670 }
1671 
1672 /*
1673  * Convert node form directory to leaf form directory.
1674  * The root of the node form dir needs to already be a LEAFN block.
1675  * Just return if we can't do anything.
1676  */
1677 int						/* error */
1678 xfs_dir2_node_to_leaf(
1679 	xfs_da_state_t		*state)		/* directory operation state */
1680 {
1681 	xfs_da_args_t		*args;		/* operation arguments */
1682 	xfs_inode_t		*dp;		/* incore directory inode */
1683 	int			error;		/* error return code */
1684 	struct xfs_buf		*fbp;		/* buffer for freespace block */
1685 	xfs_fileoff_t		fo;		/* freespace file offset */
1686 	xfs_dir2_free_t		*free;		/* freespace structure */
1687 	struct xfs_buf		*lbp;		/* buffer for leaf block */
1688 	xfs_dir2_leaf_tail_t	*ltp;		/* tail of leaf structure */
1689 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1690 	xfs_mount_t		*mp;		/* filesystem mount point */
1691 	int			rval;		/* successful free trim? */
1692 	xfs_trans_t		*tp;		/* transaction pointer */
1693 	struct xfs_dir3_icleaf_hdr leafhdr;
1694 	struct xfs_dir3_icfree_hdr freehdr;
1695 
1696 	/*
1697 	 * There's more than a leaf level in the btree, so there must
1698 	 * be multiple leafn blocks.  Give up.
1699 	 */
1700 	if (state->path.active > 1)
1701 		return 0;
1702 	args = state->args;
1703 
1704 	trace_xfs_dir2_node_to_leaf(args);
1705 
1706 	mp = state->mp;
1707 	dp = args->dp;
1708 	tp = args->trans;
1709 	/*
1710 	 * Get the last offset in the file.
1711 	 */
1712 	if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) {
1713 		return error;
1714 	}
1715 	fo -= args->geo->fsbcount;
1716 	/*
1717 	 * If there are freespace blocks other than the first one,
1718 	 * take this opportunity to remove trailing empty freespace blocks
1719 	 * that may have been left behind during no-space-reservation
1720 	 * operations.
1721 	 */
1722 	while (fo > args->geo->freeblk) {
1723 		if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) {
1724 			return error;
1725 		}
1726 		if (rval)
1727 			fo -= args->geo->fsbcount;
1728 		else
1729 			return 0;
1730 	}
1731 	/*
1732 	 * Now find the block just before the freespace block.
1733 	 */
1734 	if ((error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK))) {
1735 		return error;
1736 	}
1737 	/*
1738 	 * If it's not the single leaf block, give up.
1739 	 */
1740 	if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + args->geo->blksize)
1741 		return 0;
1742 	lbp = state->path.blk[0].bp;
1743 	leaf = lbp->b_addr;
1744 	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
1745 
1746 	ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
1747 	       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
1748 
1749 	/*
1750 	 * Read the freespace block.
1751 	 */
1752 	error = xfs_dir2_free_read(tp, dp,  args->geo->freeblk, &fbp);
1753 	if (error)
1754 		return error;
1755 	free = fbp->b_addr;
1756 	dp->d_ops->free_hdr_from_disk(&freehdr, free);
1757 
1758 	ASSERT(!freehdr.firstdb);
1759 
1760 	/*
1761 	 * Now see if the leafn and free data will fit in a leaf1.
1762 	 * If not, release the buffer and give up.
1763 	 */
1764 	if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > args->geo->blksize) {
1765 		xfs_trans_brelse(tp, fbp);
1766 		return 0;
1767 	}
1768 
1769 	/*
1770 	 * If the leaf has any stale entries in it, compress them out.
1771 	 */
1772 	if (leafhdr.stale)
1773 		xfs_dir3_leaf_compact(args, &leafhdr, lbp);
1774 
1775 	lbp->b_ops = &xfs_dir3_leaf1_buf_ops;
1776 	xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAF1_BUF);
1777 	leafhdr.magic = (leafhdr.magic == XFS_DIR2_LEAFN_MAGIC)
1778 					? XFS_DIR2_LEAF1_MAGIC
1779 					: XFS_DIR3_LEAF1_MAGIC;
1780 
1781 	/*
1782 	 * Set up the leaf tail from the freespace block.
1783 	 */
1784 	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
1785 	ltp->bestcount = cpu_to_be32(freehdr.nvalid);
1786 
1787 	/*
1788 	 * Set up the leaf bests table.
1789 	 */
1790 	memcpy(xfs_dir2_leaf_bests_p(ltp), dp->d_ops->free_bests_p(free),
1791 		freehdr.nvalid * sizeof(xfs_dir2_data_off_t));
1792 
1793 	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
1794 	xfs_dir3_leaf_log_header(args, lbp);
1795 	xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1796 	xfs_dir3_leaf_log_tail(args, lbp);
1797 	xfs_dir3_leaf_check(dp, lbp);
1798 
1799 	/*
1800 	 * Get rid of the freespace block.
1801 	 */
1802 	error = xfs_dir2_shrink_inode(args,
1803 			xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET),
1804 			fbp);
1805 	if (error) {
1806 		/*
1807 		 * This can't fail here because it can only happen when
1808 		 * punching out the middle of an extent, and this is an
1809 		 * isolated block.
1810 		 */
1811 		ASSERT(error != -ENOSPC);
1812 		return error;
1813 	}
1814 	fbp = NULL;
1815 	/*
1816 	 * Now see if we can convert the single-leaf directory
1817 	 * down to a block form directory.
1818 	 * This routine always kills the dabuf for the leaf, so
1819 	 * eliminate it from the path.
1820 	 */
1821 	error = xfs_dir2_leaf_to_block(args, lbp, NULL);
1822 	state->path.blk[0].bp = NULL;
1823 	return error;
1824 }
1825