xref: /openbmc/linux/fs/xfs/libxfs/xfs_dir2_data.c (revision e0bf6c5c)
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * Copyright (c) 2013 Red Hat, Inc.
4  * All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it would be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write the Free Software Foundation,
17  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18  */
19 #include "xfs.h"
20 #include "xfs_fs.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_mount.h"
25 #include "xfs_da_format.h"
26 #include "xfs_da_btree.h"
27 #include "xfs_inode.h"
28 #include "xfs_dir2.h"
29 #include "xfs_dir2_priv.h"
30 #include "xfs_error.h"
31 #include "xfs_trans.h"
32 #include "xfs_buf_item.h"
33 #include "xfs_cksum.h"
34 
35 /*
36  * Check the consistency of the data block.
37  * The input can also be a block-format directory.
38  * Return 0 is the buffer is good, otherwise an error.
39  */
40 int
41 __xfs_dir3_data_check(
42 	struct xfs_inode	*dp,		/* incore inode pointer */
43 	struct xfs_buf		*bp)		/* data block's buffer */
44 {
45 	xfs_dir2_dataptr_t	addr;		/* addr for leaf lookup */
46 	xfs_dir2_data_free_t	*bf;		/* bestfree table */
47 	xfs_dir2_block_tail_t	*btp=NULL;	/* block tail */
48 	int			count;		/* count of entries found */
49 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
50 	xfs_dir2_data_entry_t	*dep;		/* data entry */
51 	xfs_dir2_data_free_t	*dfp;		/* bestfree entry */
52 	xfs_dir2_data_unused_t	*dup;		/* unused entry */
53 	char			*endp;		/* end of useful data */
54 	int			freeseen;	/* mask of bestfrees seen */
55 	xfs_dahash_t		hash;		/* hash of current name */
56 	int			i;		/* leaf index */
57 	int			lastfree;	/* last entry was unused */
58 	xfs_dir2_leaf_entry_t	*lep=NULL;	/* block leaf entries */
59 	xfs_mount_t		*mp;		/* filesystem mount point */
60 	char			*p;		/* current data position */
61 	int			stale;		/* count of stale leaves */
62 	struct xfs_name		name;
63 	const struct xfs_dir_ops *ops;
64 	struct xfs_da_geometry	*geo;
65 
66 	mp = bp->b_target->bt_mount;
67 	geo = mp->m_dir_geo;
68 
69 	/*
70 	 * We can be passed a null dp here from a verifier, so we need to go the
71 	 * hard way to get them.
72 	 */
73 	ops = xfs_dir_get_ops(mp, dp);
74 
75 	hdr = bp->b_addr;
76 	p = (char *)ops->data_entry_p(hdr);
77 
78 	switch (hdr->magic) {
79 	case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
80 	case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
81 		btp = xfs_dir2_block_tail_p(geo, hdr);
82 		lep = xfs_dir2_block_leaf_p(btp);
83 		endp = (char *)lep;
84 
85 		/*
86 		 * The number of leaf entries is limited by the size of the
87 		 * block and the amount of space used by the data entries.
88 		 * We don't know how much space is used by the data entries yet,
89 		 * so just ensure that the count falls somewhere inside the
90 		 * block right now.
91 		 */
92 		XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) <
93 			((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry));
94 		break;
95 	case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
96 	case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
97 		endp = (char *)hdr + geo->blksize;
98 		break;
99 	default:
100 		XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp);
101 		return -EFSCORRUPTED;
102 	}
103 
104 	/*
105 	 * Account for zero bestfree entries.
106 	 */
107 	bf = ops->data_bestfree_p(hdr);
108 	count = lastfree = freeseen = 0;
109 	if (!bf[0].length) {
110 		XFS_WANT_CORRUPTED_RETURN(!bf[0].offset);
111 		freeseen |= 1 << 0;
112 	}
113 	if (!bf[1].length) {
114 		XFS_WANT_CORRUPTED_RETURN(!bf[1].offset);
115 		freeseen |= 1 << 1;
116 	}
117 	if (!bf[2].length) {
118 		XFS_WANT_CORRUPTED_RETURN(!bf[2].offset);
119 		freeseen |= 1 << 2;
120 	}
121 
122 	XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >=
123 						be16_to_cpu(bf[1].length));
124 	XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >=
125 						be16_to_cpu(bf[2].length));
126 	/*
127 	 * Loop over the data/unused entries.
128 	 */
129 	while (p < endp) {
130 		dup = (xfs_dir2_data_unused_t *)p;
131 		/*
132 		 * If it's unused, look for the space in the bestfree table.
133 		 * If we find it, account for that, else make sure it
134 		 * doesn't need to be there.
135 		 */
136 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
137 			XFS_WANT_CORRUPTED_RETURN(lastfree == 0);
138 			XFS_WANT_CORRUPTED_RETURN(
139 				be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
140 					       (char *)dup - (char *)hdr);
141 			dfp = xfs_dir2_data_freefind(hdr, bf, dup);
142 			if (dfp) {
143 				i = (int)(dfp - bf);
144 				XFS_WANT_CORRUPTED_RETURN(
145 					(freeseen & (1 << i)) == 0);
146 				freeseen |= 1 << i;
147 			} else {
148 				XFS_WANT_CORRUPTED_RETURN(
149 					be16_to_cpu(dup->length) <=
150 						be16_to_cpu(bf[2].length));
151 			}
152 			p += be16_to_cpu(dup->length);
153 			lastfree = 1;
154 			continue;
155 		}
156 		/*
157 		 * It's a real entry.  Validate the fields.
158 		 * If this is a block directory then make sure it's
159 		 * in the leaf section of the block.
160 		 * The linear search is crude but this is DEBUG code.
161 		 */
162 		dep = (xfs_dir2_data_entry_t *)p;
163 		XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0);
164 		XFS_WANT_CORRUPTED_RETURN(
165 			!xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
166 		XFS_WANT_CORRUPTED_RETURN(
167 			be16_to_cpu(*ops->data_entry_tag_p(dep)) ==
168 					       (char *)dep - (char *)hdr);
169 		XFS_WANT_CORRUPTED_RETURN(
170 				ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX);
171 		count++;
172 		lastfree = 0;
173 		if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
174 		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
175 			addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
176 						(xfs_dir2_data_aoff_t)
177 						((char *)dep - (char *)hdr));
178 			name.name = dep->name;
179 			name.len = dep->namelen;
180 			hash = mp->m_dirnameops->hashname(&name);
181 			for (i = 0; i < be32_to_cpu(btp->count); i++) {
182 				if (be32_to_cpu(lep[i].address) == addr &&
183 				    be32_to_cpu(lep[i].hashval) == hash)
184 					break;
185 			}
186 			XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count));
187 		}
188 		p += ops->data_entsize(dep->namelen);
189 	}
190 	/*
191 	 * Need to have seen all the entries and all the bestfree slots.
192 	 */
193 	XFS_WANT_CORRUPTED_RETURN(freeseen == 7);
194 	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
195 	    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
196 		for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
197 			if (lep[i].address ==
198 			    cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
199 				stale++;
200 			if (i > 0)
201 				XFS_WANT_CORRUPTED_RETURN(
202 					be32_to_cpu(lep[i].hashval) >=
203 						be32_to_cpu(lep[i - 1].hashval));
204 		}
205 		XFS_WANT_CORRUPTED_RETURN(count ==
206 			be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
207 		XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale));
208 	}
209 	return 0;
210 }
211 
212 static bool
213 xfs_dir3_data_verify(
214 	struct xfs_buf		*bp)
215 {
216 	struct xfs_mount	*mp = bp->b_target->bt_mount;
217 	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;
218 
219 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
220 		if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
221 			return false;
222 		if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
223 			return false;
224 		if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
225 			return false;
226 	} else {
227 		if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
228 			return false;
229 	}
230 	if (__xfs_dir3_data_check(NULL, bp))
231 		return false;
232 	return true;
233 }
234 
235 /*
236  * Readahead of the first block of the directory when it is opened is completely
237  * oblivious to the format of the directory. Hence we can either get a block
238  * format buffer or a data format buffer on readahead.
239  */
240 static void
241 xfs_dir3_data_reada_verify(
242 	struct xfs_buf		*bp)
243 {
244 	struct xfs_dir2_data_hdr *hdr = bp->b_addr;
245 
246 	switch (hdr->magic) {
247 	case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
248 	case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
249 		bp->b_ops = &xfs_dir3_block_buf_ops;
250 		bp->b_ops->verify_read(bp);
251 		return;
252 	case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
253 	case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
254 		xfs_dir3_data_verify(bp);
255 		return;
256 	default:
257 		xfs_buf_ioerror(bp, -EFSCORRUPTED);
258 		xfs_verifier_error(bp);
259 		break;
260 	}
261 }
262 
263 static void
264 xfs_dir3_data_read_verify(
265 	struct xfs_buf	*bp)
266 {
267 	struct xfs_mount	*mp = bp->b_target->bt_mount;
268 
269 	if (xfs_sb_version_hascrc(&mp->m_sb) &&
270 	     !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
271 		 xfs_buf_ioerror(bp, -EFSBADCRC);
272 	else if (!xfs_dir3_data_verify(bp))
273 		xfs_buf_ioerror(bp, -EFSCORRUPTED);
274 
275 	if (bp->b_error)
276 		xfs_verifier_error(bp);
277 }
278 
279 static void
280 xfs_dir3_data_write_verify(
281 	struct xfs_buf	*bp)
282 {
283 	struct xfs_mount	*mp = bp->b_target->bt_mount;
284 	struct xfs_buf_log_item	*bip = bp->b_fspriv;
285 	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;
286 
287 	if (!xfs_dir3_data_verify(bp)) {
288 		xfs_buf_ioerror(bp, -EFSCORRUPTED);
289 		xfs_verifier_error(bp);
290 		return;
291 	}
292 
293 	if (!xfs_sb_version_hascrc(&mp->m_sb))
294 		return;
295 
296 	if (bip)
297 		hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
298 
299 	xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
300 }
301 
302 const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
303 	.verify_read = xfs_dir3_data_read_verify,
304 	.verify_write = xfs_dir3_data_write_verify,
305 };
306 
307 static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
308 	.verify_read = xfs_dir3_data_reada_verify,
309 	.verify_write = xfs_dir3_data_write_verify,
310 };
311 
312 
313 int
314 xfs_dir3_data_read(
315 	struct xfs_trans	*tp,
316 	struct xfs_inode	*dp,
317 	xfs_dablk_t		bno,
318 	xfs_daddr_t		mapped_bno,
319 	struct xfs_buf		**bpp)
320 {
321 	int			err;
322 
323 	err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
324 				XFS_DATA_FORK, &xfs_dir3_data_buf_ops);
325 	if (!err && tp)
326 		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
327 	return err;
328 }
329 
330 int
331 xfs_dir3_data_readahead(
332 	struct xfs_inode	*dp,
333 	xfs_dablk_t		bno,
334 	xfs_daddr_t		mapped_bno)
335 {
336 	return xfs_da_reada_buf(dp, bno, mapped_bno,
337 				XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops);
338 }
339 
340 /*
341  * Given a data block and an unused entry from that block,
342  * return the bestfree entry if any that corresponds to it.
343  */
344 xfs_dir2_data_free_t *
345 xfs_dir2_data_freefind(
346 	struct xfs_dir2_data_hdr *hdr,		/* data block header */
347 	struct xfs_dir2_data_free *bf,		/* bestfree table pointer */
348 	struct xfs_dir2_data_unused *dup)	/* unused space */
349 {
350 	xfs_dir2_data_free_t	*dfp;		/* bestfree entry */
351 	xfs_dir2_data_aoff_t	off;		/* offset value needed */
352 #ifdef DEBUG
353 	int			matched;	/* matched the value */
354 	int			seenzero;	/* saw a 0 bestfree entry */
355 #endif
356 
357 	off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
358 
359 #ifdef DEBUG
360 	/*
361 	 * Validate some consistency in the bestfree table.
362 	 * Check order, non-overlapping entries, and if we find the
363 	 * one we're looking for it has to be exact.
364 	 */
365 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
366 	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
367 	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
368 	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
369 	for (dfp = &bf[0], seenzero = matched = 0;
370 	     dfp < &bf[XFS_DIR2_DATA_FD_COUNT];
371 	     dfp++) {
372 		if (!dfp->offset) {
373 			ASSERT(!dfp->length);
374 			seenzero = 1;
375 			continue;
376 		}
377 		ASSERT(seenzero == 0);
378 		if (be16_to_cpu(dfp->offset) == off) {
379 			matched = 1;
380 			ASSERT(dfp->length == dup->length);
381 		} else if (off < be16_to_cpu(dfp->offset))
382 			ASSERT(off + be16_to_cpu(dup->length) <= be16_to_cpu(dfp->offset));
383 		else
384 			ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
385 		ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
386 		if (dfp > &bf[0])
387 			ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
388 	}
389 #endif
390 	/*
391 	 * If this is smaller than the smallest bestfree entry,
392 	 * it can't be there since they're sorted.
393 	 */
394 	if (be16_to_cpu(dup->length) <
395 	    be16_to_cpu(bf[XFS_DIR2_DATA_FD_COUNT - 1].length))
396 		return NULL;
397 	/*
398 	 * Look at the three bestfree entries for our guy.
399 	 */
400 	for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
401 		if (!dfp->offset)
402 			return NULL;
403 		if (be16_to_cpu(dfp->offset) == off)
404 			return dfp;
405 	}
406 	/*
407 	 * Didn't find it.  This only happens if there are duplicate lengths.
408 	 */
409 	return NULL;
410 }
411 
412 /*
413  * Insert an unused-space entry into the bestfree table.
414  */
415 xfs_dir2_data_free_t *				/* entry inserted */
416 xfs_dir2_data_freeinsert(
417 	struct xfs_dir2_data_hdr *hdr,		/* data block pointer */
418 	struct xfs_dir2_data_free *dfp,		/* bestfree table pointer */
419 	struct xfs_dir2_data_unused *dup,	/* unused space */
420 	int			*loghead)	/* log the data header (out) */
421 {
422 	xfs_dir2_data_free_t	new;		/* new bestfree entry */
423 
424 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
425 	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
426 	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
427 	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
428 
429 	new.length = dup->length;
430 	new.offset = cpu_to_be16((char *)dup - (char *)hdr);
431 
432 	/*
433 	 * Insert at position 0, 1, or 2; or not at all.
434 	 */
435 	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[0].length)) {
436 		dfp[2] = dfp[1];
437 		dfp[1] = dfp[0];
438 		dfp[0] = new;
439 		*loghead = 1;
440 		return &dfp[0];
441 	}
442 	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[1].length)) {
443 		dfp[2] = dfp[1];
444 		dfp[1] = new;
445 		*loghead = 1;
446 		return &dfp[1];
447 	}
448 	if (be16_to_cpu(new.length) > be16_to_cpu(dfp[2].length)) {
449 		dfp[2] = new;
450 		*loghead = 1;
451 		return &dfp[2];
452 	}
453 	return NULL;
454 }
455 
456 /*
457  * Remove a bestfree entry from the table.
458  */
459 STATIC void
460 xfs_dir2_data_freeremove(
461 	struct xfs_dir2_data_hdr *hdr,		/* data block header */
462 	struct xfs_dir2_data_free *bf,		/* bestfree table pointer */
463 	struct xfs_dir2_data_free *dfp,		/* bestfree entry pointer */
464 	int			*loghead)	/* out: log data header */
465 {
466 
467 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
468 	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
469 	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
470 	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
471 
472 	/*
473 	 * It's the first entry, slide the next 2 up.
474 	 */
475 	if (dfp == &bf[0]) {
476 		bf[0] = bf[1];
477 		bf[1] = bf[2];
478 	}
479 	/*
480 	 * It's the second entry, slide the 3rd entry up.
481 	 */
482 	else if (dfp == &bf[1])
483 		bf[1] = bf[2];
484 	/*
485 	 * Must be the last entry.
486 	 */
487 	else
488 		ASSERT(dfp == &bf[2]);
489 	/*
490 	 * Clear the 3rd entry, must be zero now.
491 	 */
492 	bf[2].length = 0;
493 	bf[2].offset = 0;
494 	*loghead = 1;
495 }
496 
497 /*
498  * Given a data block, reconstruct its bestfree map.
499  */
500 void
501 xfs_dir2_data_freescan(
502 	struct xfs_inode	*dp,
503 	struct xfs_dir2_data_hdr *hdr,
504 	int			*loghead)
505 {
506 	xfs_dir2_block_tail_t	*btp;		/* block tail */
507 	xfs_dir2_data_entry_t	*dep;		/* active data entry */
508 	xfs_dir2_data_unused_t	*dup;		/* unused data entry */
509 	struct xfs_dir2_data_free *bf;
510 	char			*endp;		/* end of block's data */
511 	char			*p;		/* current entry pointer */
512 	struct xfs_da_geometry	*geo = dp->i_mount->m_dir_geo;
513 
514 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
515 	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
516 	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
517 	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
518 
519 	/*
520 	 * Start by clearing the table.
521 	 */
522 	bf = dp->d_ops->data_bestfree_p(hdr);
523 	memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT);
524 	*loghead = 1;
525 	/*
526 	 * Set up pointers.
527 	 */
528 	p = (char *)dp->d_ops->data_entry_p(hdr);
529 	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
530 	    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
531 		btp = xfs_dir2_block_tail_p(geo, hdr);
532 		endp = (char *)xfs_dir2_block_leaf_p(btp);
533 	} else
534 		endp = (char *)hdr + geo->blksize;
535 	/*
536 	 * Loop over the block's entries.
537 	 */
538 	while (p < endp) {
539 		dup = (xfs_dir2_data_unused_t *)p;
540 		/*
541 		 * If it's a free entry, insert it.
542 		 */
543 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
544 			ASSERT((char *)dup - (char *)hdr ==
545 			       be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
546 			xfs_dir2_data_freeinsert(hdr, bf, dup, loghead);
547 			p += be16_to_cpu(dup->length);
548 		}
549 		/*
550 		 * For active entries, check their tags and skip them.
551 		 */
552 		else {
553 			dep = (xfs_dir2_data_entry_t *)p;
554 			ASSERT((char *)dep - (char *)hdr ==
555 			       be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep)));
556 			p += dp->d_ops->data_entsize(dep->namelen);
557 		}
558 	}
559 }
560 
561 /*
562  * Initialize a data block at the given block number in the directory.
563  * Give back the buffer for the created block.
564  */
565 int						/* error */
566 xfs_dir3_data_init(
567 	xfs_da_args_t		*args,		/* directory operation args */
568 	xfs_dir2_db_t		blkno,		/* logical dir block number */
569 	struct xfs_buf		**bpp)		/* output block buffer */
570 {
571 	struct xfs_buf		*bp;		/* block buffer */
572 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
573 	xfs_inode_t		*dp;		/* incore directory inode */
574 	xfs_dir2_data_unused_t	*dup;		/* unused entry pointer */
575 	struct xfs_dir2_data_free *bf;
576 	int			error;		/* error return value */
577 	int			i;		/* bestfree index */
578 	xfs_mount_t		*mp;		/* filesystem mount point */
579 	xfs_trans_t		*tp;		/* transaction pointer */
580 	int                     t;              /* temp */
581 
582 	dp = args->dp;
583 	mp = dp->i_mount;
584 	tp = args->trans;
585 	/*
586 	 * Get the buffer set up for the block.
587 	 */
588 	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno),
589 			       -1, &bp, XFS_DATA_FORK);
590 	if (error)
591 		return error;
592 	bp->b_ops = &xfs_dir3_data_buf_ops;
593 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_DATA_BUF);
594 
595 	/*
596 	 * Initialize the header.
597 	 */
598 	hdr = bp->b_addr;
599 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
600 		struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
601 
602 		memset(hdr3, 0, sizeof(*hdr3));
603 		hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
604 		hdr3->blkno = cpu_to_be64(bp->b_bn);
605 		hdr3->owner = cpu_to_be64(dp->i_ino);
606 		uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
607 
608 	} else
609 		hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
610 
611 	bf = dp->d_ops->data_bestfree_p(hdr);
612 	bf[0].offset = cpu_to_be16(dp->d_ops->data_entry_offset);
613 	for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
614 		bf[i].length = 0;
615 		bf[i].offset = 0;
616 	}
617 
618 	/*
619 	 * Set up an unused entry for the block's body.
620 	 */
621 	dup = dp->d_ops->data_unused_p(hdr);
622 	dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
623 
624 	t = args->geo->blksize - (uint)dp->d_ops->data_entry_offset;
625 	bf[0].length = cpu_to_be16(t);
626 	dup->length = cpu_to_be16(t);
627 	*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
628 	/*
629 	 * Log it and return it.
630 	 */
631 	xfs_dir2_data_log_header(args, bp);
632 	xfs_dir2_data_log_unused(args, bp, dup);
633 	*bpp = bp;
634 	return 0;
635 }
636 
637 /*
638  * Log an active data entry from the block.
639  */
640 void
641 xfs_dir2_data_log_entry(
642 	struct xfs_da_args	*args,
643 	struct xfs_buf		*bp,
644 	xfs_dir2_data_entry_t	*dep)		/* data entry pointer */
645 {
646 	struct xfs_dir2_data_hdr *hdr = bp->b_addr;
647 
648 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
649 	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
650 	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
651 	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
652 
653 	xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr),
654 		(uint)((char *)(args->dp->d_ops->data_entry_tag_p(dep) + 1) -
655 		       (char *)hdr - 1));
656 }
657 
658 /*
659  * Log a data block header.
660  */
661 void
662 xfs_dir2_data_log_header(
663 	struct xfs_da_args	*args,
664 	struct xfs_buf		*bp)
665 {
666 #ifdef DEBUG
667 	struct xfs_dir2_data_hdr *hdr = bp->b_addr;
668 
669 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
670 	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
671 	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
672 	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
673 #endif
674 
675 	xfs_trans_log_buf(args->trans, bp, 0,
676 			  args->dp->d_ops->data_entry_offset - 1);
677 }
678 
679 /*
680  * Log a data unused entry.
681  */
682 void
683 xfs_dir2_data_log_unused(
684 	struct xfs_da_args	*args,
685 	struct xfs_buf		*bp,
686 	xfs_dir2_data_unused_t	*dup)		/* data unused pointer */
687 {
688 	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
689 
690 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
691 	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
692 	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
693 	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
694 
695 	/*
696 	 * Log the first part of the unused entry.
697 	 */
698 	xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr),
699 		(uint)((char *)&dup->length + sizeof(dup->length) -
700 		       1 - (char *)hdr));
701 	/*
702 	 * Log the end (tag) of the unused entry.
703 	 */
704 	xfs_trans_log_buf(args->trans, bp,
705 		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
706 		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
707 		       sizeof(xfs_dir2_data_off_t) - 1));
708 }
709 
710 /*
711  * Make a byte range in the data block unused.
712  * Its current contents are unimportant.
713  */
714 void
715 xfs_dir2_data_make_free(
716 	struct xfs_da_args	*args,
717 	struct xfs_buf		*bp,
718 	xfs_dir2_data_aoff_t	offset,		/* starting byte offset */
719 	xfs_dir2_data_aoff_t	len,		/* length in bytes */
720 	int			*needlogp,	/* out: log header */
721 	int			*needscanp)	/* out: regen bestfree */
722 {
723 	xfs_dir2_data_hdr_t	*hdr;		/* data block pointer */
724 	xfs_dir2_data_free_t	*dfp;		/* bestfree pointer */
725 	char			*endptr;	/* end of data area */
726 	int			needscan;	/* need to regen bestfree */
727 	xfs_dir2_data_unused_t	*newdup;	/* new unused entry */
728 	xfs_dir2_data_unused_t	*postdup;	/* unused entry after us */
729 	xfs_dir2_data_unused_t	*prevdup;	/* unused entry before us */
730 	struct xfs_dir2_data_free *bf;
731 
732 	hdr = bp->b_addr;
733 
734 	/*
735 	 * Figure out where the end of the data area is.
736 	 */
737 	if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
738 	    hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC))
739 		endptr = (char *)hdr + args->geo->blksize;
740 	else {
741 		xfs_dir2_block_tail_t	*btp;	/* block tail */
742 
743 		ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
744 			hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
745 		btp = xfs_dir2_block_tail_p(args->geo, hdr);
746 		endptr = (char *)xfs_dir2_block_leaf_p(btp);
747 	}
748 	/*
749 	 * If this isn't the start of the block, then back up to
750 	 * the previous entry and see if it's free.
751 	 */
752 	if (offset > args->dp->d_ops->data_entry_offset) {
753 		__be16			*tagp;	/* tag just before us */
754 
755 		tagp = (__be16 *)((char *)hdr + offset) - 1;
756 		prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
757 		if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
758 			prevdup = NULL;
759 	} else
760 		prevdup = NULL;
761 	/*
762 	 * If this isn't the end of the block, see if the entry after
763 	 * us is free.
764 	 */
765 	if ((char *)hdr + offset + len < endptr) {
766 		postdup =
767 			(xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
768 		if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
769 			postdup = NULL;
770 	} else
771 		postdup = NULL;
772 	ASSERT(*needscanp == 0);
773 	needscan = 0;
774 	/*
775 	 * Previous and following entries are both free,
776 	 * merge everything into a single free entry.
777 	 */
778 	bf = args->dp->d_ops->data_bestfree_p(hdr);
779 	if (prevdup && postdup) {
780 		xfs_dir2_data_free_t	*dfp2;	/* another bestfree pointer */
781 
782 		/*
783 		 * See if prevdup and/or postdup are in bestfree table.
784 		 */
785 		dfp = xfs_dir2_data_freefind(hdr, bf, prevdup);
786 		dfp2 = xfs_dir2_data_freefind(hdr, bf, postdup);
787 		/*
788 		 * We need a rescan unless there are exactly 2 free entries
789 		 * namely our two.  Then we know what's happening, otherwise
790 		 * since the third bestfree is there, there might be more
791 		 * entries.
792 		 */
793 		needscan = (bf[2].length != 0);
794 		/*
795 		 * Fix up the new big freespace.
796 		 */
797 		be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));
798 		*xfs_dir2_data_unused_tag_p(prevdup) =
799 			cpu_to_be16((char *)prevdup - (char *)hdr);
800 		xfs_dir2_data_log_unused(args, bp, prevdup);
801 		if (!needscan) {
802 			/*
803 			 * Has to be the case that entries 0 and 1 are
804 			 * dfp and dfp2 (don't know which is which), and
805 			 * entry 2 is empty.
806 			 * Remove entry 1 first then entry 0.
807 			 */
808 			ASSERT(dfp && dfp2);
809 			if (dfp == &bf[1]) {
810 				dfp = &bf[0];
811 				ASSERT(dfp2 == dfp);
812 				dfp2 = &bf[1];
813 			}
814 			xfs_dir2_data_freeremove(hdr, bf, dfp2, needlogp);
815 			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
816 			/*
817 			 * Now insert the new entry.
818 			 */
819 			dfp = xfs_dir2_data_freeinsert(hdr, bf, prevdup,
820 						       needlogp);
821 			ASSERT(dfp == &bf[0]);
822 			ASSERT(dfp->length == prevdup->length);
823 			ASSERT(!dfp[1].length);
824 			ASSERT(!dfp[2].length);
825 		}
826 	}
827 	/*
828 	 * The entry before us is free, merge with it.
829 	 */
830 	else if (prevdup) {
831 		dfp = xfs_dir2_data_freefind(hdr, bf, prevdup);
832 		be16_add_cpu(&prevdup->length, len);
833 		*xfs_dir2_data_unused_tag_p(prevdup) =
834 			cpu_to_be16((char *)prevdup - (char *)hdr);
835 		xfs_dir2_data_log_unused(args, bp, prevdup);
836 		/*
837 		 * If the previous entry was in the table, the new entry
838 		 * is longer, so it will be in the table too.  Remove
839 		 * the old one and add the new one.
840 		 */
841 		if (dfp) {
842 			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
843 			xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp);
844 		}
845 		/*
846 		 * Otherwise we need a scan if the new entry is big enough.
847 		 */
848 		else {
849 			needscan = be16_to_cpu(prevdup->length) >
850 				   be16_to_cpu(bf[2].length);
851 		}
852 	}
853 	/*
854 	 * The following entry is free, merge with it.
855 	 */
856 	else if (postdup) {
857 		dfp = xfs_dir2_data_freefind(hdr, bf, postdup);
858 		newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
859 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
860 		newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
861 		*xfs_dir2_data_unused_tag_p(newdup) =
862 			cpu_to_be16((char *)newdup - (char *)hdr);
863 		xfs_dir2_data_log_unused(args, bp, newdup);
864 		/*
865 		 * If the following entry was in the table, the new entry
866 		 * is longer, so it will be in the table too.  Remove
867 		 * the old one and add the new one.
868 		 */
869 		if (dfp) {
870 			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
871 			xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);
872 		}
873 		/*
874 		 * Otherwise we need a scan if the new entry is big enough.
875 		 */
876 		else {
877 			needscan = be16_to_cpu(newdup->length) >
878 				   be16_to_cpu(bf[2].length);
879 		}
880 	}
881 	/*
882 	 * Neither neighbor is free.  Make a new entry.
883 	 */
884 	else {
885 		newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
886 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
887 		newdup->length = cpu_to_be16(len);
888 		*xfs_dir2_data_unused_tag_p(newdup) =
889 			cpu_to_be16((char *)newdup - (char *)hdr);
890 		xfs_dir2_data_log_unused(args, bp, newdup);
891 		xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);
892 	}
893 	*needscanp = needscan;
894 }
895 
896 /*
897  * Take a byte range out of an existing unused space and make it un-free.
898  */
899 void
900 xfs_dir2_data_use_free(
901 	struct xfs_da_args	*args,
902 	struct xfs_buf		*bp,
903 	xfs_dir2_data_unused_t	*dup,		/* unused entry */
904 	xfs_dir2_data_aoff_t	offset,		/* starting offset to use */
905 	xfs_dir2_data_aoff_t	len,		/* length to use */
906 	int			*needlogp,	/* out: need to log header */
907 	int			*needscanp)	/* out: need regen bestfree */
908 {
909 	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
910 	xfs_dir2_data_free_t	*dfp;		/* bestfree pointer */
911 	int			matchback;	/* matches end of freespace */
912 	int			matchfront;	/* matches start of freespace */
913 	int			needscan;	/* need to regen bestfree */
914 	xfs_dir2_data_unused_t	*newdup;	/* new unused entry */
915 	xfs_dir2_data_unused_t	*newdup2;	/* another new unused entry */
916 	int			oldlen;		/* old unused entry's length */
917 	struct xfs_dir2_data_free *bf;
918 
919 	hdr = bp->b_addr;
920 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
921 	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
922 	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
923 	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
924 	ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
925 	ASSERT(offset >= (char *)dup - (char *)hdr);
926 	ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
927 	ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
928 	/*
929 	 * Look up the entry in the bestfree table.
930 	 */
931 	oldlen = be16_to_cpu(dup->length);
932 	bf = args->dp->d_ops->data_bestfree_p(hdr);
933 	dfp = xfs_dir2_data_freefind(hdr, bf, dup);
934 	ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length));
935 	/*
936 	 * Check for alignment with front and back of the entry.
937 	 */
938 	matchfront = (char *)dup - (char *)hdr == offset;
939 	matchback = (char *)dup + oldlen - (char *)hdr == offset + len;
940 	ASSERT(*needscanp == 0);
941 	needscan = 0;
942 	/*
943 	 * If we matched it exactly we just need to get rid of it from
944 	 * the bestfree table.
945 	 */
946 	if (matchfront && matchback) {
947 		if (dfp) {
948 			needscan = (bf[2].offset != 0);
949 			if (!needscan)
950 				xfs_dir2_data_freeremove(hdr, bf, dfp,
951 							 needlogp);
952 		}
953 	}
954 	/*
955 	 * We match the first part of the entry.
956 	 * Make a new entry with the remaining freespace.
957 	 */
958 	else if (matchfront) {
959 		newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
960 		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
961 		newdup->length = cpu_to_be16(oldlen - len);
962 		*xfs_dir2_data_unused_tag_p(newdup) =
963 			cpu_to_be16((char *)newdup - (char *)hdr);
964 		xfs_dir2_data_log_unused(args, bp, newdup);
965 		/*
966 		 * If it was in the table, remove it and add the new one.
967 		 */
968 		if (dfp) {
969 			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
970 			dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
971 						       needlogp);
972 			ASSERT(dfp != NULL);
973 			ASSERT(dfp->length == newdup->length);
974 			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
975 			/*
976 			 * If we got inserted at the last slot,
977 			 * that means we don't know if there was a better
978 			 * choice for the last slot, or not.  Rescan.
979 			 */
980 			needscan = dfp == &bf[2];
981 		}
982 	}
983 	/*
984 	 * We match the last part of the entry.
985 	 * Trim the allocated space off the tail of the entry.
986 	 */
987 	else if (matchback) {
988 		newdup = dup;
989 		newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
990 		*xfs_dir2_data_unused_tag_p(newdup) =
991 			cpu_to_be16((char *)newdup - (char *)hdr);
992 		xfs_dir2_data_log_unused(args, bp, newdup);
993 		/*
994 		 * If it was in the table, remove it and add the new one.
995 		 */
996 		if (dfp) {
997 			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
998 			dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
999 						       needlogp);
1000 			ASSERT(dfp != NULL);
1001 			ASSERT(dfp->length == newdup->length);
1002 			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
1003 			/*
1004 			 * If we got inserted at the last slot,
1005 			 * that means we don't know if there was a better
1006 			 * choice for the last slot, or not.  Rescan.
1007 			 */
1008 			needscan = dfp == &bf[2];
1009 		}
1010 	}
1011 	/*
1012 	 * Poking out the middle of an entry.
1013 	 * Make two new entries.
1014 	 */
1015 	else {
1016 		newdup = dup;
1017 		newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
1018 		*xfs_dir2_data_unused_tag_p(newdup) =
1019 			cpu_to_be16((char *)newdup - (char *)hdr);
1020 		xfs_dir2_data_log_unused(args, bp, newdup);
1021 		newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
1022 		newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
1023 		newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
1024 		*xfs_dir2_data_unused_tag_p(newdup2) =
1025 			cpu_to_be16((char *)newdup2 - (char *)hdr);
1026 		xfs_dir2_data_log_unused(args, bp, newdup2);
1027 		/*
1028 		 * If the old entry was in the table, we need to scan
1029 		 * if the 3rd entry was valid, since these entries
1030 		 * are smaller than the old one.
1031 		 * If we don't need to scan that means there were 1 or 2
1032 		 * entries in the table, and removing the old and adding
1033 		 * the 2 new will work.
1034 		 */
1035 		if (dfp) {
1036 			needscan = (bf[2].length != 0);
1037 			if (!needscan) {
1038 				xfs_dir2_data_freeremove(hdr, bf, dfp,
1039 							 needlogp);
1040 				xfs_dir2_data_freeinsert(hdr, bf, newdup,
1041 							 needlogp);
1042 				xfs_dir2_data_freeinsert(hdr, bf, newdup2,
1043 							 needlogp);
1044 			}
1045 		}
1046 	}
1047 	*needscanp = needscan;
1048 }
1049