xref: /openbmc/linux/fs/xfs/libxfs/xfs_inode_fork.c (revision 527d1470744d338c912f94bc1f4dba08ffdff349)
1 /*
2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include <linux/log2.h>
19 
20 #include "xfs.h"
21 #include "xfs_fs.h"
22 #include "xfs_format.h"
23 #include "xfs_log_format.h"
24 #include "xfs_trans_resv.h"
25 #include "xfs_mount.h"
26 #include "xfs_inode.h"
27 #include "xfs_trans.h"
28 #include "xfs_inode_item.h"
29 #include "xfs_btree.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_bmap.h"
32 #include "xfs_error.h"
33 #include "xfs_trace.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_da_format.h"
36 #include "xfs_da_btree.h"
37 #include "xfs_dir2_priv.h"
38 
39 kmem_zone_t *xfs_ifork_zone;
40 
41 STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
42 STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
43 STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
44 
45 static inline dev_t xfs_to_linux_dev_t(xfs_dev_t dev)
46 {
47 	return MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
48 }
49 
50 /*
51  * Copy inode type and data and attr format specific information from the
52  * on-disk inode to the in-core inode and fork structures.  For fifos, devices,
53  * and sockets this means set i_rdev to the proper value.  For files,
54  * directories, and symlinks this means to bring in the in-line data or extent
55  * pointers as well as the attribute fork.  For a fork in B-tree format, only
56  * the root is immediately brought in-core.  The rest will be read in later when
57  * first referenced (see xfs_iread_extents()).
58  */
59 int
60 xfs_iformat_fork(
61 	struct xfs_inode	*ip,
62 	struct xfs_dinode	*dip)
63 {
64 	struct inode		*inode = VFS_I(ip);
65 	struct xfs_attr_shortform *atp;
66 	int			size;
67 	int			error = 0;
68 	xfs_fsize_t             di_size;
69 
70 	if (unlikely(be32_to_cpu(dip->di_nextents) +
71 		     be16_to_cpu(dip->di_anextents) >
72 		     be64_to_cpu(dip->di_nblocks))) {
73 		xfs_warn(ip->i_mount,
74 			"corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
75 			(unsigned long long)ip->i_ino,
76 			(int)(be32_to_cpu(dip->di_nextents) +
77 			      be16_to_cpu(dip->di_anextents)),
78 			(unsigned long long)
79 				be64_to_cpu(dip->di_nblocks));
80 		XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
81 				     ip->i_mount, dip);
82 		return -EFSCORRUPTED;
83 	}
84 
85 	if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
86 		xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
87 			(unsigned long long)ip->i_ino,
88 			dip->di_forkoff);
89 		XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
90 				     ip->i_mount, dip);
91 		return -EFSCORRUPTED;
92 	}
93 
94 	if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
95 		     !ip->i_mount->m_rtdev_targp)) {
96 		xfs_warn(ip->i_mount,
97 			"corrupt dinode %Lu, has realtime flag set.",
98 			ip->i_ino);
99 		XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
100 				     XFS_ERRLEVEL_LOW, ip->i_mount, dip);
101 		return -EFSCORRUPTED;
102 	}
103 
104 	if (unlikely(xfs_is_reflink_inode(ip) && !S_ISREG(inode->i_mode))) {
105 		xfs_warn(ip->i_mount,
106 			"corrupt dinode %llu, wrong file type for reflink.",
107 			ip->i_ino);
108 		XFS_CORRUPTION_ERROR("xfs_iformat(reflink)",
109 				     XFS_ERRLEVEL_LOW, ip->i_mount, dip);
110 		return -EFSCORRUPTED;
111 	}
112 
113 	if (unlikely(xfs_is_reflink_inode(ip) &&
114 	    (ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) {
115 		xfs_warn(ip->i_mount,
116 			"corrupt dinode %llu, has reflink+realtime flag set.",
117 			ip->i_ino);
118 		XFS_CORRUPTION_ERROR("xfs_iformat(reflink)",
119 				     XFS_ERRLEVEL_LOW, ip->i_mount, dip);
120 		return -EFSCORRUPTED;
121 	}
122 
123 	switch (inode->i_mode & S_IFMT) {
124 	case S_IFIFO:
125 	case S_IFCHR:
126 	case S_IFBLK:
127 	case S_IFSOCK:
128 		if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
129 			XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
130 					      ip->i_mount, dip);
131 			return -EFSCORRUPTED;
132 		}
133 		ip->i_d.di_size = 0;
134 		inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip));
135 		break;
136 
137 	case S_IFREG:
138 	case S_IFLNK:
139 	case S_IFDIR:
140 		switch (dip->di_format) {
141 		case XFS_DINODE_FMT_LOCAL:
142 			/*
143 			 * no local regular files yet
144 			 */
145 			if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
146 				xfs_warn(ip->i_mount,
147 			"corrupt inode %Lu (local format for regular file).",
148 					(unsigned long long) ip->i_ino);
149 				XFS_CORRUPTION_ERROR("xfs_iformat(4)",
150 						     XFS_ERRLEVEL_LOW,
151 						     ip->i_mount, dip);
152 				return -EFSCORRUPTED;
153 			}
154 
155 			di_size = be64_to_cpu(dip->di_size);
156 			if (unlikely(di_size < 0 ||
157 				     di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
158 				xfs_warn(ip->i_mount,
159 			"corrupt inode %Lu (bad size %Ld for local inode).",
160 					(unsigned long long) ip->i_ino,
161 					(long long) di_size);
162 				XFS_CORRUPTION_ERROR("xfs_iformat(5)",
163 						     XFS_ERRLEVEL_LOW,
164 						     ip->i_mount, dip);
165 				return -EFSCORRUPTED;
166 			}
167 
168 			size = (int)di_size;
169 			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
170 			break;
171 		case XFS_DINODE_FMT_EXTENTS:
172 			error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
173 			break;
174 		case XFS_DINODE_FMT_BTREE:
175 			error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
176 			break;
177 		default:
178 			XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
179 					 ip->i_mount);
180 			return -EFSCORRUPTED;
181 		}
182 		break;
183 
184 	default:
185 		XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
186 		return -EFSCORRUPTED;
187 	}
188 	if (error)
189 		return error;
190 
191 	/* Check inline dir contents. */
192 	if (S_ISDIR(inode->i_mode) && dip->di_format == XFS_DINODE_FMT_LOCAL) {
193 		error = xfs_dir2_sf_verify(ip);
194 		if (error) {
195 			xfs_idestroy_fork(ip, XFS_DATA_FORK);
196 			return error;
197 		}
198 	}
199 
200 	if (xfs_is_reflink_inode(ip)) {
201 		ASSERT(ip->i_cowfp == NULL);
202 		xfs_ifork_init_cow(ip);
203 	}
204 
205 	if (!XFS_DFORK_Q(dip))
206 		return 0;
207 
208 	ASSERT(ip->i_afp == NULL);
209 	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
210 
211 	switch (dip->di_aformat) {
212 	case XFS_DINODE_FMT_LOCAL:
213 		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
214 		size = be16_to_cpu(atp->hdr.totsize);
215 
216 		if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
217 			xfs_warn(ip->i_mount,
218 				"corrupt inode %Lu (bad attr fork size %Ld).",
219 				(unsigned long long) ip->i_ino,
220 				(long long) size);
221 			XFS_CORRUPTION_ERROR("xfs_iformat(8)",
222 					     XFS_ERRLEVEL_LOW,
223 					     ip->i_mount, dip);
224 			error = -EFSCORRUPTED;
225 			break;
226 		}
227 
228 		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
229 		break;
230 	case XFS_DINODE_FMT_EXTENTS:
231 		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
232 		break;
233 	case XFS_DINODE_FMT_BTREE:
234 		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
235 		break;
236 	default:
237 		error = -EFSCORRUPTED;
238 		break;
239 	}
240 	if (error) {
241 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
242 		ip->i_afp = NULL;
243 		if (ip->i_cowfp)
244 			kmem_zone_free(xfs_ifork_zone, ip->i_cowfp);
245 		ip->i_cowfp = NULL;
246 		xfs_idestroy_fork(ip, XFS_DATA_FORK);
247 	}
248 	return error;
249 }
250 
251 void
252 xfs_init_local_fork(
253 	struct xfs_inode	*ip,
254 	int			whichfork,
255 	const void		*data,
256 	int			size)
257 {
258 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
259 	int			mem_size = size, real_size = 0;
260 	bool			zero_terminate;
261 
262 	/*
263 	 * If we are using the local fork to store a symlink body we need to
264 	 * zero-terminate it so that we can pass it back to the VFS directly.
265 	 * Overallocate the in-memory fork by one for that and add a zero
266 	 * to terminate it below.
267 	 */
268 	zero_terminate = S_ISLNK(VFS_I(ip)->i_mode);
269 	if (zero_terminate)
270 		mem_size++;
271 
272 	if (size) {
273 		real_size = roundup(mem_size, 4);
274 		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
275 		memcpy(ifp->if_u1.if_data, data, size);
276 		if (zero_terminate)
277 			ifp->if_u1.if_data[size] = '\0';
278 	} else {
279 		ifp->if_u1.if_data = NULL;
280 	}
281 
282 	ifp->if_bytes = size;
283 	ifp->if_real_bytes = real_size;
284 	ifp->if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
285 	ifp->if_flags |= XFS_IFINLINE;
286 }
287 
288 /*
289  * The file is in-lined in the on-disk inode.
290  */
291 STATIC int
292 xfs_iformat_local(
293 	xfs_inode_t	*ip,
294 	xfs_dinode_t	*dip,
295 	int		whichfork,
296 	int		size)
297 {
298 	/*
299 	 * If the size is unreasonable, then something
300 	 * is wrong and we just bail out rather than crash in
301 	 * kmem_alloc() or memcpy() below.
302 	 */
303 	if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
304 		xfs_warn(ip->i_mount,
305 	"corrupt inode %Lu (bad size %d for local fork, size = %d).",
306 			(unsigned long long) ip->i_ino, size,
307 			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
308 		XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
309 				     ip->i_mount, dip);
310 		return -EFSCORRUPTED;
311 	}
312 
313 	xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
314 	return 0;
315 }
316 
317 /*
318  * The file consists of a set of extents all of which fit into the on-disk
319  * inode.
320  */
321 STATIC int
322 xfs_iformat_extents(
323 	struct xfs_inode	*ip,
324 	struct xfs_dinode	*dip,
325 	int			whichfork)
326 {
327 	struct xfs_mount	*mp = ip->i_mount;
328 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
329 	int			state = xfs_bmap_fork_to_state(whichfork);
330 	int			nex = XFS_DFORK_NEXTENTS(dip, whichfork);
331 	int			size = nex * sizeof(xfs_bmbt_rec_t);
332 	struct xfs_iext_cursor	icur;
333 	struct xfs_bmbt_rec	*dp;
334 	struct xfs_bmbt_irec	new;
335 	int			i;
336 
337 	/*
338 	 * If the number of extents is unreasonable, then something is wrong and
339 	 * we just bail out rather than crash in kmem_alloc() or memcpy() below.
340 	 */
341 	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) {
342 		xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
343 			(unsigned long long) ip->i_ino, nex);
344 		XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
345 				     mp, dip);
346 		return -EFSCORRUPTED;
347 	}
348 
349 	ifp->if_real_bytes = 0;
350 	ifp->if_bytes = 0;
351 	ifp->if_u1.if_root = NULL;
352 	ifp->if_height = 0;
353 	if (size) {
354 		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
355 
356 		xfs_iext_first(ifp, &icur);
357 		for (i = 0; i < nex; i++, dp++) {
358 			xfs_bmbt_disk_get_all(dp, &new);
359 			if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) {
360 				XFS_ERROR_REPORT("xfs_iformat_extents(2)",
361 						 XFS_ERRLEVEL_LOW, mp);
362 				return -EFSCORRUPTED;
363 			}
364 
365 			xfs_iext_insert(ip, &icur, &new, state);
366 			trace_xfs_read_extent(ip, &icur, state, _THIS_IP_);
367 			xfs_iext_next(ifp, &icur);
368 		}
369 	}
370 	ifp->if_flags |= XFS_IFEXTENTS;
371 	return 0;
372 }
373 
374 /*
375  * The file has too many extents to fit into
376  * the inode, so they are in B-tree format.
377  * Allocate a buffer for the root of the B-tree
378  * and copy the root into it.  The i_extents
379  * field will remain NULL until all of the
380  * extents are read in (when they are needed).
381  */
382 STATIC int
383 xfs_iformat_btree(
384 	xfs_inode_t		*ip,
385 	xfs_dinode_t		*dip,
386 	int			whichfork)
387 {
388 	struct xfs_mount	*mp = ip->i_mount;
389 	xfs_bmdr_block_t	*dfp;
390 	xfs_ifork_t		*ifp;
391 	/* REFERENCED */
392 	int			nrecs;
393 	int			size;
394 	int			level;
395 
396 	ifp = XFS_IFORK_PTR(ip, whichfork);
397 	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
398 	size = XFS_BMAP_BROOT_SPACE(mp, dfp);
399 	nrecs = be16_to_cpu(dfp->bb_numrecs);
400 	level = be16_to_cpu(dfp->bb_level);
401 
402 	/*
403 	 * blow out if -- fork has less extents than can fit in
404 	 * fork (fork shouldn't be a btree format), root btree
405 	 * block has more records than can fit into the fork,
406 	 * or the number of extents is greater than the number of
407 	 * blocks.
408 	 */
409 	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
410 					XFS_IFORK_MAXEXT(ip, whichfork) ||
411 		     XFS_BMDR_SPACE_CALC(nrecs) >
412 					XFS_DFORK_SIZE(dip, mp, whichfork) ||
413 		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) ||
414 		     level == 0 || level > XFS_BTREE_MAXLEVELS) {
415 		xfs_warn(mp, "corrupt inode %Lu (btree).",
416 					(unsigned long long) ip->i_ino);
417 		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
418 					 mp, dip);
419 		return -EFSCORRUPTED;
420 	}
421 
422 	ifp->if_broot_bytes = size;
423 	ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
424 	ASSERT(ifp->if_broot != NULL);
425 	/*
426 	 * Copy and convert from the on-disk structure
427 	 * to the in-memory structure.
428 	 */
429 	xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
430 			 ifp->if_broot, size);
431 	ifp->if_flags &= ~XFS_IFEXTENTS;
432 	ifp->if_flags |= XFS_IFBROOT;
433 
434 	ifp->if_real_bytes = 0;
435 	ifp->if_bytes = 0;
436 	ifp->if_u1.if_root = NULL;
437 	ifp->if_height = 0;
438 	return 0;
439 }
440 
441 /*
442  * Reallocate the space for if_broot based on the number of records
443  * being added or deleted as indicated in rec_diff.  Move the records
444  * and pointers in if_broot to fit the new size.  When shrinking this
445  * will eliminate holes between the records and pointers created by
446  * the caller.  When growing this will create holes to be filled in
447  * by the caller.
448  *
449  * The caller must not request to add more records than would fit in
450  * the on-disk inode root.  If the if_broot is currently NULL, then
451  * if we are adding records, one will be allocated.  The caller must also
452  * not request that the number of records go below zero, although
453  * it can go to zero.
454  *
455  * ip -- the inode whose if_broot area is changing
456  * ext_diff -- the change in the number of records, positive or negative,
457  *	 requested for the if_broot array.
458  */
459 void
460 xfs_iroot_realloc(
461 	xfs_inode_t		*ip,
462 	int			rec_diff,
463 	int			whichfork)
464 {
465 	struct xfs_mount	*mp = ip->i_mount;
466 	int			cur_max;
467 	xfs_ifork_t		*ifp;
468 	struct xfs_btree_block	*new_broot;
469 	int			new_max;
470 	size_t			new_size;
471 	char			*np;
472 	char			*op;
473 
474 	/*
475 	 * Handle the degenerate case quietly.
476 	 */
477 	if (rec_diff == 0) {
478 		return;
479 	}
480 
481 	ifp = XFS_IFORK_PTR(ip, whichfork);
482 	if (rec_diff > 0) {
483 		/*
484 		 * If there wasn't any memory allocated before, just
485 		 * allocate it now and get out.
486 		 */
487 		if (ifp->if_broot_bytes == 0) {
488 			new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
489 			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
490 			ifp->if_broot_bytes = (int)new_size;
491 			return;
492 		}
493 
494 		/*
495 		 * If there is already an existing if_broot, then we need
496 		 * to realloc() it and shift the pointers to their new
497 		 * location.  The records don't change location because
498 		 * they are kept butted up against the btree block header.
499 		 */
500 		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
501 		new_max = cur_max + rec_diff;
502 		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
503 		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
504 				KM_SLEEP | KM_NOFS);
505 		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
506 						     ifp->if_broot_bytes);
507 		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
508 						     (int)new_size);
509 		ifp->if_broot_bytes = (int)new_size;
510 		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
511 			XFS_IFORK_SIZE(ip, whichfork));
512 		memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t));
513 		return;
514 	}
515 
516 	/*
517 	 * rec_diff is less than 0.  In this case, we are shrinking the
518 	 * if_broot buffer.  It must already exist.  If we go to zero
519 	 * records, just get rid of the root and clear the status bit.
520 	 */
521 	ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
522 	cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
523 	new_max = cur_max + rec_diff;
524 	ASSERT(new_max >= 0);
525 	if (new_max > 0)
526 		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
527 	else
528 		new_size = 0;
529 	if (new_size > 0) {
530 		new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
531 		/*
532 		 * First copy over the btree block header.
533 		 */
534 		memcpy(new_broot, ifp->if_broot,
535 			XFS_BMBT_BLOCK_LEN(ip->i_mount));
536 	} else {
537 		new_broot = NULL;
538 		ifp->if_flags &= ~XFS_IFBROOT;
539 	}
540 
541 	/*
542 	 * Only copy the records and pointers if there are any.
543 	 */
544 	if (new_max > 0) {
545 		/*
546 		 * First copy the records.
547 		 */
548 		op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
549 		np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
550 		memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
551 
552 		/*
553 		 * Then copy the pointers.
554 		 */
555 		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
556 						     ifp->if_broot_bytes);
557 		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
558 						     (int)new_size);
559 		memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t));
560 	}
561 	kmem_free(ifp->if_broot);
562 	ifp->if_broot = new_broot;
563 	ifp->if_broot_bytes = (int)new_size;
564 	if (ifp->if_broot)
565 		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
566 			XFS_IFORK_SIZE(ip, whichfork));
567 	return;
568 }
569 
570 
571 /*
572  * This is called when the amount of space needed for if_data
573  * is increased or decreased.  The change in size is indicated by
574  * the number of bytes that need to be added or deleted in the
575  * byte_diff parameter.
576  *
577  * If the amount of space needed has decreased below the size of the
578  * inline buffer, then switch to using the inline buffer.  Otherwise,
579  * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
580  * to what is needed.
581  *
582  * ip -- the inode whose if_data area is changing
583  * byte_diff -- the change in the number of bytes, positive or negative,
584  *	 requested for the if_data array.
585  */
586 void
587 xfs_idata_realloc(
588 	xfs_inode_t	*ip,
589 	int		byte_diff,
590 	int		whichfork)
591 {
592 	xfs_ifork_t	*ifp;
593 	int		new_size;
594 	int		real_size;
595 
596 	if (byte_diff == 0) {
597 		return;
598 	}
599 
600 	ifp = XFS_IFORK_PTR(ip, whichfork);
601 	new_size = (int)ifp->if_bytes + byte_diff;
602 	ASSERT(new_size >= 0);
603 
604 	if (new_size == 0) {
605 		kmem_free(ifp->if_u1.if_data);
606 		ifp->if_u1.if_data = NULL;
607 		real_size = 0;
608 	} else {
609 		/*
610 		 * Stuck with malloc/realloc.
611 		 * For inline data, the underlying buffer must be
612 		 * a multiple of 4 bytes in size so that it can be
613 		 * logged and stay on word boundaries.  We enforce
614 		 * that here.
615 		 */
616 		real_size = roundup(new_size, 4);
617 		if (ifp->if_u1.if_data == NULL) {
618 			ASSERT(ifp->if_real_bytes == 0);
619 			ifp->if_u1.if_data = kmem_alloc(real_size,
620 							KM_SLEEP | KM_NOFS);
621 		} else {
622 			/*
623 			 * Only do the realloc if the underlying size
624 			 * is really changing.
625 			 */
626 			if (ifp->if_real_bytes != real_size) {
627 				ifp->if_u1.if_data =
628 					kmem_realloc(ifp->if_u1.if_data,
629 							real_size,
630 							KM_SLEEP | KM_NOFS);
631 			}
632 		}
633 	}
634 	ifp->if_real_bytes = real_size;
635 	ifp->if_bytes = new_size;
636 	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
637 }
638 
639 void
640 xfs_idestroy_fork(
641 	xfs_inode_t	*ip,
642 	int		whichfork)
643 {
644 	xfs_ifork_t	*ifp;
645 
646 	ifp = XFS_IFORK_PTR(ip, whichfork);
647 	if (ifp->if_broot != NULL) {
648 		kmem_free(ifp->if_broot);
649 		ifp->if_broot = NULL;
650 	}
651 
652 	/*
653 	 * If the format is local, then we can't have an extents
654 	 * array so just look for an inline data array.  If we're
655 	 * not local then we may or may not have an extents list,
656 	 * so check and free it up if we do.
657 	 */
658 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
659 		if (ifp->if_u1.if_data != NULL) {
660 			ASSERT(ifp->if_real_bytes != 0);
661 			kmem_free(ifp->if_u1.if_data);
662 			ifp->if_u1.if_data = NULL;
663 			ifp->if_real_bytes = 0;
664 		}
665 	} else if ((ifp->if_flags & XFS_IFEXTENTS) && ifp->if_height) {
666 		xfs_iext_destroy(ifp);
667 	}
668 
669 	ASSERT(ifp->if_real_bytes == 0);
670 
671 	if (whichfork == XFS_ATTR_FORK) {
672 		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
673 		ip->i_afp = NULL;
674 	} else if (whichfork == XFS_COW_FORK) {
675 		kmem_zone_free(xfs_ifork_zone, ip->i_cowfp);
676 		ip->i_cowfp = NULL;
677 	}
678 }
679 
680 /*
681  * Convert in-core extents to on-disk form
682  *
683  * In the case of the data fork, the in-core and on-disk fork sizes can be
684  * different due to delayed allocation extents. We only copy on-disk extents
685  * here, so callers must always use the physical fork size to determine the
686  * size of the buffer passed to this routine.  We will return the size actually
687  * used.
688  */
689 int
690 xfs_iextents_copy(
691 	struct xfs_inode	*ip,
692 	struct xfs_bmbt_rec	*dp,
693 	int			whichfork)
694 {
695 	int			state = xfs_bmap_fork_to_state(whichfork);
696 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
697 	struct xfs_iext_cursor	icur;
698 	struct xfs_bmbt_irec	rec;
699 	int			copied = 0;
700 
701 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
702 	ASSERT(ifp->if_bytes > 0);
703 
704 	for_each_xfs_iext(ifp, &icur, &rec) {
705 		if (isnullstartblock(rec.br_startblock))
706 			continue;
707 		ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, &rec));
708 		xfs_bmbt_disk_set_all(dp, &rec);
709 		trace_xfs_write_extent(ip, &icur, state, _RET_IP_);
710 		copied += sizeof(struct xfs_bmbt_rec);
711 		dp++;
712 	}
713 
714 	ASSERT(copied > 0);
715 	ASSERT(copied <= ifp->if_bytes);
716 	return copied;
717 }
718 
719 /*
720  * Each of the following cases stores data into the same region
721  * of the on-disk inode, so only one of them can be valid at
722  * any given time. While it is possible to have conflicting formats
723  * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
724  * in EXTENTS format, this can only happen when the fork has
725  * changed formats after being modified but before being flushed.
726  * In these cases, the format always takes precedence, because the
727  * format indicates the current state of the fork.
728  */
729 void
730 xfs_iflush_fork(
731 	xfs_inode_t		*ip,
732 	xfs_dinode_t		*dip,
733 	xfs_inode_log_item_t	*iip,
734 	int			whichfork)
735 {
736 	char			*cp;
737 	xfs_ifork_t		*ifp;
738 	xfs_mount_t		*mp;
739 	static const short	brootflag[2] =
740 		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
741 	static const short	dataflag[2] =
742 		{ XFS_ILOG_DDATA, XFS_ILOG_ADATA };
743 	static const short	extflag[2] =
744 		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
745 
746 	if (!iip)
747 		return;
748 	ifp = XFS_IFORK_PTR(ip, whichfork);
749 	/*
750 	 * This can happen if we gave up in iformat in an error path,
751 	 * for the attribute fork.
752 	 */
753 	if (!ifp) {
754 		ASSERT(whichfork == XFS_ATTR_FORK);
755 		return;
756 	}
757 	cp = XFS_DFORK_PTR(dip, whichfork);
758 	mp = ip->i_mount;
759 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
760 	case XFS_DINODE_FMT_LOCAL:
761 		if ((iip->ili_fields & dataflag[whichfork]) &&
762 		    (ifp->if_bytes > 0)) {
763 			ASSERT(ifp->if_u1.if_data != NULL);
764 			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
765 			memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
766 		}
767 		break;
768 
769 	case XFS_DINODE_FMT_EXTENTS:
770 		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
771 		       !(iip->ili_fields & extflag[whichfork]));
772 		if ((iip->ili_fields & extflag[whichfork]) &&
773 		    (ifp->if_bytes > 0)) {
774 			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
775 			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
776 				whichfork);
777 		}
778 		break;
779 
780 	case XFS_DINODE_FMT_BTREE:
781 		if ((iip->ili_fields & brootflag[whichfork]) &&
782 		    (ifp->if_broot_bytes > 0)) {
783 			ASSERT(ifp->if_broot != NULL);
784 			ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
785 			        XFS_IFORK_SIZE(ip, whichfork));
786 			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
787 				(xfs_bmdr_block_t *)cp,
788 				XFS_DFORK_SIZE(dip, mp, whichfork));
789 		}
790 		break;
791 
792 	case XFS_DINODE_FMT_DEV:
793 		if (iip->ili_fields & XFS_ILOG_DEV) {
794 			ASSERT(whichfork == XFS_DATA_FORK);
795 			xfs_dinode_put_rdev(dip, sysv_encode_dev(VFS_I(ip)->i_rdev));
796 		}
797 		break;
798 
799 	default:
800 		ASSERT(0);
801 		break;
802 	}
803 }
804 
805 /* Convert bmap state flags to an inode fork. */
806 struct xfs_ifork *
807 xfs_iext_state_to_fork(
808 	struct xfs_inode	*ip,
809 	int			state)
810 {
811 	if (state & BMAP_COWFORK)
812 		return ip->i_cowfp;
813 	else if (state & BMAP_ATTRFORK)
814 		return ip->i_afp;
815 	return &ip->i_df;
816 }
817 
818 /*
819  * Initialize an inode's copy-on-write fork.
820  */
821 void
822 xfs_ifork_init_cow(
823 	struct xfs_inode	*ip)
824 {
825 	if (ip->i_cowfp)
826 		return;
827 
828 	ip->i_cowfp = kmem_zone_zalloc(xfs_ifork_zone,
829 				       KM_SLEEP | KM_NOFS);
830 	ip->i_cowfp->if_flags = XFS_IFEXTENTS;
831 	ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
832 	ip->i_cnextents = 0;
833 }
834