xref: /openbmc/linux/fs/xfs/libxfs/xfs_attr.c (revision fb960bd2)
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_da_format.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_attr_sf.h"
30 #include "xfs_inode.h"
31 #include "xfs_alloc.h"
32 #include "xfs_trans.h"
33 #include "xfs_inode_item.h"
34 #include "xfs_bmap.h"
35 #include "xfs_bmap_util.h"
36 #include "xfs_bmap_btree.h"
37 #include "xfs_attr.h"
38 #include "xfs_attr_leaf.h"
39 #include "xfs_attr_remote.h"
40 #include "xfs_error.h"
41 #include "xfs_quota.h"
42 #include "xfs_trans_space.h"
43 #include "xfs_trace.h"
44 
45 /*
46  * xfs_attr.c
47  *
48  * Provide the external interfaces to manage attribute lists.
49  */
50 
51 /*========================================================================
52  * Function prototypes for the kernel.
53  *========================================================================*/
54 
55 /*
56  * Internal routines when attribute list fits inside the inode.
57  */
58 STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
59 
60 /*
61  * Internal routines when attribute list is one block.
62  */
63 STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
64 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
65 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
66 
67 /*
68  * Internal routines when attribute list is more than one block.
69  */
70 STATIC int xfs_attr_node_get(xfs_da_args_t *args);
71 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
72 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
73 STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
74 STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
75 
76 
77 STATIC int
78 xfs_attr_args_init(
79 	struct xfs_da_args	*args,
80 	struct xfs_inode	*dp,
81 	const unsigned char	*name,
82 	int			flags)
83 {
84 
85 	if (!name)
86 		return -EINVAL;
87 
88 	memset(args, 0, sizeof(*args));
89 	args->geo = dp->i_mount->m_attr_geo;
90 	args->whichfork = XFS_ATTR_FORK;
91 	args->dp = dp;
92 	args->flags = flags;
93 	args->name = name;
94 	args->namelen = strlen((const char *)name);
95 	if (args->namelen >= MAXNAMELEN)
96 		return -EFAULT;		/* match IRIX behaviour */
97 
98 	args->hashval = xfs_da_hashname(args->name, args->namelen);
99 	return 0;
100 }
101 
102 int
103 xfs_inode_hasattr(
104 	struct xfs_inode	*ip)
105 {
106 	if (!XFS_IFORK_Q(ip) ||
107 	    (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
108 	     ip->i_d.di_anextents == 0))
109 		return 0;
110 	return 1;
111 }
112 
113 /*========================================================================
114  * Overall external interface routines.
115  *========================================================================*/
116 
117 /* Retrieve an extended attribute and its value.  Must have ilock. */
118 int
119 xfs_attr_get_ilocked(
120 	struct xfs_inode	*ip,
121 	struct xfs_da_args	*args)
122 {
123 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
124 
125 	if (!xfs_inode_hasattr(ip))
126 		return -ENOATTR;
127 	else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
128 		return xfs_attr_shortform_getvalue(args);
129 	else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
130 		return xfs_attr_leaf_get(args);
131 	else
132 		return xfs_attr_node_get(args);
133 }
134 
135 /* Retrieve an extended attribute by name, and its value. */
136 int
137 xfs_attr_get(
138 	struct xfs_inode	*ip,
139 	const unsigned char	*name,
140 	unsigned char		*value,
141 	int			*valuelenp,
142 	int			flags)
143 {
144 	struct xfs_da_args	args;
145 	uint			lock_mode;
146 	int			error;
147 
148 	XFS_STATS_INC(ip->i_mount, xs_attr_get);
149 
150 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
151 		return -EIO;
152 
153 	error = xfs_attr_args_init(&args, ip, name, flags);
154 	if (error)
155 		return error;
156 
157 	args.value = value;
158 	args.valuelen = *valuelenp;
159 	/* Entirely possible to look up a name which doesn't exist */
160 	args.op_flags = XFS_DA_OP_OKNOENT;
161 
162 	lock_mode = xfs_ilock_attr_map_shared(ip);
163 	error = xfs_attr_get_ilocked(ip, &args);
164 	xfs_iunlock(ip, lock_mode);
165 
166 	*valuelenp = args.valuelen;
167 	return error == -EEXIST ? 0 : error;
168 }
169 
170 /*
171  * Calculate how many blocks we need for the new attribute,
172  */
173 STATIC int
174 xfs_attr_calc_size(
175 	struct xfs_da_args	*args,
176 	int			*local)
177 {
178 	struct xfs_mount	*mp = args->dp->i_mount;
179 	int			size;
180 	int			nblks;
181 
182 	/*
183 	 * Determine space new attribute will use, and if it would be
184 	 * "local" or "remote" (note: local != inline).
185 	 */
186 	size = xfs_attr_leaf_newentsize(args, local);
187 	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
188 	if (*local) {
189 		if (size > (args->geo->blksize / 2)) {
190 			/* Double split possible */
191 			nblks *= 2;
192 		}
193 	} else {
194 		/*
195 		 * Out of line attribute, cannot double split, but
196 		 * make room for the attribute value itself.
197 		 */
198 		uint	dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
199 		nblks += dblocks;
200 		nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
201 	}
202 
203 	return nblks;
204 }
205 
206 int
207 xfs_attr_set(
208 	struct xfs_inode	*dp,
209 	const unsigned char	*name,
210 	unsigned char		*value,
211 	int			valuelen,
212 	int			flags)
213 {
214 	struct xfs_mount	*mp = dp->i_mount;
215 	struct xfs_da_args	args;
216 	struct xfs_defer_ops	dfops;
217 	struct xfs_trans_res	tres;
218 	xfs_fsblock_t		firstblock;
219 	int			rsvd = (flags & ATTR_ROOT) != 0;
220 	int			error, err2, local;
221 
222 	XFS_STATS_INC(mp, xs_attr_set);
223 
224 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
225 		return -EIO;
226 
227 	error = xfs_attr_args_init(&args, dp, name, flags);
228 	if (error)
229 		return error;
230 
231 	args.value = value;
232 	args.valuelen = valuelen;
233 	args.firstblock = &firstblock;
234 	args.dfops = &dfops;
235 	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
236 	args.total = xfs_attr_calc_size(&args, &local);
237 
238 	error = xfs_qm_dqattach(dp, 0);
239 	if (error)
240 		return error;
241 
242 	/*
243 	 * If the inode doesn't have an attribute fork, add one.
244 	 * (inode must not be locked when we call this routine)
245 	 */
246 	if (XFS_IFORK_Q(dp) == 0) {
247 		int sf_size = sizeof(xfs_attr_sf_hdr_t) +
248 			XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen);
249 
250 		error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
251 		if (error)
252 			return error;
253 	}
254 
255 	tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
256 			 M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
257 	tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
258 	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
259 
260 	/*
261 	 * Root fork attributes can use reserved data blocks for this
262 	 * operation if necessary
263 	 */
264 	error = xfs_trans_alloc(mp, &tres, args.total, 0,
265 			rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
266 	if (error)
267 		return error;
268 
269 	xfs_ilock(dp, XFS_ILOCK_EXCL);
270 	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
271 				rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
272 				       XFS_QMOPT_RES_REGBLKS);
273 	if (error) {
274 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
275 		xfs_trans_cancel(args.trans);
276 		return error;
277 	}
278 
279 	xfs_trans_ijoin(args.trans, dp, 0);
280 
281 	/*
282 	 * If the attribute list is non-existent or a shortform list,
283 	 * upgrade it to a single-leaf-block attribute list.
284 	 */
285 	if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
286 	    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
287 	     dp->i_d.di_anextents == 0)) {
288 
289 		/*
290 		 * Build initial attribute list (if required).
291 		 */
292 		if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
293 			xfs_attr_shortform_create(&args);
294 
295 		/*
296 		 * Try to add the attr to the attribute list in
297 		 * the inode.
298 		 */
299 		error = xfs_attr_shortform_addname(&args);
300 		if (error != -ENOSPC) {
301 			/*
302 			 * Commit the shortform mods, and we're done.
303 			 * NOTE: this is also the error path (EEXIST, etc).
304 			 */
305 			ASSERT(args.trans != NULL);
306 
307 			/*
308 			 * If this is a synchronous mount, make sure that
309 			 * the transaction goes to disk before returning
310 			 * to the user.
311 			 */
312 			if (mp->m_flags & XFS_MOUNT_WSYNC)
313 				xfs_trans_set_sync(args.trans);
314 
315 			if (!error && (flags & ATTR_KERNOTIME) == 0) {
316 				xfs_trans_ichgtime(args.trans, dp,
317 							XFS_ICHGTIME_CHG);
318 			}
319 			err2 = xfs_trans_commit(args.trans);
320 			xfs_iunlock(dp, XFS_ILOCK_EXCL);
321 
322 			return error ? error : err2;
323 		}
324 
325 		/*
326 		 * It won't fit in the shortform, transform to a leaf block.
327 		 * GROT: another possible req'mt for a double-split btree op.
328 		 */
329 		xfs_defer_init(args.dfops, args.firstblock);
330 		error = xfs_attr_shortform_to_leaf(&args);
331 		if (error)
332 			goto out_defer_cancel;
333 		xfs_defer_ijoin(args.dfops, dp);
334 		error = xfs_defer_finish(&args.trans, args.dfops);
335 		if (error)
336 			goto out_defer_cancel;
337 
338 		/*
339 		 * Commit the leaf transformation.  We'll need another (linked)
340 		 * transaction to add the new attribute to the leaf.
341 		 */
342 
343 		error = xfs_trans_roll_inode(&args.trans, dp);
344 		if (error)
345 			goto out;
346 
347 	}
348 
349 	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
350 		error = xfs_attr_leaf_addname(&args);
351 	else
352 		error = xfs_attr_node_addname(&args);
353 	if (error)
354 		goto out;
355 
356 	/*
357 	 * If this is a synchronous mount, make sure that the
358 	 * transaction goes to disk before returning to the user.
359 	 */
360 	if (mp->m_flags & XFS_MOUNT_WSYNC)
361 		xfs_trans_set_sync(args.trans);
362 
363 	if ((flags & ATTR_KERNOTIME) == 0)
364 		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
365 
366 	/*
367 	 * Commit the last in the sequence of transactions.
368 	 */
369 	xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
370 	error = xfs_trans_commit(args.trans);
371 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
372 
373 	return error;
374 
375 out_defer_cancel:
376 	xfs_defer_cancel(&dfops);
377 	args.trans = NULL;
378 out:
379 	if (args.trans)
380 		xfs_trans_cancel(args.trans);
381 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
382 	return error;
383 }
384 
385 /*
386  * Generic handler routine to remove a name from an attribute list.
387  * Transitions attribute list from Btree to shortform as necessary.
388  */
389 int
390 xfs_attr_remove(
391 	struct xfs_inode	*dp,
392 	const unsigned char	*name,
393 	int			flags)
394 {
395 	struct xfs_mount	*mp = dp->i_mount;
396 	struct xfs_da_args	args;
397 	struct xfs_defer_ops	dfops;
398 	xfs_fsblock_t		firstblock;
399 	int			error;
400 
401 	XFS_STATS_INC(mp, xs_attr_remove);
402 
403 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
404 		return -EIO;
405 
406 	error = xfs_attr_args_init(&args, dp, name, flags);
407 	if (error)
408 		return error;
409 
410 	args.firstblock = &firstblock;
411 	args.dfops = &dfops;
412 
413 	/*
414 	 * we have no control over the attribute names that userspace passes us
415 	 * to remove, so we have to allow the name lookup prior to attribute
416 	 * removal to fail.
417 	 */
418 	args.op_flags = XFS_DA_OP_OKNOENT;
419 
420 	error = xfs_qm_dqattach(dp, 0);
421 	if (error)
422 		return error;
423 
424 	/*
425 	 * Root fork attributes can use reserved data blocks for this
426 	 * operation if necessary
427 	 */
428 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrrm,
429 			XFS_ATTRRM_SPACE_RES(mp), 0,
430 			(flags & ATTR_ROOT) ? XFS_TRANS_RESERVE : 0,
431 			&args.trans);
432 	if (error)
433 		return error;
434 
435 	xfs_ilock(dp, XFS_ILOCK_EXCL);
436 	/*
437 	 * No need to make quota reservations here. We expect to release some
438 	 * blocks not allocate in the common case.
439 	 */
440 	xfs_trans_ijoin(args.trans, dp, 0);
441 
442 	if (!xfs_inode_hasattr(dp)) {
443 		error = -ENOATTR;
444 	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
445 		ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
446 		error = xfs_attr_shortform_remove(&args);
447 	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
448 		error = xfs_attr_leaf_removename(&args);
449 	} else {
450 		error = xfs_attr_node_removename(&args);
451 	}
452 
453 	if (error)
454 		goto out;
455 
456 	/*
457 	 * If this is a synchronous mount, make sure that the
458 	 * transaction goes to disk before returning to the user.
459 	 */
460 	if (mp->m_flags & XFS_MOUNT_WSYNC)
461 		xfs_trans_set_sync(args.trans);
462 
463 	if ((flags & ATTR_KERNOTIME) == 0)
464 		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
465 
466 	/*
467 	 * Commit the last in the sequence of transactions.
468 	 */
469 	xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
470 	error = xfs_trans_commit(args.trans);
471 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
472 
473 	return error;
474 
475 out:
476 	if (args.trans)
477 		xfs_trans_cancel(args.trans);
478 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
479 	return error;
480 }
481 
482 /*========================================================================
483  * External routines when attribute list is inside the inode
484  *========================================================================*/
485 
486 /*
487  * Add a name to the shortform attribute list structure
488  * This is the external routine.
489  */
490 STATIC int
491 xfs_attr_shortform_addname(xfs_da_args_t *args)
492 {
493 	int newsize, forkoff, retval;
494 
495 	trace_xfs_attr_sf_addname(args);
496 
497 	retval = xfs_attr_shortform_lookup(args);
498 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
499 		return retval;
500 	} else if (retval == -EEXIST) {
501 		if (args->flags & ATTR_CREATE)
502 			return retval;
503 		retval = xfs_attr_shortform_remove(args);
504 		ASSERT(retval == 0);
505 	}
506 
507 	if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
508 	    args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
509 		return -ENOSPC;
510 
511 	newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
512 	newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
513 
514 	forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
515 	if (!forkoff)
516 		return -ENOSPC;
517 
518 	xfs_attr_shortform_add(args, forkoff);
519 	return 0;
520 }
521 
522 
523 /*========================================================================
524  * External routines when attribute list is one block
525  *========================================================================*/
526 
527 /*
528  * Add a name to the leaf attribute list structure
529  *
530  * This leaf block cannot have a "remote" value, we only call this routine
531  * if bmap_one_block() says there is only one block (ie: no remote blks).
532  */
533 STATIC int
534 xfs_attr_leaf_addname(xfs_da_args_t *args)
535 {
536 	xfs_inode_t *dp;
537 	struct xfs_buf *bp;
538 	int retval, error, forkoff;
539 
540 	trace_xfs_attr_leaf_addname(args);
541 
542 	/*
543 	 * Read the (only) block in the attribute list in.
544 	 */
545 	dp = args->dp;
546 	args->blkno = 0;
547 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
548 	if (error)
549 		return error;
550 
551 	/*
552 	 * Look up the given attribute in the leaf block.  Figure out if
553 	 * the given flags produce an error or call for an atomic rename.
554 	 */
555 	retval = xfs_attr3_leaf_lookup_int(bp, args);
556 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
557 		xfs_trans_brelse(args->trans, bp);
558 		return retval;
559 	} else if (retval == -EEXIST) {
560 		if (args->flags & ATTR_CREATE) {	/* pure create op */
561 			xfs_trans_brelse(args->trans, bp);
562 			return retval;
563 		}
564 
565 		trace_xfs_attr_leaf_replace(args);
566 
567 		/* save the attribute state for later removal*/
568 		args->op_flags |= XFS_DA_OP_RENAME;	/* an atomic rename */
569 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
570 		args->index2 = args->index;
571 		args->rmtblkno2 = args->rmtblkno;
572 		args->rmtblkcnt2 = args->rmtblkcnt;
573 		args->rmtvaluelen2 = args->rmtvaluelen;
574 
575 		/*
576 		 * clear the remote attr state now that it is saved so that the
577 		 * values reflect the state of the attribute we are about to
578 		 * add, not the attribute we just found and will remove later.
579 		 */
580 		args->rmtblkno = 0;
581 		args->rmtblkcnt = 0;
582 		args->rmtvaluelen = 0;
583 	}
584 
585 	/*
586 	 * Add the attribute to the leaf block, transitioning to a Btree
587 	 * if required.
588 	 */
589 	retval = xfs_attr3_leaf_add(bp, args);
590 	if (retval == -ENOSPC) {
591 		/*
592 		 * Promote the attribute list to the Btree format, then
593 		 * Commit that transaction so that the node_addname() call
594 		 * can manage its own transactions.
595 		 */
596 		xfs_defer_init(args->dfops, args->firstblock);
597 		error = xfs_attr3_leaf_to_node(args);
598 		if (error)
599 			goto out_defer_cancel;
600 		xfs_defer_ijoin(args->dfops, dp);
601 		error = xfs_defer_finish(&args->trans, args->dfops);
602 		if (error)
603 			goto out_defer_cancel;
604 
605 		/*
606 		 * Commit the current trans (including the inode) and start
607 		 * a new one.
608 		 */
609 		error = xfs_trans_roll_inode(&args->trans, dp);
610 		if (error)
611 			return error;
612 
613 		/*
614 		 * Fob the whole rest of the problem off on the Btree code.
615 		 */
616 		error = xfs_attr_node_addname(args);
617 		return error;
618 	}
619 
620 	/*
621 	 * Commit the transaction that added the attr name so that
622 	 * later routines can manage their own transactions.
623 	 */
624 	error = xfs_trans_roll_inode(&args->trans, dp);
625 	if (error)
626 		return error;
627 
628 	/*
629 	 * If there was an out-of-line value, allocate the blocks we
630 	 * identified for its storage and copy the value.  This is done
631 	 * after we create the attribute so that we don't overflow the
632 	 * maximum size of a transaction and/or hit a deadlock.
633 	 */
634 	if (args->rmtblkno > 0) {
635 		error = xfs_attr_rmtval_set(args);
636 		if (error)
637 			return error;
638 	}
639 
640 	/*
641 	 * If this is an atomic rename operation, we must "flip" the
642 	 * incomplete flags on the "new" and "old" attribute/value pairs
643 	 * so that one disappears and one appears atomically.  Then we
644 	 * must remove the "old" attribute/value pair.
645 	 */
646 	if (args->op_flags & XFS_DA_OP_RENAME) {
647 		/*
648 		 * In a separate transaction, set the incomplete flag on the
649 		 * "old" attr and clear the incomplete flag on the "new" attr.
650 		 */
651 		error = xfs_attr3_leaf_flipflags(args);
652 		if (error)
653 			return error;
654 
655 		/*
656 		 * Dismantle the "old" attribute/value pair by removing
657 		 * a "remote" value (if it exists).
658 		 */
659 		args->index = args->index2;
660 		args->blkno = args->blkno2;
661 		args->rmtblkno = args->rmtblkno2;
662 		args->rmtblkcnt = args->rmtblkcnt2;
663 		args->rmtvaluelen = args->rmtvaluelen2;
664 		if (args->rmtblkno) {
665 			error = xfs_attr_rmtval_remove(args);
666 			if (error)
667 				return error;
668 		}
669 
670 		/*
671 		 * Read in the block containing the "old" attr, then
672 		 * remove the "old" attr from that block (neat, huh!)
673 		 */
674 		error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
675 					   -1, &bp);
676 		if (error)
677 			return error;
678 
679 		xfs_attr3_leaf_remove(bp, args);
680 
681 		/*
682 		 * If the result is small enough, shrink it all into the inode.
683 		 */
684 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
685 			xfs_defer_init(args->dfops, args->firstblock);
686 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
687 			/* bp is gone due to xfs_da_shrink_inode */
688 			if (error)
689 				goto out_defer_cancel;
690 			xfs_defer_ijoin(args->dfops, dp);
691 			error = xfs_defer_finish(&args->trans, args->dfops);
692 			if (error)
693 				goto out_defer_cancel;
694 		}
695 
696 		/*
697 		 * Commit the remove and start the next trans in series.
698 		 */
699 		error = xfs_trans_roll_inode(&args->trans, dp);
700 
701 	} else if (args->rmtblkno > 0) {
702 		/*
703 		 * Added a "remote" value, just clear the incomplete flag.
704 		 */
705 		error = xfs_attr3_leaf_clearflag(args);
706 	}
707 	return error;
708 out_defer_cancel:
709 	xfs_defer_cancel(args->dfops);
710 	args->trans = NULL;
711 	return error;
712 }
713 
714 /*
715  * Remove a name from the leaf attribute list structure
716  *
717  * This leaf block cannot have a "remote" value, we only call this routine
718  * if bmap_one_block() says there is only one block (ie: no remote blks).
719  */
720 STATIC int
721 xfs_attr_leaf_removename(xfs_da_args_t *args)
722 {
723 	xfs_inode_t *dp;
724 	struct xfs_buf *bp;
725 	int error, forkoff;
726 
727 	trace_xfs_attr_leaf_removename(args);
728 
729 	/*
730 	 * Remove the attribute.
731 	 */
732 	dp = args->dp;
733 	args->blkno = 0;
734 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
735 	if (error)
736 		return error;
737 
738 	error = xfs_attr3_leaf_lookup_int(bp, args);
739 	if (error == -ENOATTR) {
740 		xfs_trans_brelse(args->trans, bp);
741 		return error;
742 	}
743 
744 	xfs_attr3_leaf_remove(bp, args);
745 
746 	/*
747 	 * If the result is small enough, shrink it all into the inode.
748 	 */
749 	if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
750 		xfs_defer_init(args->dfops, args->firstblock);
751 		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
752 		/* bp is gone due to xfs_da_shrink_inode */
753 		if (error)
754 			goto out_defer_cancel;
755 		xfs_defer_ijoin(args->dfops, dp);
756 		error = xfs_defer_finish(&args->trans, args->dfops);
757 		if (error)
758 			goto out_defer_cancel;
759 	}
760 	return 0;
761 out_defer_cancel:
762 	xfs_defer_cancel(args->dfops);
763 	args->trans = NULL;
764 	return error;
765 }
766 
767 /*
768  * Look up a name in a leaf attribute list structure.
769  *
770  * This leaf block cannot have a "remote" value, we only call this routine
771  * if bmap_one_block() says there is only one block (ie: no remote blks).
772  */
773 STATIC int
774 xfs_attr_leaf_get(xfs_da_args_t *args)
775 {
776 	struct xfs_buf *bp;
777 	int error;
778 
779 	trace_xfs_attr_leaf_get(args);
780 
781 	args->blkno = 0;
782 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
783 	if (error)
784 		return error;
785 
786 	error = xfs_attr3_leaf_lookup_int(bp, args);
787 	if (error != -EEXIST)  {
788 		xfs_trans_brelse(args->trans, bp);
789 		return error;
790 	}
791 	error = xfs_attr3_leaf_getvalue(bp, args);
792 	xfs_trans_brelse(args->trans, bp);
793 	if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
794 		error = xfs_attr_rmtval_get(args);
795 	}
796 	return error;
797 }
798 
799 /*========================================================================
800  * External routines when attribute list size > geo->blksize
801  *========================================================================*/
802 
803 /*
804  * Add a name to a Btree-format attribute list.
805  *
806  * This will involve walking down the Btree, and may involve splitting
807  * leaf nodes and even splitting intermediate nodes up to and including
808  * the root node (a special case of an intermediate node).
809  *
810  * "Remote" attribute values confuse the issue and atomic rename operations
811  * add a whole extra layer of confusion on top of that.
812  */
813 STATIC int
814 xfs_attr_node_addname(xfs_da_args_t *args)
815 {
816 	xfs_da_state_t *state;
817 	xfs_da_state_blk_t *blk;
818 	xfs_inode_t *dp;
819 	xfs_mount_t *mp;
820 	int retval, error;
821 
822 	trace_xfs_attr_node_addname(args);
823 
824 	/*
825 	 * Fill in bucket of arguments/results/context to carry around.
826 	 */
827 	dp = args->dp;
828 	mp = dp->i_mount;
829 restart:
830 	state = xfs_da_state_alloc();
831 	state->args = args;
832 	state->mp = mp;
833 
834 	/*
835 	 * Search to see if name already exists, and get back a pointer
836 	 * to where it should go.
837 	 */
838 	error = xfs_da3_node_lookup_int(state, &retval);
839 	if (error)
840 		goto out;
841 	blk = &state->path.blk[ state->path.active-1 ];
842 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
843 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
844 		goto out;
845 	} else if (retval == -EEXIST) {
846 		if (args->flags & ATTR_CREATE)
847 			goto out;
848 
849 		trace_xfs_attr_node_replace(args);
850 
851 		/* save the attribute state for later removal*/
852 		args->op_flags |= XFS_DA_OP_RENAME;	/* atomic rename op */
853 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
854 		args->index2 = args->index;
855 		args->rmtblkno2 = args->rmtblkno;
856 		args->rmtblkcnt2 = args->rmtblkcnt;
857 		args->rmtvaluelen2 = args->rmtvaluelen;
858 
859 		/*
860 		 * clear the remote attr state now that it is saved so that the
861 		 * values reflect the state of the attribute we are about to
862 		 * add, not the attribute we just found and will remove later.
863 		 */
864 		args->rmtblkno = 0;
865 		args->rmtblkcnt = 0;
866 		args->rmtvaluelen = 0;
867 	}
868 
869 	retval = xfs_attr3_leaf_add(blk->bp, state->args);
870 	if (retval == -ENOSPC) {
871 		if (state->path.active == 1) {
872 			/*
873 			 * Its really a single leaf node, but it had
874 			 * out-of-line values so it looked like it *might*
875 			 * have been a b-tree.
876 			 */
877 			xfs_da_state_free(state);
878 			state = NULL;
879 			xfs_defer_init(args->dfops, args->firstblock);
880 			error = xfs_attr3_leaf_to_node(args);
881 			if (error)
882 				goto out_defer_cancel;
883 			xfs_defer_ijoin(args->dfops, dp);
884 			error = xfs_defer_finish(&args->trans, args->dfops);
885 			if (error)
886 				goto out_defer_cancel;
887 
888 			/*
889 			 * Commit the node conversion and start the next
890 			 * trans in the chain.
891 			 */
892 			error = xfs_trans_roll_inode(&args->trans, dp);
893 			if (error)
894 				goto out;
895 
896 			goto restart;
897 		}
898 
899 		/*
900 		 * Split as many Btree elements as required.
901 		 * This code tracks the new and old attr's location
902 		 * in the index/blkno/rmtblkno/rmtblkcnt fields and
903 		 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
904 		 */
905 		xfs_defer_init(args->dfops, args->firstblock);
906 		error = xfs_da3_split(state);
907 		if (error)
908 			goto out_defer_cancel;
909 		xfs_defer_ijoin(args->dfops, dp);
910 		error = xfs_defer_finish(&args->trans, args->dfops);
911 		if (error)
912 			goto out_defer_cancel;
913 	} else {
914 		/*
915 		 * Addition succeeded, update Btree hashvals.
916 		 */
917 		xfs_da3_fixhashpath(state, &state->path);
918 	}
919 
920 	/*
921 	 * Kill the state structure, we're done with it and need to
922 	 * allow the buffers to come back later.
923 	 */
924 	xfs_da_state_free(state);
925 	state = NULL;
926 
927 	/*
928 	 * Commit the leaf addition or btree split and start the next
929 	 * trans in the chain.
930 	 */
931 	error = xfs_trans_roll_inode(&args->trans, dp);
932 	if (error)
933 		goto out;
934 
935 	/*
936 	 * If there was an out-of-line value, allocate the blocks we
937 	 * identified for its storage and copy the value.  This is done
938 	 * after we create the attribute so that we don't overflow the
939 	 * maximum size of a transaction and/or hit a deadlock.
940 	 */
941 	if (args->rmtblkno > 0) {
942 		error = xfs_attr_rmtval_set(args);
943 		if (error)
944 			return error;
945 	}
946 
947 	/*
948 	 * If this is an atomic rename operation, we must "flip" the
949 	 * incomplete flags on the "new" and "old" attribute/value pairs
950 	 * so that one disappears and one appears atomically.  Then we
951 	 * must remove the "old" attribute/value pair.
952 	 */
953 	if (args->op_flags & XFS_DA_OP_RENAME) {
954 		/*
955 		 * In a separate transaction, set the incomplete flag on the
956 		 * "old" attr and clear the incomplete flag on the "new" attr.
957 		 */
958 		error = xfs_attr3_leaf_flipflags(args);
959 		if (error)
960 			goto out;
961 
962 		/*
963 		 * Dismantle the "old" attribute/value pair by removing
964 		 * a "remote" value (if it exists).
965 		 */
966 		args->index = args->index2;
967 		args->blkno = args->blkno2;
968 		args->rmtblkno = args->rmtblkno2;
969 		args->rmtblkcnt = args->rmtblkcnt2;
970 		args->rmtvaluelen = args->rmtvaluelen2;
971 		if (args->rmtblkno) {
972 			error = xfs_attr_rmtval_remove(args);
973 			if (error)
974 				return error;
975 		}
976 
977 		/*
978 		 * Re-find the "old" attribute entry after any split ops.
979 		 * The INCOMPLETE flag means that we will find the "old"
980 		 * attr, not the "new" one.
981 		 */
982 		args->flags |= XFS_ATTR_INCOMPLETE;
983 		state = xfs_da_state_alloc();
984 		state->args = args;
985 		state->mp = mp;
986 		state->inleaf = 0;
987 		error = xfs_da3_node_lookup_int(state, &retval);
988 		if (error)
989 			goto out;
990 
991 		/*
992 		 * Remove the name and update the hashvals in the tree.
993 		 */
994 		blk = &state->path.blk[ state->path.active-1 ];
995 		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
996 		error = xfs_attr3_leaf_remove(blk->bp, args);
997 		xfs_da3_fixhashpath(state, &state->path);
998 
999 		/*
1000 		 * Check to see if the tree needs to be collapsed.
1001 		 */
1002 		if (retval && (state->path.active > 1)) {
1003 			xfs_defer_init(args->dfops, args->firstblock);
1004 			error = xfs_da3_join(state);
1005 			if (error)
1006 				goto out_defer_cancel;
1007 			xfs_defer_ijoin(args->dfops, dp);
1008 			error = xfs_defer_finish(&args->trans, args->dfops);
1009 			if (error)
1010 				goto out_defer_cancel;
1011 		}
1012 
1013 		/*
1014 		 * Commit and start the next trans in the chain.
1015 		 */
1016 		error = xfs_trans_roll_inode(&args->trans, dp);
1017 		if (error)
1018 			goto out;
1019 
1020 	} else if (args->rmtblkno > 0) {
1021 		/*
1022 		 * Added a "remote" value, just clear the incomplete flag.
1023 		 */
1024 		error = xfs_attr3_leaf_clearflag(args);
1025 		if (error)
1026 			goto out;
1027 	}
1028 	retval = error = 0;
1029 
1030 out:
1031 	if (state)
1032 		xfs_da_state_free(state);
1033 	if (error)
1034 		return error;
1035 	return retval;
1036 out_defer_cancel:
1037 	xfs_defer_cancel(args->dfops);
1038 	args->trans = NULL;
1039 	goto out;
1040 }
1041 
1042 /*
1043  * Remove a name from a B-tree attribute list.
1044  *
1045  * This will involve walking down the Btree, and may involve joining
1046  * leaf nodes and even joining intermediate nodes up to and including
1047  * the root node (a special case of an intermediate node).
1048  */
1049 STATIC int
1050 xfs_attr_node_removename(xfs_da_args_t *args)
1051 {
1052 	xfs_da_state_t *state;
1053 	xfs_da_state_blk_t *blk;
1054 	xfs_inode_t *dp;
1055 	struct xfs_buf *bp;
1056 	int retval, error, forkoff;
1057 
1058 	trace_xfs_attr_node_removename(args);
1059 
1060 	/*
1061 	 * Tie a string around our finger to remind us where we are.
1062 	 */
1063 	dp = args->dp;
1064 	state = xfs_da_state_alloc();
1065 	state->args = args;
1066 	state->mp = dp->i_mount;
1067 
1068 	/*
1069 	 * Search to see if name exists, and get back a pointer to it.
1070 	 */
1071 	error = xfs_da3_node_lookup_int(state, &retval);
1072 	if (error || (retval != -EEXIST)) {
1073 		if (error == 0)
1074 			error = retval;
1075 		goto out;
1076 	}
1077 
1078 	/*
1079 	 * If there is an out-of-line value, de-allocate the blocks.
1080 	 * This is done before we remove the attribute so that we don't
1081 	 * overflow the maximum size of a transaction and/or hit a deadlock.
1082 	 */
1083 	blk = &state->path.blk[ state->path.active-1 ];
1084 	ASSERT(blk->bp != NULL);
1085 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1086 	if (args->rmtblkno > 0) {
1087 		/*
1088 		 * Fill in disk block numbers in the state structure
1089 		 * so that we can get the buffers back after we commit
1090 		 * several transactions in the following calls.
1091 		 */
1092 		error = xfs_attr_fillstate(state);
1093 		if (error)
1094 			goto out;
1095 
1096 		/*
1097 		 * Mark the attribute as INCOMPLETE, then bunmapi() the
1098 		 * remote value.
1099 		 */
1100 		error = xfs_attr3_leaf_setflag(args);
1101 		if (error)
1102 			goto out;
1103 		error = xfs_attr_rmtval_remove(args);
1104 		if (error)
1105 			goto out;
1106 
1107 		/*
1108 		 * Refill the state structure with buffers, the prior calls
1109 		 * released our buffers.
1110 		 */
1111 		error = xfs_attr_refillstate(state);
1112 		if (error)
1113 			goto out;
1114 	}
1115 
1116 	/*
1117 	 * Remove the name and update the hashvals in the tree.
1118 	 */
1119 	blk = &state->path.blk[ state->path.active-1 ];
1120 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1121 	retval = xfs_attr3_leaf_remove(blk->bp, args);
1122 	xfs_da3_fixhashpath(state, &state->path);
1123 
1124 	/*
1125 	 * Check to see if the tree needs to be collapsed.
1126 	 */
1127 	if (retval && (state->path.active > 1)) {
1128 		xfs_defer_init(args->dfops, args->firstblock);
1129 		error = xfs_da3_join(state);
1130 		if (error)
1131 			goto out_defer_cancel;
1132 		xfs_defer_ijoin(args->dfops, dp);
1133 		error = xfs_defer_finish(&args->trans, args->dfops);
1134 		if (error)
1135 			goto out_defer_cancel;
1136 		/*
1137 		 * Commit the Btree join operation and start a new trans.
1138 		 */
1139 		error = xfs_trans_roll_inode(&args->trans, dp);
1140 		if (error)
1141 			goto out;
1142 	}
1143 
1144 	/*
1145 	 * If the result is small enough, push it all into the inode.
1146 	 */
1147 	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
1148 		/*
1149 		 * Have to get rid of the copy of this dabuf in the state.
1150 		 */
1151 		ASSERT(state->path.active == 1);
1152 		ASSERT(state->path.blk[0].bp);
1153 		state->path.blk[0].bp = NULL;
1154 
1155 		error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp);
1156 		if (error)
1157 			goto out;
1158 
1159 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1160 			xfs_defer_init(args->dfops, args->firstblock);
1161 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1162 			/* bp is gone due to xfs_da_shrink_inode */
1163 			if (error)
1164 				goto out_defer_cancel;
1165 			xfs_defer_ijoin(args->dfops, dp);
1166 			error = xfs_defer_finish(&args->trans, args->dfops);
1167 			if (error)
1168 				goto out_defer_cancel;
1169 		} else
1170 			xfs_trans_brelse(args->trans, bp);
1171 	}
1172 	error = 0;
1173 
1174 out:
1175 	xfs_da_state_free(state);
1176 	return error;
1177 out_defer_cancel:
1178 	xfs_defer_cancel(args->dfops);
1179 	args->trans = NULL;
1180 	goto out;
1181 }
1182 
1183 /*
1184  * Fill in the disk block numbers in the state structure for the buffers
1185  * that are attached to the state structure.
1186  * This is done so that we can quickly reattach ourselves to those buffers
1187  * after some set of transaction commits have released these buffers.
1188  */
1189 STATIC int
1190 xfs_attr_fillstate(xfs_da_state_t *state)
1191 {
1192 	xfs_da_state_path_t *path;
1193 	xfs_da_state_blk_t *blk;
1194 	int level;
1195 
1196 	trace_xfs_attr_fillstate(state->args);
1197 
1198 	/*
1199 	 * Roll down the "path" in the state structure, storing the on-disk
1200 	 * block number for those buffers in the "path".
1201 	 */
1202 	path = &state->path;
1203 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1204 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1205 		if (blk->bp) {
1206 			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
1207 			blk->bp = NULL;
1208 		} else {
1209 			blk->disk_blkno = 0;
1210 		}
1211 	}
1212 
1213 	/*
1214 	 * Roll down the "altpath" in the state structure, storing the on-disk
1215 	 * block number for those buffers in the "altpath".
1216 	 */
1217 	path = &state->altpath;
1218 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1219 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1220 		if (blk->bp) {
1221 			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
1222 			blk->bp = NULL;
1223 		} else {
1224 			blk->disk_blkno = 0;
1225 		}
1226 	}
1227 
1228 	return 0;
1229 }
1230 
1231 /*
1232  * Reattach the buffers to the state structure based on the disk block
1233  * numbers stored in the state structure.
1234  * This is done after some set of transaction commits have released those
1235  * buffers from our grip.
1236  */
1237 STATIC int
1238 xfs_attr_refillstate(xfs_da_state_t *state)
1239 {
1240 	xfs_da_state_path_t *path;
1241 	xfs_da_state_blk_t *blk;
1242 	int level, error;
1243 
1244 	trace_xfs_attr_refillstate(state->args);
1245 
1246 	/*
1247 	 * Roll down the "path" in the state structure, storing the on-disk
1248 	 * block number for those buffers in the "path".
1249 	 */
1250 	path = &state->path;
1251 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1252 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1253 		if (blk->disk_blkno) {
1254 			error = xfs_da3_node_read(state->args->trans,
1255 						state->args->dp,
1256 						blk->blkno, blk->disk_blkno,
1257 						&blk->bp, XFS_ATTR_FORK);
1258 			if (error)
1259 				return error;
1260 		} else {
1261 			blk->bp = NULL;
1262 		}
1263 	}
1264 
1265 	/*
1266 	 * Roll down the "altpath" in the state structure, storing the on-disk
1267 	 * block number for those buffers in the "altpath".
1268 	 */
1269 	path = &state->altpath;
1270 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1271 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1272 		if (blk->disk_blkno) {
1273 			error = xfs_da3_node_read(state->args->trans,
1274 						state->args->dp,
1275 						blk->blkno, blk->disk_blkno,
1276 						&blk->bp, XFS_ATTR_FORK);
1277 			if (error)
1278 				return error;
1279 		} else {
1280 			blk->bp = NULL;
1281 		}
1282 	}
1283 
1284 	return 0;
1285 }
1286 
1287 /*
1288  * Look up a filename in a node attribute list.
1289  *
1290  * This routine gets called for any attribute fork that has more than one
1291  * block, ie: both true Btree attr lists and for single-leaf-blocks with
1292  * "remote" values taking up more blocks.
1293  */
1294 STATIC int
1295 xfs_attr_node_get(xfs_da_args_t *args)
1296 {
1297 	xfs_da_state_t *state;
1298 	xfs_da_state_blk_t *blk;
1299 	int error, retval;
1300 	int i;
1301 
1302 	trace_xfs_attr_node_get(args);
1303 
1304 	state = xfs_da_state_alloc();
1305 	state->args = args;
1306 	state->mp = args->dp->i_mount;
1307 
1308 	/*
1309 	 * Search to see if name exists, and get back a pointer to it.
1310 	 */
1311 	error = xfs_da3_node_lookup_int(state, &retval);
1312 	if (error) {
1313 		retval = error;
1314 	} else if (retval == -EEXIST) {
1315 		blk = &state->path.blk[ state->path.active-1 ];
1316 		ASSERT(blk->bp != NULL);
1317 		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1318 
1319 		/*
1320 		 * Get the value, local or "remote"
1321 		 */
1322 		retval = xfs_attr3_leaf_getvalue(blk->bp, args);
1323 		if (!retval && (args->rmtblkno > 0)
1324 		    && !(args->flags & ATTR_KERNOVAL)) {
1325 			retval = xfs_attr_rmtval_get(args);
1326 		}
1327 	}
1328 
1329 	/*
1330 	 * If not in a transaction, we have to release all the buffers.
1331 	 */
1332 	for (i = 0; i < state->path.active; i++) {
1333 		xfs_trans_brelse(args->trans, state->path.blk[i].bp);
1334 		state->path.blk[i].bp = NULL;
1335 	}
1336 
1337 	xfs_da_state_free(state);
1338 	return retval;
1339 }
1340