xref: /openbmc/linux/fs/xfs/libxfs/xfs_attr.c (revision 9dae47aba0a055f761176d9297371d5bb24289ec)
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_da_format.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_attr_sf.h"
30 #include "xfs_inode.h"
31 #include "xfs_alloc.h"
32 #include "xfs_trans.h"
33 #include "xfs_inode_item.h"
34 #include "xfs_bmap.h"
35 #include "xfs_bmap_util.h"
36 #include "xfs_bmap_btree.h"
37 #include "xfs_attr.h"
38 #include "xfs_attr_leaf.h"
39 #include "xfs_attr_remote.h"
40 #include "xfs_error.h"
41 #include "xfs_quota.h"
42 #include "xfs_trans_space.h"
43 #include "xfs_trace.h"
44 
45 /*
46  * xfs_attr.c
47  *
48  * Provide the external interfaces to manage attribute lists.
49  */
50 
51 /*========================================================================
52  * Function prototypes for the kernel.
53  *========================================================================*/
54 
55 /*
56  * Internal routines when attribute list fits inside the inode.
57  */
58 STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
59 
60 /*
61  * Internal routines when attribute list is one block.
62  */
63 STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
64 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
65 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
66 
67 /*
68  * Internal routines when attribute list is more than one block.
69  */
70 STATIC int xfs_attr_node_get(xfs_da_args_t *args);
71 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
72 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
73 STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
74 STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
75 
76 
77 STATIC int
78 xfs_attr_args_init(
79 	struct xfs_da_args	*args,
80 	struct xfs_inode	*dp,
81 	const unsigned char	*name,
82 	int			flags)
83 {
84 
85 	if (!name)
86 		return -EINVAL;
87 
88 	memset(args, 0, sizeof(*args));
89 	args->geo = dp->i_mount->m_attr_geo;
90 	args->whichfork = XFS_ATTR_FORK;
91 	args->dp = dp;
92 	args->flags = flags;
93 	args->name = name;
94 	args->namelen = strlen((const char *)name);
95 	if (args->namelen >= MAXNAMELEN)
96 		return -EFAULT;		/* match IRIX behaviour */
97 
98 	args->hashval = xfs_da_hashname(args->name, args->namelen);
99 	return 0;
100 }
101 
102 int
103 xfs_inode_hasattr(
104 	struct xfs_inode	*ip)
105 {
106 	if (!XFS_IFORK_Q(ip) ||
107 	    (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
108 	     ip->i_d.di_anextents == 0))
109 		return 0;
110 	return 1;
111 }
112 
113 /*========================================================================
114  * Overall external interface routines.
115  *========================================================================*/
116 
117 /* Retrieve an extended attribute and its value.  Must have ilock. */
118 int
119 xfs_attr_get_ilocked(
120 	struct xfs_inode	*ip,
121 	struct xfs_da_args	*args)
122 {
123 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
124 
125 	if (!xfs_inode_hasattr(ip))
126 		return -ENOATTR;
127 	else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
128 		return xfs_attr_shortform_getvalue(args);
129 	else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
130 		return xfs_attr_leaf_get(args);
131 	else
132 		return xfs_attr_node_get(args);
133 }
134 
135 /* Retrieve an extended attribute by name, and its value. */
136 int
137 xfs_attr_get(
138 	struct xfs_inode	*ip,
139 	const unsigned char	*name,
140 	unsigned char		*value,
141 	int			*valuelenp,
142 	int			flags)
143 {
144 	struct xfs_da_args	args;
145 	uint			lock_mode;
146 	int			error;
147 
148 	XFS_STATS_INC(ip->i_mount, xs_attr_get);
149 
150 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
151 		return -EIO;
152 
153 	error = xfs_attr_args_init(&args, ip, name, flags);
154 	if (error)
155 		return error;
156 
157 	args.value = value;
158 	args.valuelen = *valuelenp;
159 	/* Entirely possible to look up a name which doesn't exist */
160 	args.op_flags = XFS_DA_OP_OKNOENT;
161 
162 	lock_mode = xfs_ilock_attr_map_shared(ip);
163 	error = xfs_attr_get_ilocked(ip, &args);
164 	xfs_iunlock(ip, lock_mode);
165 
166 	*valuelenp = args.valuelen;
167 	return error == -EEXIST ? 0 : error;
168 }
169 
170 /*
171  * Calculate how many blocks we need for the new attribute,
172  */
173 STATIC int
174 xfs_attr_calc_size(
175 	struct xfs_da_args	*args,
176 	int			*local)
177 {
178 	struct xfs_mount	*mp = args->dp->i_mount;
179 	int			size;
180 	int			nblks;
181 
182 	/*
183 	 * Determine space new attribute will use, and if it would be
184 	 * "local" or "remote" (note: local != inline).
185 	 */
186 	size = xfs_attr_leaf_newentsize(args, local);
187 	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
188 	if (*local) {
189 		if (size > (args->geo->blksize / 2)) {
190 			/* Double split possible */
191 			nblks *= 2;
192 		}
193 	} else {
194 		/*
195 		 * Out of line attribute, cannot double split, but
196 		 * make room for the attribute value itself.
197 		 */
198 		uint	dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
199 		nblks += dblocks;
200 		nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
201 	}
202 
203 	return nblks;
204 }
205 
206 int
207 xfs_attr_set(
208 	struct xfs_inode	*dp,
209 	const unsigned char	*name,
210 	unsigned char		*value,
211 	int			valuelen,
212 	int			flags)
213 {
214 	struct xfs_mount	*mp = dp->i_mount;
215 	struct xfs_buf		*leaf_bp = NULL;
216 	struct xfs_da_args	args;
217 	struct xfs_defer_ops	dfops;
218 	struct xfs_trans_res	tres;
219 	xfs_fsblock_t		firstblock;
220 	int			rsvd = (flags & ATTR_ROOT) != 0;
221 	int			error, err2, local;
222 
223 	XFS_STATS_INC(mp, xs_attr_set);
224 
225 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
226 		return -EIO;
227 
228 	error = xfs_attr_args_init(&args, dp, name, flags);
229 	if (error)
230 		return error;
231 
232 	args.value = value;
233 	args.valuelen = valuelen;
234 	args.firstblock = &firstblock;
235 	args.dfops = &dfops;
236 	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
237 	args.total = xfs_attr_calc_size(&args, &local);
238 
239 	error = xfs_qm_dqattach(dp, 0);
240 	if (error)
241 		return error;
242 
243 	/*
244 	 * If the inode doesn't have an attribute fork, add one.
245 	 * (inode must not be locked when we call this routine)
246 	 */
247 	if (XFS_IFORK_Q(dp) == 0) {
248 		int sf_size = sizeof(xfs_attr_sf_hdr_t) +
249 			XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen);
250 
251 		error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
252 		if (error)
253 			return error;
254 	}
255 
256 	tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
257 			 M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
258 	tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
259 	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
260 
261 	/*
262 	 * Root fork attributes can use reserved data blocks for this
263 	 * operation if necessary
264 	 */
265 	error = xfs_trans_alloc(mp, &tres, args.total, 0,
266 			rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
267 	if (error)
268 		return error;
269 
270 	xfs_ilock(dp, XFS_ILOCK_EXCL);
271 	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
272 				rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
273 				       XFS_QMOPT_RES_REGBLKS);
274 	if (error) {
275 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
276 		xfs_trans_cancel(args.trans);
277 		return error;
278 	}
279 
280 	xfs_trans_ijoin(args.trans, dp, 0);
281 
282 	/*
283 	 * If the attribute list is non-existent or a shortform list,
284 	 * upgrade it to a single-leaf-block attribute list.
285 	 */
286 	if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
287 	    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
288 	     dp->i_d.di_anextents == 0)) {
289 
290 		/*
291 		 * Build initial attribute list (if required).
292 		 */
293 		if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
294 			xfs_attr_shortform_create(&args);
295 
296 		/*
297 		 * Try to add the attr to the attribute list in
298 		 * the inode.
299 		 */
300 		error = xfs_attr_shortform_addname(&args);
301 		if (error != -ENOSPC) {
302 			/*
303 			 * Commit the shortform mods, and we're done.
304 			 * NOTE: this is also the error path (EEXIST, etc).
305 			 */
306 			ASSERT(args.trans != NULL);
307 
308 			/*
309 			 * If this is a synchronous mount, make sure that
310 			 * the transaction goes to disk before returning
311 			 * to the user.
312 			 */
313 			if (mp->m_flags & XFS_MOUNT_WSYNC)
314 				xfs_trans_set_sync(args.trans);
315 
316 			if (!error && (flags & ATTR_KERNOTIME) == 0) {
317 				xfs_trans_ichgtime(args.trans, dp,
318 							XFS_ICHGTIME_CHG);
319 			}
320 			err2 = xfs_trans_commit(args.trans);
321 			xfs_iunlock(dp, XFS_ILOCK_EXCL);
322 
323 			return error ? error : err2;
324 		}
325 
326 		/*
327 		 * It won't fit in the shortform, transform to a leaf block.
328 		 * GROT: another possible req'mt for a double-split btree op.
329 		 */
330 		xfs_defer_init(args.dfops, args.firstblock);
331 		error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
332 		if (error)
333 			goto out_defer_cancel;
334 		/*
335 		 * Prevent the leaf buffer from being unlocked so that a
336 		 * concurrent AIL push cannot grab the half-baked leaf
337 		 * buffer and run into problems with the write verifier.
338 		 */
339 		xfs_trans_bhold(args.trans, leaf_bp);
340 		xfs_defer_bjoin(args.dfops, leaf_bp);
341 		xfs_defer_ijoin(args.dfops, dp);
342 		error = xfs_defer_finish(&args.trans, args.dfops);
343 		if (error)
344 			goto out_defer_cancel;
345 
346 		/*
347 		 * Commit the leaf transformation.  We'll need another (linked)
348 		 * transaction to add the new attribute to the leaf, which
349 		 * means that we have to hold & join the leaf buffer here too.
350 		 */
351 		error = xfs_trans_roll_inode(&args.trans, dp);
352 		if (error)
353 			goto out;
354 		xfs_trans_bjoin(args.trans, leaf_bp);
355 		leaf_bp = NULL;
356 	}
357 
358 	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
359 		error = xfs_attr_leaf_addname(&args);
360 	else
361 		error = xfs_attr_node_addname(&args);
362 	if (error)
363 		goto out;
364 
365 	/*
366 	 * If this is a synchronous mount, make sure that the
367 	 * transaction goes to disk before returning to the user.
368 	 */
369 	if (mp->m_flags & XFS_MOUNT_WSYNC)
370 		xfs_trans_set_sync(args.trans);
371 
372 	if ((flags & ATTR_KERNOTIME) == 0)
373 		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
374 
375 	/*
376 	 * Commit the last in the sequence of transactions.
377 	 */
378 	xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
379 	error = xfs_trans_commit(args.trans);
380 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
381 
382 	return error;
383 
384 out_defer_cancel:
385 	xfs_defer_cancel(&dfops);
386 out:
387 	if (leaf_bp)
388 		xfs_trans_brelse(args.trans, leaf_bp);
389 	if (args.trans)
390 		xfs_trans_cancel(args.trans);
391 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
392 	return error;
393 }
394 
395 /*
396  * Generic handler routine to remove a name from an attribute list.
397  * Transitions attribute list from Btree to shortform as necessary.
398  */
399 int
400 xfs_attr_remove(
401 	struct xfs_inode	*dp,
402 	const unsigned char	*name,
403 	int			flags)
404 {
405 	struct xfs_mount	*mp = dp->i_mount;
406 	struct xfs_da_args	args;
407 	struct xfs_defer_ops	dfops;
408 	xfs_fsblock_t		firstblock;
409 	int			error;
410 
411 	XFS_STATS_INC(mp, xs_attr_remove);
412 
413 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
414 		return -EIO;
415 
416 	error = xfs_attr_args_init(&args, dp, name, flags);
417 	if (error)
418 		return error;
419 
420 	args.firstblock = &firstblock;
421 	args.dfops = &dfops;
422 
423 	/*
424 	 * we have no control over the attribute names that userspace passes us
425 	 * to remove, so we have to allow the name lookup prior to attribute
426 	 * removal to fail.
427 	 */
428 	args.op_flags = XFS_DA_OP_OKNOENT;
429 
430 	error = xfs_qm_dqattach(dp, 0);
431 	if (error)
432 		return error;
433 
434 	/*
435 	 * Root fork attributes can use reserved data blocks for this
436 	 * operation if necessary
437 	 */
438 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrrm,
439 			XFS_ATTRRM_SPACE_RES(mp), 0,
440 			(flags & ATTR_ROOT) ? XFS_TRANS_RESERVE : 0,
441 			&args.trans);
442 	if (error)
443 		return error;
444 
445 	xfs_ilock(dp, XFS_ILOCK_EXCL);
446 	/*
447 	 * No need to make quota reservations here. We expect to release some
448 	 * blocks not allocate in the common case.
449 	 */
450 	xfs_trans_ijoin(args.trans, dp, 0);
451 
452 	if (!xfs_inode_hasattr(dp)) {
453 		error = -ENOATTR;
454 	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
455 		ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
456 		error = xfs_attr_shortform_remove(&args);
457 	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
458 		error = xfs_attr_leaf_removename(&args);
459 	} else {
460 		error = xfs_attr_node_removename(&args);
461 	}
462 
463 	if (error)
464 		goto out;
465 
466 	/*
467 	 * If this is a synchronous mount, make sure that the
468 	 * transaction goes to disk before returning to the user.
469 	 */
470 	if (mp->m_flags & XFS_MOUNT_WSYNC)
471 		xfs_trans_set_sync(args.trans);
472 
473 	if ((flags & ATTR_KERNOTIME) == 0)
474 		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
475 
476 	/*
477 	 * Commit the last in the sequence of transactions.
478 	 */
479 	xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
480 	error = xfs_trans_commit(args.trans);
481 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
482 
483 	return error;
484 
485 out:
486 	if (args.trans)
487 		xfs_trans_cancel(args.trans);
488 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
489 	return error;
490 }
491 
492 /*========================================================================
493  * External routines when attribute list is inside the inode
494  *========================================================================*/
495 
496 /*
497  * Add a name to the shortform attribute list structure
498  * This is the external routine.
499  */
500 STATIC int
501 xfs_attr_shortform_addname(xfs_da_args_t *args)
502 {
503 	int newsize, forkoff, retval;
504 
505 	trace_xfs_attr_sf_addname(args);
506 
507 	retval = xfs_attr_shortform_lookup(args);
508 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
509 		return retval;
510 	} else if (retval == -EEXIST) {
511 		if (args->flags & ATTR_CREATE)
512 			return retval;
513 		retval = xfs_attr_shortform_remove(args);
514 		ASSERT(retval == 0);
515 	}
516 
517 	if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
518 	    args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
519 		return -ENOSPC;
520 
521 	newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
522 	newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
523 
524 	forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
525 	if (!forkoff)
526 		return -ENOSPC;
527 
528 	xfs_attr_shortform_add(args, forkoff);
529 	return 0;
530 }
531 
532 
533 /*========================================================================
534  * External routines when attribute list is one block
535  *========================================================================*/
536 
537 /*
538  * Add a name to the leaf attribute list structure
539  *
540  * This leaf block cannot have a "remote" value, we only call this routine
541  * if bmap_one_block() says there is only one block (ie: no remote blks).
542  */
543 STATIC int
544 xfs_attr_leaf_addname(xfs_da_args_t *args)
545 {
546 	xfs_inode_t *dp;
547 	struct xfs_buf *bp;
548 	int retval, error, forkoff;
549 
550 	trace_xfs_attr_leaf_addname(args);
551 
552 	/*
553 	 * Read the (only) block in the attribute list in.
554 	 */
555 	dp = args->dp;
556 	args->blkno = 0;
557 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
558 	if (error)
559 		return error;
560 
561 	/*
562 	 * Look up the given attribute in the leaf block.  Figure out if
563 	 * the given flags produce an error or call for an atomic rename.
564 	 */
565 	retval = xfs_attr3_leaf_lookup_int(bp, args);
566 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
567 		xfs_trans_brelse(args->trans, bp);
568 		return retval;
569 	} else if (retval == -EEXIST) {
570 		if (args->flags & ATTR_CREATE) {	/* pure create op */
571 			xfs_trans_brelse(args->trans, bp);
572 			return retval;
573 		}
574 
575 		trace_xfs_attr_leaf_replace(args);
576 
577 		/* save the attribute state for later removal*/
578 		args->op_flags |= XFS_DA_OP_RENAME;	/* an atomic rename */
579 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
580 		args->index2 = args->index;
581 		args->rmtblkno2 = args->rmtblkno;
582 		args->rmtblkcnt2 = args->rmtblkcnt;
583 		args->rmtvaluelen2 = args->rmtvaluelen;
584 
585 		/*
586 		 * clear the remote attr state now that it is saved so that the
587 		 * values reflect the state of the attribute we are about to
588 		 * add, not the attribute we just found and will remove later.
589 		 */
590 		args->rmtblkno = 0;
591 		args->rmtblkcnt = 0;
592 		args->rmtvaluelen = 0;
593 	}
594 
595 	/*
596 	 * Add the attribute to the leaf block, transitioning to a Btree
597 	 * if required.
598 	 */
599 	retval = xfs_attr3_leaf_add(bp, args);
600 	if (retval == -ENOSPC) {
601 		/*
602 		 * Promote the attribute list to the Btree format, then
603 		 * Commit that transaction so that the node_addname() call
604 		 * can manage its own transactions.
605 		 */
606 		xfs_defer_init(args->dfops, args->firstblock);
607 		error = xfs_attr3_leaf_to_node(args);
608 		if (error)
609 			goto out_defer_cancel;
610 		xfs_defer_ijoin(args->dfops, dp);
611 		error = xfs_defer_finish(&args->trans, args->dfops);
612 		if (error)
613 			goto out_defer_cancel;
614 
615 		/*
616 		 * Commit the current trans (including the inode) and start
617 		 * a new one.
618 		 */
619 		error = xfs_trans_roll_inode(&args->trans, dp);
620 		if (error)
621 			return error;
622 
623 		/*
624 		 * Fob the whole rest of the problem off on the Btree code.
625 		 */
626 		error = xfs_attr_node_addname(args);
627 		return error;
628 	}
629 
630 	/*
631 	 * Commit the transaction that added the attr name so that
632 	 * later routines can manage their own transactions.
633 	 */
634 	error = xfs_trans_roll_inode(&args->trans, dp);
635 	if (error)
636 		return error;
637 
638 	/*
639 	 * If there was an out-of-line value, allocate the blocks we
640 	 * identified for its storage and copy the value.  This is done
641 	 * after we create the attribute so that we don't overflow the
642 	 * maximum size of a transaction and/or hit a deadlock.
643 	 */
644 	if (args->rmtblkno > 0) {
645 		error = xfs_attr_rmtval_set(args);
646 		if (error)
647 			return error;
648 	}
649 
650 	/*
651 	 * If this is an atomic rename operation, we must "flip" the
652 	 * incomplete flags on the "new" and "old" attribute/value pairs
653 	 * so that one disappears and one appears atomically.  Then we
654 	 * must remove the "old" attribute/value pair.
655 	 */
656 	if (args->op_flags & XFS_DA_OP_RENAME) {
657 		/*
658 		 * In a separate transaction, set the incomplete flag on the
659 		 * "old" attr and clear the incomplete flag on the "new" attr.
660 		 */
661 		error = xfs_attr3_leaf_flipflags(args);
662 		if (error)
663 			return error;
664 
665 		/*
666 		 * Dismantle the "old" attribute/value pair by removing
667 		 * a "remote" value (if it exists).
668 		 */
669 		args->index = args->index2;
670 		args->blkno = args->blkno2;
671 		args->rmtblkno = args->rmtblkno2;
672 		args->rmtblkcnt = args->rmtblkcnt2;
673 		args->rmtvaluelen = args->rmtvaluelen2;
674 		if (args->rmtblkno) {
675 			error = xfs_attr_rmtval_remove(args);
676 			if (error)
677 				return error;
678 		}
679 
680 		/*
681 		 * Read in the block containing the "old" attr, then
682 		 * remove the "old" attr from that block (neat, huh!)
683 		 */
684 		error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
685 					   -1, &bp);
686 		if (error)
687 			return error;
688 
689 		xfs_attr3_leaf_remove(bp, args);
690 
691 		/*
692 		 * If the result is small enough, shrink it all into the inode.
693 		 */
694 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
695 			xfs_defer_init(args->dfops, args->firstblock);
696 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
697 			/* bp is gone due to xfs_da_shrink_inode */
698 			if (error)
699 				goto out_defer_cancel;
700 			xfs_defer_ijoin(args->dfops, dp);
701 			error = xfs_defer_finish(&args->trans, args->dfops);
702 			if (error)
703 				goto out_defer_cancel;
704 		}
705 
706 		/*
707 		 * Commit the remove and start the next trans in series.
708 		 */
709 		error = xfs_trans_roll_inode(&args->trans, dp);
710 
711 	} else if (args->rmtblkno > 0) {
712 		/*
713 		 * Added a "remote" value, just clear the incomplete flag.
714 		 */
715 		error = xfs_attr3_leaf_clearflag(args);
716 	}
717 	return error;
718 out_defer_cancel:
719 	xfs_defer_cancel(args->dfops);
720 	args->trans = NULL;
721 	return error;
722 }
723 
724 /*
725  * Remove a name from the leaf attribute list structure
726  *
727  * This leaf block cannot have a "remote" value, we only call this routine
728  * if bmap_one_block() says there is only one block (ie: no remote blks).
729  */
730 STATIC int
731 xfs_attr_leaf_removename(xfs_da_args_t *args)
732 {
733 	xfs_inode_t *dp;
734 	struct xfs_buf *bp;
735 	int error, forkoff;
736 
737 	trace_xfs_attr_leaf_removename(args);
738 
739 	/*
740 	 * Remove the attribute.
741 	 */
742 	dp = args->dp;
743 	args->blkno = 0;
744 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
745 	if (error)
746 		return error;
747 
748 	error = xfs_attr3_leaf_lookup_int(bp, args);
749 	if (error == -ENOATTR) {
750 		xfs_trans_brelse(args->trans, bp);
751 		return error;
752 	}
753 
754 	xfs_attr3_leaf_remove(bp, args);
755 
756 	/*
757 	 * If the result is small enough, shrink it all into the inode.
758 	 */
759 	if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
760 		xfs_defer_init(args->dfops, args->firstblock);
761 		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
762 		/* bp is gone due to xfs_da_shrink_inode */
763 		if (error)
764 			goto out_defer_cancel;
765 		xfs_defer_ijoin(args->dfops, dp);
766 		error = xfs_defer_finish(&args->trans, args->dfops);
767 		if (error)
768 			goto out_defer_cancel;
769 	}
770 	return 0;
771 out_defer_cancel:
772 	xfs_defer_cancel(args->dfops);
773 	args->trans = NULL;
774 	return error;
775 }
776 
777 /*
778  * Look up a name in a leaf attribute list structure.
779  *
780  * This leaf block cannot have a "remote" value, we only call this routine
781  * if bmap_one_block() says there is only one block (ie: no remote blks).
782  */
783 STATIC int
784 xfs_attr_leaf_get(xfs_da_args_t *args)
785 {
786 	struct xfs_buf *bp;
787 	int error;
788 
789 	trace_xfs_attr_leaf_get(args);
790 
791 	args->blkno = 0;
792 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
793 	if (error)
794 		return error;
795 
796 	error = xfs_attr3_leaf_lookup_int(bp, args);
797 	if (error != -EEXIST)  {
798 		xfs_trans_brelse(args->trans, bp);
799 		return error;
800 	}
801 	error = xfs_attr3_leaf_getvalue(bp, args);
802 	xfs_trans_brelse(args->trans, bp);
803 	if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
804 		error = xfs_attr_rmtval_get(args);
805 	}
806 	return error;
807 }
808 
809 /*========================================================================
810  * External routines when attribute list size > geo->blksize
811  *========================================================================*/
812 
813 /*
814  * Add a name to a Btree-format attribute list.
815  *
816  * This will involve walking down the Btree, and may involve splitting
817  * leaf nodes and even splitting intermediate nodes up to and including
818  * the root node (a special case of an intermediate node).
819  *
820  * "Remote" attribute values confuse the issue and atomic rename operations
821  * add a whole extra layer of confusion on top of that.
822  */
823 STATIC int
824 xfs_attr_node_addname(xfs_da_args_t *args)
825 {
826 	xfs_da_state_t *state;
827 	xfs_da_state_blk_t *blk;
828 	xfs_inode_t *dp;
829 	xfs_mount_t *mp;
830 	int retval, error;
831 
832 	trace_xfs_attr_node_addname(args);
833 
834 	/*
835 	 * Fill in bucket of arguments/results/context to carry around.
836 	 */
837 	dp = args->dp;
838 	mp = dp->i_mount;
839 restart:
840 	state = xfs_da_state_alloc();
841 	state->args = args;
842 	state->mp = mp;
843 
844 	/*
845 	 * Search to see if name already exists, and get back a pointer
846 	 * to where it should go.
847 	 */
848 	error = xfs_da3_node_lookup_int(state, &retval);
849 	if (error)
850 		goto out;
851 	blk = &state->path.blk[ state->path.active-1 ];
852 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
853 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
854 		goto out;
855 	} else if (retval == -EEXIST) {
856 		if (args->flags & ATTR_CREATE)
857 			goto out;
858 
859 		trace_xfs_attr_node_replace(args);
860 
861 		/* save the attribute state for later removal*/
862 		args->op_flags |= XFS_DA_OP_RENAME;	/* atomic rename op */
863 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
864 		args->index2 = args->index;
865 		args->rmtblkno2 = args->rmtblkno;
866 		args->rmtblkcnt2 = args->rmtblkcnt;
867 		args->rmtvaluelen2 = args->rmtvaluelen;
868 
869 		/*
870 		 * clear the remote attr state now that it is saved so that the
871 		 * values reflect the state of the attribute we are about to
872 		 * add, not the attribute we just found and will remove later.
873 		 */
874 		args->rmtblkno = 0;
875 		args->rmtblkcnt = 0;
876 		args->rmtvaluelen = 0;
877 	}
878 
879 	retval = xfs_attr3_leaf_add(blk->bp, state->args);
880 	if (retval == -ENOSPC) {
881 		if (state->path.active == 1) {
882 			/*
883 			 * Its really a single leaf node, but it had
884 			 * out-of-line values so it looked like it *might*
885 			 * have been a b-tree.
886 			 */
887 			xfs_da_state_free(state);
888 			state = NULL;
889 			xfs_defer_init(args->dfops, args->firstblock);
890 			error = xfs_attr3_leaf_to_node(args);
891 			if (error)
892 				goto out_defer_cancel;
893 			xfs_defer_ijoin(args->dfops, dp);
894 			error = xfs_defer_finish(&args->trans, args->dfops);
895 			if (error)
896 				goto out_defer_cancel;
897 
898 			/*
899 			 * Commit the node conversion and start the next
900 			 * trans in the chain.
901 			 */
902 			error = xfs_trans_roll_inode(&args->trans, dp);
903 			if (error)
904 				goto out;
905 
906 			goto restart;
907 		}
908 
909 		/*
910 		 * Split as many Btree elements as required.
911 		 * This code tracks the new and old attr's location
912 		 * in the index/blkno/rmtblkno/rmtblkcnt fields and
913 		 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
914 		 */
915 		xfs_defer_init(args->dfops, args->firstblock);
916 		error = xfs_da3_split(state);
917 		if (error)
918 			goto out_defer_cancel;
919 		xfs_defer_ijoin(args->dfops, dp);
920 		error = xfs_defer_finish(&args->trans, args->dfops);
921 		if (error)
922 			goto out_defer_cancel;
923 	} else {
924 		/*
925 		 * Addition succeeded, update Btree hashvals.
926 		 */
927 		xfs_da3_fixhashpath(state, &state->path);
928 	}
929 
930 	/*
931 	 * Kill the state structure, we're done with it and need to
932 	 * allow the buffers to come back later.
933 	 */
934 	xfs_da_state_free(state);
935 	state = NULL;
936 
937 	/*
938 	 * Commit the leaf addition or btree split and start the next
939 	 * trans in the chain.
940 	 */
941 	error = xfs_trans_roll_inode(&args->trans, dp);
942 	if (error)
943 		goto out;
944 
945 	/*
946 	 * If there was an out-of-line value, allocate the blocks we
947 	 * identified for its storage and copy the value.  This is done
948 	 * after we create the attribute so that we don't overflow the
949 	 * maximum size of a transaction and/or hit a deadlock.
950 	 */
951 	if (args->rmtblkno > 0) {
952 		error = xfs_attr_rmtval_set(args);
953 		if (error)
954 			return error;
955 	}
956 
957 	/*
958 	 * If this is an atomic rename operation, we must "flip" the
959 	 * incomplete flags on the "new" and "old" attribute/value pairs
960 	 * so that one disappears and one appears atomically.  Then we
961 	 * must remove the "old" attribute/value pair.
962 	 */
963 	if (args->op_flags & XFS_DA_OP_RENAME) {
964 		/*
965 		 * In a separate transaction, set the incomplete flag on the
966 		 * "old" attr and clear the incomplete flag on the "new" attr.
967 		 */
968 		error = xfs_attr3_leaf_flipflags(args);
969 		if (error)
970 			goto out;
971 
972 		/*
973 		 * Dismantle the "old" attribute/value pair by removing
974 		 * a "remote" value (if it exists).
975 		 */
976 		args->index = args->index2;
977 		args->blkno = args->blkno2;
978 		args->rmtblkno = args->rmtblkno2;
979 		args->rmtblkcnt = args->rmtblkcnt2;
980 		args->rmtvaluelen = args->rmtvaluelen2;
981 		if (args->rmtblkno) {
982 			error = xfs_attr_rmtval_remove(args);
983 			if (error)
984 				return error;
985 		}
986 
987 		/*
988 		 * Re-find the "old" attribute entry after any split ops.
989 		 * The INCOMPLETE flag means that we will find the "old"
990 		 * attr, not the "new" one.
991 		 */
992 		args->flags |= XFS_ATTR_INCOMPLETE;
993 		state = xfs_da_state_alloc();
994 		state->args = args;
995 		state->mp = mp;
996 		state->inleaf = 0;
997 		error = xfs_da3_node_lookup_int(state, &retval);
998 		if (error)
999 			goto out;
1000 
1001 		/*
1002 		 * Remove the name and update the hashvals in the tree.
1003 		 */
1004 		blk = &state->path.blk[ state->path.active-1 ];
1005 		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1006 		error = xfs_attr3_leaf_remove(blk->bp, args);
1007 		xfs_da3_fixhashpath(state, &state->path);
1008 
1009 		/*
1010 		 * Check to see if the tree needs to be collapsed.
1011 		 */
1012 		if (retval && (state->path.active > 1)) {
1013 			xfs_defer_init(args->dfops, args->firstblock);
1014 			error = xfs_da3_join(state);
1015 			if (error)
1016 				goto out_defer_cancel;
1017 			xfs_defer_ijoin(args->dfops, dp);
1018 			error = xfs_defer_finish(&args->trans, args->dfops);
1019 			if (error)
1020 				goto out_defer_cancel;
1021 		}
1022 
1023 		/*
1024 		 * Commit and start the next trans in the chain.
1025 		 */
1026 		error = xfs_trans_roll_inode(&args->trans, dp);
1027 		if (error)
1028 			goto out;
1029 
1030 	} else if (args->rmtblkno > 0) {
1031 		/*
1032 		 * Added a "remote" value, just clear the incomplete flag.
1033 		 */
1034 		error = xfs_attr3_leaf_clearflag(args);
1035 		if (error)
1036 			goto out;
1037 	}
1038 	retval = error = 0;
1039 
1040 out:
1041 	if (state)
1042 		xfs_da_state_free(state);
1043 	if (error)
1044 		return error;
1045 	return retval;
1046 out_defer_cancel:
1047 	xfs_defer_cancel(args->dfops);
1048 	args->trans = NULL;
1049 	goto out;
1050 }
1051 
1052 /*
1053  * Remove a name from a B-tree attribute list.
1054  *
1055  * This will involve walking down the Btree, and may involve joining
1056  * leaf nodes and even joining intermediate nodes up to and including
1057  * the root node (a special case of an intermediate node).
1058  */
1059 STATIC int
1060 xfs_attr_node_removename(xfs_da_args_t *args)
1061 {
1062 	xfs_da_state_t *state;
1063 	xfs_da_state_blk_t *blk;
1064 	xfs_inode_t *dp;
1065 	struct xfs_buf *bp;
1066 	int retval, error, forkoff;
1067 
1068 	trace_xfs_attr_node_removename(args);
1069 
1070 	/*
1071 	 * Tie a string around our finger to remind us where we are.
1072 	 */
1073 	dp = args->dp;
1074 	state = xfs_da_state_alloc();
1075 	state->args = args;
1076 	state->mp = dp->i_mount;
1077 
1078 	/*
1079 	 * Search to see if name exists, and get back a pointer to it.
1080 	 */
1081 	error = xfs_da3_node_lookup_int(state, &retval);
1082 	if (error || (retval != -EEXIST)) {
1083 		if (error == 0)
1084 			error = retval;
1085 		goto out;
1086 	}
1087 
1088 	/*
1089 	 * If there is an out-of-line value, de-allocate the blocks.
1090 	 * This is done before we remove the attribute so that we don't
1091 	 * overflow the maximum size of a transaction and/or hit a deadlock.
1092 	 */
1093 	blk = &state->path.blk[ state->path.active-1 ];
1094 	ASSERT(blk->bp != NULL);
1095 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1096 	if (args->rmtblkno > 0) {
1097 		/*
1098 		 * Fill in disk block numbers in the state structure
1099 		 * so that we can get the buffers back after we commit
1100 		 * several transactions in the following calls.
1101 		 */
1102 		error = xfs_attr_fillstate(state);
1103 		if (error)
1104 			goto out;
1105 
1106 		/*
1107 		 * Mark the attribute as INCOMPLETE, then bunmapi() the
1108 		 * remote value.
1109 		 */
1110 		error = xfs_attr3_leaf_setflag(args);
1111 		if (error)
1112 			goto out;
1113 		error = xfs_attr_rmtval_remove(args);
1114 		if (error)
1115 			goto out;
1116 
1117 		/*
1118 		 * Refill the state structure with buffers, the prior calls
1119 		 * released our buffers.
1120 		 */
1121 		error = xfs_attr_refillstate(state);
1122 		if (error)
1123 			goto out;
1124 	}
1125 
1126 	/*
1127 	 * Remove the name and update the hashvals in the tree.
1128 	 */
1129 	blk = &state->path.blk[ state->path.active-1 ];
1130 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1131 	retval = xfs_attr3_leaf_remove(blk->bp, args);
1132 	xfs_da3_fixhashpath(state, &state->path);
1133 
1134 	/*
1135 	 * Check to see if the tree needs to be collapsed.
1136 	 */
1137 	if (retval && (state->path.active > 1)) {
1138 		xfs_defer_init(args->dfops, args->firstblock);
1139 		error = xfs_da3_join(state);
1140 		if (error)
1141 			goto out_defer_cancel;
1142 		xfs_defer_ijoin(args->dfops, dp);
1143 		error = xfs_defer_finish(&args->trans, args->dfops);
1144 		if (error)
1145 			goto out_defer_cancel;
1146 		/*
1147 		 * Commit the Btree join operation and start a new trans.
1148 		 */
1149 		error = xfs_trans_roll_inode(&args->trans, dp);
1150 		if (error)
1151 			goto out;
1152 	}
1153 
1154 	/*
1155 	 * If the result is small enough, push it all into the inode.
1156 	 */
1157 	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
1158 		/*
1159 		 * Have to get rid of the copy of this dabuf in the state.
1160 		 */
1161 		ASSERT(state->path.active == 1);
1162 		ASSERT(state->path.blk[0].bp);
1163 		state->path.blk[0].bp = NULL;
1164 
1165 		error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp);
1166 		if (error)
1167 			goto out;
1168 
1169 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1170 			xfs_defer_init(args->dfops, args->firstblock);
1171 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1172 			/* bp is gone due to xfs_da_shrink_inode */
1173 			if (error)
1174 				goto out_defer_cancel;
1175 			xfs_defer_ijoin(args->dfops, dp);
1176 			error = xfs_defer_finish(&args->trans, args->dfops);
1177 			if (error)
1178 				goto out_defer_cancel;
1179 		} else
1180 			xfs_trans_brelse(args->trans, bp);
1181 	}
1182 	error = 0;
1183 
1184 out:
1185 	xfs_da_state_free(state);
1186 	return error;
1187 out_defer_cancel:
1188 	xfs_defer_cancel(args->dfops);
1189 	args->trans = NULL;
1190 	goto out;
1191 }
1192 
1193 /*
1194  * Fill in the disk block numbers in the state structure for the buffers
1195  * that are attached to the state structure.
1196  * This is done so that we can quickly reattach ourselves to those buffers
1197  * after some set of transaction commits have released these buffers.
1198  */
1199 STATIC int
1200 xfs_attr_fillstate(xfs_da_state_t *state)
1201 {
1202 	xfs_da_state_path_t *path;
1203 	xfs_da_state_blk_t *blk;
1204 	int level;
1205 
1206 	trace_xfs_attr_fillstate(state->args);
1207 
1208 	/*
1209 	 * Roll down the "path" in the state structure, storing the on-disk
1210 	 * block number for those buffers in the "path".
1211 	 */
1212 	path = &state->path;
1213 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1214 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1215 		if (blk->bp) {
1216 			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
1217 			blk->bp = NULL;
1218 		} else {
1219 			blk->disk_blkno = 0;
1220 		}
1221 	}
1222 
1223 	/*
1224 	 * Roll down the "altpath" in the state structure, storing the on-disk
1225 	 * block number for those buffers in the "altpath".
1226 	 */
1227 	path = &state->altpath;
1228 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1229 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1230 		if (blk->bp) {
1231 			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
1232 			blk->bp = NULL;
1233 		} else {
1234 			blk->disk_blkno = 0;
1235 		}
1236 	}
1237 
1238 	return 0;
1239 }
1240 
1241 /*
1242  * Reattach the buffers to the state structure based on the disk block
1243  * numbers stored in the state structure.
1244  * This is done after some set of transaction commits have released those
1245  * buffers from our grip.
1246  */
1247 STATIC int
1248 xfs_attr_refillstate(xfs_da_state_t *state)
1249 {
1250 	xfs_da_state_path_t *path;
1251 	xfs_da_state_blk_t *blk;
1252 	int level, error;
1253 
1254 	trace_xfs_attr_refillstate(state->args);
1255 
1256 	/*
1257 	 * Roll down the "path" in the state structure, storing the on-disk
1258 	 * block number for those buffers in the "path".
1259 	 */
1260 	path = &state->path;
1261 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1262 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1263 		if (blk->disk_blkno) {
1264 			error = xfs_da3_node_read(state->args->trans,
1265 						state->args->dp,
1266 						blk->blkno, blk->disk_blkno,
1267 						&blk->bp, XFS_ATTR_FORK);
1268 			if (error)
1269 				return error;
1270 		} else {
1271 			blk->bp = NULL;
1272 		}
1273 	}
1274 
1275 	/*
1276 	 * Roll down the "altpath" in the state structure, storing the on-disk
1277 	 * block number for those buffers in the "altpath".
1278 	 */
1279 	path = &state->altpath;
1280 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1281 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1282 		if (blk->disk_blkno) {
1283 			error = xfs_da3_node_read(state->args->trans,
1284 						state->args->dp,
1285 						blk->blkno, blk->disk_blkno,
1286 						&blk->bp, XFS_ATTR_FORK);
1287 			if (error)
1288 				return error;
1289 		} else {
1290 			blk->bp = NULL;
1291 		}
1292 	}
1293 
1294 	return 0;
1295 }
1296 
1297 /*
1298  * Look up a filename in a node attribute list.
1299  *
1300  * This routine gets called for any attribute fork that has more than one
1301  * block, ie: both true Btree attr lists and for single-leaf-blocks with
1302  * "remote" values taking up more blocks.
1303  */
1304 STATIC int
1305 xfs_attr_node_get(xfs_da_args_t *args)
1306 {
1307 	xfs_da_state_t *state;
1308 	xfs_da_state_blk_t *blk;
1309 	int error, retval;
1310 	int i;
1311 
1312 	trace_xfs_attr_node_get(args);
1313 
1314 	state = xfs_da_state_alloc();
1315 	state->args = args;
1316 	state->mp = args->dp->i_mount;
1317 
1318 	/*
1319 	 * Search to see if name exists, and get back a pointer to it.
1320 	 */
1321 	error = xfs_da3_node_lookup_int(state, &retval);
1322 	if (error) {
1323 		retval = error;
1324 	} else if (retval == -EEXIST) {
1325 		blk = &state->path.blk[ state->path.active-1 ];
1326 		ASSERT(blk->bp != NULL);
1327 		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1328 
1329 		/*
1330 		 * Get the value, local or "remote"
1331 		 */
1332 		retval = xfs_attr3_leaf_getvalue(blk->bp, args);
1333 		if (!retval && (args->rmtblkno > 0)
1334 		    && !(args->flags & ATTR_KERNOVAL)) {
1335 			retval = xfs_attr_rmtval_get(args);
1336 		}
1337 	}
1338 
1339 	/*
1340 	 * If not in a transaction, we have to release all the buffers.
1341 	 */
1342 	for (i = 0; i < state->path.active; i++) {
1343 		xfs_trans_brelse(args->trans, state->path.blk[i].bp);
1344 		state->path.blk[i].bp = NULL;
1345 	}
1346 
1347 	xfs_da_state_free(state);
1348 	return retval;
1349 }
1350