xref: /openbmc/linux/fs/xfs/libxfs/xfs_attr.c (revision 05cf4fe738242183f1237f1b3a28b4479348c0a1)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4  * All Rights Reserved.
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_mount.h"
14 #include "xfs_defer.h"
15 #include "xfs_da_format.h"
16 #include "xfs_da_btree.h"
17 #include "xfs_attr_sf.h"
18 #include "xfs_inode.h"
19 #include "xfs_alloc.h"
20 #include "xfs_trans.h"
21 #include "xfs_inode_item.h"
22 #include "xfs_bmap.h"
23 #include "xfs_bmap_util.h"
24 #include "xfs_bmap_btree.h"
25 #include "xfs_attr.h"
26 #include "xfs_attr_leaf.h"
27 #include "xfs_attr_remote.h"
28 #include "xfs_error.h"
29 #include "xfs_quota.h"
30 #include "xfs_trans_space.h"
31 #include "xfs_trace.h"
32 
33 /*
34  * xfs_attr.c
35  *
36  * Provide the external interfaces to manage attribute lists.
37  */
38 
39 /*========================================================================
40  * Function prototypes for the kernel.
41  *========================================================================*/
42 
43 /*
44  * Internal routines when attribute list fits inside the inode.
45  */
46 STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
47 
48 /*
49  * Internal routines when attribute list is one block.
50  */
51 STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
52 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
53 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
54 
55 /*
56  * Internal routines when attribute list is more than one block.
57  */
58 STATIC int xfs_attr_node_get(xfs_da_args_t *args);
59 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
60 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
61 STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
62 STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
63 
64 
65 STATIC int
66 xfs_attr_args_init(
67 	struct xfs_da_args	*args,
68 	struct xfs_inode	*dp,
69 	const unsigned char	*name,
70 	int			flags)
71 {
72 
73 	if (!name)
74 		return -EINVAL;
75 
76 	memset(args, 0, sizeof(*args));
77 	args->geo = dp->i_mount->m_attr_geo;
78 	args->whichfork = XFS_ATTR_FORK;
79 	args->dp = dp;
80 	args->flags = flags;
81 	args->name = name;
82 	args->namelen = strlen((const char *)name);
83 	if (args->namelen >= MAXNAMELEN)
84 		return -EFAULT;		/* match IRIX behaviour */
85 
86 	args->hashval = xfs_da_hashname(args->name, args->namelen);
87 	return 0;
88 }
89 
90 int
91 xfs_inode_hasattr(
92 	struct xfs_inode	*ip)
93 {
94 	if (!XFS_IFORK_Q(ip) ||
95 	    (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
96 	     ip->i_d.di_anextents == 0))
97 		return 0;
98 	return 1;
99 }
100 
101 /*========================================================================
102  * Overall external interface routines.
103  *========================================================================*/
104 
105 /* Retrieve an extended attribute and its value.  Must have ilock. */
106 int
107 xfs_attr_get_ilocked(
108 	struct xfs_inode	*ip,
109 	struct xfs_da_args	*args)
110 {
111 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
112 
113 	if (!xfs_inode_hasattr(ip))
114 		return -ENOATTR;
115 	else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
116 		return xfs_attr_shortform_getvalue(args);
117 	else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
118 		return xfs_attr_leaf_get(args);
119 	else
120 		return xfs_attr_node_get(args);
121 }
122 
123 /* Retrieve an extended attribute by name, and its value. */
124 int
125 xfs_attr_get(
126 	struct xfs_inode	*ip,
127 	const unsigned char	*name,
128 	unsigned char		*value,
129 	int			*valuelenp,
130 	int			flags)
131 {
132 	struct xfs_da_args	args;
133 	uint			lock_mode;
134 	int			error;
135 
136 	XFS_STATS_INC(ip->i_mount, xs_attr_get);
137 
138 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
139 		return -EIO;
140 
141 	error = xfs_attr_args_init(&args, ip, name, flags);
142 	if (error)
143 		return error;
144 
145 	args.value = value;
146 	args.valuelen = *valuelenp;
147 	/* Entirely possible to look up a name which doesn't exist */
148 	args.op_flags = XFS_DA_OP_OKNOENT;
149 
150 	lock_mode = xfs_ilock_attr_map_shared(ip);
151 	error = xfs_attr_get_ilocked(ip, &args);
152 	xfs_iunlock(ip, lock_mode);
153 
154 	*valuelenp = args.valuelen;
155 	return error == -EEXIST ? 0 : error;
156 }
157 
158 /*
159  * Calculate how many blocks we need for the new attribute,
160  */
161 STATIC int
162 xfs_attr_calc_size(
163 	struct xfs_da_args	*args,
164 	int			*local)
165 {
166 	struct xfs_mount	*mp = args->dp->i_mount;
167 	int			size;
168 	int			nblks;
169 
170 	/*
171 	 * Determine space new attribute will use, and if it would be
172 	 * "local" or "remote" (note: local != inline).
173 	 */
174 	size = xfs_attr_leaf_newentsize(args, local);
175 	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
176 	if (*local) {
177 		if (size > (args->geo->blksize / 2)) {
178 			/* Double split possible */
179 			nblks *= 2;
180 		}
181 	} else {
182 		/*
183 		 * Out of line attribute, cannot double split, but
184 		 * make room for the attribute value itself.
185 		 */
186 		uint	dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
187 		nblks += dblocks;
188 		nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
189 	}
190 
191 	return nblks;
192 }
193 
194 STATIC int
195 xfs_attr_try_sf_addname(
196 	struct xfs_inode	*dp,
197 	struct xfs_da_args	*args)
198 {
199 
200 	struct xfs_mount	*mp = dp->i_mount;
201 	int			error, error2;
202 
203 	error = xfs_attr_shortform_addname(args);
204 	if (error == -ENOSPC)
205 		return error;
206 
207 	/*
208 	 * Commit the shortform mods, and we're done.
209 	 * NOTE: this is also the error path (EEXIST, etc).
210 	 */
211 	if (!error && (args->flags & ATTR_KERNOTIME) == 0)
212 		xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG);
213 
214 	if (mp->m_flags & XFS_MOUNT_WSYNC)
215 		xfs_trans_set_sync(args->trans);
216 
217 	error2 = xfs_trans_commit(args->trans);
218 	args->trans = NULL;
219 	return error ? error : error2;
220 }
221 
222 /*
223  * Set the attribute specified in @args.
224  */
225 int
226 xfs_attr_set_args(
227 	struct xfs_da_args	*args,
228 	struct xfs_buf          **leaf_bp)
229 {
230 	struct xfs_inode	*dp = args->dp;
231 	int			error;
232 
233 	/*
234 	 * If the attribute list is non-existent or a shortform list,
235 	 * upgrade it to a single-leaf-block attribute list.
236 	 */
237 	if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
238 	    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
239 	     dp->i_d.di_anextents == 0)) {
240 
241 		/*
242 		 * Build initial attribute list (if required).
243 		 */
244 		if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
245 			xfs_attr_shortform_create(args);
246 
247 		/*
248 		 * Try to add the attr to the attribute list in the inode.
249 		 */
250 		error = xfs_attr_try_sf_addname(dp, args);
251 		if (error != -ENOSPC)
252 			return error;
253 
254 		/*
255 		 * It won't fit in the shortform, transform to a leaf block.
256 		 * GROT: another possible req'mt for a double-split btree op.
257 		 */
258 		error = xfs_attr_shortform_to_leaf(args, leaf_bp);
259 		if (error)
260 			return error;
261 
262 		/*
263 		 * Prevent the leaf buffer from being unlocked so that a
264 		 * concurrent AIL push cannot grab the half-baked leaf
265 		 * buffer and run into problems with the write verifier.
266 		 */
267 		xfs_trans_bhold(args->trans, *leaf_bp);
268 
269 		error = xfs_defer_finish(&args->trans);
270 		if (error)
271 			return error;
272 
273 		/*
274 		 * Commit the leaf transformation.  We'll need another
275 		 * (linked) transaction to add the new attribute to the
276 		 * leaf.
277 		 */
278 		error = xfs_trans_roll_inode(&args->trans, dp);
279 		if (error)
280 			return error;
281 		xfs_trans_bjoin(args->trans, *leaf_bp);
282 		*leaf_bp = NULL;
283 	}
284 
285 	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
286 		error = xfs_attr_leaf_addname(args);
287 	else
288 		error = xfs_attr_node_addname(args);
289 	return error;
290 }
291 
292 /*
293  * Remove the attribute specified in @args.
294  */
295 int
296 xfs_attr_remove_args(
297 	struct xfs_da_args      *args)
298 {
299 	struct xfs_inode	*dp = args->dp;
300 	int			error;
301 
302 	if (!xfs_inode_hasattr(dp)) {
303 		error = -ENOATTR;
304 	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
305 		ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
306 		error = xfs_attr_shortform_remove(args);
307 	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
308 		error = xfs_attr_leaf_removename(args);
309 	} else {
310 		error = xfs_attr_node_removename(args);
311 	}
312 
313 	return error;
314 }
315 
316 int
317 xfs_attr_set(
318 	struct xfs_inode	*dp,
319 	const unsigned char	*name,
320 	unsigned char		*value,
321 	int			valuelen,
322 	int			flags)
323 {
324 	struct xfs_mount	*mp = dp->i_mount;
325 	struct xfs_buf		*leaf_bp = NULL;
326 	struct xfs_da_args	args;
327 	struct xfs_trans_res	tres;
328 	int			rsvd = (flags & ATTR_ROOT) != 0;
329 	int			error, local;
330 
331 	XFS_STATS_INC(mp, xs_attr_set);
332 
333 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
334 		return -EIO;
335 
336 	error = xfs_attr_args_init(&args, dp, name, flags);
337 	if (error)
338 		return error;
339 
340 	args.value = value;
341 	args.valuelen = valuelen;
342 	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
343 	args.total = xfs_attr_calc_size(&args, &local);
344 
345 	error = xfs_qm_dqattach(dp);
346 	if (error)
347 		return error;
348 
349 	/*
350 	 * If the inode doesn't have an attribute fork, add one.
351 	 * (inode must not be locked when we call this routine)
352 	 */
353 	if (XFS_IFORK_Q(dp) == 0) {
354 		int sf_size = sizeof(xfs_attr_sf_hdr_t) +
355 			XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen);
356 
357 		error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
358 		if (error)
359 			return error;
360 	}
361 
362 	tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
363 			 M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
364 	tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
365 	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
366 
367 	/*
368 	 * Root fork attributes can use reserved data blocks for this
369 	 * operation if necessary
370 	 */
371 	error = xfs_trans_alloc(mp, &tres, args.total, 0,
372 			rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
373 	if (error)
374 		return error;
375 
376 	xfs_ilock(dp, XFS_ILOCK_EXCL);
377 	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
378 				rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
379 				       XFS_QMOPT_RES_REGBLKS);
380 	if (error)
381 		goto out_trans_cancel;
382 
383 	xfs_trans_ijoin(args.trans, dp, 0);
384 	error = xfs_attr_set_args(&args, &leaf_bp);
385 	if (error)
386 		goto out_release_leaf;
387 	if (!args.trans) {
388 		/* shortform attribute has already been committed */
389 		goto out_unlock;
390 	}
391 
392 	/*
393 	 * If this is a synchronous mount, make sure that the
394 	 * transaction goes to disk before returning to the user.
395 	 */
396 	if (mp->m_flags & XFS_MOUNT_WSYNC)
397 		xfs_trans_set_sync(args.trans);
398 
399 	if ((flags & ATTR_KERNOTIME) == 0)
400 		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
401 
402 	/*
403 	 * Commit the last in the sequence of transactions.
404 	 */
405 	xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
406 	error = xfs_trans_commit(args.trans);
407 out_unlock:
408 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
409 	return error;
410 
411 out_release_leaf:
412 	if (leaf_bp)
413 		xfs_trans_brelse(args.trans, leaf_bp);
414 out_trans_cancel:
415 	if (args.trans)
416 		xfs_trans_cancel(args.trans);
417 	goto out_unlock;
418 }
419 
420 /*
421  * Generic handler routine to remove a name from an attribute list.
422  * Transitions attribute list from Btree to shortform as necessary.
423  */
424 int
425 xfs_attr_remove(
426 	struct xfs_inode	*dp,
427 	const unsigned char	*name,
428 	int			flags)
429 {
430 	struct xfs_mount	*mp = dp->i_mount;
431 	struct xfs_da_args	args;
432 	int			error;
433 
434 	XFS_STATS_INC(mp, xs_attr_remove);
435 
436 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
437 		return -EIO;
438 
439 	error = xfs_attr_args_init(&args, dp, name, flags);
440 	if (error)
441 		return error;
442 
443 	/*
444 	 * we have no control over the attribute names that userspace passes us
445 	 * to remove, so we have to allow the name lookup prior to attribute
446 	 * removal to fail.
447 	 */
448 	args.op_flags = XFS_DA_OP_OKNOENT;
449 
450 	error = xfs_qm_dqattach(dp);
451 	if (error)
452 		return error;
453 
454 	/*
455 	 * Root fork attributes can use reserved data blocks for this
456 	 * operation if necessary
457 	 */
458 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrrm,
459 			XFS_ATTRRM_SPACE_RES(mp), 0,
460 			(flags & ATTR_ROOT) ? XFS_TRANS_RESERVE : 0,
461 			&args.trans);
462 	if (error)
463 		return error;
464 
465 	xfs_ilock(dp, XFS_ILOCK_EXCL);
466 	/*
467 	 * No need to make quota reservations here. We expect to release some
468 	 * blocks not allocate in the common case.
469 	 */
470 	xfs_trans_ijoin(args.trans, dp, 0);
471 
472 	error = xfs_attr_remove_args(&args);
473 	if (error)
474 		goto out;
475 
476 	/*
477 	 * If this is a synchronous mount, make sure that the
478 	 * transaction goes to disk before returning to the user.
479 	 */
480 	if (mp->m_flags & XFS_MOUNT_WSYNC)
481 		xfs_trans_set_sync(args.trans);
482 
483 	if ((flags & ATTR_KERNOTIME) == 0)
484 		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
485 
486 	/*
487 	 * Commit the last in the sequence of transactions.
488 	 */
489 	xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
490 	error = xfs_trans_commit(args.trans);
491 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
492 
493 	return error;
494 
495 out:
496 	if (args.trans)
497 		xfs_trans_cancel(args.trans);
498 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
499 	return error;
500 }
501 
502 /*========================================================================
503  * External routines when attribute list is inside the inode
504  *========================================================================*/
505 
506 /*
507  * Add a name to the shortform attribute list structure
508  * This is the external routine.
509  */
510 STATIC int
511 xfs_attr_shortform_addname(xfs_da_args_t *args)
512 {
513 	int newsize, forkoff, retval;
514 
515 	trace_xfs_attr_sf_addname(args);
516 
517 	retval = xfs_attr_shortform_lookup(args);
518 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
519 		return retval;
520 	} else if (retval == -EEXIST) {
521 		if (args->flags & ATTR_CREATE)
522 			return retval;
523 		retval = xfs_attr_shortform_remove(args);
524 		if (retval)
525 			return retval;
526 		/*
527 		 * Since we have removed the old attr, clear ATTR_REPLACE so
528 		 * that the leaf format add routine won't trip over the attr
529 		 * not being around.
530 		 */
531 		args->flags &= ~ATTR_REPLACE;
532 	}
533 
534 	if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
535 	    args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
536 		return -ENOSPC;
537 
538 	newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
539 	newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
540 
541 	forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
542 	if (!forkoff)
543 		return -ENOSPC;
544 
545 	xfs_attr_shortform_add(args, forkoff);
546 	return 0;
547 }
548 
549 
550 /*========================================================================
551  * External routines when attribute list is one block
552  *========================================================================*/
553 
554 /*
555  * Add a name to the leaf attribute list structure
556  *
557  * This leaf block cannot have a "remote" value, we only call this routine
558  * if bmap_one_block() says there is only one block (ie: no remote blks).
559  */
560 STATIC int
561 xfs_attr_leaf_addname(
562 	struct xfs_da_args	*args)
563 {
564 	struct xfs_inode	*dp;
565 	struct xfs_buf		*bp;
566 	int			retval, error, forkoff;
567 
568 	trace_xfs_attr_leaf_addname(args);
569 
570 	/*
571 	 * Read the (only) block in the attribute list in.
572 	 */
573 	dp = args->dp;
574 	args->blkno = 0;
575 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
576 	if (error)
577 		return error;
578 
579 	/*
580 	 * Look up the given attribute in the leaf block.  Figure out if
581 	 * the given flags produce an error or call for an atomic rename.
582 	 */
583 	retval = xfs_attr3_leaf_lookup_int(bp, args);
584 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
585 		xfs_trans_brelse(args->trans, bp);
586 		return retval;
587 	} else if (retval == -EEXIST) {
588 		if (args->flags & ATTR_CREATE) {	/* pure create op */
589 			xfs_trans_brelse(args->trans, bp);
590 			return retval;
591 		}
592 
593 		trace_xfs_attr_leaf_replace(args);
594 
595 		/* save the attribute state for later removal*/
596 		args->op_flags |= XFS_DA_OP_RENAME;	/* an atomic rename */
597 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
598 		args->index2 = args->index;
599 		args->rmtblkno2 = args->rmtblkno;
600 		args->rmtblkcnt2 = args->rmtblkcnt;
601 		args->rmtvaluelen2 = args->rmtvaluelen;
602 
603 		/*
604 		 * clear the remote attr state now that it is saved so that the
605 		 * values reflect the state of the attribute we are about to
606 		 * add, not the attribute we just found and will remove later.
607 		 */
608 		args->rmtblkno = 0;
609 		args->rmtblkcnt = 0;
610 		args->rmtvaluelen = 0;
611 	}
612 
613 	/*
614 	 * Add the attribute to the leaf block, transitioning to a Btree
615 	 * if required.
616 	 */
617 	retval = xfs_attr3_leaf_add(bp, args);
618 	if (retval == -ENOSPC) {
619 		/*
620 		 * Promote the attribute list to the Btree format, then
621 		 * Commit that transaction so that the node_addname() call
622 		 * can manage its own transactions.
623 		 */
624 		error = xfs_attr3_leaf_to_node(args);
625 		if (error)
626 			return error;
627 		error = xfs_defer_finish(&args->trans);
628 		if (error)
629 			return error;
630 
631 		/*
632 		 * Commit the current trans (including the inode) and start
633 		 * a new one.
634 		 */
635 		error = xfs_trans_roll_inode(&args->trans, dp);
636 		if (error)
637 			return error;
638 
639 		/*
640 		 * Fob the whole rest of the problem off on the Btree code.
641 		 */
642 		error = xfs_attr_node_addname(args);
643 		return error;
644 	}
645 
646 	/*
647 	 * Commit the transaction that added the attr name so that
648 	 * later routines can manage their own transactions.
649 	 */
650 	error = xfs_trans_roll_inode(&args->trans, dp);
651 	if (error)
652 		return error;
653 
654 	/*
655 	 * If there was an out-of-line value, allocate the blocks we
656 	 * identified for its storage and copy the value.  This is done
657 	 * after we create the attribute so that we don't overflow the
658 	 * maximum size of a transaction and/or hit a deadlock.
659 	 */
660 	if (args->rmtblkno > 0) {
661 		error = xfs_attr_rmtval_set(args);
662 		if (error)
663 			return error;
664 	}
665 
666 	/*
667 	 * If this is an atomic rename operation, we must "flip" the
668 	 * incomplete flags on the "new" and "old" attribute/value pairs
669 	 * so that one disappears and one appears atomically.  Then we
670 	 * must remove the "old" attribute/value pair.
671 	 */
672 	if (args->op_flags & XFS_DA_OP_RENAME) {
673 		/*
674 		 * In a separate transaction, set the incomplete flag on the
675 		 * "old" attr and clear the incomplete flag on the "new" attr.
676 		 */
677 		error = xfs_attr3_leaf_flipflags(args);
678 		if (error)
679 			return error;
680 
681 		/*
682 		 * Dismantle the "old" attribute/value pair by removing
683 		 * a "remote" value (if it exists).
684 		 */
685 		args->index = args->index2;
686 		args->blkno = args->blkno2;
687 		args->rmtblkno = args->rmtblkno2;
688 		args->rmtblkcnt = args->rmtblkcnt2;
689 		args->rmtvaluelen = args->rmtvaluelen2;
690 		if (args->rmtblkno) {
691 			error = xfs_attr_rmtval_remove(args);
692 			if (error)
693 				return error;
694 		}
695 
696 		/*
697 		 * Read in the block containing the "old" attr, then
698 		 * remove the "old" attr from that block (neat, huh!)
699 		 */
700 		error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
701 					   -1, &bp);
702 		if (error)
703 			return error;
704 
705 		xfs_attr3_leaf_remove(bp, args);
706 
707 		/*
708 		 * If the result is small enough, shrink it all into the inode.
709 		 */
710 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
711 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
712 			/* bp is gone due to xfs_da_shrink_inode */
713 			if (error)
714 				return error;
715 			error = xfs_defer_finish(&args->trans);
716 			if (error)
717 				return error;
718 		}
719 
720 		/*
721 		 * Commit the remove and start the next trans in series.
722 		 */
723 		error = xfs_trans_roll_inode(&args->trans, dp);
724 
725 	} else if (args->rmtblkno > 0) {
726 		/*
727 		 * Added a "remote" value, just clear the incomplete flag.
728 		 */
729 		error = xfs_attr3_leaf_clearflag(args);
730 	}
731 	return error;
732 }
733 
734 /*
735  * Remove a name from the leaf attribute list structure
736  *
737  * This leaf block cannot have a "remote" value, we only call this routine
738  * if bmap_one_block() says there is only one block (ie: no remote blks).
739  */
740 STATIC int
741 xfs_attr_leaf_removename(
742 	struct xfs_da_args	*args)
743 {
744 	struct xfs_inode	*dp;
745 	struct xfs_buf		*bp;
746 	int			error, forkoff;
747 
748 	trace_xfs_attr_leaf_removename(args);
749 
750 	/*
751 	 * Remove the attribute.
752 	 */
753 	dp = args->dp;
754 	args->blkno = 0;
755 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
756 	if (error)
757 		return error;
758 
759 	error = xfs_attr3_leaf_lookup_int(bp, args);
760 	if (error == -ENOATTR) {
761 		xfs_trans_brelse(args->trans, bp);
762 		return error;
763 	}
764 
765 	xfs_attr3_leaf_remove(bp, args);
766 
767 	/*
768 	 * If the result is small enough, shrink it all into the inode.
769 	 */
770 	if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
771 		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
772 		/* bp is gone due to xfs_da_shrink_inode */
773 		if (error)
774 			return error;
775 		error = xfs_defer_finish(&args->trans);
776 		if (error)
777 			return error;
778 	}
779 	return 0;
780 }
781 
782 /*
783  * Look up a name in a leaf attribute list structure.
784  *
785  * This leaf block cannot have a "remote" value, we only call this routine
786  * if bmap_one_block() says there is only one block (ie: no remote blks).
787  */
788 STATIC int
789 xfs_attr_leaf_get(xfs_da_args_t *args)
790 {
791 	struct xfs_buf *bp;
792 	int error;
793 
794 	trace_xfs_attr_leaf_get(args);
795 
796 	args->blkno = 0;
797 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
798 	if (error)
799 		return error;
800 
801 	error = xfs_attr3_leaf_lookup_int(bp, args);
802 	if (error != -EEXIST)  {
803 		xfs_trans_brelse(args->trans, bp);
804 		return error;
805 	}
806 	error = xfs_attr3_leaf_getvalue(bp, args);
807 	xfs_trans_brelse(args->trans, bp);
808 	if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
809 		error = xfs_attr_rmtval_get(args);
810 	}
811 	return error;
812 }
813 
814 /*========================================================================
815  * External routines when attribute list size > geo->blksize
816  *========================================================================*/
817 
818 /*
819  * Add a name to a Btree-format attribute list.
820  *
821  * This will involve walking down the Btree, and may involve splitting
822  * leaf nodes and even splitting intermediate nodes up to and including
823  * the root node (a special case of an intermediate node).
824  *
825  * "Remote" attribute values confuse the issue and atomic rename operations
826  * add a whole extra layer of confusion on top of that.
827  */
828 STATIC int
829 xfs_attr_node_addname(
830 	struct xfs_da_args	*args)
831 {
832 	struct xfs_da_state	*state;
833 	struct xfs_da_state_blk	*blk;
834 	struct xfs_inode	*dp;
835 	struct xfs_mount	*mp;
836 	int			retval, error;
837 
838 	trace_xfs_attr_node_addname(args);
839 
840 	/*
841 	 * Fill in bucket of arguments/results/context to carry around.
842 	 */
843 	dp = args->dp;
844 	mp = dp->i_mount;
845 restart:
846 	state = xfs_da_state_alloc();
847 	state->args = args;
848 	state->mp = mp;
849 
850 	/*
851 	 * Search to see if name already exists, and get back a pointer
852 	 * to where it should go.
853 	 */
854 	error = xfs_da3_node_lookup_int(state, &retval);
855 	if (error)
856 		goto out;
857 	blk = &state->path.blk[ state->path.active-1 ];
858 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
859 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
860 		goto out;
861 	} else if (retval == -EEXIST) {
862 		if (args->flags & ATTR_CREATE)
863 			goto out;
864 
865 		trace_xfs_attr_node_replace(args);
866 
867 		/* save the attribute state for later removal*/
868 		args->op_flags |= XFS_DA_OP_RENAME;	/* atomic rename op */
869 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
870 		args->index2 = args->index;
871 		args->rmtblkno2 = args->rmtblkno;
872 		args->rmtblkcnt2 = args->rmtblkcnt;
873 		args->rmtvaluelen2 = args->rmtvaluelen;
874 
875 		/*
876 		 * clear the remote attr state now that it is saved so that the
877 		 * values reflect the state of the attribute we are about to
878 		 * add, not the attribute we just found and will remove later.
879 		 */
880 		args->rmtblkno = 0;
881 		args->rmtblkcnt = 0;
882 		args->rmtvaluelen = 0;
883 	}
884 
885 	retval = xfs_attr3_leaf_add(blk->bp, state->args);
886 	if (retval == -ENOSPC) {
887 		if (state->path.active == 1) {
888 			/*
889 			 * Its really a single leaf node, but it had
890 			 * out-of-line values so it looked like it *might*
891 			 * have been a b-tree.
892 			 */
893 			xfs_da_state_free(state);
894 			state = NULL;
895 			error = xfs_attr3_leaf_to_node(args);
896 			if (error)
897 				goto out;
898 			error = xfs_defer_finish(&args->trans);
899 			if (error)
900 				goto out;
901 
902 			/*
903 			 * Commit the node conversion and start the next
904 			 * trans in the chain.
905 			 */
906 			error = xfs_trans_roll_inode(&args->trans, dp);
907 			if (error)
908 				goto out;
909 
910 			goto restart;
911 		}
912 
913 		/*
914 		 * Split as many Btree elements as required.
915 		 * This code tracks the new and old attr's location
916 		 * in the index/blkno/rmtblkno/rmtblkcnt fields and
917 		 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
918 		 */
919 		error = xfs_da3_split(state);
920 		if (error)
921 			goto out;
922 		error = xfs_defer_finish(&args->trans);
923 		if (error)
924 			goto out;
925 	} else {
926 		/*
927 		 * Addition succeeded, update Btree hashvals.
928 		 */
929 		xfs_da3_fixhashpath(state, &state->path);
930 	}
931 
932 	/*
933 	 * Kill the state structure, we're done with it and need to
934 	 * allow the buffers to come back later.
935 	 */
936 	xfs_da_state_free(state);
937 	state = NULL;
938 
939 	/*
940 	 * Commit the leaf addition or btree split and start the next
941 	 * trans in the chain.
942 	 */
943 	error = xfs_trans_roll_inode(&args->trans, dp);
944 	if (error)
945 		goto out;
946 
947 	/*
948 	 * If there was an out-of-line value, allocate the blocks we
949 	 * identified for its storage and copy the value.  This is done
950 	 * after we create the attribute so that we don't overflow the
951 	 * maximum size of a transaction and/or hit a deadlock.
952 	 */
953 	if (args->rmtblkno > 0) {
954 		error = xfs_attr_rmtval_set(args);
955 		if (error)
956 			return error;
957 	}
958 
959 	/*
960 	 * If this is an atomic rename operation, we must "flip" the
961 	 * incomplete flags on the "new" and "old" attribute/value pairs
962 	 * so that one disappears and one appears atomically.  Then we
963 	 * must remove the "old" attribute/value pair.
964 	 */
965 	if (args->op_flags & XFS_DA_OP_RENAME) {
966 		/*
967 		 * In a separate transaction, set the incomplete flag on the
968 		 * "old" attr and clear the incomplete flag on the "new" attr.
969 		 */
970 		error = xfs_attr3_leaf_flipflags(args);
971 		if (error)
972 			goto out;
973 
974 		/*
975 		 * Dismantle the "old" attribute/value pair by removing
976 		 * a "remote" value (if it exists).
977 		 */
978 		args->index = args->index2;
979 		args->blkno = args->blkno2;
980 		args->rmtblkno = args->rmtblkno2;
981 		args->rmtblkcnt = args->rmtblkcnt2;
982 		args->rmtvaluelen = args->rmtvaluelen2;
983 		if (args->rmtblkno) {
984 			error = xfs_attr_rmtval_remove(args);
985 			if (error)
986 				return error;
987 		}
988 
989 		/*
990 		 * Re-find the "old" attribute entry after any split ops.
991 		 * The INCOMPLETE flag means that we will find the "old"
992 		 * attr, not the "new" one.
993 		 */
994 		args->flags |= XFS_ATTR_INCOMPLETE;
995 		state = xfs_da_state_alloc();
996 		state->args = args;
997 		state->mp = mp;
998 		state->inleaf = 0;
999 		error = xfs_da3_node_lookup_int(state, &retval);
1000 		if (error)
1001 			goto out;
1002 
1003 		/*
1004 		 * Remove the name and update the hashvals in the tree.
1005 		 */
1006 		blk = &state->path.blk[ state->path.active-1 ];
1007 		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1008 		error = xfs_attr3_leaf_remove(blk->bp, args);
1009 		xfs_da3_fixhashpath(state, &state->path);
1010 
1011 		/*
1012 		 * Check to see if the tree needs to be collapsed.
1013 		 */
1014 		if (retval && (state->path.active > 1)) {
1015 			error = xfs_da3_join(state);
1016 			if (error)
1017 				goto out;
1018 			error = xfs_defer_finish(&args->trans);
1019 			if (error)
1020 				goto out;
1021 		}
1022 
1023 		/*
1024 		 * Commit and start the next trans in the chain.
1025 		 */
1026 		error = xfs_trans_roll_inode(&args->trans, dp);
1027 		if (error)
1028 			goto out;
1029 
1030 	} else if (args->rmtblkno > 0) {
1031 		/*
1032 		 * Added a "remote" value, just clear the incomplete flag.
1033 		 */
1034 		error = xfs_attr3_leaf_clearflag(args);
1035 		if (error)
1036 			goto out;
1037 	}
1038 	retval = error = 0;
1039 
1040 out:
1041 	if (state)
1042 		xfs_da_state_free(state);
1043 	if (error)
1044 		return error;
1045 	return retval;
1046 }
1047 
1048 /*
1049  * Remove a name from a B-tree attribute list.
1050  *
1051  * This will involve walking down the Btree, and may involve joining
1052  * leaf nodes and even joining intermediate nodes up to and including
1053  * the root node (a special case of an intermediate node).
1054  */
1055 STATIC int
1056 xfs_attr_node_removename(
1057 	struct xfs_da_args	*args)
1058 {
1059 	struct xfs_da_state	*state;
1060 	struct xfs_da_state_blk	*blk;
1061 	struct xfs_inode	*dp;
1062 	struct xfs_buf		*bp;
1063 	int			retval, error, forkoff;
1064 
1065 	trace_xfs_attr_node_removename(args);
1066 
1067 	/*
1068 	 * Tie a string around our finger to remind us where we are.
1069 	 */
1070 	dp = args->dp;
1071 	state = xfs_da_state_alloc();
1072 	state->args = args;
1073 	state->mp = dp->i_mount;
1074 
1075 	/*
1076 	 * Search to see if name exists, and get back a pointer to it.
1077 	 */
1078 	error = xfs_da3_node_lookup_int(state, &retval);
1079 	if (error || (retval != -EEXIST)) {
1080 		if (error == 0)
1081 			error = retval;
1082 		goto out;
1083 	}
1084 
1085 	/*
1086 	 * If there is an out-of-line value, de-allocate the blocks.
1087 	 * This is done before we remove the attribute so that we don't
1088 	 * overflow the maximum size of a transaction and/or hit a deadlock.
1089 	 */
1090 	blk = &state->path.blk[ state->path.active-1 ];
1091 	ASSERT(blk->bp != NULL);
1092 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1093 	if (args->rmtblkno > 0) {
1094 		/*
1095 		 * Fill in disk block numbers in the state structure
1096 		 * so that we can get the buffers back after we commit
1097 		 * several transactions in the following calls.
1098 		 */
1099 		error = xfs_attr_fillstate(state);
1100 		if (error)
1101 			goto out;
1102 
1103 		/*
1104 		 * Mark the attribute as INCOMPLETE, then bunmapi() the
1105 		 * remote value.
1106 		 */
1107 		error = xfs_attr3_leaf_setflag(args);
1108 		if (error)
1109 			goto out;
1110 		error = xfs_attr_rmtval_remove(args);
1111 		if (error)
1112 			goto out;
1113 
1114 		/*
1115 		 * Refill the state structure with buffers, the prior calls
1116 		 * released our buffers.
1117 		 */
1118 		error = xfs_attr_refillstate(state);
1119 		if (error)
1120 			goto out;
1121 	}
1122 
1123 	/*
1124 	 * Remove the name and update the hashvals in the tree.
1125 	 */
1126 	blk = &state->path.blk[ state->path.active-1 ];
1127 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1128 	retval = xfs_attr3_leaf_remove(blk->bp, args);
1129 	xfs_da3_fixhashpath(state, &state->path);
1130 
1131 	/*
1132 	 * Check to see if the tree needs to be collapsed.
1133 	 */
1134 	if (retval && (state->path.active > 1)) {
1135 		error = xfs_da3_join(state);
1136 		if (error)
1137 			goto out;
1138 		error = xfs_defer_finish(&args->trans);
1139 		if (error)
1140 			goto out;
1141 		/*
1142 		 * Commit the Btree join operation and start a new trans.
1143 		 */
1144 		error = xfs_trans_roll_inode(&args->trans, dp);
1145 		if (error)
1146 			goto out;
1147 	}
1148 
1149 	/*
1150 	 * If the result is small enough, push it all into the inode.
1151 	 */
1152 	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
1153 		/*
1154 		 * Have to get rid of the copy of this dabuf in the state.
1155 		 */
1156 		ASSERT(state->path.active == 1);
1157 		ASSERT(state->path.blk[0].bp);
1158 		state->path.blk[0].bp = NULL;
1159 
1160 		error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp);
1161 		if (error)
1162 			goto out;
1163 
1164 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1165 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1166 			/* bp is gone due to xfs_da_shrink_inode */
1167 			if (error)
1168 				goto out;
1169 			error = xfs_defer_finish(&args->trans);
1170 			if (error)
1171 				goto out;
1172 		} else
1173 			xfs_trans_brelse(args->trans, bp);
1174 	}
1175 	error = 0;
1176 
1177 out:
1178 	xfs_da_state_free(state);
1179 	return error;
1180 }
1181 
1182 /*
1183  * Fill in the disk block numbers in the state structure for the buffers
1184  * that are attached to the state structure.
1185  * This is done so that we can quickly reattach ourselves to those buffers
1186  * after some set of transaction commits have released these buffers.
1187  */
1188 STATIC int
1189 xfs_attr_fillstate(xfs_da_state_t *state)
1190 {
1191 	xfs_da_state_path_t *path;
1192 	xfs_da_state_blk_t *blk;
1193 	int level;
1194 
1195 	trace_xfs_attr_fillstate(state->args);
1196 
1197 	/*
1198 	 * Roll down the "path" in the state structure, storing the on-disk
1199 	 * block number for those buffers in the "path".
1200 	 */
1201 	path = &state->path;
1202 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1203 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1204 		if (blk->bp) {
1205 			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
1206 			blk->bp = NULL;
1207 		} else {
1208 			blk->disk_blkno = 0;
1209 		}
1210 	}
1211 
1212 	/*
1213 	 * Roll down the "altpath" in the state structure, storing the on-disk
1214 	 * block number for those buffers in the "altpath".
1215 	 */
1216 	path = &state->altpath;
1217 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1218 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1219 		if (blk->bp) {
1220 			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
1221 			blk->bp = NULL;
1222 		} else {
1223 			blk->disk_blkno = 0;
1224 		}
1225 	}
1226 
1227 	return 0;
1228 }
1229 
1230 /*
1231  * Reattach the buffers to the state structure based on the disk block
1232  * numbers stored in the state structure.
1233  * This is done after some set of transaction commits have released those
1234  * buffers from our grip.
1235  */
1236 STATIC int
1237 xfs_attr_refillstate(xfs_da_state_t *state)
1238 {
1239 	xfs_da_state_path_t *path;
1240 	xfs_da_state_blk_t *blk;
1241 	int level, error;
1242 
1243 	trace_xfs_attr_refillstate(state->args);
1244 
1245 	/*
1246 	 * Roll down the "path" in the state structure, storing the on-disk
1247 	 * block number for those buffers in the "path".
1248 	 */
1249 	path = &state->path;
1250 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1251 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1252 		if (blk->disk_blkno) {
1253 			error = xfs_da3_node_read(state->args->trans,
1254 						state->args->dp,
1255 						blk->blkno, blk->disk_blkno,
1256 						&blk->bp, XFS_ATTR_FORK);
1257 			if (error)
1258 				return error;
1259 		} else {
1260 			blk->bp = NULL;
1261 		}
1262 	}
1263 
1264 	/*
1265 	 * Roll down the "altpath" in the state structure, storing the on-disk
1266 	 * block number for those buffers in the "altpath".
1267 	 */
1268 	path = &state->altpath;
1269 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1270 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1271 		if (blk->disk_blkno) {
1272 			error = xfs_da3_node_read(state->args->trans,
1273 						state->args->dp,
1274 						blk->blkno, blk->disk_blkno,
1275 						&blk->bp, XFS_ATTR_FORK);
1276 			if (error)
1277 				return error;
1278 		} else {
1279 			blk->bp = NULL;
1280 		}
1281 	}
1282 
1283 	return 0;
1284 }
1285 
1286 /*
1287  * Look up a filename in a node attribute list.
1288  *
1289  * This routine gets called for any attribute fork that has more than one
1290  * block, ie: both true Btree attr lists and for single-leaf-blocks with
1291  * "remote" values taking up more blocks.
1292  */
1293 STATIC int
1294 xfs_attr_node_get(xfs_da_args_t *args)
1295 {
1296 	xfs_da_state_t *state;
1297 	xfs_da_state_blk_t *blk;
1298 	int error, retval;
1299 	int i;
1300 
1301 	trace_xfs_attr_node_get(args);
1302 
1303 	state = xfs_da_state_alloc();
1304 	state->args = args;
1305 	state->mp = args->dp->i_mount;
1306 
1307 	/*
1308 	 * Search to see if name exists, and get back a pointer to it.
1309 	 */
1310 	error = xfs_da3_node_lookup_int(state, &retval);
1311 	if (error) {
1312 		retval = error;
1313 	} else if (retval == -EEXIST) {
1314 		blk = &state->path.blk[ state->path.active-1 ];
1315 		ASSERT(blk->bp != NULL);
1316 		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1317 
1318 		/*
1319 		 * Get the value, local or "remote"
1320 		 */
1321 		retval = xfs_attr3_leaf_getvalue(blk->bp, args);
1322 		if (!retval && (args->rmtblkno > 0)
1323 		    && !(args->flags & ATTR_KERNOVAL)) {
1324 			retval = xfs_attr_rmtval_get(args);
1325 		}
1326 	}
1327 
1328 	/*
1329 	 * If not in a transaction, we have to release all the buffers.
1330 	 */
1331 	for (i = 0; i < state->path.active; i++) {
1332 		xfs_trans_brelse(args->trans, state->path.blk[i].bp);
1333 		state->path.blk[i].bp = NULL;
1334 	}
1335 
1336 	xfs_da_state_free(state);
1337 	return retval;
1338 }
1339