xref: /openbmc/linux/fs/xfs/libxfs/xfs_attr.c (revision a06c488d)
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_mount.h"
26 #include "xfs_da_format.h"
27 #include "xfs_da_btree.h"
28 #include "xfs_attr_sf.h"
29 #include "xfs_inode.h"
30 #include "xfs_alloc.h"
31 #include "xfs_trans.h"
32 #include "xfs_inode_item.h"
33 #include "xfs_bmap.h"
34 #include "xfs_bmap_util.h"
35 #include "xfs_bmap_btree.h"
36 #include "xfs_attr.h"
37 #include "xfs_attr_leaf.h"
38 #include "xfs_attr_remote.h"
39 #include "xfs_error.h"
40 #include "xfs_quota.h"
41 #include "xfs_trans_space.h"
42 #include "xfs_trace.h"
43 
44 /*
45  * xfs_attr.c
46  *
47  * Provide the external interfaces to manage attribute lists.
48  */
49 
50 /*========================================================================
51  * Function prototypes for the kernel.
52  *========================================================================*/
53 
54 /*
55  * Internal routines when attribute list fits inside the inode.
56  */
57 STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
58 
59 /*
60  * Internal routines when attribute list is one block.
61  */
62 STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
63 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
64 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
65 
66 /*
67  * Internal routines when attribute list is more than one block.
68  */
69 STATIC int xfs_attr_node_get(xfs_da_args_t *args);
70 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
71 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
72 STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
73 STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
74 
75 
76 STATIC int
77 xfs_attr_args_init(
78 	struct xfs_da_args	*args,
79 	struct xfs_inode	*dp,
80 	const unsigned char	*name,
81 	int			flags)
82 {
83 
84 	if (!name)
85 		return -EINVAL;
86 
87 	memset(args, 0, sizeof(*args));
88 	args->geo = dp->i_mount->m_attr_geo;
89 	args->whichfork = XFS_ATTR_FORK;
90 	args->dp = dp;
91 	args->flags = flags;
92 	args->name = name;
93 	args->namelen = strlen((const char *)name);
94 	if (args->namelen >= MAXNAMELEN)
95 		return -EFAULT;		/* match IRIX behaviour */
96 
97 	args->hashval = xfs_da_hashname(args->name, args->namelen);
98 	return 0;
99 }
100 
101 int
102 xfs_inode_hasattr(
103 	struct xfs_inode	*ip)
104 {
105 	if (!XFS_IFORK_Q(ip) ||
106 	    (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
107 	     ip->i_d.di_anextents == 0))
108 		return 0;
109 	return 1;
110 }
111 
112 /*========================================================================
113  * Overall external interface routines.
114  *========================================================================*/
115 
116 int
117 xfs_attr_get(
118 	struct xfs_inode	*ip,
119 	const unsigned char	*name,
120 	unsigned char		*value,
121 	int			*valuelenp,
122 	int			flags)
123 {
124 	struct xfs_da_args	args;
125 	uint			lock_mode;
126 	int			error;
127 
128 	XFS_STATS_INC(ip->i_mount, xs_attr_get);
129 
130 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
131 		return -EIO;
132 
133 	if (!xfs_inode_hasattr(ip))
134 		return -ENOATTR;
135 
136 	error = xfs_attr_args_init(&args, ip, name, flags);
137 	if (error)
138 		return error;
139 
140 	args.value = value;
141 	args.valuelen = *valuelenp;
142 	/* Entirely possible to look up a name which doesn't exist */
143 	args.op_flags = XFS_DA_OP_OKNOENT;
144 
145 	lock_mode = xfs_ilock_attr_map_shared(ip);
146 	if (!xfs_inode_hasattr(ip))
147 		error = -ENOATTR;
148 	else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
149 		error = xfs_attr_shortform_getvalue(&args);
150 	else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
151 		error = xfs_attr_leaf_get(&args);
152 	else
153 		error = xfs_attr_node_get(&args);
154 	xfs_iunlock(ip, lock_mode);
155 
156 	*valuelenp = args.valuelen;
157 	return error == -EEXIST ? 0 : error;
158 }
159 
160 /*
161  * Calculate how many blocks we need for the new attribute,
162  */
163 STATIC int
164 xfs_attr_calc_size(
165 	struct xfs_da_args	*args,
166 	int			*local)
167 {
168 	struct xfs_mount	*mp = args->dp->i_mount;
169 	int			size;
170 	int			nblks;
171 
172 	/*
173 	 * Determine space new attribute will use, and if it would be
174 	 * "local" or "remote" (note: local != inline).
175 	 */
176 	size = xfs_attr_leaf_newentsize(args, local);
177 	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
178 	if (*local) {
179 		if (size > (args->geo->blksize / 2)) {
180 			/* Double split possible */
181 			nblks *= 2;
182 		}
183 	} else {
184 		/*
185 		 * Out of line attribute, cannot double split, but
186 		 * make room for the attribute value itself.
187 		 */
188 		uint	dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
189 		nblks += dblocks;
190 		nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
191 	}
192 
193 	return nblks;
194 }
195 
196 int
197 xfs_attr_set(
198 	struct xfs_inode	*dp,
199 	const unsigned char	*name,
200 	unsigned char		*value,
201 	int			valuelen,
202 	int			flags)
203 {
204 	struct xfs_mount	*mp = dp->i_mount;
205 	struct xfs_da_args	args;
206 	struct xfs_bmap_free	flist;
207 	struct xfs_trans_res	tres;
208 	xfs_fsblock_t		firstblock;
209 	int			rsvd = (flags & ATTR_ROOT) != 0;
210 	int			error, err2, local;
211 
212 	XFS_STATS_INC(mp, xs_attr_set);
213 
214 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
215 		return -EIO;
216 
217 	error = xfs_attr_args_init(&args, dp, name, flags);
218 	if (error)
219 		return error;
220 
221 	args.value = value;
222 	args.valuelen = valuelen;
223 	args.firstblock = &firstblock;
224 	args.flist = &flist;
225 	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
226 	args.total = xfs_attr_calc_size(&args, &local);
227 
228 	error = xfs_qm_dqattach(dp, 0);
229 	if (error)
230 		return error;
231 
232 	/*
233 	 * If the inode doesn't have an attribute fork, add one.
234 	 * (inode must not be locked when we call this routine)
235 	 */
236 	if (XFS_IFORK_Q(dp) == 0) {
237 		int sf_size = sizeof(xfs_attr_sf_hdr_t) +
238 			XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen);
239 
240 		error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
241 		if (error)
242 			return error;
243 	}
244 
245 	/*
246 	 * Start our first transaction of the day.
247 	 *
248 	 * All future transactions during this code must be "chained" off
249 	 * this one via the trans_dup() call.  All transactions will contain
250 	 * the inode, and the inode will always be marked with trans_ihold().
251 	 * Since the inode will be locked in all transactions, we must log
252 	 * the inode in every transaction to let it float upward through
253 	 * the log.
254 	 */
255 	args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET);
256 
257 	/*
258 	 * Root fork attributes can use reserved data blocks for this
259 	 * operation if necessary
260 	 */
261 
262 	if (rsvd)
263 		args.trans->t_flags |= XFS_TRANS_RESERVE;
264 
265 	tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
266 			 M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
267 	tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
268 	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
269 	error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
270 	if (error) {
271 		xfs_trans_cancel(args.trans);
272 		return error;
273 	}
274 	xfs_ilock(dp, XFS_ILOCK_EXCL);
275 
276 	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
277 				rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
278 				       XFS_QMOPT_RES_REGBLKS);
279 	if (error) {
280 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
281 		xfs_trans_cancel(args.trans);
282 		return error;
283 	}
284 
285 	xfs_trans_ijoin(args.trans, dp, 0);
286 
287 	/*
288 	 * If the attribute list is non-existent or a shortform list,
289 	 * upgrade it to a single-leaf-block attribute list.
290 	 */
291 	if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
292 	    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
293 	     dp->i_d.di_anextents == 0)) {
294 
295 		/*
296 		 * Build initial attribute list (if required).
297 		 */
298 		if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
299 			xfs_attr_shortform_create(&args);
300 
301 		/*
302 		 * Try to add the attr to the attribute list in
303 		 * the inode.
304 		 */
305 		error = xfs_attr_shortform_addname(&args);
306 		if (error != -ENOSPC) {
307 			/*
308 			 * Commit the shortform mods, and we're done.
309 			 * NOTE: this is also the error path (EEXIST, etc).
310 			 */
311 			ASSERT(args.trans != NULL);
312 
313 			/*
314 			 * If this is a synchronous mount, make sure that
315 			 * the transaction goes to disk before returning
316 			 * to the user.
317 			 */
318 			if (mp->m_flags & XFS_MOUNT_WSYNC)
319 				xfs_trans_set_sync(args.trans);
320 
321 			if (!error && (flags & ATTR_KERNOTIME) == 0) {
322 				xfs_trans_ichgtime(args.trans, dp,
323 							XFS_ICHGTIME_CHG);
324 			}
325 			err2 = xfs_trans_commit(args.trans);
326 			xfs_iunlock(dp, XFS_ILOCK_EXCL);
327 
328 			return error ? error : err2;
329 		}
330 
331 		/*
332 		 * It won't fit in the shortform, transform to a leaf block.
333 		 * GROT: another possible req'mt for a double-split btree op.
334 		 */
335 		xfs_bmap_init(args.flist, args.firstblock);
336 		error = xfs_attr_shortform_to_leaf(&args);
337 		if (!error)
338 			error = xfs_bmap_finish(&args.trans, args.flist, dp);
339 		if (error) {
340 			args.trans = NULL;
341 			xfs_bmap_cancel(&flist);
342 			goto out;
343 		}
344 
345 		/*
346 		 * Commit the leaf transformation.  We'll need another (linked)
347 		 * transaction to add the new attribute to the leaf.
348 		 */
349 
350 		error = xfs_trans_roll(&args.trans, dp);
351 		if (error)
352 			goto out;
353 
354 	}
355 
356 	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
357 		error = xfs_attr_leaf_addname(&args);
358 	else
359 		error = xfs_attr_node_addname(&args);
360 	if (error)
361 		goto out;
362 
363 	/*
364 	 * If this is a synchronous mount, make sure that the
365 	 * transaction goes to disk before returning to the user.
366 	 */
367 	if (mp->m_flags & XFS_MOUNT_WSYNC)
368 		xfs_trans_set_sync(args.trans);
369 
370 	if ((flags & ATTR_KERNOTIME) == 0)
371 		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
372 
373 	/*
374 	 * Commit the last in the sequence of transactions.
375 	 */
376 	xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
377 	error = xfs_trans_commit(args.trans);
378 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
379 
380 	return error;
381 
382 out:
383 	if (args.trans)
384 		xfs_trans_cancel(args.trans);
385 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
386 	return error;
387 }
388 
389 /*
390  * Generic handler routine to remove a name from an attribute list.
391  * Transitions attribute list from Btree to shortform as necessary.
392  */
393 int
394 xfs_attr_remove(
395 	struct xfs_inode	*dp,
396 	const unsigned char	*name,
397 	int			flags)
398 {
399 	struct xfs_mount	*mp = dp->i_mount;
400 	struct xfs_da_args	args;
401 	struct xfs_bmap_free	flist;
402 	xfs_fsblock_t		firstblock;
403 	int			error;
404 
405 	XFS_STATS_INC(mp, xs_attr_remove);
406 
407 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
408 		return -EIO;
409 
410 	if (!xfs_inode_hasattr(dp))
411 		return -ENOATTR;
412 
413 	error = xfs_attr_args_init(&args, dp, name, flags);
414 	if (error)
415 		return error;
416 
417 	args.firstblock = &firstblock;
418 	args.flist = &flist;
419 
420 	/*
421 	 * we have no control over the attribute names that userspace passes us
422 	 * to remove, so we have to allow the name lookup prior to attribute
423 	 * removal to fail.
424 	 */
425 	args.op_flags = XFS_DA_OP_OKNOENT;
426 
427 	error = xfs_qm_dqattach(dp, 0);
428 	if (error)
429 		return error;
430 
431 	/*
432 	 * Start our first transaction of the day.
433 	 *
434 	 * All future transactions during this code must be "chained" off
435 	 * this one via the trans_dup() call.  All transactions will contain
436 	 * the inode, and the inode will always be marked with trans_ihold().
437 	 * Since the inode will be locked in all transactions, we must log
438 	 * the inode in every transaction to let it float upward through
439 	 * the log.
440 	 */
441 	args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM);
442 
443 	/*
444 	 * Root fork attributes can use reserved data blocks for this
445 	 * operation if necessary
446 	 */
447 
448 	if (flags & ATTR_ROOT)
449 		args.trans->t_flags |= XFS_TRANS_RESERVE;
450 
451 	error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm,
452 				  XFS_ATTRRM_SPACE_RES(mp), 0);
453 	if (error) {
454 		xfs_trans_cancel(args.trans);
455 		return error;
456 	}
457 
458 	xfs_ilock(dp, XFS_ILOCK_EXCL);
459 	/*
460 	 * No need to make quota reservations here. We expect to release some
461 	 * blocks not allocate in the common case.
462 	 */
463 	xfs_trans_ijoin(args.trans, dp, 0);
464 
465 	if (!xfs_inode_hasattr(dp)) {
466 		error = -ENOATTR;
467 	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
468 		ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
469 		error = xfs_attr_shortform_remove(&args);
470 	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
471 		error = xfs_attr_leaf_removename(&args);
472 	} else {
473 		error = xfs_attr_node_removename(&args);
474 	}
475 
476 	if (error)
477 		goto out;
478 
479 	/*
480 	 * If this is a synchronous mount, make sure that the
481 	 * transaction goes to disk before returning to the user.
482 	 */
483 	if (mp->m_flags & XFS_MOUNT_WSYNC)
484 		xfs_trans_set_sync(args.trans);
485 
486 	if ((flags & ATTR_KERNOTIME) == 0)
487 		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
488 
489 	/*
490 	 * Commit the last in the sequence of transactions.
491 	 */
492 	xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
493 	error = xfs_trans_commit(args.trans);
494 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
495 
496 	return error;
497 
498 out:
499 	if (args.trans)
500 		xfs_trans_cancel(args.trans);
501 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
502 	return error;
503 }
504 
505 /*========================================================================
506  * External routines when attribute list is inside the inode
507  *========================================================================*/
508 
509 /*
510  * Add a name to the shortform attribute list structure
511  * This is the external routine.
512  */
513 STATIC int
514 xfs_attr_shortform_addname(xfs_da_args_t *args)
515 {
516 	int newsize, forkoff, retval;
517 
518 	trace_xfs_attr_sf_addname(args);
519 
520 	retval = xfs_attr_shortform_lookup(args);
521 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
522 		return retval;
523 	} else if (retval == -EEXIST) {
524 		if (args->flags & ATTR_CREATE)
525 			return retval;
526 		retval = xfs_attr_shortform_remove(args);
527 		ASSERT(retval == 0);
528 	}
529 
530 	if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
531 	    args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
532 		return -ENOSPC;
533 
534 	newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
535 	newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
536 
537 	forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
538 	if (!forkoff)
539 		return -ENOSPC;
540 
541 	xfs_attr_shortform_add(args, forkoff);
542 	return 0;
543 }
544 
545 
546 /*========================================================================
547  * External routines when attribute list is one block
548  *========================================================================*/
549 
550 /*
551  * Add a name to the leaf attribute list structure
552  *
553  * This leaf block cannot have a "remote" value, we only call this routine
554  * if bmap_one_block() says there is only one block (ie: no remote blks).
555  */
556 STATIC int
557 xfs_attr_leaf_addname(xfs_da_args_t *args)
558 {
559 	xfs_inode_t *dp;
560 	struct xfs_buf *bp;
561 	int retval, error, forkoff;
562 
563 	trace_xfs_attr_leaf_addname(args);
564 
565 	/*
566 	 * Read the (only) block in the attribute list in.
567 	 */
568 	dp = args->dp;
569 	args->blkno = 0;
570 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
571 	if (error)
572 		return error;
573 
574 	/*
575 	 * Look up the given attribute in the leaf block.  Figure out if
576 	 * the given flags produce an error or call for an atomic rename.
577 	 */
578 	retval = xfs_attr3_leaf_lookup_int(bp, args);
579 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
580 		xfs_trans_brelse(args->trans, bp);
581 		return retval;
582 	} else if (retval == -EEXIST) {
583 		if (args->flags & ATTR_CREATE) {	/* pure create op */
584 			xfs_trans_brelse(args->trans, bp);
585 			return retval;
586 		}
587 
588 		trace_xfs_attr_leaf_replace(args);
589 
590 		/* save the attribute state for later removal*/
591 		args->op_flags |= XFS_DA_OP_RENAME;	/* an atomic rename */
592 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
593 		args->index2 = args->index;
594 		args->rmtblkno2 = args->rmtblkno;
595 		args->rmtblkcnt2 = args->rmtblkcnt;
596 		args->rmtvaluelen2 = args->rmtvaluelen;
597 
598 		/*
599 		 * clear the remote attr state now that it is saved so that the
600 		 * values reflect the state of the attribute we are about to
601 		 * add, not the attribute we just found and will remove later.
602 		 */
603 		args->rmtblkno = 0;
604 		args->rmtblkcnt = 0;
605 		args->rmtvaluelen = 0;
606 	}
607 
608 	/*
609 	 * Add the attribute to the leaf block, transitioning to a Btree
610 	 * if required.
611 	 */
612 	retval = xfs_attr3_leaf_add(bp, args);
613 	if (retval == -ENOSPC) {
614 		/*
615 		 * Promote the attribute list to the Btree format, then
616 		 * Commit that transaction so that the node_addname() call
617 		 * can manage its own transactions.
618 		 */
619 		xfs_bmap_init(args->flist, args->firstblock);
620 		error = xfs_attr3_leaf_to_node(args);
621 		if (!error)
622 			error = xfs_bmap_finish(&args->trans, args->flist, dp);
623 		if (error) {
624 			args->trans = NULL;
625 			xfs_bmap_cancel(args->flist);
626 			return error;
627 		}
628 
629 		/*
630 		 * Commit the current trans (including the inode) and start
631 		 * a new one.
632 		 */
633 		error = xfs_trans_roll(&args->trans, dp);
634 		if (error)
635 			return error;
636 
637 		/*
638 		 * Fob the whole rest of the problem off on the Btree code.
639 		 */
640 		error = xfs_attr_node_addname(args);
641 		return error;
642 	}
643 
644 	/*
645 	 * Commit the transaction that added the attr name so that
646 	 * later routines can manage their own transactions.
647 	 */
648 	error = xfs_trans_roll(&args->trans, dp);
649 	if (error)
650 		return error;
651 
652 	/*
653 	 * If there was an out-of-line value, allocate the blocks we
654 	 * identified for its storage and copy the value.  This is done
655 	 * after we create the attribute so that we don't overflow the
656 	 * maximum size of a transaction and/or hit a deadlock.
657 	 */
658 	if (args->rmtblkno > 0) {
659 		error = xfs_attr_rmtval_set(args);
660 		if (error)
661 			return error;
662 	}
663 
664 	/*
665 	 * If this is an atomic rename operation, we must "flip" the
666 	 * incomplete flags on the "new" and "old" attribute/value pairs
667 	 * so that one disappears and one appears atomically.  Then we
668 	 * must remove the "old" attribute/value pair.
669 	 */
670 	if (args->op_flags & XFS_DA_OP_RENAME) {
671 		/*
672 		 * In a separate transaction, set the incomplete flag on the
673 		 * "old" attr and clear the incomplete flag on the "new" attr.
674 		 */
675 		error = xfs_attr3_leaf_flipflags(args);
676 		if (error)
677 			return error;
678 
679 		/*
680 		 * Dismantle the "old" attribute/value pair by removing
681 		 * a "remote" value (if it exists).
682 		 */
683 		args->index = args->index2;
684 		args->blkno = args->blkno2;
685 		args->rmtblkno = args->rmtblkno2;
686 		args->rmtblkcnt = args->rmtblkcnt2;
687 		args->rmtvaluelen = args->rmtvaluelen2;
688 		if (args->rmtblkno) {
689 			error = xfs_attr_rmtval_remove(args);
690 			if (error)
691 				return error;
692 		}
693 
694 		/*
695 		 * Read in the block containing the "old" attr, then
696 		 * remove the "old" attr from that block (neat, huh!)
697 		 */
698 		error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
699 					   -1, &bp);
700 		if (error)
701 			return error;
702 
703 		xfs_attr3_leaf_remove(bp, args);
704 
705 		/*
706 		 * If the result is small enough, shrink it all into the inode.
707 		 */
708 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
709 			xfs_bmap_init(args->flist, args->firstblock);
710 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
711 			/* bp is gone due to xfs_da_shrink_inode */
712 			if (!error)
713 				error = xfs_bmap_finish(&args->trans,
714 							args->flist, dp);
715 			if (error) {
716 				args->trans = NULL;
717 				xfs_bmap_cancel(args->flist);
718 				return error;
719 			}
720 		}
721 
722 		/*
723 		 * Commit the remove and start the next trans in series.
724 		 */
725 		error = xfs_trans_roll(&args->trans, dp);
726 
727 	} else if (args->rmtblkno > 0) {
728 		/*
729 		 * Added a "remote" value, just clear the incomplete flag.
730 		 */
731 		error = xfs_attr3_leaf_clearflag(args);
732 	}
733 	return error;
734 }
735 
736 /*
737  * Remove a name from the leaf attribute list structure
738  *
739  * This leaf block cannot have a "remote" value, we only call this routine
740  * if bmap_one_block() says there is only one block (ie: no remote blks).
741  */
742 STATIC int
743 xfs_attr_leaf_removename(xfs_da_args_t *args)
744 {
745 	xfs_inode_t *dp;
746 	struct xfs_buf *bp;
747 	int error, forkoff;
748 
749 	trace_xfs_attr_leaf_removename(args);
750 
751 	/*
752 	 * Remove the attribute.
753 	 */
754 	dp = args->dp;
755 	args->blkno = 0;
756 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
757 	if (error)
758 		return error;
759 
760 	error = xfs_attr3_leaf_lookup_int(bp, args);
761 	if (error == -ENOATTR) {
762 		xfs_trans_brelse(args->trans, bp);
763 		return error;
764 	}
765 
766 	xfs_attr3_leaf_remove(bp, args);
767 
768 	/*
769 	 * If the result is small enough, shrink it all into the inode.
770 	 */
771 	if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
772 		xfs_bmap_init(args->flist, args->firstblock);
773 		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
774 		/* bp is gone due to xfs_da_shrink_inode */
775 		if (!error)
776 			error = xfs_bmap_finish(&args->trans, args->flist, dp);
777 		if (error) {
778 			args->trans = NULL;
779 			xfs_bmap_cancel(args->flist);
780 			return error;
781 		}
782 	}
783 	return 0;
784 }
785 
786 /*
787  * Look up a name in a leaf attribute list structure.
788  *
789  * This leaf block cannot have a "remote" value, we only call this routine
790  * if bmap_one_block() says there is only one block (ie: no remote blks).
791  */
792 STATIC int
793 xfs_attr_leaf_get(xfs_da_args_t *args)
794 {
795 	struct xfs_buf *bp;
796 	int error;
797 
798 	trace_xfs_attr_leaf_get(args);
799 
800 	args->blkno = 0;
801 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
802 	if (error)
803 		return error;
804 
805 	error = xfs_attr3_leaf_lookup_int(bp, args);
806 	if (error != -EEXIST)  {
807 		xfs_trans_brelse(args->trans, bp);
808 		return error;
809 	}
810 	error = xfs_attr3_leaf_getvalue(bp, args);
811 	xfs_trans_brelse(args->trans, bp);
812 	if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
813 		error = xfs_attr_rmtval_get(args);
814 	}
815 	return error;
816 }
817 
818 /*========================================================================
819  * External routines when attribute list size > geo->blksize
820  *========================================================================*/
821 
822 /*
823  * Add a name to a Btree-format attribute list.
824  *
825  * This will involve walking down the Btree, and may involve splitting
826  * leaf nodes and even splitting intermediate nodes up to and including
827  * the root node (a special case of an intermediate node).
828  *
829  * "Remote" attribute values confuse the issue and atomic rename operations
830  * add a whole extra layer of confusion on top of that.
831  */
832 STATIC int
833 xfs_attr_node_addname(xfs_da_args_t *args)
834 {
835 	xfs_da_state_t *state;
836 	xfs_da_state_blk_t *blk;
837 	xfs_inode_t *dp;
838 	xfs_mount_t *mp;
839 	int retval, error;
840 
841 	trace_xfs_attr_node_addname(args);
842 
843 	/*
844 	 * Fill in bucket of arguments/results/context to carry around.
845 	 */
846 	dp = args->dp;
847 	mp = dp->i_mount;
848 restart:
849 	state = xfs_da_state_alloc();
850 	state->args = args;
851 	state->mp = mp;
852 
853 	/*
854 	 * Search to see if name already exists, and get back a pointer
855 	 * to where it should go.
856 	 */
857 	error = xfs_da3_node_lookup_int(state, &retval);
858 	if (error)
859 		goto out;
860 	blk = &state->path.blk[ state->path.active-1 ];
861 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
862 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
863 		goto out;
864 	} else if (retval == -EEXIST) {
865 		if (args->flags & ATTR_CREATE)
866 			goto out;
867 
868 		trace_xfs_attr_node_replace(args);
869 
870 		/* save the attribute state for later removal*/
871 		args->op_flags |= XFS_DA_OP_RENAME;	/* atomic rename op */
872 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
873 		args->index2 = args->index;
874 		args->rmtblkno2 = args->rmtblkno;
875 		args->rmtblkcnt2 = args->rmtblkcnt;
876 		args->rmtvaluelen2 = args->rmtvaluelen;
877 
878 		/*
879 		 * clear the remote attr state now that it is saved so that the
880 		 * values reflect the state of the attribute we are about to
881 		 * add, not the attribute we just found and will remove later.
882 		 */
883 		args->rmtblkno = 0;
884 		args->rmtblkcnt = 0;
885 		args->rmtvaluelen = 0;
886 	}
887 
888 	retval = xfs_attr3_leaf_add(blk->bp, state->args);
889 	if (retval == -ENOSPC) {
890 		if (state->path.active == 1) {
891 			/*
892 			 * Its really a single leaf node, but it had
893 			 * out-of-line values so it looked like it *might*
894 			 * have been a b-tree.
895 			 */
896 			xfs_da_state_free(state);
897 			state = NULL;
898 			xfs_bmap_init(args->flist, args->firstblock);
899 			error = xfs_attr3_leaf_to_node(args);
900 			if (!error)
901 				error = xfs_bmap_finish(&args->trans,
902 							args->flist, dp);
903 			if (error) {
904 				args->trans = NULL;
905 				xfs_bmap_cancel(args->flist);
906 				goto out;
907 			}
908 
909 			/*
910 			 * Commit the node conversion and start the next
911 			 * trans in the chain.
912 			 */
913 			error = xfs_trans_roll(&args->trans, dp);
914 			if (error)
915 				goto out;
916 
917 			goto restart;
918 		}
919 
920 		/*
921 		 * Split as many Btree elements as required.
922 		 * This code tracks the new and old attr's location
923 		 * in the index/blkno/rmtblkno/rmtblkcnt fields and
924 		 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
925 		 */
926 		xfs_bmap_init(args->flist, args->firstblock);
927 		error = xfs_da3_split(state);
928 		if (!error)
929 			error = xfs_bmap_finish(&args->trans, args->flist, dp);
930 		if (error) {
931 			args->trans = NULL;
932 			xfs_bmap_cancel(args->flist);
933 			goto out;
934 		}
935 	} else {
936 		/*
937 		 * Addition succeeded, update Btree hashvals.
938 		 */
939 		xfs_da3_fixhashpath(state, &state->path);
940 	}
941 
942 	/*
943 	 * Kill the state structure, we're done with it and need to
944 	 * allow the buffers to come back later.
945 	 */
946 	xfs_da_state_free(state);
947 	state = NULL;
948 
949 	/*
950 	 * Commit the leaf addition or btree split and start the next
951 	 * trans in the chain.
952 	 */
953 	error = xfs_trans_roll(&args->trans, dp);
954 	if (error)
955 		goto out;
956 
957 	/*
958 	 * If there was an out-of-line value, allocate the blocks we
959 	 * identified for its storage and copy the value.  This is done
960 	 * after we create the attribute so that we don't overflow the
961 	 * maximum size of a transaction and/or hit a deadlock.
962 	 */
963 	if (args->rmtblkno > 0) {
964 		error = xfs_attr_rmtval_set(args);
965 		if (error)
966 			return error;
967 	}
968 
969 	/*
970 	 * If this is an atomic rename operation, we must "flip" the
971 	 * incomplete flags on the "new" and "old" attribute/value pairs
972 	 * so that one disappears and one appears atomically.  Then we
973 	 * must remove the "old" attribute/value pair.
974 	 */
975 	if (args->op_flags & XFS_DA_OP_RENAME) {
976 		/*
977 		 * In a separate transaction, set the incomplete flag on the
978 		 * "old" attr and clear the incomplete flag on the "new" attr.
979 		 */
980 		error = xfs_attr3_leaf_flipflags(args);
981 		if (error)
982 			goto out;
983 
984 		/*
985 		 * Dismantle the "old" attribute/value pair by removing
986 		 * a "remote" value (if it exists).
987 		 */
988 		args->index = args->index2;
989 		args->blkno = args->blkno2;
990 		args->rmtblkno = args->rmtblkno2;
991 		args->rmtblkcnt = args->rmtblkcnt2;
992 		args->rmtvaluelen = args->rmtvaluelen2;
993 		if (args->rmtblkno) {
994 			error = xfs_attr_rmtval_remove(args);
995 			if (error)
996 				return error;
997 		}
998 
999 		/*
1000 		 * Re-find the "old" attribute entry after any split ops.
1001 		 * The INCOMPLETE flag means that we will find the "old"
1002 		 * attr, not the "new" one.
1003 		 */
1004 		args->flags |= XFS_ATTR_INCOMPLETE;
1005 		state = xfs_da_state_alloc();
1006 		state->args = args;
1007 		state->mp = mp;
1008 		state->inleaf = 0;
1009 		error = xfs_da3_node_lookup_int(state, &retval);
1010 		if (error)
1011 			goto out;
1012 
1013 		/*
1014 		 * Remove the name and update the hashvals in the tree.
1015 		 */
1016 		blk = &state->path.blk[ state->path.active-1 ];
1017 		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1018 		error = xfs_attr3_leaf_remove(blk->bp, args);
1019 		xfs_da3_fixhashpath(state, &state->path);
1020 
1021 		/*
1022 		 * Check to see if the tree needs to be collapsed.
1023 		 */
1024 		if (retval && (state->path.active > 1)) {
1025 			xfs_bmap_init(args->flist, args->firstblock);
1026 			error = xfs_da3_join(state);
1027 			if (!error)
1028 				error = xfs_bmap_finish(&args->trans,
1029 							args->flist, dp);
1030 			if (error) {
1031 				args->trans = NULL;
1032 				xfs_bmap_cancel(args->flist);
1033 				goto out;
1034 			}
1035 		}
1036 
1037 		/*
1038 		 * Commit and start the next trans in the chain.
1039 		 */
1040 		error = xfs_trans_roll(&args->trans, dp);
1041 		if (error)
1042 			goto out;
1043 
1044 	} else if (args->rmtblkno > 0) {
1045 		/*
1046 		 * Added a "remote" value, just clear the incomplete flag.
1047 		 */
1048 		error = xfs_attr3_leaf_clearflag(args);
1049 		if (error)
1050 			goto out;
1051 	}
1052 	retval = error = 0;
1053 
1054 out:
1055 	if (state)
1056 		xfs_da_state_free(state);
1057 	if (error)
1058 		return error;
1059 	return retval;
1060 }
1061 
1062 /*
1063  * Remove a name from a B-tree attribute list.
1064  *
1065  * This will involve walking down the Btree, and may involve joining
1066  * leaf nodes and even joining intermediate nodes up to and including
1067  * the root node (a special case of an intermediate node).
1068  */
1069 STATIC int
1070 xfs_attr_node_removename(xfs_da_args_t *args)
1071 {
1072 	xfs_da_state_t *state;
1073 	xfs_da_state_blk_t *blk;
1074 	xfs_inode_t *dp;
1075 	struct xfs_buf *bp;
1076 	int retval, error, forkoff;
1077 
1078 	trace_xfs_attr_node_removename(args);
1079 
1080 	/*
1081 	 * Tie a string around our finger to remind us where we are.
1082 	 */
1083 	dp = args->dp;
1084 	state = xfs_da_state_alloc();
1085 	state->args = args;
1086 	state->mp = dp->i_mount;
1087 
1088 	/*
1089 	 * Search to see if name exists, and get back a pointer to it.
1090 	 */
1091 	error = xfs_da3_node_lookup_int(state, &retval);
1092 	if (error || (retval != -EEXIST)) {
1093 		if (error == 0)
1094 			error = retval;
1095 		goto out;
1096 	}
1097 
1098 	/*
1099 	 * If there is an out-of-line value, de-allocate the blocks.
1100 	 * This is done before we remove the attribute so that we don't
1101 	 * overflow the maximum size of a transaction and/or hit a deadlock.
1102 	 */
1103 	blk = &state->path.blk[ state->path.active-1 ];
1104 	ASSERT(blk->bp != NULL);
1105 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1106 	if (args->rmtblkno > 0) {
1107 		/*
1108 		 * Fill in disk block numbers in the state structure
1109 		 * so that we can get the buffers back after we commit
1110 		 * several transactions in the following calls.
1111 		 */
1112 		error = xfs_attr_fillstate(state);
1113 		if (error)
1114 			goto out;
1115 
1116 		/*
1117 		 * Mark the attribute as INCOMPLETE, then bunmapi() the
1118 		 * remote value.
1119 		 */
1120 		error = xfs_attr3_leaf_setflag(args);
1121 		if (error)
1122 			goto out;
1123 		error = xfs_attr_rmtval_remove(args);
1124 		if (error)
1125 			goto out;
1126 
1127 		/*
1128 		 * Refill the state structure with buffers, the prior calls
1129 		 * released our buffers.
1130 		 */
1131 		error = xfs_attr_refillstate(state);
1132 		if (error)
1133 			goto out;
1134 	}
1135 
1136 	/*
1137 	 * Remove the name and update the hashvals in the tree.
1138 	 */
1139 	blk = &state->path.blk[ state->path.active-1 ];
1140 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1141 	retval = xfs_attr3_leaf_remove(blk->bp, args);
1142 	xfs_da3_fixhashpath(state, &state->path);
1143 
1144 	/*
1145 	 * Check to see if the tree needs to be collapsed.
1146 	 */
1147 	if (retval && (state->path.active > 1)) {
1148 		xfs_bmap_init(args->flist, args->firstblock);
1149 		error = xfs_da3_join(state);
1150 		if (!error)
1151 			error = xfs_bmap_finish(&args->trans, args->flist, dp);
1152 		if (error) {
1153 			args->trans = NULL;
1154 			xfs_bmap_cancel(args->flist);
1155 			goto out;
1156 		}
1157 		/*
1158 		 * Commit the Btree join operation and start a new trans.
1159 		 */
1160 		error = xfs_trans_roll(&args->trans, dp);
1161 		if (error)
1162 			goto out;
1163 	}
1164 
1165 	/*
1166 	 * If the result is small enough, push it all into the inode.
1167 	 */
1168 	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
1169 		/*
1170 		 * Have to get rid of the copy of this dabuf in the state.
1171 		 */
1172 		ASSERT(state->path.active == 1);
1173 		ASSERT(state->path.blk[0].bp);
1174 		state->path.blk[0].bp = NULL;
1175 
1176 		error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp);
1177 		if (error)
1178 			goto out;
1179 
1180 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1181 			xfs_bmap_init(args->flist, args->firstblock);
1182 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1183 			/* bp is gone due to xfs_da_shrink_inode */
1184 			if (!error)
1185 				error = xfs_bmap_finish(&args->trans,
1186 							args->flist, dp);
1187 			if (error) {
1188 				args->trans = NULL;
1189 				xfs_bmap_cancel(args->flist);
1190 				goto out;
1191 			}
1192 		} else
1193 			xfs_trans_brelse(args->trans, bp);
1194 	}
1195 	error = 0;
1196 
1197 out:
1198 	xfs_da_state_free(state);
1199 	return error;
1200 }
1201 
1202 /*
1203  * Fill in the disk block numbers in the state structure for the buffers
1204  * that are attached to the state structure.
1205  * This is done so that we can quickly reattach ourselves to those buffers
1206  * after some set of transaction commits have released these buffers.
1207  */
1208 STATIC int
1209 xfs_attr_fillstate(xfs_da_state_t *state)
1210 {
1211 	xfs_da_state_path_t *path;
1212 	xfs_da_state_blk_t *blk;
1213 	int level;
1214 
1215 	trace_xfs_attr_fillstate(state->args);
1216 
1217 	/*
1218 	 * Roll down the "path" in the state structure, storing the on-disk
1219 	 * block number for those buffers in the "path".
1220 	 */
1221 	path = &state->path;
1222 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1223 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1224 		if (blk->bp) {
1225 			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
1226 			blk->bp = NULL;
1227 		} else {
1228 			blk->disk_blkno = 0;
1229 		}
1230 	}
1231 
1232 	/*
1233 	 * Roll down the "altpath" in the state structure, storing the on-disk
1234 	 * block number for those buffers in the "altpath".
1235 	 */
1236 	path = &state->altpath;
1237 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1238 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1239 		if (blk->bp) {
1240 			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
1241 			blk->bp = NULL;
1242 		} else {
1243 			blk->disk_blkno = 0;
1244 		}
1245 	}
1246 
1247 	return 0;
1248 }
1249 
1250 /*
1251  * Reattach the buffers to the state structure based on the disk block
1252  * numbers stored in the state structure.
1253  * This is done after some set of transaction commits have released those
1254  * buffers from our grip.
1255  */
1256 STATIC int
1257 xfs_attr_refillstate(xfs_da_state_t *state)
1258 {
1259 	xfs_da_state_path_t *path;
1260 	xfs_da_state_blk_t *blk;
1261 	int level, error;
1262 
1263 	trace_xfs_attr_refillstate(state->args);
1264 
1265 	/*
1266 	 * Roll down the "path" in the state structure, storing the on-disk
1267 	 * block number for those buffers in the "path".
1268 	 */
1269 	path = &state->path;
1270 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1271 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1272 		if (blk->disk_blkno) {
1273 			error = xfs_da3_node_read(state->args->trans,
1274 						state->args->dp,
1275 						blk->blkno, blk->disk_blkno,
1276 						&blk->bp, XFS_ATTR_FORK);
1277 			if (error)
1278 				return error;
1279 		} else {
1280 			blk->bp = NULL;
1281 		}
1282 	}
1283 
1284 	/*
1285 	 * Roll down the "altpath" in the state structure, storing the on-disk
1286 	 * block number for those buffers in the "altpath".
1287 	 */
1288 	path = &state->altpath;
1289 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1290 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1291 		if (blk->disk_blkno) {
1292 			error = xfs_da3_node_read(state->args->trans,
1293 						state->args->dp,
1294 						blk->blkno, blk->disk_blkno,
1295 						&blk->bp, XFS_ATTR_FORK);
1296 			if (error)
1297 				return error;
1298 		} else {
1299 			blk->bp = NULL;
1300 		}
1301 	}
1302 
1303 	return 0;
1304 }
1305 
1306 /*
1307  * Look up a filename in a node attribute list.
1308  *
1309  * This routine gets called for any attribute fork that has more than one
1310  * block, ie: both true Btree attr lists and for single-leaf-blocks with
1311  * "remote" values taking up more blocks.
1312  */
1313 STATIC int
1314 xfs_attr_node_get(xfs_da_args_t *args)
1315 {
1316 	xfs_da_state_t *state;
1317 	xfs_da_state_blk_t *blk;
1318 	int error, retval;
1319 	int i;
1320 
1321 	trace_xfs_attr_node_get(args);
1322 
1323 	state = xfs_da_state_alloc();
1324 	state->args = args;
1325 	state->mp = args->dp->i_mount;
1326 
1327 	/*
1328 	 * Search to see if name exists, and get back a pointer to it.
1329 	 */
1330 	error = xfs_da3_node_lookup_int(state, &retval);
1331 	if (error) {
1332 		retval = error;
1333 	} else if (retval == -EEXIST) {
1334 		blk = &state->path.blk[ state->path.active-1 ];
1335 		ASSERT(blk->bp != NULL);
1336 		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1337 
1338 		/*
1339 		 * Get the value, local or "remote"
1340 		 */
1341 		retval = xfs_attr3_leaf_getvalue(blk->bp, args);
1342 		if (!retval && (args->rmtblkno > 0)
1343 		    && !(args->flags & ATTR_KERNOVAL)) {
1344 			retval = xfs_attr_rmtval_get(args);
1345 		}
1346 	}
1347 
1348 	/*
1349 	 * If not in a transaction, we have to release all the buffers.
1350 	 */
1351 	for (i = 0; i < state->path.active; i++) {
1352 		xfs_trans_brelse(args->trans, state->path.blk[i].bp);
1353 		state->path.blk[i].bp = NULL;
1354 	}
1355 
1356 	xfs_da_state_free(state);
1357 	return retval;
1358 }
1359