xref: /openbmc/linux/fs/xfs/libxfs/xfs_attr.c (revision efe4a1ac)
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_da_format.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_attr_sf.h"
30 #include "xfs_inode.h"
31 #include "xfs_alloc.h"
32 #include "xfs_trans.h"
33 #include "xfs_inode_item.h"
34 #include "xfs_bmap.h"
35 #include "xfs_bmap_util.h"
36 #include "xfs_bmap_btree.h"
37 #include "xfs_attr.h"
38 #include "xfs_attr_leaf.h"
39 #include "xfs_attr_remote.h"
40 #include "xfs_error.h"
41 #include "xfs_quota.h"
42 #include "xfs_trans_space.h"
43 #include "xfs_trace.h"
44 
45 /*
46  * xfs_attr.c
47  *
48  * Provide the external interfaces to manage attribute lists.
49  */
50 
51 /*========================================================================
52  * Function prototypes for the kernel.
53  *========================================================================*/
54 
55 /*
56  * Internal routines when attribute list fits inside the inode.
57  */
58 STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
59 
60 /*
61  * Internal routines when attribute list is one block.
62  */
63 STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
64 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
65 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
66 
67 /*
68  * Internal routines when attribute list is more than one block.
69  */
70 STATIC int xfs_attr_node_get(xfs_da_args_t *args);
71 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
72 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
73 STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
74 STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
75 
76 
77 STATIC int
78 xfs_attr_args_init(
79 	struct xfs_da_args	*args,
80 	struct xfs_inode	*dp,
81 	const unsigned char	*name,
82 	int			flags)
83 {
84 
85 	if (!name)
86 		return -EINVAL;
87 
88 	memset(args, 0, sizeof(*args));
89 	args->geo = dp->i_mount->m_attr_geo;
90 	args->whichfork = XFS_ATTR_FORK;
91 	args->dp = dp;
92 	args->flags = flags;
93 	args->name = name;
94 	args->namelen = strlen((const char *)name);
95 	if (args->namelen >= MAXNAMELEN)
96 		return -EFAULT;		/* match IRIX behaviour */
97 
98 	args->hashval = xfs_da_hashname(args->name, args->namelen);
99 	return 0;
100 }
101 
102 int
103 xfs_inode_hasattr(
104 	struct xfs_inode	*ip)
105 {
106 	if (!XFS_IFORK_Q(ip) ||
107 	    (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
108 	     ip->i_d.di_anextents == 0))
109 		return 0;
110 	return 1;
111 }
112 
113 /*========================================================================
114  * Overall external interface routines.
115  *========================================================================*/
116 
117 int
118 xfs_attr_get(
119 	struct xfs_inode	*ip,
120 	const unsigned char	*name,
121 	unsigned char		*value,
122 	int			*valuelenp,
123 	int			flags)
124 {
125 	struct xfs_da_args	args;
126 	uint			lock_mode;
127 	int			error;
128 
129 	XFS_STATS_INC(ip->i_mount, xs_attr_get);
130 
131 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
132 		return -EIO;
133 
134 	error = xfs_attr_args_init(&args, ip, name, flags);
135 	if (error)
136 		return error;
137 
138 	args.value = value;
139 	args.valuelen = *valuelenp;
140 	/* Entirely possible to look up a name which doesn't exist */
141 	args.op_flags = XFS_DA_OP_OKNOENT;
142 
143 	lock_mode = xfs_ilock_attr_map_shared(ip);
144 	if (!xfs_inode_hasattr(ip))
145 		error = -ENOATTR;
146 	else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
147 		error = xfs_attr_shortform_getvalue(&args);
148 	else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))
149 		error = xfs_attr_leaf_get(&args);
150 	else
151 		error = xfs_attr_node_get(&args);
152 	xfs_iunlock(ip, lock_mode);
153 
154 	*valuelenp = args.valuelen;
155 	return error == -EEXIST ? 0 : error;
156 }
157 
158 /*
159  * Calculate how many blocks we need for the new attribute,
160  */
161 STATIC int
162 xfs_attr_calc_size(
163 	struct xfs_da_args	*args,
164 	int			*local)
165 {
166 	struct xfs_mount	*mp = args->dp->i_mount;
167 	int			size;
168 	int			nblks;
169 
170 	/*
171 	 * Determine space new attribute will use, and if it would be
172 	 * "local" or "remote" (note: local != inline).
173 	 */
174 	size = xfs_attr_leaf_newentsize(args, local);
175 	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
176 	if (*local) {
177 		if (size > (args->geo->blksize / 2)) {
178 			/* Double split possible */
179 			nblks *= 2;
180 		}
181 	} else {
182 		/*
183 		 * Out of line attribute, cannot double split, but
184 		 * make room for the attribute value itself.
185 		 */
186 		uint	dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
187 		nblks += dblocks;
188 		nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
189 	}
190 
191 	return nblks;
192 }
193 
194 int
195 xfs_attr_set(
196 	struct xfs_inode	*dp,
197 	const unsigned char	*name,
198 	unsigned char		*value,
199 	int			valuelen,
200 	int			flags)
201 {
202 	struct xfs_mount	*mp = dp->i_mount;
203 	struct xfs_da_args	args;
204 	struct xfs_defer_ops	dfops;
205 	struct xfs_trans_res	tres;
206 	xfs_fsblock_t		firstblock;
207 	int			rsvd = (flags & ATTR_ROOT) != 0;
208 	int			error, err2, local;
209 
210 	XFS_STATS_INC(mp, xs_attr_set);
211 
212 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
213 		return -EIO;
214 
215 	error = xfs_attr_args_init(&args, dp, name, flags);
216 	if (error)
217 		return error;
218 
219 	args.value = value;
220 	args.valuelen = valuelen;
221 	args.firstblock = &firstblock;
222 	args.dfops = &dfops;
223 	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
224 	args.total = xfs_attr_calc_size(&args, &local);
225 
226 	error = xfs_qm_dqattach(dp, 0);
227 	if (error)
228 		return error;
229 
230 	/*
231 	 * If the inode doesn't have an attribute fork, add one.
232 	 * (inode must not be locked when we call this routine)
233 	 */
234 	if (XFS_IFORK_Q(dp) == 0) {
235 		int sf_size = sizeof(xfs_attr_sf_hdr_t) +
236 			XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen);
237 
238 		error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
239 		if (error)
240 			return error;
241 	}
242 
243 	tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
244 			 M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
245 	tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
246 	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
247 
248 	/*
249 	 * Root fork attributes can use reserved data blocks for this
250 	 * operation if necessary
251 	 */
252 	error = xfs_trans_alloc(mp, &tres, args.total, 0,
253 			rsvd ? XFS_TRANS_RESERVE : 0, &args.trans);
254 	if (error)
255 		return error;
256 
257 	xfs_ilock(dp, XFS_ILOCK_EXCL);
258 	error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
259 				rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
260 				       XFS_QMOPT_RES_REGBLKS);
261 	if (error) {
262 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
263 		xfs_trans_cancel(args.trans);
264 		return error;
265 	}
266 
267 	xfs_trans_ijoin(args.trans, dp, 0);
268 
269 	/*
270 	 * If the attribute list is non-existent or a shortform list,
271 	 * upgrade it to a single-leaf-block attribute list.
272 	 */
273 	if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
274 	    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
275 	     dp->i_d.di_anextents == 0)) {
276 
277 		/*
278 		 * Build initial attribute list (if required).
279 		 */
280 		if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
281 			xfs_attr_shortform_create(&args);
282 
283 		/*
284 		 * Try to add the attr to the attribute list in
285 		 * the inode.
286 		 */
287 		error = xfs_attr_shortform_addname(&args);
288 		if (error != -ENOSPC) {
289 			/*
290 			 * Commit the shortform mods, and we're done.
291 			 * NOTE: this is also the error path (EEXIST, etc).
292 			 */
293 			ASSERT(args.trans != NULL);
294 
295 			/*
296 			 * If this is a synchronous mount, make sure that
297 			 * the transaction goes to disk before returning
298 			 * to the user.
299 			 */
300 			if (mp->m_flags & XFS_MOUNT_WSYNC)
301 				xfs_trans_set_sync(args.trans);
302 
303 			if (!error && (flags & ATTR_KERNOTIME) == 0) {
304 				xfs_trans_ichgtime(args.trans, dp,
305 							XFS_ICHGTIME_CHG);
306 			}
307 			err2 = xfs_trans_commit(args.trans);
308 			xfs_iunlock(dp, XFS_ILOCK_EXCL);
309 
310 			return error ? error : err2;
311 		}
312 
313 		/*
314 		 * It won't fit in the shortform, transform to a leaf block.
315 		 * GROT: another possible req'mt for a double-split btree op.
316 		 */
317 		xfs_defer_init(args.dfops, args.firstblock);
318 		error = xfs_attr_shortform_to_leaf(&args);
319 		if (!error)
320 			error = xfs_defer_finish(&args.trans, args.dfops, dp);
321 		if (error) {
322 			args.trans = NULL;
323 			xfs_defer_cancel(&dfops);
324 			goto out;
325 		}
326 
327 		/*
328 		 * Commit the leaf transformation.  We'll need another (linked)
329 		 * transaction to add the new attribute to the leaf.
330 		 */
331 
332 		error = xfs_trans_roll(&args.trans, dp);
333 		if (error)
334 			goto out;
335 
336 	}
337 
338 	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
339 		error = xfs_attr_leaf_addname(&args);
340 	else
341 		error = xfs_attr_node_addname(&args);
342 	if (error)
343 		goto out;
344 
345 	/*
346 	 * If this is a synchronous mount, make sure that the
347 	 * transaction goes to disk before returning to the user.
348 	 */
349 	if (mp->m_flags & XFS_MOUNT_WSYNC)
350 		xfs_trans_set_sync(args.trans);
351 
352 	if ((flags & ATTR_KERNOTIME) == 0)
353 		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
354 
355 	/*
356 	 * Commit the last in the sequence of transactions.
357 	 */
358 	xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
359 	error = xfs_trans_commit(args.trans);
360 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
361 
362 	return error;
363 
364 out:
365 	if (args.trans)
366 		xfs_trans_cancel(args.trans);
367 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
368 	return error;
369 }
370 
371 /*
372  * Generic handler routine to remove a name from an attribute list.
373  * Transitions attribute list from Btree to shortform as necessary.
374  */
375 int
376 xfs_attr_remove(
377 	struct xfs_inode	*dp,
378 	const unsigned char	*name,
379 	int			flags)
380 {
381 	struct xfs_mount	*mp = dp->i_mount;
382 	struct xfs_da_args	args;
383 	struct xfs_defer_ops	dfops;
384 	xfs_fsblock_t		firstblock;
385 	int			error;
386 
387 	XFS_STATS_INC(mp, xs_attr_remove);
388 
389 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
390 		return -EIO;
391 
392 	error = xfs_attr_args_init(&args, dp, name, flags);
393 	if (error)
394 		return error;
395 
396 	args.firstblock = &firstblock;
397 	args.dfops = &dfops;
398 
399 	/*
400 	 * we have no control over the attribute names that userspace passes us
401 	 * to remove, so we have to allow the name lookup prior to attribute
402 	 * removal to fail.
403 	 */
404 	args.op_flags = XFS_DA_OP_OKNOENT;
405 
406 	error = xfs_qm_dqattach(dp, 0);
407 	if (error)
408 		return error;
409 
410 	/*
411 	 * Root fork attributes can use reserved data blocks for this
412 	 * operation if necessary
413 	 */
414 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrrm,
415 			XFS_ATTRRM_SPACE_RES(mp), 0,
416 			(flags & ATTR_ROOT) ? XFS_TRANS_RESERVE : 0,
417 			&args.trans);
418 	if (error)
419 		return error;
420 
421 	xfs_ilock(dp, XFS_ILOCK_EXCL);
422 	/*
423 	 * No need to make quota reservations here. We expect to release some
424 	 * blocks not allocate in the common case.
425 	 */
426 	xfs_trans_ijoin(args.trans, dp, 0);
427 
428 	if (!xfs_inode_hasattr(dp)) {
429 		error = -ENOATTR;
430 	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
431 		ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
432 		error = xfs_attr_shortform_remove(&args);
433 	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
434 		error = xfs_attr_leaf_removename(&args);
435 	} else {
436 		error = xfs_attr_node_removename(&args);
437 	}
438 
439 	if (error)
440 		goto out;
441 
442 	/*
443 	 * If this is a synchronous mount, make sure that the
444 	 * transaction goes to disk before returning to the user.
445 	 */
446 	if (mp->m_flags & XFS_MOUNT_WSYNC)
447 		xfs_trans_set_sync(args.trans);
448 
449 	if ((flags & ATTR_KERNOTIME) == 0)
450 		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
451 
452 	/*
453 	 * Commit the last in the sequence of transactions.
454 	 */
455 	xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
456 	error = xfs_trans_commit(args.trans);
457 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
458 
459 	return error;
460 
461 out:
462 	if (args.trans)
463 		xfs_trans_cancel(args.trans);
464 	xfs_iunlock(dp, XFS_ILOCK_EXCL);
465 	return error;
466 }
467 
468 /*========================================================================
469  * External routines when attribute list is inside the inode
470  *========================================================================*/
471 
472 /*
473  * Add a name to the shortform attribute list structure
474  * This is the external routine.
475  */
476 STATIC int
477 xfs_attr_shortform_addname(xfs_da_args_t *args)
478 {
479 	int newsize, forkoff, retval;
480 
481 	trace_xfs_attr_sf_addname(args);
482 
483 	retval = xfs_attr_shortform_lookup(args);
484 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
485 		return retval;
486 	} else if (retval == -EEXIST) {
487 		if (args->flags & ATTR_CREATE)
488 			return retval;
489 		retval = xfs_attr_shortform_remove(args);
490 		ASSERT(retval == 0);
491 	}
492 
493 	if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
494 	    args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
495 		return -ENOSPC;
496 
497 	newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
498 	newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
499 
500 	forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
501 	if (!forkoff)
502 		return -ENOSPC;
503 
504 	xfs_attr_shortform_add(args, forkoff);
505 	return 0;
506 }
507 
508 
509 /*========================================================================
510  * External routines when attribute list is one block
511  *========================================================================*/
512 
513 /*
514  * Add a name to the leaf attribute list structure
515  *
516  * This leaf block cannot have a "remote" value, we only call this routine
517  * if bmap_one_block() says there is only one block (ie: no remote blks).
518  */
519 STATIC int
520 xfs_attr_leaf_addname(xfs_da_args_t *args)
521 {
522 	xfs_inode_t *dp;
523 	struct xfs_buf *bp;
524 	int retval, error, forkoff;
525 
526 	trace_xfs_attr_leaf_addname(args);
527 
528 	/*
529 	 * Read the (only) block in the attribute list in.
530 	 */
531 	dp = args->dp;
532 	args->blkno = 0;
533 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
534 	if (error)
535 		return error;
536 
537 	/*
538 	 * Look up the given attribute in the leaf block.  Figure out if
539 	 * the given flags produce an error or call for an atomic rename.
540 	 */
541 	retval = xfs_attr3_leaf_lookup_int(bp, args);
542 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
543 		xfs_trans_brelse(args->trans, bp);
544 		return retval;
545 	} else if (retval == -EEXIST) {
546 		if (args->flags & ATTR_CREATE) {	/* pure create op */
547 			xfs_trans_brelse(args->trans, bp);
548 			return retval;
549 		}
550 
551 		trace_xfs_attr_leaf_replace(args);
552 
553 		/* save the attribute state for later removal*/
554 		args->op_flags |= XFS_DA_OP_RENAME;	/* an atomic rename */
555 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
556 		args->index2 = args->index;
557 		args->rmtblkno2 = args->rmtblkno;
558 		args->rmtblkcnt2 = args->rmtblkcnt;
559 		args->rmtvaluelen2 = args->rmtvaluelen;
560 
561 		/*
562 		 * clear the remote attr state now that it is saved so that the
563 		 * values reflect the state of the attribute we are about to
564 		 * add, not the attribute we just found and will remove later.
565 		 */
566 		args->rmtblkno = 0;
567 		args->rmtblkcnt = 0;
568 		args->rmtvaluelen = 0;
569 	}
570 
571 	/*
572 	 * Add the attribute to the leaf block, transitioning to a Btree
573 	 * if required.
574 	 */
575 	retval = xfs_attr3_leaf_add(bp, args);
576 	if (retval == -ENOSPC) {
577 		/*
578 		 * Promote the attribute list to the Btree format, then
579 		 * Commit that transaction so that the node_addname() call
580 		 * can manage its own transactions.
581 		 */
582 		xfs_defer_init(args->dfops, args->firstblock);
583 		error = xfs_attr3_leaf_to_node(args);
584 		if (!error)
585 			error = xfs_defer_finish(&args->trans, args->dfops, dp);
586 		if (error) {
587 			args->trans = NULL;
588 			xfs_defer_cancel(args->dfops);
589 			return error;
590 		}
591 
592 		/*
593 		 * Commit the current trans (including the inode) and start
594 		 * a new one.
595 		 */
596 		error = xfs_trans_roll(&args->trans, dp);
597 		if (error)
598 			return error;
599 
600 		/*
601 		 * Fob the whole rest of the problem off on the Btree code.
602 		 */
603 		error = xfs_attr_node_addname(args);
604 		return error;
605 	}
606 
607 	/*
608 	 * Commit the transaction that added the attr name so that
609 	 * later routines can manage their own transactions.
610 	 */
611 	error = xfs_trans_roll(&args->trans, dp);
612 	if (error)
613 		return error;
614 
615 	/*
616 	 * If there was an out-of-line value, allocate the blocks we
617 	 * identified for its storage and copy the value.  This is done
618 	 * after we create the attribute so that we don't overflow the
619 	 * maximum size of a transaction and/or hit a deadlock.
620 	 */
621 	if (args->rmtblkno > 0) {
622 		error = xfs_attr_rmtval_set(args);
623 		if (error)
624 			return error;
625 	}
626 
627 	/*
628 	 * If this is an atomic rename operation, we must "flip" the
629 	 * incomplete flags on the "new" and "old" attribute/value pairs
630 	 * so that one disappears and one appears atomically.  Then we
631 	 * must remove the "old" attribute/value pair.
632 	 */
633 	if (args->op_flags & XFS_DA_OP_RENAME) {
634 		/*
635 		 * In a separate transaction, set the incomplete flag on the
636 		 * "old" attr and clear the incomplete flag on the "new" attr.
637 		 */
638 		error = xfs_attr3_leaf_flipflags(args);
639 		if (error)
640 			return error;
641 
642 		/*
643 		 * Dismantle the "old" attribute/value pair by removing
644 		 * a "remote" value (if it exists).
645 		 */
646 		args->index = args->index2;
647 		args->blkno = args->blkno2;
648 		args->rmtblkno = args->rmtblkno2;
649 		args->rmtblkcnt = args->rmtblkcnt2;
650 		args->rmtvaluelen = args->rmtvaluelen2;
651 		if (args->rmtblkno) {
652 			error = xfs_attr_rmtval_remove(args);
653 			if (error)
654 				return error;
655 		}
656 
657 		/*
658 		 * Read in the block containing the "old" attr, then
659 		 * remove the "old" attr from that block (neat, huh!)
660 		 */
661 		error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
662 					   -1, &bp);
663 		if (error)
664 			return error;
665 
666 		xfs_attr3_leaf_remove(bp, args);
667 
668 		/*
669 		 * If the result is small enough, shrink it all into the inode.
670 		 */
671 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
672 			xfs_defer_init(args->dfops, args->firstblock);
673 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
674 			/* bp is gone due to xfs_da_shrink_inode */
675 			if (!error)
676 				error = xfs_defer_finish(&args->trans,
677 							args->dfops, dp);
678 			if (error) {
679 				args->trans = NULL;
680 				xfs_defer_cancel(args->dfops);
681 				return error;
682 			}
683 		}
684 
685 		/*
686 		 * Commit the remove and start the next trans in series.
687 		 */
688 		error = xfs_trans_roll(&args->trans, dp);
689 
690 	} else if (args->rmtblkno > 0) {
691 		/*
692 		 * Added a "remote" value, just clear the incomplete flag.
693 		 */
694 		error = xfs_attr3_leaf_clearflag(args);
695 	}
696 	return error;
697 }
698 
699 /*
700  * Remove a name from the leaf attribute list structure
701  *
702  * This leaf block cannot have a "remote" value, we only call this routine
703  * if bmap_one_block() says there is only one block (ie: no remote blks).
704  */
705 STATIC int
706 xfs_attr_leaf_removename(xfs_da_args_t *args)
707 {
708 	xfs_inode_t *dp;
709 	struct xfs_buf *bp;
710 	int error, forkoff;
711 
712 	trace_xfs_attr_leaf_removename(args);
713 
714 	/*
715 	 * Remove the attribute.
716 	 */
717 	dp = args->dp;
718 	args->blkno = 0;
719 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
720 	if (error)
721 		return error;
722 
723 	error = xfs_attr3_leaf_lookup_int(bp, args);
724 	if (error == -ENOATTR) {
725 		xfs_trans_brelse(args->trans, bp);
726 		return error;
727 	}
728 
729 	xfs_attr3_leaf_remove(bp, args);
730 
731 	/*
732 	 * If the result is small enough, shrink it all into the inode.
733 	 */
734 	if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
735 		xfs_defer_init(args->dfops, args->firstblock);
736 		error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
737 		/* bp is gone due to xfs_da_shrink_inode */
738 		if (!error)
739 			error = xfs_defer_finish(&args->trans, args->dfops, dp);
740 		if (error) {
741 			args->trans = NULL;
742 			xfs_defer_cancel(args->dfops);
743 			return error;
744 		}
745 	}
746 	return 0;
747 }
748 
749 /*
750  * Look up a name in a leaf attribute list structure.
751  *
752  * This leaf block cannot have a "remote" value, we only call this routine
753  * if bmap_one_block() says there is only one block (ie: no remote blks).
754  */
755 STATIC int
756 xfs_attr_leaf_get(xfs_da_args_t *args)
757 {
758 	struct xfs_buf *bp;
759 	int error;
760 
761 	trace_xfs_attr_leaf_get(args);
762 
763 	args->blkno = 0;
764 	error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
765 	if (error)
766 		return error;
767 
768 	error = xfs_attr3_leaf_lookup_int(bp, args);
769 	if (error != -EEXIST)  {
770 		xfs_trans_brelse(args->trans, bp);
771 		return error;
772 	}
773 	error = xfs_attr3_leaf_getvalue(bp, args);
774 	xfs_trans_brelse(args->trans, bp);
775 	if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
776 		error = xfs_attr_rmtval_get(args);
777 	}
778 	return error;
779 }
780 
781 /*========================================================================
782  * External routines when attribute list size > geo->blksize
783  *========================================================================*/
784 
785 /*
786  * Add a name to a Btree-format attribute list.
787  *
788  * This will involve walking down the Btree, and may involve splitting
789  * leaf nodes and even splitting intermediate nodes up to and including
790  * the root node (a special case of an intermediate node).
791  *
792  * "Remote" attribute values confuse the issue and atomic rename operations
793  * add a whole extra layer of confusion on top of that.
794  */
795 STATIC int
796 xfs_attr_node_addname(xfs_da_args_t *args)
797 {
798 	xfs_da_state_t *state;
799 	xfs_da_state_blk_t *blk;
800 	xfs_inode_t *dp;
801 	xfs_mount_t *mp;
802 	int retval, error;
803 
804 	trace_xfs_attr_node_addname(args);
805 
806 	/*
807 	 * Fill in bucket of arguments/results/context to carry around.
808 	 */
809 	dp = args->dp;
810 	mp = dp->i_mount;
811 restart:
812 	state = xfs_da_state_alloc();
813 	state->args = args;
814 	state->mp = mp;
815 
816 	/*
817 	 * Search to see if name already exists, and get back a pointer
818 	 * to where it should go.
819 	 */
820 	error = xfs_da3_node_lookup_int(state, &retval);
821 	if (error)
822 		goto out;
823 	blk = &state->path.blk[ state->path.active-1 ];
824 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
825 	if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) {
826 		goto out;
827 	} else if (retval == -EEXIST) {
828 		if (args->flags & ATTR_CREATE)
829 			goto out;
830 
831 		trace_xfs_attr_node_replace(args);
832 
833 		/* save the attribute state for later removal*/
834 		args->op_flags |= XFS_DA_OP_RENAME;	/* atomic rename op */
835 		args->blkno2 = args->blkno;		/* set 2nd entry info*/
836 		args->index2 = args->index;
837 		args->rmtblkno2 = args->rmtblkno;
838 		args->rmtblkcnt2 = args->rmtblkcnt;
839 		args->rmtvaluelen2 = args->rmtvaluelen;
840 
841 		/*
842 		 * clear the remote attr state now that it is saved so that the
843 		 * values reflect the state of the attribute we are about to
844 		 * add, not the attribute we just found and will remove later.
845 		 */
846 		args->rmtblkno = 0;
847 		args->rmtblkcnt = 0;
848 		args->rmtvaluelen = 0;
849 	}
850 
851 	retval = xfs_attr3_leaf_add(blk->bp, state->args);
852 	if (retval == -ENOSPC) {
853 		if (state->path.active == 1) {
854 			/*
855 			 * Its really a single leaf node, but it had
856 			 * out-of-line values so it looked like it *might*
857 			 * have been a b-tree.
858 			 */
859 			xfs_da_state_free(state);
860 			state = NULL;
861 			xfs_defer_init(args->dfops, args->firstblock);
862 			error = xfs_attr3_leaf_to_node(args);
863 			if (!error)
864 				error = xfs_defer_finish(&args->trans,
865 							args->dfops, dp);
866 			if (error) {
867 				args->trans = NULL;
868 				xfs_defer_cancel(args->dfops);
869 				goto out;
870 			}
871 
872 			/*
873 			 * Commit the node conversion and start the next
874 			 * trans in the chain.
875 			 */
876 			error = xfs_trans_roll(&args->trans, dp);
877 			if (error)
878 				goto out;
879 
880 			goto restart;
881 		}
882 
883 		/*
884 		 * Split as many Btree elements as required.
885 		 * This code tracks the new and old attr's location
886 		 * in the index/blkno/rmtblkno/rmtblkcnt fields and
887 		 * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
888 		 */
889 		xfs_defer_init(args->dfops, args->firstblock);
890 		error = xfs_da3_split(state);
891 		if (!error)
892 			error = xfs_defer_finish(&args->trans, args->dfops, dp);
893 		if (error) {
894 			args->trans = NULL;
895 			xfs_defer_cancel(args->dfops);
896 			goto out;
897 		}
898 	} else {
899 		/*
900 		 * Addition succeeded, update Btree hashvals.
901 		 */
902 		xfs_da3_fixhashpath(state, &state->path);
903 	}
904 
905 	/*
906 	 * Kill the state structure, we're done with it and need to
907 	 * allow the buffers to come back later.
908 	 */
909 	xfs_da_state_free(state);
910 	state = NULL;
911 
912 	/*
913 	 * Commit the leaf addition or btree split and start the next
914 	 * trans in the chain.
915 	 */
916 	error = xfs_trans_roll(&args->trans, dp);
917 	if (error)
918 		goto out;
919 
920 	/*
921 	 * If there was an out-of-line value, allocate the blocks we
922 	 * identified for its storage and copy the value.  This is done
923 	 * after we create the attribute so that we don't overflow the
924 	 * maximum size of a transaction and/or hit a deadlock.
925 	 */
926 	if (args->rmtblkno > 0) {
927 		error = xfs_attr_rmtval_set(args);
928 		if (error)
929 			return error;
930 	}
931 
932 	/*
933 	 * If this is an atomic rename operation, we must "flip" the
934 	 * incomplete flags on the "new" and "old" attribute/value pairs
935 	 * so that one disappears and one appears atomically.  Then we
936 	 * must remove the "old" attribute/value pair.
937 	 */
938 	if (args->op_flags & XFS_DA_OP_RENAME) {
939 		/*
940 		 * In a separate transaction, set the incomplete flag on the
941 		 * "old" attr and clear the incomplete flag on the "new" attr.
942 		 */
943 		error = xfs_attr3_leaf_flipflags(args);
944 		if (error)
945 			goto out;
946 
947 		/*
948 		 * Dismantle the "old" attribute/value pair by removing
949 		 * a "remote" value (if it exists).
950 		 */
951 		args->index = args->index2;
952 		args->blkno = args->blkno2;
953 		args->rmtblkno = args->rmtblkno2;
954 		args->rmtblkcnt = args->rmtblkcnt2;
955 		args->rmtvaluelen = args->rmtvaluelen2;
956 		if (args->rmtblkno) {
957 			error = xfs_attr_rmtval_remove(args);
958 			if (error)
959 				return error;
960 		}
961 
962 		/*
963 		 * Re-find the "old" attribute entry after any split ops.
964 		 * The INCOMPLETE flag means that we will find the "old"
965 		 * attr, not the "new" one.
966 		 */
967 		args->flags |= XFS_ATTR_INCOMPLETE;
968 		state = xfs_da_state_alloc();
969 		state->args = args;
970 		state->mp = mp;
971 		state->inleaf = 0;
972 		error = xfs_da3_node_lookup_int(state, &retval);
973 		if (error)
974 			goto out;
975 
976 		/*
977 		 * Remove the name and update the hashvals in the tree.
978 		 */
979 		blk = &state->path.blk[ state->path.active-1 ];
980 		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
981 		error = xfs_attr3_leaf_remove(blk->bp, args);
982 		xfs_da3_fixhashpath(state, &state->path);
983 
984 		/*
985 		 * Check to see if the tree needs to be collapsed.
986 		 */
987 		if (retval && (state->path.active > 1)) {
988 			xfs_defer_init(args->dfops, args->firstblock);
989 			error = xfs_da3_join(state);
990 			if (!error)
991 				error = xfs_defer_finish(&args->trans,
992 							args->dfops, dp);
993 			if (error) {
994 				args->trans = NULL;
995 				xfs_defer_cancel(args->dfops);
996 				goto out;
997 			}
998 		}
999 
1000 		/*
1001 		 * Commit and start the next trans in the chain.
1002 		 */
1003 		error = xfs_trans_roll(&args->trans, dp);
1004 		if (error)
1005 			goto out;
1006 
1007 	} else if (args->rmtblkno > 0) {
1008 		/*
1009 		 * Added a "remote" value, just clear the incomplete flag.
1010 		 */
1011 		error = xfs_attr3_leaf_clearflag(args);
1012 		if (error)
1013 			goto out;
1014 	}
1015 	retval = error = 0;
1016 
1017 out:
1018 	if (state)
1019 		xfs_da_state_free(state);
1020 	if (error)
1021 		return error;
1022 	return retval;
1023 }
1024 
1025 /*
1026  * Remove a name from a B-tree attribute list.
1027  *
1028  * This will involve walking down the Btree, and may involve joining
1029  * leaf nodes and even joining intermediate nodes up to and including
1030  * the root node (a special case of an intermediate node).
1031  */
1032 STATIC int
1033 xfs_attr_node_removename(xfs_da_args_t *args)
1034 {
1035 	xfs_da_state_t *state;
1036 	xfs_da_state_blk_t *blk;
1037 	xfs_inode_t *dp;
1038 	struct xfs_buf *bp;
1039 	int retval, error, forkoff;
1040 
1041 	trace_xfs_attr_node_removename(args);
1042 
1043 	/*
1044 	 * Tie a string around our finger to remind us where we are.
1045 	 */
1046 	dp = args->dp;
1047 	state = xfs_da_state_alloc();
1048 	state->args = args;
1049 	state->mp = dp->i_mount;
1050 
1051 	/*
1052 	 * Search to see if name exists, and get back a pointer to it.
1053 	 */
1054 	error = xfs_da3_node_lookup_int(state, &retval);
1055 	if (error || (retval != -EEXIST)) {
1056 		if (error == 0)
1057 			error = retval;
1058 		goto out;
1059 	}
1060 
1061 	/*
1062 	 * If there is an out-of-line value, de-allocate the blocks.
1063 	 * This is done before we remove the attribute so that we don't
1064 	 * overflow the maximum size of a transaction and/or hit a deadlock.
1065 	 */
1066 	blk = &state->path.blk[ state->path.active-1 ];
1067 	ASSERT(blk->bp != NULL);
1068 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1069 	if (args->rmtblkno > 0) {
1070 		/*
1071 		 * Fill in disk block numbers in the state structure
1072 		 * so that we can get the buffers back after we commit
1073 		 * several transactions in the following calls.
1074 		 */
1075 		error = xfs_attr_fillstate(state);
1076 		if (error)
1077 			goto out;
1078 
1079 		/*
1080 		 * Mark the attribute as INCOMPLETE, then bunmapi() the
1081 		 * remote value.
1082 		 */
1083 		error = xfs_attr3_leaf_setflag(args);
1084 		if (error)
1085 			goto out;
1086 		error = xfs_attr_rmtval_remove(args);
1087 		if (error)
1088 			goto out;
1089 
1090 		/*
1091 		 * Refill the state structure with buffers, the prior calls
1092 		 * released our buffers.
1093 		 */
1094 		error = xfs_attr_refillstate(state);
1095 		if (error)
1096 			goto out;
1097 	}
1098 
1099 	/*
1100 	 * Remove the name and update the hashvals in the tree.
1101 	 */
1102 	blk = &state->path.blk[ state->path.active-1 ];
1103 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1104 	retval = xfs_attr3_leaf_remove(blk->bp, args);
1105 	xfs_da3_fixhashpath(state, &state->path);
1106 
1107 	/*
1108 	 * Check to see if the tree needs to be collapsed.
1109 	 */
1110 	if (retval && (state->path.active > 1)) {
1111 		xfs_defer_init(args->dfops, args->firstblock);
1112 		error = xfs_da3_join(state);
1113 		if (!error)
1114 			error = xfs_defer_finish(&args->trans, args->dfops, dp);
1115 		if (error) {
1116 			args->trans = NULL;
1117 			xfs_defer_cancel(args->dfops);
1118 			goto out;
1119 		}
1120 		/*
1121 		 * Commit the Btree join operation and start a new trans.
1122 		 */
1123 		error = xfs_trans_roll(&args->trans, dp);
1124 		if (error)
1125 			goto out;
1126 	}
1127 
1128 	/*
1129 	 * If the result is small enough, push it all into the inode.
1130 	 */
1131 	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
1132 		/*
1133 		 * Have to get rid of the copy of this dabuf in the state.
1134 		 */
1135 		ASSERT(state->path.active == 1);
1136 		ASSERT(state->path.blk[0].bp);
1137 		state->path.blk[0].bp = NULL;
1138 
1139 		error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp);
1140 		if (error)
1141 			goto out;
1142 
1143 		if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1144 			xfs_defer_init(args->dfops, args->firstblock);
1145 			error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1146 			/* bp is gone due to xfs_da_shrink_inode */
1147 			if (!error)
1148 				error = xfs_defer_finish(&args->trans,
1149 							args->dfops, dp);
1150 			if (error) {
1151 				args->trans = NULL;
1152 				xfs_defer_cancel(args->dfops);
1153 				goto out;
1154 			}
1155 		} else
1156 			xfs_trans_brelse(args->trans, bp);
1157 	}
1158 	error = 0;
1159 
1160 out:
1161 	xfs_da_state_free(state);
1162 	return error;
1163 }
1164 
1165 /*
1166  * Fill in the disk block numbers in the state structure for the buffers
1167  * that are attached to the state structure.
1168  * This is done so that we can quickly reattach ourselves to those buffers
1169  * after some set of transaction commits have released these buffers.
1170  */
1171 STATIC int
1172 xfs_attr_fillstate(xfs_da_state_t *state)
1173 {
1174 	xfs_da_state_path_t *path;
1175 	xfs_da_state_blk_t *blk;
1176 	int level;
1177 
1178 	trace_xfs_attr_fillstate(state->args);
1179 
1180 	/*
1181 	 * Roll down the "path" in the state structure, storing the on-disk
1182 	 * block number for those buffers in the "path".
1183 	 */
1184 	path = &state->path;
1185 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1186 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1187 		if (blk->bp) {
1188 			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
1189 			blk->bp = NULL;
1190 		} else {
1191 			blk->disk_blkno = 0;
1192 		}
1193 	}
1194 
1195 	/*
1196 	 * Roll down the "altpath" in the state structure, storing the on-disk
1197 	 * block number for those buffers in the "altpath".
1198 	 */
1199 	path = &state->altpath;
1200 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1201 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1202 		if (blk->bp) {
1203 			blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
1204 			blk->bp = NULL;
1205 		} else {
1206 			blk->disk_blkno = 0;
1207 		}
1208 	}
1209 
1210 	return 0;
1211 }
1212 
1213 /*
1214  * Reattach the buffers to the state structure based on the disk block
1215  * numbers stored in the state structure.
1216  * This is done after some set of transaction commits have released those
1217  * buffers from our grip.
1218  */
1219 STATIC int
1220 xfs_attr_refillstate(xfs_da_state_t *state)
1221 {
1222 	xfs_da_state_path_t *path;
1223 	xfs_da_state_blk_t *blk;
1224 	int level, error;
1225 
1226 	trace_xfs_attr_refillstate(state->args);
1227 
1228 	/*
1229 	 * Roll down the "path" in the state structure, storing the on-disk
1230 	 * block number for those buffers in the "path".
1231 	 */
1232 	path = &state->path;
1233 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1234 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1235 		if (blk->disk_blkno) {
1236 			error = xfs_da3_node_read(state->args->trans,
1237 						state->args->dp,
1238 						blk->blkno, blk->disk_blkno,
1239 						&blk->bp, XFS_ATTR_FORK);
1240 			if (error)
1241 				return error;
1242 		} else {
1243 			blk->bp = NULL;
1244 		}
1245 	}
1246 
1247 	/*
1248 	 * Roll down the "altpath" in the state structure, storing the on-disk
1249 	 * block number for those buffers in the "altpath".
1250 	 */
1251 	path = &state->altpath;
1252 	ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1253 	for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1254 		if (blk->disk_blkno) {
1255 			error = xfs_da3_node_read(state->args->trans,
1256 						state->args->dp,
1257 						blk->blkno, blk->disk_blkno,
1258 						&blk->bp, XFS_ATTR_FORK);
1259 			if (error)
1260 				return error;
1261 		} else {
1262 			blk->bp = NULL;
1263 		}
1264 	}
1265 
1266 	return 0;
1267 }
1268 
1269 /*
1270  * Look up a filename in a node attribute list.
1271  *
1272  * This routine gets called for any attribute fork that has more than one
1273  * block, ie: both true Btree attr lists and for single-leaf-blocks with
1274  * "remote" values taking up more blocks.
1275  */
1276 STATIC int
1277 xfs_attr_node_get(xfs_da_args_t *args)
1278 {
1279 	xfs_da_state_t *state;
1280 	xfs_da_state_blk_t *blk;
1281 	int error, retval;
1282 	int i;
1283 
1284 	trace_xfs_attr_node_get(args);
1285 
1286 	state = xfs_da_state_alloc();
1287 	state->args = args;
1288 	state->mp = args->dp->i_mount;
1289 
1290 	/*
1291 	 * Search to see if name exists, and get back a pointer to it.
1292 	 */
1293 	error = xfs_da3_node_lookup_int(state, &retval);
1294 	if (error) {
1295 		retval = error;
1296 	} else if (retval == -EEXIST) {
1297 		blk = &state->path.blk[ state->path.active-1 ];
1298 		ASSERT(blk->bp != NULL);
1299 		ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1300 
1301 		/*
1302 		 * Get the value, local or "remote"
1303 		 */
1304 		retval = xfs_attr3_leaf_getvalue(blk->bp, args);
1305 		if (!retval && (args->rmtblkno > 0)
1306 		    && !(args->flags & ATTR_KERNOVAL)) {
1307 			retval = xfs_attr_rmtval_get(args);
1308 		}
1309 	}
1310 
1311 	/*
1312 	 * If not in a transaction, we have to release all the buffers.
1313 	 */
1314 	for (i = 0; i < state->path.active; i++) {
1315 		xfs_trans_brelse(args->trans, state->path.blk[i].bp);
1316 		state->path.blk[i].bp = NULL;
1317 	}
1318 
1319 	xfs_da_state_free(state);
1320 	return retval;
1321 }
1322