xref: /openbmc/linux/fs/ocfs2/xattr.c (revision 8ac727c1)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21 
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39 
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42 
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "blockcheck.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 #include "refcounttree.h"
59 #include "acl.h"
60 
61 struct ocfs2_xattr_def_value_root {
62 	struct ocfs2_xattr_value_root	xv;
63 	struct ocfs2_extent_rec		er;
64 };
65 
66 struct ocfs2_xattr_bucket {
67 	/* The inode these xattrs are associated with */
68 	struct inode *bu_inode;
69 
70 	/* The actual buffers that make up the bucket */
71 	struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
72 
73 	/* How many blocks make up one bucket for this filesystem */
74 	int bu_blocks;
75 };
76 
77 struct ocfs2_xattr_set_ctxt {
78 	handle_t *handle;
79 	struct ocfs2_alloc_context *meta_ac;
80 	struct ocfs2_alloc_context *data_ac;
81 	struct ocfs2_cached_dealloc_ctxt dealloc;
82 	int set_abort;
83 };
84 
85 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
86 #define OCFS2_XATTR_INLINE_SIZE	80
87 #define OCFS2_XATTR_HEADER_GAP	4
88 #define OCFS2_XATTR_FREE_IN_IBODY	(OCFS2_MIN_XATTR_INLINE_SIZE \
89 					 - sizeof(struct ocfs2_xattr_header) \
90 					 - OCFS2_XATTR_HEADER_GAP)
91 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)	((ptr)->i_sb->s_blocksize \
92 					 - sizeof(struct ocfs2_xattr_block) \
93 					 - sizeof(struct ocfs2_xattr_header) \
94 					 - OCFS2_XATTR_HEADER_GAP)
95 
96 static struct ocfs2_xattr_def_value_root def_xv = {
97 	.xv.xr_list.l_count = cpu_to_le16(1),
98 };
99 
100 const struct xattr_handler *ocfs2_xattr_handlers[] = {
101 	&ocfs2_xattr_user_handler,
102 	&ocfs2_xattr_acl_access_handler,
103 	&ocfs2_xattr_acl_default_handler,
104 	&ocfs2_xattr_trusted_handler,
105 	&ocfs2_xattr_security_handler,
106 	NULL
107 };
108 
109 static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
110 	[OCFS2_XATTR_INDEX_USER]	= &ocfs2_xattr_user_handler,
111 	[OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
112 					= &ocfs2_xattr_acl_access_handler,
113 	[OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
114 					= &ocfs2_xattr_acl_default_handler,
115 	[OCFS2_XATTR_INDEX_TRUSTED]	= &ocfs2_xattr_trusted_handler,
116 	[OCFS2_XATTR_INDEX_SECURITY]	= &ocfs2_xattr_security_handler,
117 };
118 
119 struct ocfs2_xattr_info {
120 	int		xi_name_index;
121 	const char	*xi_name;
122 	int		xi_name_len;
123 	const void	*xi_value;
124 	size_t		xi_value_len;
125 };
126 
127 struct ocfs2_xattr_search {
128 	struct buffer_head *inode_bh;
129 	/*
130 	 * xattr_bh point to the block buffer head which has extended attribute
131 	 * when extended attribute in inode, xattr_bh is equal to inode_bh.
132 	 */
133 	struct buffer_head *xattr_bh;
134 	struct ocfs2_xattr_header *header;
135 	struct ocfs2_xattr_bucket *bucket;
136 	void *base;
137 	void *end;
138 	struct ocfs2_xattr_entry *here;
139 	int not_found;
140 };
141 
142 /* Operations on struct ocfs2_xa_entry */
143 struct ocfs2_xa_loc;
144 struct ocfs2_xa_loc_operations {
145 	/*
146 	 * Journal functions
147 	 */
148 	int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc,
149 				  int type);
150 	void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc);
151 
152 	/*
153 	 * Return a pointer to the appropriate buffer in loc->xl_storage
154 	 * at the given offset from loc->xl_header.
155 	 */
156 	void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
157 
158 	/* Can we reuse the existing entry for the new value? */
159 	int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
160 			     struct ocfs2_xattr_info *xi);
161 
162 	/* How much space is needed for the new value? */
163 	int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
164 			       struct ocfs2_xattr_info *xi);
165 
166 	/*
167 	 * Return the offset of the first name+value pair.  This is
168 	 * the start of our downward-filling free space.
169 	 */
170 	int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
171 
172 	/*
173 	 * Remove the name+value at this location.  Do whatever is
174 	 * appropriate with the remaining name+value pairs.
175 	 */
176 	void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
177 
178 	/* Fill xl_entry with a new entry */
179 	void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
180 
181 	/* Add name+value storage to an entry */
182 	void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
183 
184 	/*
185 	 * Initialize the value buf's access and bh fields for this entry.
186 	 * ocfs2_xa_fill_value_buf() will handle the xv pointer.
187 	 */
188 	void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc,
189 				   struct ocfs2_xattr_value_buf *vb);
190 };
191 
192 /*
193  * Describes an xattr entry location.  This is a memory structure
194  * tracking the on-disk structure.
195  */
196 struct ocfs2_xa_loc {
197 	/* This xattr belongs to this inode */
198 	struct inode *xl_inode;
199 
200 	/* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
201 	struct ocfs2_xattr_header *xl_header;
202 
203 	/* Bytes from xl_header to the end of the storage */
204 	int xl_size;
205 
206 	/*
207 	 * The ocfs2_xattr_entry this location describes.  If this is
208 	 * NULL, this location describes the on-disk structure where it
209 	 * would have been.
210 	 */
211 	struct ocfs2_xattr_entry *xl_entry;
212 
213 	/*
214 	 * Internal housekeeping
215 	 */
216 
217 	/* Buffer(s) containing this entry */
218 	void *xl_storage;
219 
220 	/* Operations on the storage backing this location */
221 	const struct ocfs2_xa_loc_operations *xl_ops;
222 };
223 
224 /*
225  * Convenience functions to calculate how much space is needed for a
226  * given name+value pair
227  */
228 static int namevalue_size(int name_len, uint64_t value_len)
229 {
230 	if (value_len > OCFS2_XATTR_INLINE_SIZE)
231 		return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
232 	else
233 		return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
234 }
235 
236 static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
237 {
238 	return namevalue_size(xi->xi_name_len, xi->xi_value_len);
239 }
240 
241 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
242 {
243 	u64 value_len = le64_to_cpu(xe->xe_value_size);
244 
245 	BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
246 	       ocfs2_xattr_is_local(xe));
247 	return namevalue_size(xe->xe_name_len, value_len);
248 }
249 
250 
251 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
252 					     struct ocfs2_xattr_header *xh,
253 					     int index,
254 					     int *block_off,
255 					     int *new_offset);
256 
257 static int ocfs2_xattr_block_find(struct inode *inode,
258 				  int name_index,
259 				  const char *name,
260 				  struct ocfs2_xattr_search *xs);
261 static int ocfs2_xattr_index_block_find(struct inode *inode,
262 					struct buffer_head *root_bh,
263 					int name_index,
264 					const char *name,
265 					struct ocfs2_xattr_search *xs);
266 
267 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
268 					struct buffer_head *blk_bh,
269 					char *buffer,
270 					size_t buffer_size);
271 
272 static int ocfs2_xattr_create_index_block(struct inode *inode,
273 					  struct ocfs2_xattr_search *xs,
274 					  struct ocfs2_xattr_set_ctxt *ctxt);
275 
276 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
277 					     struct ocfs2_xattr_info *xi,
278 					     struct ocfs2_xattr_search *xs,
279 					     struct ocfs2_xattr_set_ctxt *ctxt);
280 
281 typedef int (xattr_tree_rec_func)(struct inode *inode,
282 				  struct buffer_head *root_bh,
283 				  u64 blkno, u32 cpos, u32 len, void *para);
284 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
285 					   struct buffer_head *root_bh,
286 					   xattr_tree_rec_func *rec_func,
287 					   void *para);
288 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
289 					struct ocfs2_xattr_bucket *bucket,
290 					void *para);
291 static int ocfs2_rm_xattr_cluster(struct inode *inode,
292 				  struct buffer_head *root_bh,
293 				  u64 blkno,
294 				  u32 cpos,
295 				  u32 len,
296 				  void *para);
297 
298 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
299 				  u64 src_blk, u64 last_blk, u64 to_blk,
300 				  unsigned int start_bucket,
301 				  u32 *first_hash);
302 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
303 					struct ocfs2_dinode *di,
304 					struct ocfs2_xattr_info *xi,
305 					struct ocfs2_xattr_search *xis,
306 					struct ocfs2_xattr_search *xbs,
307 					struct ocfs2_refcount_tree **ref_tree,
308 					int *meta_need,
309 					int *credits);
310 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
311 					   struct ocfs2_xattr_bucket *bucket,
312 					   int offset,
313 					   struct ocfs2_xattr_value_root **xv,
314 					   struct buffer_head **bh);
315 
316 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
317 {
318 	return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
319 }
320 
321 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
322 {
323 	return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
324 }
325 
326 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
327 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
328 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
329 
330 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
331 {
332 	struct ocfs2_xattr_bucket *bucket;
333 	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
334 
335 	BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
336 
337 	bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
338 	if (bucket) {
339 		bucket->bu_inode = inode;
340 		bucket->bu_blocks = blks;
341 	}
342 
343 	return bucket;
344 }
345 
346 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
347 {
348 	int i;
349 
350 	for (i = 0; i < bucket->bu_blocks; i++) {
351 		brelse(bucket->bu_bhs[i]);
352 		bucket->bu_bhs[i] = NULL;
353 	}
354 }
355 
356 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
357 {
358 	if (bucket) {
359 		ocfs2_xattr_bucket_relse(bucket);
360 		bucket->bu_inode = NULL;
361 		kfree(bucket);
362 	}
363 }
364 
365 /*
366  * A bucket that has never been written to disk doesn't need to be
367  * read.  We just need the buffer_heads.  Don't call this for
368  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
369  * them fully.
370  */
371 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
372 				   u64 xb_blkno)
373 {
374 	int i, rc = 0;
375 
376 	for (i = 0; i < bucket->bu_blocks; i++) {
377 		bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
378 					      xb_blkno + i);
379 		if (!bucket->bu_bhs[i]) {
380 			rc = -EIO;
381 			mlog_errno(rc);
382 			break;
383 		}
384 
385 		if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
386 					   bucket->bu_bhs[i]))
387 			ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
388 						      bucket->bu_bhs[i]);
389 	}
390 
391 	if (rc)
392 		ocfs2_xattr_bucket_relse(bucket);
393 	return rc;
394 }
395 
396 /* Read the xattr bucket at xb_blkno */
397 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
398 				   u64 xb_blkno)
399 {
400 	int rc;
401 
402 	rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
403 			       bucket->bu_blocks, bucket->bu_bhs, 0,
404 			       NULL);
405 	if (!rc) {
406 		spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
407 		rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
408 						 bucket->bu_bhs,
409 						 bucket->bu_blocks,
410 						 &bucket_xh(bucket)->xh_check);
411 		spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
412 		if (rc)
413 			mlog_errno(rc);
414 	}
415 
416 	if (rc)
417 		ocfs2_xattr_bucket_relse(bucket);
418 	return rc;
419 }
420 
421 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
422 					     struct ocfs2_xattr_bucket *bucket,
423 					     int type)
424 {
425 	int i, rc = 0;
426 
427 	for (i = 0; i < bucket->bu_blocks; i++) {
428 		rc = ocfs2_journal_access(handle,
429 					  INODE_CACHE(bucket->bu_inode),
430 					  bucket->bu_bhs[i], type);
431 		if (rc) {
432 			mlog_errno(rc);
433 			break;
434 		}
435 	}
436 
437 	return rc;
438 }
439 
440 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
441 					     struct ocfs2_xattr_bucket *bucket)
442 {
443 	int i;
444 
445 	spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
446 	ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
447 				   bucket->bu_bhs, bucket->bu_blocks,
448 				   &bucket_xh(bucket)->xh_check);
449 	spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
450 
451 	for (i = 0; i < bucket->bu_blocks; i++)
452 		ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
453 }
454 
455 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
456 					 struct ocfs2_xattr_bucket *src)
457 {
458 	int i;
459 	int blocksize = src->bu_inode->i_sb->s_blocksize;
460 
461 	BUG_ON(dest->bu_blocks != src->bu_blocks);
462 	BUG_ON(dest->bu_inode != src->bu_inode);
463 
464 	for (i = 0; i < src->bu_blocks; i++) {
465 		memcpy(bucket_block(dest, i), bucket_block(src, i),
466 		       blocksize);
467 	}
468 }
469 
470 static int ocfs2_validate_xattr_block(struct super_block *sb,
471 				      struct buffer_head *bh)
472 {
473 	int rc;
474 	struct ocfs2_xattr_block *xb =
475 		(struct ocfs2_xattr_block *)bh->b_data;
476 
477 	mlog(0, "Validating xattr block %llu\n",
478 	     (unsigned long long)bh->b_blocknr);
479 
480 	BUG_ON(!buffer_uptodate(bh));
481 
482 	/*
483 	 * If the ecc fails, we return the error but otherwise
484 	 * leave the filesystem running.  We know any error is
485 	 * local to this block.
486 	 */
487 	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
488 	if (rc)
489 		return rc;
490 
491 	/*
492 	 * Errors after here are fatal
493 	 */
494 
495 	if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
496 		ocfs2_error(sb,
497 			    "Extended attribute block #%llu has bad "
498 			    "signature %.*s",
499 			    (unsigned long long)bh->b_blocknr, 7,
500 			    xb->xb_signature);
501 		return -EINVAL;
502 	}
503 
504 	if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
505 		ocfs2_error(sb,
506 			    "Extended attribute block #%llu has an "
507 			    "invalid xb_blkno of %llu",
508 			    (unsigned long long)bh->b_blocknr,
509 			    (unsigned long long)le64_to_cpu(xb->xb_blkno));
510 		return -EINVAL;
511 	}
512 
513 	if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
514 		ocfs2_error(sb,
515 			    "Extended attribute block #%llu has an invalid "
516 			    "xb_fs_generation of #%u",
517 			    (unsigned long long)bh->b_blocknr,
518 			    le32_to_cpu(xb->xb_fs_generation));
519 		return -EINVAL;
520 	}
521 
522 	return 0;
523 }
524 
525 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
526 				  struct buffer_head **bh)
527 {
528 	int rc;
529 	struct buffer_head *tmp = *bh;
530 
531 	rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
532 			      ocfs2_validate_xattr_block);
533 
534 	/* If ocfs2_read_block() got us a new bh, pass it up. */
535 	if (!rc && !*bh)
536 		*bh = tmp;
537 
538 	return rc;
539 }
540 
541 static inline const char *ocfs2_xattr_prefix(int name_index)
542 {
543 	const struct xattr_handler *handler = NULL;
544 
545 	if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
546 		handler = ocfs2_xattr_handler_map[name_index];
547 
548 	return handler ? handler->prefix : NULL;
549 }
550 
551 static u32 ocfs2_xattr_name_hash(struct inode *inode,
552 				 const char *name,
553 				 int name_len)
554 {
555 	/* Get hash value of uuid from super block */
556 	u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
557 	int i;
558 
559 	/* hash extended attribute name */
560 	for (i = 0; i < name_len; i++) {
561 		hash = (hash << OCFS2_HASH_SHIFT) ^
562 		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
563 		       *name++;
564 	}
565 
566 	return hash;
567 }
568 
569 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
570 {
571 	return namevalue_size(name_len, value_len) +
572 		sizeof(struct ocfs2_xattr_entry);
573 }
574 
575 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
576 {
577 	return namevalue_size_xi(xi) +
578 		sizeof(struct ocfs2_xattr_entry);
579 }
580 
581 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
582 {
583 	return namevalue_size_xe(xe) +
584 		sizeof(struct ocfs2_xattr_entry);
585 }
586 
587 int ocfs2_calc_security_init(struct inode *dir,
588 			     struct ocfs2_security_xattr_info *si,
589 			     int *want_clusters,
590 			     int *xattr_credits,
591 			     struct ocfs2_alloc_context **xattr_ac)
592 {
593 	int ret = 0;
594 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
595 	int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
596 						 si->value_len);
597 
598 	/*
599 	 * The max space of security xattr taken inline is
600 	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
601 	 * So reserve one metadata block for it is ok.
602 	 */
603 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
604 	    s_size > OCFS2_XATTR_FREE_IN_IBODY) {
605 		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
606 		if (ret) {
607 			mlog_errno(ret);
608 			return ret;
609 		}
610 		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
611 	}
612 
613 	/* reserve clusters for xattr value which will be set in B tree*/
614 	if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
615 		int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
616 							    si->value_len);
617 
618 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
619 							   new_clusters);
620 		*want_clusters += new_clusters;
621 	}
622 	return ret;
623 }
624 
625 int ocfs2_calc_xattr_init(struct inode *dir,
626 			  struct buffer_head *dir_bh,
627 			  int mode,
628 			  struct ocfs2_security_xattr_info *si,
629 			  int *want_clusters,
630 			  int *xattr_credits,
631 			  int *want_meta)
632 {
633 	int ret = 0;
634 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
635 	int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
636 
637 	if (si->enable)
638 		s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
639 						     si->value_len);
640 
641 	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
642 		acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
643 					OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
644 					"", NULL, 0);
645 		if (acl_len > 0) {
646 			a_size = ocfs2_xattr_entry_real_size(0, acl_len);
647 			if (S_ISDIR(mode))
648 				a_size <<= 1;
649 		} else if (acl_len != 0 && acl_len != -ENODATA) {
650 			mlog_errno(ret);
651 			return ret;
652 		}
653 	}
654 
655 	if (!(s_size + a_size))
656 		return ret;
657 
658 	/*
659 	 * The max space of security xattr taken inline is
660 	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
661 	 * The max space of acl xattr taken inline is
662 	 * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
663 	 * when blocksize = 512, may reserve one more cluser for
664 	 * xattr bucket, otherwise reserve one metadata block
665 	 * for them is ok.
666 	 * If this is a new directory with inline data,
667 	 * we choose to reserve the entire inline area for
668 	 * directory contents and force an external xattr block.
669 	 */
670 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
671 	    (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
672 	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
673 		*want_meta = *want_meta + 1;
674 		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
675 	}
676 
677 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
678 	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
679 		*want_clusters += 1;
680 		*xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
681 	}
682 
683 	/*
684 	 * reserve credits and clusters for xattrs which has large value
685 	 * and have to be set outside
686 	 */
687 	if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
688 		new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
689 							si->value_len);
690 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
691 							   new_clusters);
692 		*want_clusters += new_clusters;
693 	}
694 	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
695 	    acl_len > OCFS2_XATTR_INLINE_SIZE) {
696 		/* for directory, it has DEFAULT and ACCESS two types of acls */
697 		new_clusters = (S_ISDIR(mode) ? 2 : 1) *
698 				ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
699 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
700 							   new_clusters);
701 		*want_clusters += new_clusters;
702 	}
703 
704 	return ret;
705 }
706 
707 static int ocfs2_xattr_extend_allocation(struct inode *inode,
708 					 u32 clusters_to_add,
709 					 struct ocfs2_xattr_value_buf *vb,
710 					 struct ocfs2_xattr_set_ctxt *ctxt)
711 {
712 	int status = 0, credits;
713 	handle_t *handle = ctxt->handle;
714 	enum ocfs2_alloc_restarted why;
715 	u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
716 	struct ocfs2_extent_tree et;
717 
718 	mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
719 
720 	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
721 
722 	while (clusters_to_add) {
723 		status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
724 				       OCFS2_JOURNAL_ACCESS_WRITE);
725 		if (status < 0) {
726 			mlog_errno(status);
727 			break;
728 		}
729 
730 		prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
731 		status = ocfs2_add_clusters_in_btree(handle,
732 						     &et,
733 						     &logical_start,
734 						     clusters_to_add,
735 						     0,
736 						     ctxt->data_ac,
737 						     ctxt->meta_ac,
738 						     &why);
739 		if ((status < 0) && (status != -EAGAIN)) {
740 			if (status != -ENOSPC)
741 				mlog_errno(status);
742 			break;
743 		}
744 
745 		ocfs2_journal_dirty(handle, vb->vb_bh);
746 
747 		clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
748 					 prev_clusters;
749 
750 		if (why != RESTART_NONE && clusters_to_add) {
751 			/*
752 			 * We can only fail in case the alloc file doesn't give
753 			 * up enough clusters.
754 			 */
755 			BUG_ON(why == RESTART_META);
756 
757 			mlog(0, "restarting xattr value extension for %u"
758 			     " clusters,.\n", clusters_to_add);
759 			credits = ocfs2_calc_extend_credits(inode->i_sb,
760 							    &vb->vb_xv->xr_list,
761 							    clusters_to_add);
762 			status = ocfs2_extend_trans(handle, credits);
763 			if (status < 0) {
764 				status = -ENOMEM;
765 				mlog_errno(status);
766 				break;
767 			}
768 		}
769 	}
770 
771 	return status;
772 }
773 
774 static int __ocfs2_remove_xattr_range(struct inode *inode,
775 				      struct ocfs2_xattr_value_buf *vb,
776 				      u32 cpos, u32 phys_cpos, u32 len,
777 				      unsigned int ext_flags,
778 				      struct ocfs2_xattr_set_ctxt *ctxt)
779 {
780 	int ret;
781 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
782 	handle_t *handle = ctxt->handle;
783 	struct ocfs2_extent_tree et;
784 
785 	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
786 
787 	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
788 			    OCFS2_JOURNAL_ACCESS_WRITE);
789 	if (ret) {
790 		mlog_errno(ret);
791 		goto out;
792 	}
793 
794 	ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
795 				  &ctxt->dealloc);
796 	if (ret) {
797 		mlog_errno(ret);
798 		goto out;
799 	}
800 
801 	le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
802 	ocfs2_journal_dirty(handle, vb->vb_bh);
803 
804 	if (ext_flags & OCFS2_EXT_REFCOUNTED)
805 		ret = ocfs2_decrease_refcount(inode, handle,
806 					ocfs2_blocks_to_clusters(inode->i_sb,
807 								 phys_blkno),
808 					len, ctxt->meta_ac, &ctxt->dealloc, 1);
809 	else
810 		ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
811 						  phys_blkno, len);
812 	if (ret)
813 		mlog_errno(ret);
814 
815 out:
816 	return ret;
817 }
818 
819 static int ocfs2_xattr_shrink_size(struct inode *inode,
820 				   u32 old_clusters,
821 				   u32 new_clusters,
822 				   struct ocfs2_xattr_value_buf *vb,
823 				   struct ocfs2_xattr_set_ctxt *ctxt)
824 {
825 	int ret = 0;
826 	unsigned int ext_flags;
827 	u32 trunc_len, cpos, phys_cpos, alloc_size;
828 	u64 block;
829 
830 	if (old_clusters <= new_clusters)
831 		return 0;
832 
833 	cpos = new_clusters;
834 	trunc_len = old_clusters - new_clusters;
835 	while (trunc_len) {
836 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
837 					       &alloc_size,
838 					       &vb->vb_xv->xr_list, &ext_flags);
839 		if (ret) {
840 			mlog_errno(ret);
841 			goto out;
842 		}
843 
844 		if (alloc_size > trunc_len)
845 			alloc_size = trunc_len;
846 
847 		ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
848 						 phys_cpos, alloc_size,
849 						 ext_flags, ctxt);
850 		if (ret) {
851 			mlog_errno(ret);
852 			goto out;
853 		}
854 
855 		block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
856 		ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
857 						       block, alloc_size);
858 		cpos += alloc_size;
859 		trunc_len -= alloc_size;
860 	}
861 
862 out:
863 	return ret;
864 }
865 
866 static int ocfs2_xattr_value_truncate(struct inode *inode,
867 				      struct ocfs2_xattr_value_buf *vb,
868 				      int len,
869 				      struct ocfs2_xattr_set_ctxt *ctxt)
870 {
871 	int ret;
872 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
873 	u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
874 
875 	if (new_clusters == old_clusters)
876 		return 0;
877 
878 	if (new_clusters > old_clusters)
879 		ret = ocfs2_xattr_extend_allocation(inode,
880 						    new_clusters - old_clusters,
881 						    vb, ctxt);
882 	else
883 		ret = ocfs2_xattr_shrink_size(inode,
884 					      old_clusters, new_clusters,
885 					      vb, ctxt);
886 
887 	return ret;
888 }
889 
890 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
891 				  size_t *result, const char *prefix,
892 				  const char *name, int name_len)
893 {
894 	char *p = buffer + *result;
895 	int prefix_len = strlen(prefix);
896 	int total_len = prefix_len + name_len + 1;
897 
898 	*result += total_len;
899 
900 	/* we are just looking for how big our buffer needs to be */
901 	if (!size)
902 		return 0;
903 
904 	if (*result > size)
905 		return -ERANGE;
906 
907 	memcpy(p, prefix, prefix_len);
908 	memcpy(p + prefix_len, name, name_len);
909 	p[prefix_len + name_len] = '\0';
910 
911 	return 0;
912 }
913 
914 static int ocfs2_xattr_list_entries(struct inode *inode,
915 				    struct ocfs2_xattr_header *header,
916 				    char *buffer, size_t buffer_size)
917 {
918 	size_t result = 0;
919 	int i, type, ret;
920 	const char *prefix, *name;
921 
922 	for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
923 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
924 		type = ocfs2_xattr_get_type(entry);
925 		prefix = ocfs2_xattr_prefix(type);
926 
927 		if (prefix) {
928 			name = (const char *)header +
929 				le16_to_cpu(entry->xe_name_offset);
930 
931 			ret = ocfs2_xattr_list_entry(buffer, buffer_size,
932 						     &result, prefix, name,
933 						     entry->xe_name_len);
934 			if (ret)
935 				return ret;
936 		}
937 	}
938 
939 	return result;
940 }
941 
942 int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
943 					 struct ocfs2_dinode *di)
944 {
945 	struct ocfs2_xattr_header *xh;
946 	int i;
947 
948 	xh = (struct ocfs2_xattr_header *)
949 		 ((void *)di + inode->i_sb->s_blocksize -
950 		 le16_to_cpu(di->i_xattr_inline_size));
951 
952 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
953 		if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
954 			return 1;
955 
956 	return 0;
957 }
958 
959 static int ocfs2_xattr_ibody_list(struct inode *inode,
960 				  struct ocfs2_dinode *di,
961 				  char *buffer,
962 				  size_t buffer_size)
963 {
964 	struct ocfs2_xattr_header *header = NULL;
965 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
966 	int ret = 0;
967 
968 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
969 		return ret;
970 
971 	header = (struct ocfs2_xattr_header *)
972 		 ((void *)di + inode->i_sb->s_blocksize -
973 		 le16_to_cpu(di->i_xattr_inline_size));
974 
975 	ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
976 
977 	return ret;
978 }
979 
980 static int ocfs2_xattr_block_list(struct inode *inode,
981 				  struct ocfs2_dinode *di,
982 				  char *buffer,
983 				  size_t buffer_size)
984 {
985 	struct buffer_head *blk_bh = NULL;
986 	struct ocfs2_xattr_block *xb;
987 	int ret = 0;
988 
989 	if (!di->i_xattr_loc)
990 		return ret;
991 
992 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
993 				     &blk_bh);
994 	if (ret < 0) {
995 		mlog_errno(ret);
996 		return ret;
997 	}
998 
999 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1000 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1001 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
1002 		ret = ocfs2_xattr_list_entries(inode, header,
1003 					       buffer, buffer_size);
1004 	} else
1005 		ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
1006 						   buffer, buffer_size);
1007 
1008 	brelse(blk_bh);
1009 
1010 	return ret;
1011 }
1012 
1013 ssize_t ocfs2_listxattr(struct dentry *dentry,
1014 			char *buffer,
1015 			size_t size)
1016 {
1017 	int ret = 0, i_ret = 0, b_ret = 0;
1018 	struct buffer_head *di_bh = NULL;
1019 	struct ocfs2_dinode *di = NULL;
1020 	struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
1021 
1022 	if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
1023 		return -EOPNOTSUPP;
1024 
1025 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1026 		return ret;
1027 
1028 	ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
1029 	if (ret < 0) {
1030 		mlog_errno(ret);
1031 		return ret;
1032 	}
1033 
1034 	di = (struct ocfs2_dinode *)di_bh->b_data;
1035 
1036 	down_read(&oi->ip_xattr_sem);
1037 	i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
1038 	if (i_ret < 0)
1039 		b_ret = 0;
1040 	else {
1041 		if (buffer) {
1042 			buffer += i_ret;
1043 			size -= i_ret;
1044 		}
1045 		b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
1046 					       buffer, size);
1047 		if (b_ret < 0)
1048 			i_ret = 0;
1049 	}
1050 	up_read(&oi->ip_xattr_sem);
1051 	ocfs2_inode_unlock(dentry->d_inode, 0);
1052 
1053 	brelse(di_bh);
1054 
1055 	return i_ret + b_ret;
1056 }
1057 
1058 static int ocfs2_xattr_find_entry(int name_index,
1059 				  const char *name,
1060 				  struct ocfs2_xattr_search *xs)
1061 {
1062 	struct ocfs2_xattr_entry *entry;
1063 	size_t name_len;
1064 	int i, cmp = 1;
1065 
1066 	if (name == NULL)
1067 		return -EINVAL;
1068 
1069 	name_len = strlen(name);
1070 	entry = xs->here;
1071 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1072 		cmp = name_index - ocfs2_xattr_get_type(entry);
1073 		if (!cmp)
1074 			cmp = name_len - entry->xe_name_len;
1075 		if (!cmp)
1076 			cmp = memcmp(name, (xs->base +
1077 				     le16_to_cpu(entry->xe_name_offset)),
1078 				     name_len);
1079 		if (cmp == 0)
1080 			break;
1081 		entry += 1;
1082 	}
1083 	xs->here = entry;
1084 
1085 	return cmp ? -ENODATA : 0;
1086 }
1087 
1088 static int ocfs2_xattr_get_value_outside(struct inode *inode,
1089 					 struct ocfs2_xattr_value_root *xv,
1090 					 void *buffer,
1091 					 size_t len)
1092 {
1093 	u32 cpos, p_cluster, num_clusters, bpc, clusters;
1094 	u64 blkno;
1095 	int i, ret = 0;
1096 	size_t cplen, blocksize;
1097 	struct buffer_head *bh = NULL;
1098 	struct ocfs2_extent_list *el;
1099 
1100 	el = &xv->xr_list;
1101 	clusters = le32_to_cpu(xv->xr_clusters);
1102 	bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1103 	blocksize = inode->i_sb->s_blocksize;
1104 
1105 	cpos = 0;
1106 	while (cpos < clusters) {
1107 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1108 					       &num_clusters, el, NULL);
1109 		if (ret) {
1110 			mlog_errno(ret);
1111 			goto out;
1112 		}
1113 
1114 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1115 		/* Copy ocfs2_xattr_value */
1116 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1117 			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1118 					       &bh, NULL);
1119 			if (ret) {
1120 				mlog_errno(ret);
1121 				goto out;
1122 			}
1123 
1124 			cplen = len >= blocksize ? blocksize : len;
1125 			memcpy(buffer, bh->b_data, cplen);
1126 			len -= cplen;
1127 			buffer += cplen;
1128 
1129 			brelse(bh);
1130 			bh = NULL;
1131 			if (len == 0)
1132 				break;
1133 		}
1134 		cpos += num_clusters;
1135 	}
1136 out:
1137 	return ret;
1138 }
1139 
1140 static int ocfs2_xattr_ibody_get(struct inode *inode,
1141 				 int name_index,
1142 				 const char *name,
1143 				 void *buffer,
1144 				 size_t buffer_size,
1145 				 struct ocfs2_xattr_search *xs)
1146 {
1147 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1148 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1149 	struct ocfs2_xattr_value_root *xv;
1150 	size_t size;
1151 	int ret = 0;
1152 
1153 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1154 		return -ENODATA;
1155 
1156 	xs->end = (void *)di + inode->i_sb->s_blocksize;
1157 	xs->header = (struct ocfs2_xattr_header *)
1158 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
1159 	xs->base = (void *)xs->header;
1160 	xs->here = xs->header->xh_entries;
1161 
1162 	ret = ocfs2_xattr_find_entry(name_index, name, xs);
1163 	if (ret)
1164 		return ret;
1165 	size = le64_to_cpu(xs->here->xe_value_size);
1166 	if (buffer) {
1167 		if (size > buffer_size)
1168 			return -ERANGE;
1169 		if (ocfs2_xattr_is_local(xs->here)) {
1170 			memcpy(buffer, (void *)xs->base +
1171 			       le16_to_cpu(xs->here->xe_name_offset) +
1172 			       OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1173 		} else {
1174 			xv = (struct ocfs2_xattr_value_root *)
1175 				(xs->base + le16_to_cpu(
1176 				 xs->here->xe_name_offset) +
1177 				OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1178 			ret = ocfs2_xattr_get_value_outside(inode, xv,
1179 							    buffer, size);
1180 			if (ret < 0) {
1181 				mlog_errno(ret);
1182 				return ret;
1183 			}
1184 		}
1185 	}
1186 
1187 	return size;
1188 }
1189 
1190 static int ocfs2_xattr_block_get(struct inode *inode,
1191 				 int name_index,
1192 				 const char *name,
1193 				 void *buffer,
1194 				 size_t buffer_size,
1195 				 struct ocfs2_xattr_search *xs)
1196 {
1197 	struct ocfs2_xattr_block *xb;
1198 	struct ocfs2_xattr_value_root *xv;
1199 	size_t size;
1200 	int ret = -ENODATA, name_offset, name_len, i;
1201 	int uninitialized_var(block_off);
1202 
1203 	xs->bucket = ocfs2_xattr_bucket_new(inode);
1204 	if (!xs->bucket) {
1205 		ret = -ENOMEM;
1206 		mlog_errno(ret);
1207 		goto cleanup;
1208 	}
1209 
1210 	ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1211 	if (ret) {
1212 		mlog_errno(ret);
1213 		goto cleanup;
1214 	}
1215 
1216 	if (xs->not_found) {
1217 		ret = -ENODATA;
1218 		goto cleanup;
1219 	}
1220 
1221 	xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1222 	size = le64_to_cpu(xs->here->xe_value_size);
1223 	if (buffer) {
1224 		ret = -ERANGE;
1225 		if (size > buffer_size)
1226 			goto cleanup;
1227 
1228 		name_offset = le16_to_cpu(xs->here->xe_name_offset);
1229 		name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1230 		i = xs->here - xs->header->xh_entries;
1231 
1232 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1233 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1234 								bucket_xh(xs->bucket),
1235 								i,
1236 								&block_off,
1237 								&name_offset);
1238 			xs->base = bucket_block(xs->bucket, block_off);
1239 		}
1240 		if (ocfs2_xattr_is_local(xs->here)) {
1241 			memcpy(buffer, (void *)xs->base +
1242 			       name_offset + name_len, size);
1243 		} else {
1244 			xv = (struct ocfs2_xattr_value_root *)
1245 				(xs->base + name_offset + name_len);
1246 			ret = ocfs2_xattr_get_value_outside(inode, xv,
1247 							    buffer, size);
1248 			if (ret < 0) {
1249 				mlog_errno(ret);
1250 				goto cleanup;
1251 			}
1252 		}
1253 	}
1254 	ret = size;
1255 cleanup:
1256 	ocfs2_xattr_bucket_free(xs->bucket);
1257 
1258 	brelse(xs->xattr_bh);
1259 	xs->xattr_bh = NULL;
1260 	return ret;
1261 }
1262 
1263 int ocfs2_xattr_get_nolock(struct inode *inode,
1264 			   struct buffer_head *di_bh,
1265 			   int name_index,
1266 			   const char *name,
1267 			   void *buffer,
1268 			   size_t buffer_size)
1269 {
1270 	int ret;
1271 	struct ocfs2_dinode *di = NULL;
1272 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1273 	struct ocfs2_xattr_search xis = {
1274 		.not_found = -ENODATA,
1275 	};
1276 	struct ocfs2_xattr_search xbs = {
1277 		.not_found = -ENODATA,
1278 	};
1279 
1280 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1281 		return -EOPNOTSUPP;
1282 
1283 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1284 		ret = -ENODATA;
1285 
1286 	xis.inode_bh = xbs.inode_bh = di_bh;
1287 	di = (struct ocfs2_dinode *)di_bh->b_data;
1288 
1289 	ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1290 				    buffer_size, &xis);
1291 	if (ret == -ENODATA && di->i_xattr_loc)
1292 		ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1293 					    buffer_size, &xbs);
1294 
1295 	return ret;
1296 }
1297 
1298 /* ocfs2_xattr_get()
1299  *
1300  * Copy an extended attribute into the buffer provided.
1301  * Buffer is NULL to compute the size of buffer required.
1302  */
1303 static int ocfs2_xattr_get(struct inode *inode,
1304 			   int name_index,
1305 			   const char *name,
1306 			   void *buffer,
1307 			   size_t buffer_size)
1308 {
1309 	int ret;
1310 	struct buffer_head *di_bh = NULL;
1311 
1312 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
1313 	if (ret < 0) {
1314 		mlog_errno(ret);
1315 		return ret;
1316 	}
1317 	down_read(&OCFS2_I(inode)->ip_xattr_sem);
1318 	ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1319 				     name, buffer, buffer_size);
1320 	up_read(&OCFS2_I(inode)->ip_xattr_sem);
1321 
1322 	ocfs2_inode_unlock(inode, 0);
1323 
1324 	brelse(di_bh);
1325 
1326 	return ret;
1327 }
1328 
1329 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1330 					   handle_t *handle,
1331 					   struct ocfs2_xattr_value_buf *vb,
1332 					   const void *value,
1333 					   int value_len)
1334 {
1335 	int ret = 0, i, cp_len;
1336 	u16 blocksize = inode->i_sb->s_blocksize;
1337 	u32 p_cluster, num_clusters;
1338 	u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1339 	u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1340 	u64 blkno;
1341 	struct buffer_head *bh = NULL;
1342 	unsigned int ext_flags;
1343 	struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1344 
1345 	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1346 
1347 	while (cpos < clusters) {
1348 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1349 					       &num_clusters, &xv->xr_list,
1350 					       &ext_flags);
1351 		if (ret) {
1352 			mlog_errno(ret);
1353 			goto out;
1354 		}
1355 
1356 		BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1357 
1358 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1359 
1360 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1361 			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1362 					       &bh, NULL);
1363 			if (ret) {
1364 				mlog_errno(ret);
1365 				goto out;
1366 			}
1367 
1368 			ret = ocfs2_journal_access(handle,
1369 						   INODE_CACHE(inode),
1370 						   bh,
1371 						   OCFS2_JOURNAL_ACCESS_WRITE);
1372 			if (ret < 0) {
1373 				mlog_errno(ret);
1374 				goto out;
1375 			}
1376 
1377 			cp_len = value_len > blocksize ? blocksize : value_len;
1378 			memcpy(bh->b_data, value, cp_len);
1379 			value_len -= cp_len;
1380 			value += cp_len;
1381 			if (cp_len < blocksize)
1382 				memset(bh->b_data + cp_len, 0,
1383 				       blocksize - cp_len);
1384 
1385 			ocfs2_journal_dirty(handle, bh);
1386 			brelse(bh);
1387 			bh = NULL;
1388 
1389 			/*
1390 			 * XXX: do we need to empty all the following
1391 			 * blocks in this cluster?
1392 			 */
1393 			if (!value_len)
1394 				break;
1395 		}
1396 		cpos += num_clusters;
1397 	}
1398 out:
1399 	brelse(bh);
1400 
1401 	return ret;
1402 }
1403 
1404 static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
1405 				       int num_entries)
1406 {
1407 	int free_space;
1408 
1409 	if (!needed_space)
1410 		return 0;
1411 
1412 	free_space = free_start -
1413 		sizeof(struct ocfs2_xattr_header) -
1414 		(num_entries * sizeof(struct ocfs2_xattr_entry)) -
1415 		OCFS2_XATTR_HEADER_GAP;
1416 	if (free_space < 0)
1417 		return -EIO;
1418 	if (free_space < needed_space)
1419 		return -ENOSPC;
1420 
1421 	return 0;
1422 }
1423 
1424 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc,
1425 				   int type)
1426 {
1427 	return loc->xl_ops->xlo_journal_access(handle, loc, type);
1428 }
1429 
1430 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc)
1431 {
1432 	loc->xl_ops->xlo_journal_dirty(handle, loc);
1433 }
1434 
1435 /* Give a pointer into the storage for the given offset */
1436 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
1437 {
1438 	BUG_ON(offset >= loc->xl_size);
1439 	return loc->xl_ops->xlo_offset_pointer(loc, offset);
1440 }
1441 
1442 /*
1443  * Wipe the name+value pair and allow the storage to reclaim it.  This
1444  * must be followed by either removal of the entry or a call to
1445  * ocfs2_xa_add_namevalue().
1446  */
1447 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1448 {
1449 	loc->xl_ops->xlo_wipe_namevalue(loc);
1450 }
1451 
1452 /*
1453  * Find lowest offset to a name+value pair.  This is the start of our
1454  * downward-growing free space.
1455  */
1456 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
1457 {
1458 	return loc->xl_ops->xlo_get_free_start(loc);
1459 }
1460 
1461 /* Can we reuse loc->xl_entry for xi? */
1462 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
1463 				    struct ocfs2_xattr_info *xi)
1464 {
1465 	return loc->xl_ops->xlo_can_reuse(loc, xi);
1466 }
1467 
1468 /* How much free space is needed to set the new value */
1469 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
1470 				struct ocfs2_xattr_info *xi)
1471 {
1472 	return loc->xl_ops->xlo_check_space(loc, xi);
1473 }
1474 
1475 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1476 {
1477 	loc->xl_ops->xlo_add_entry(loc, name_hash);
1478 	loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
1479 	/*
1480 	 * We can't leave the new entry's xe_name_offset at zero or
1481 	 * add_namevalue() will go nuts.  We set it to the size of our
1482 	 * storage so that it can never be less than any other entry.
1483 	 */
1484 	loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
1485 }
1486 
1487 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
1488 				   struct ocfs2_xattr_info *xi)
1489 {
1490 	int size = namevalue_size_xi(xi);
1491 	int nameval_offset;
1492 	char *nameval_buf;
1493 
1494 	loc->xl_ops->xlo_add_namevalue(loc, size);
1495 	loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
1496 	loc->xl_entry->xe_name_len = xi->xi_name_len;
1497 	ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
1498 	ocfs2_xattr_set_local(loc->xl_entry,
1499 			      xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
1500 
1501 	nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1502 	nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1503 	memset(nameval_buf, 0, size);
1504 	memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
1505 }
1506 
1507 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc,
1508 				    struct ocfs2_xattr_value_buf *vb)
1509 {
1510 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1511 	int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1512 
1513 	/* Value bufs are for value trees */
1514 	BUG_ON(ocfs2_xattr_is_local(loc->xl_entry));
1515 	BUG_ON(namevalue_size_xe(loc->xl_entry) !=
1516 	       (name_size + OCFS2_XATTR_ROOT_SIZE));
1517 
1518 	loc->xl_ops->xlo_fill_value_buf(loc, vb);
1519 	vb->vb_xv =
1520 		(struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc,
1521 							nameval_offset +
1522 							name_size);
1523 }
1524 
1525 static int ocfs2_xa_block_journal_access(handle_t *handle,
1526 					 struct ocfs2_xa_loc *loc, int type)
1527 {
1528 	struct buffer_head *bh = loc->xl_storage;
1529 	ocfs2_journal_access_func access;
1530 
1531 	if (loc->xl_size == (bh->b_size -
1532 			     offsetof(struct ocfs2_xattr_block,
1533 				      xb_attrs.xb_header)))
1534 		access = ocfs2_journal_access_xb;
1535 	else
1536 		access = ocfs2_journal_access_di;
1537 	return access(handle, INODE_CACHE(loc->xl_inode), bh, type);
1538 }
1539 
1540 static void ocfs2_xa_block_journal_dirty(handle_t *handle,
1541 					 struct ocfs2_xa_loc *loc)
1542 {
1543 	struct buffer_head *bh = loc->xl_storage;
1544 
1545 	ocfs2_journal_dirty(handle, bh);
1546 }
1547 
1548 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1549 					   int offset)
1550 {
1551 	return (char *)loc->xl_header + offset;
1552 }
1553 
1554 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
1555 				    struct ocfs2_xattr_info *xi)
1556 {
1557 	/*
1558 	 * Block storage is strict.  If the sizes aren't exact, we will
1559 	 * remove the old one and reinsert the new.
1560 	 */
1561 	return namevalue_size_xe(loc->xl_entry) ==
1562 		namevalue_size_xi(xi);
1563 }
1564 
1565 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
1566 {
1567 	struct ocfs2_xattr_header *xh = loc->xl_header;
1568 	int i, count = le16_to_cpu(xh->xh_count);
1569 	int offset, free_start = loc->xl_size;
1570 
1571 	for (i = 0; i < count; i++) {
1572 		offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1573 		if (offset < free_start)
1574 			free_start = offset;
1575 	}
1576 
1577 	return free_start;
1578 }
1579 
1580 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
1581 				      struct ocfs2_xattr_info *xi)
1582 {
1583 	int count = le16_to_cpu(loc->xl_header->xh_count);
1584 	int free_start = ocfs2_xa_get_free_start(loc);
1585 	int needed_space = ocfs2_xi_entry_usage(xi);
1586 
1587 	/*
1588 	 * Block storage will reclaim the original entry before inserting
1589 	 * the new value, so we only need the difference.  If the new
1590 	 * entry is smaller than the old one, we don't need anything.
1591 	 */
1592 	if (loc->xl_entry) {
1593 		/* Don't need space if we're reusing! */
1594 		if (ocfs2_xa_can_reuse_entry(loc, xi))
1595 			needed_space = 0;
1596 		else
1597 			needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
1598 	}
1599 	if (needed_space < 0)
1600 		needed_space = 0;
1601 	return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1602 }
1603 
1604 /*
1605  * Block storage for xattrs keeps the name+value pairs compacted.  When
1606  * we remove one, we have to shift any that preceded it towards the end.
1607  */
1608 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1609 {
1610 	int i, offset;
1611 	int namevalue_offset, first_namevalue_offset, namevalue_size;
1612 	struct ocfs2_xattr_entry *entry = loc->xl_entry;
1613 	struct ocfs2_xattr_header *xh = loc->xl_header;
1614 	int count = le16_to_cpu(xh->xh_count);
1615 
1616 	namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1617 	namevalue_size = namevalue_size_xe(entry);
1618 	first_namevalue_offset = ocfs2_xa_get_free_start(loc);
1619 
1620 	/* Shift the name+value pairs */
1621 	memmove((char *)xh + first_namevalue_offset + namevalue_size,
1622 		(char *)xh + first_namevalue_offset,
1623 		namevalue_offset - first_namevalue_offset);
1624 	memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1625 
1626 	/* Now tell xh->xh_entries about it */
1627 	for (i = 0; i < count; i++) {
1628 		offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1629 		if (offset <= namevalue_offset)
1630 			le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1631 				     namevalue_size);
1632 	}
1633 
1634 	/*
1635 	 * Note that we don't update xh_free_start or xh_name_value_len
1636 	 * because they're not used in block-stored xattrs.
1637 	 */
1638 }
1639 
1640 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1641 {
1642 	int count = le16_to_cpu(loc->xl_header->xh_count);
1643 	loc->xl_entry = &(loc->xl_header->xh_entries[count]);
1644 	le16_add_cpu(&loc->xl_header->xh_count, 1);
1645 	memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1646 }
1647 
1648 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1649 {
1650 	int free_start = ocfs2_xa_get_free_start(loc);
1651 
1652 	loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
1653 }
1654 
1655 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc,
1656 					  struct ocfs2_xattr_value_buf *vb)
1657 {
1658 	struct buffer_head *bh = loc->xl_storage;
1659 
1660 	if (loc->xl_size == (bh->b_size -
1661 			     offsetof(struct ocfs2_xattr_block,
1662 				      xb_attrs.xb_header)))
1663 		vb->vb_access = ocfs2_journal_access_xb;
1664 	else
1665 		vb->vb_access = ocfs2_journal_access_di;
1666 	vb->vb_bh = bh;
1667 }
1668 
1669 /*
1670  * Operations for xattrs stored in blocks.  This includes inline inode
1671  * storage and unindexed ocfs2_xattr_blocks.
1672  */
1673 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1674 	.xlo_journal_access	= ocfs2_xa_block_journal_access,
1675 	.xlo_journal_dirty	= ocfs2_xa_block_journal_dirty,
1676 	.xlo_offset_pointer	= ocfs2_xa_block_offset_pointer,
1677 	.xlo_check_space	= ocfs2_xa_block_check_space,
1678 	.xlo_can_reuse		= ocfs2_xa_block_can_reuse,
1679 	.xlo_get_free_start	= ocfs2_xa_block_get_free_start,
1680 	.xlo_wipe_namevalue	= ocfs2_xa_block_wipe_namevalue,
1681 	.xlo_add_entry		= ocfs2_xa_block_add_entry,
1682 	.xlo_add_namevalue	= ocfs2_xa_block_add_namevalue,
1683 	.xlo_fill_value_buf	= ocfs2_xa_block_fill_value_buf,
1684 };
1685 
1686 static int ocfs2_xa_bucket_journal_access(handle_t *handle,
1687 					  struct ocfs2_xa_loc *loc, int type)
1688 {
1689 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1690 
1691 	return ocfs2_xattr_bucket_journal_access(handle, bucket, type);
1692 }
1693 
1694 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle,
1695 					  struct ocfs2_xa_loc *loc)
1696 {
1697 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1698 
1699 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
1700 }
1701 
1702 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1703 					    int offset)
1704 {
1705 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1706 	int block, block_offset;
1707 
1708 	/* The header is at the front of the bucket */
1709 	block = offset >> loc->xl_inode->i_sb->s_blocksize_bits;
1710 	block_offset = offset % loc->xl_inode->i_sb->s_blocksize;
1711 
1712 	return bucket_block(bucket, block) + block_offset;
1713 }
1714 
1715 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
1716 				     struct ocfs2_xattr_info *xi)
1717 {
1718 	return namevalue_size_xe(loc->xl_entry) >=
1719 		namevalue_size_xi(xi);
1720 }
1721 
1722 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
1723 {
1724 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1725 	return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
1726 }
1727 
1728 static int ocfs2_bucket_align_free_start(struct super_block *sb,
1729 					 int free_start, int size)
1730 {
1731 	/*
1732 	 * We need to make sure that the name+value pair fits within
1733 	 * one block.
1734 	 */
1735 	if (((free_start - size) >> sb->s_blocksize_bits) !=
1736 	    ((free_start - 1) >> sb->s_blocksize_bits))
1737 		free_start -= free_start % sb->s_blocksize;
1738 
1739 	return free_start;
1740 }
1741 
1742 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
1743 				       struct ocfs2_xattr_info *xi)
1744 {
1745 	int rc;
1746 	int count = le16_to_cpu(loc->xl_header->xh_count);
1747 	int free_start = ocfs2_xa_get_free_start(loc);
1748 	int needed_space = ocfs2_xi_entry_usage(xi);
1749 	int size = namevalue_size_xi(xi);
1750 	struct super_block *sb = loc->xl_inode->i_sb;
1751 
1752 	/*
1753 	 * Bucket storage does not reclaim name+value pairs it cannot
1754 	 * reuse.  They live as holes until the bucket fills, and then
1755 	 * the bucket is defragmented.  However, the bucket can reclaim
1756 	 * the ocfs2_xattr_entry.
1757 	 */
1758 	if (loc->xl_entry) {
1759 		/* Don't need space if we're reusing! */
1760 		if (ocfs2_xa_can_reuse_entry(loc, xi))
1761 			needed_space = 0;
1762 		else
1763 			needed_space -= sizeof(struct ocfs2_xattr_entry);
1764 	}
1765 	BUG_ON(needed_space < 0);
1766 
1767 	if (free_start < size) {
1768 		if (needed_space)
1769 			return -ENOSPC;
1770 	} else {
1771 		/*
1772 		 * First we check if it would fit in the first place.
1773 		 * Below, we align the free start to a block.  This may
1774 		 * slide us below the minimum gap.  By checking unaligned
1775 		 * first, we avoid that error.
1776 		 */
1777 		rc = ocfs2_xa_check_space_helper(needed_space, free_start,
1778 						 count);
1779 		if (rc)
1780 			return rc;
1781 		free_start = ocfs2_bucket_align_free_start(sb, free_start,
1782 							   size);
1783 	}
1784 	return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1785 }
1786 
1787 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1788 {
1789 	le16_add_cpu(&loc->xl_header->xh_name_value_len,
1790 		     -namevalue_size_xe(loc->xl_entry));
1791 }
1792 
1793 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1794 {
1795 	struct ocfs2_xattr_header *xh = loc->xl_header;
1796 	int count = le16_to_cpu(xh->xh_count);
1797 	int low = 0, high = count - 1, tmp;
1798 	struct ocfs2_xattr_entry *tmp_xe;
1799 
1800 	/*
1801 	 * We keep buckets sorted by name_hash, so we need to find
1802 	 * our insert place.
1803 	 */
1804 	while (low <= high && count) {
1805 		tmp = (low + high) / 2;
1806 		tmp_xe = &xh->xh_entries[tmp];
1807 
1808 		if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
1809 			low = tmp + 1;
1810 		else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
1811 			high = tmp - 1;
1812 		else {
1813 			low = tmp;
1814 			break;
1815 		}
1816 	}
1817 
1818 	if (low != count)
1819 		memmove(&xh->xh_entries[low + 1],
1820 			&xh->xh_entries[low],
1821 			((count - low) * sizeof(struct ocfs2_xattr_entry)));
1822 
1823 	le16_add_cpu(&xh->xh_count, 1);
1824 	loc->xl_entry = &xh->xh_entries[low];
1825 	memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1826 }
1827 
1828 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1829 {
1830 	int free_start = ocfs2_xa_get_free_start(loc);
1831 	struct ocfs2_xattr_header *xh = loc->xl_header;
1832 	struct super_block *sb = loc->xl_inode->i_sb;
1833 	int nameval_offset;
1834 
1835 	free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
1836 	nameval_offset = free_start - size;
1837 	loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
1838 	xh->xh_free_start = cpu_to_le16(nameval_offset);
1839 	le16_add_cpu(&xh->xh_name_value_len, size);
1840 
1841 }
1842 
1843 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc,
1844 					   struct ocfs2_xattr_value_buf *vb)
1845 {
1846 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1847 	struct super_block *sb = loc->xl_inode->i_sb;
1848 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1849 	int size = namevalue_size_xe(loc->xl_entry);
1850 	int block_offset = nameval_offset >> sb->s_blocksize_bits;
1851 
1852 	/* Values are not allowed to straddle block boundaries */
1853 	BUG_ON(block_offset !=
1854 	       ((nameval_offset + size - 1) >> sb->s_blocksize_bits));
1855 	/* We expect the bucket to be filled in */
1856 	BUG_ON(!bucket->bu_bhs[block_offset]);
1857 
1858 	vb->vb_access = ocfs2_journal_access;
1859 	vb->vb_bh = bucket->bu_bhs[block_offset];
1860 }
1861 
1862 /* Operations for xattrs stored in buckets. */
1863 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1864 	.xlo_journal_access	= ocfs2_xa_bucket_journal_access,
1865 	.xlo_journal_dirty	= ocfs2_xa_bucket_journal_dirty,
1866 	.xlo_offset_pointer	= ocfs2_xa_bucket_offset_pointer,
1867 	.xlo_check_space	= ocfs2_xa_bucket_check_space,
1868 	.xlo_can_reuse		= ocfs2_xa_bucket_can_reuse,
1869 	.xlo_get_free_start	= ocfs2_xa_bucket_get_free_start,
1870 	.xlo_wipe_namevalue	= ocfs2_xa_bucket_wipe_namevalue,
1871 	.xlo_add_entry		= ocfs2_xa_bucket_add_entry,
1872 	.xlo_add_namevalue	= ocfs2_xa_bucket_add_namevalue,
1873 	.xlo_fill_value_buf	= ocfs2_xa_bucket_fill_value_buf,
1874 };
1875 
1876 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc)
1877 {
1878 	struct ocfs2_xattr_value_buf vb;
1879 
1880 	if (ocfs2_xattr_is_local(loc->xl_entry))
1881 		return 0;
1882 
1883 	ocfs2_xa_fill_value_buf(loc, &vb);
1884 	return le32_to_cpu(vb.vb_xv->xr_clusters);
1885 }
1886 
1887 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
1888 				   struct ocfs2_xattr_set_ctxt *ctxt)
1889 {
1890 	int trunc_rc, access_rc;
1891 	struct ocfs2_xattr_value_buf vb;
1892 
1893 	ocfs2_xa_fill_value_buf(loc, &vb);
1894 	trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes,
1895 					      ctxt);
1896 
1897 	/*
1898 	 * The caller of ocfs2_xa_value_truncate() has already called
1899 	 * ocfs2_xa_journal_access on the loc.  However, The truncate code
1900 	 * calls ocfs2_extend_trans().  This may commit the previous
1901 	 * transaction and open a new one.  If this is a bucket, truncate
1902 	 * could leave only vb->vb_bh set up for journaling.  Meanwhile,
1903 	 * the caller is expecting to dirty the entire bucket.  So we must
1904 	 * reset the journal work.  We do this even if truncate has failed,
1905 	 * as it could have failed after committing the extend.
1906 	 */
1907 	access_rc = ocfs2_xa_journal_access(ctxt->handle, loc,
1908 					    OCFS2_JOURNAL_ACCESS_WRITE);
1909 
1910 	/* Errors in truncate take precedence */
1911 	return trunc_rc ? trunc_rc : access_rc;
1912 }
1913 
1914 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1915 {
1916 	int index, count;
1917 	struct ocfs2_xattr_header *xh = loc->xl_header;
1918 	struct ocfs2_xattr_entry *entry = loc->xl_entry;
1919 
1920 	ocfs2_xa_wipe_namevalue(loc);
1921 	loc->xl_entry = NULL;
1922 
1923 	le16_add_cpu(&xh->xh_count, -1);
1924 	count = le16_to_cpu(xh->xh_count);
1925 
1926 	/*
1927 	 * Only zero out the entry if there are more remaining.  This is
1928 	 * important for an empty bucket, as it keeps track of the
1929 	 * bucket's hash value.  It doesn't hurt empty block storage.
1930 	 */
1931 	if (count) {
1932 		index = ((char *)entry - (char *)&xh->xh_entries) /
1933 			sizeof(struct ocfs2_xattr_entry);
1934 		memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1935 			(count - index) * sizeof(struct ocfs2_xattr_entry));
1936 		memset(&xh->xh_entries[count], 0,
1937 		       sizeof(struct ocfs2_xattr_entry));
1938 	}
1939 }
1940 
1941 /*
1942  * If we have a problem adjusting the size of an external value during
1943  * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr
1944  * in an intermediate state.  For example, the value may be partially
1945  * truncated.
1946  *
1947  * If the value tree hasn't changed, the extend/truncate went nowhere.
1948  * We have nothing to do.  The caller can treat it as a straight error.
1949  *
1950  * If the value tree got partially truncated, we now have a corrupted
1951  * extended attribute.  We're going to wipe its entry and leak the
1952  * clusters.  Better to leak some storage than leave a corrupt entry.
1953  *
1954  * If the value tree grew, it obviously didn't grow enough for the
1955  * new entry.  We're not going to try and reclaim those clusters either.
1956  * If there was already an external value there (orig_clusters != 0),
1957  * the new clusters are attached safely and we can just leave the old
1958  * value in place.  If there was no external value there, we remove
1959  * the entry.
1960  *
1961  * This way, the xattr block we store in the journal will be consistent.
1962  * If the size change broke because of the journal, no changes will hit
1963  * disk anyway.
1964  */
1965 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc,
1966 					    const char *what,
1967 					    unsigned int orig_clusters)
1968 {
1969 	unsigned int new_clusters = ocfs2_xa_value_clusters(loc);
1970 	char *nameval_buf = ocfs2_xa_offset_pointer(loc,
1971 				le16_to_cpu(loc->xl_entry->xe_name_offset));
1972 
1973 	if (new_clusters < orig_clusters) {
1974 		mlog(ML_ERROR,
1975 		     "Partial truncate while %s xattr %.*s.  Leaking "
1976 		     "%u clusters and removing the entry\n",
1977 		     what, loc->xl_entry->xe_name_len, nameval_buf,
1978 		     orig_clusters - new_clusters);
1979 		ocfs2_xa_remove_entry(loc);
1980 	} else if (!orig_clusters) {
1981 		mlog(ML_ERROR,
1982 		     "Unable to allocate an external value for xattr "
1983 		     "%.*s safely.  Leaking %u clusters and removing the "
1984 		     "entry\n",
1985 		     loc->xl_entry->xe_name_len, nameval_buf,
1986 		     new_clusters - orig_clusters);
1987 		ocfs2_xa_remove_entry(loc);
1988 	} else if (new_clusters > orig_clusters)
1989 		mlog(ML_ERROR,
1990 		     "Unable to grow xattr %.*s safely.  %u new clusters "
1991 		     "have been added, but the value will not be "
1992 		     "modified\n",
1993 		     loc->xl_entry->xe_name_len, nameval_buf,
1994 		     new_clusters - orig_clusters);
1995 }
1996 
1997 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
1998 			   struct ocfs2_xattr_set_ctxt *ctxt)
1999 {
2000 	int rc = 0;
2001 	unsigned int orig_clusters;
2002 
2003 	if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2004 		orig_clusters = ocfs2_xa_value_clusters(loc);
2005 		rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2006 		if (rc) {
2007 			mlog_errno(rc);
2008 			/*
2009 			 * Since this is remove, we can return 0 if
2010 			 * ocfs2_xa_cleanup_value_truncate() is going to
2011 			 * wipe the entry anyway.  So we check the
2012 			 * cluster count as well.
2013 			 */
2014 			if (orig_clusters != ocfs2_xa_value_clusters(loc))
2015 				rc = 0;
2016 			ocfs2_xa_cleanup_value_truncate(loc, "removing",
2017 							orig_clusters);
2018 			if (rc)
2019 				goto out;
2020 		}
2021 	}
2022 
2023 	ocfs2_xa_remove_entry(loc);
2024 
2025 out:
2026 	return rc;
2027 }
2028 
2029 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc)
2030 {
2031 	int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
2032 	char *nameval_buf;
2033 
2034 	nameval_buf = ocfs2_xa_offset_pointer(loc,
2035 				le16_to_cpu(loc->xl_entry->xe_name_offset));
2036 	memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE);
2037 }
2038 
2039 /*
2040  * Take an existing entry and make it ready for the new value.  This
2041  * won't allocate space, but it may free space.  It should be ready for
2042  * ocfs2_xa_prepare_entry() to finish the work.
2043  */
2044 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc,
2045 				struct ocfs2_xattr_info *xi,
2046 				struct ocfs2_xattr_set_ctxt *ctxt)
2047 {
2048 	int rc = 0;
2049 	int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2050 	unsigned int orig_clusters;
2051 	char *nameval_buf;
2052 	int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
2053 	int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
2054 
2055 	BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) !=
2056 	       name_size);
2057 
2058 	nameval_buf = ocfs2_xa_offset_pointer(loc,
2059 				le16_to_cpu(loc->xl_entry->xe_name_offset));
2060 	if (xe_local) {
2061 		memset(nameval_buf + name_size, 0,
2062 		       namevalue_size_xe(loc->xl_entry) - name_size);
2063 		if (!xi_local)
2064 			ocfs2_xa_install_value_root(loc);
2065 	} else {
2066 		orig_clusters = ocfs2_xa_value_clusters(loc);
2067 		if (xi_local) {
2068 			rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2069 			if (rc < 0)
2070 				mlog_errno(rc);
2071 			else
2072 				memset(nameval_buf + name_size, 0,
2073 				       namevalue_size_xe(loc->xl_entry) -
2074 				       name_size);
2075 		} else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
2076 			   xi->xi_value_len) {
2077 			rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
2078 						     ctxt);
2079 			if (rc < 0)
2080 				mlog_errno(rc);
2081 		}
2082 
2083 		if (rc) {
2084 			ocfs2_xa_cleanup_value_truncate(loc, "reusing",
2085 							orig_clusters);
2086 			goto out;
2087 		}
2088 	}
2089 
2090 	loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
2091 	ocfs2_xattr_set_local(loc->xl_entry, xi_local);
2092 
2093 out:
2094 	return rc;
2095 }
2096 
2097 /*
2098  * Prepares loc->xl_entry to receive the new xattr.  This includes
2099  * properly setting up the name+value pair region.  If loc->xl_entry
2100  * already exists, it will take care of modifying it appropriately.
2101  *
2102  * Note that this modifies the data.  You did journal_access already,
2103  * right?
2104  */
2105 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
2106 				  struct ocfs2_xattr_info *xi,
2107 				  u32 name_hash,
2108 				  struct ocfs2_xattr_set_ctxt *ctxt)
2109 {
2110 	int rc = 0;
2111 	unsigned int orig_clusters;
2112 	__le64 orig_value_size = 0;
2113 
2114 	rc = ocfs2_xa_check_space(loc, xi);
2115 	if (rc)
2116 		goto out;
2117 
2118 	if (loc->xl_entry) {
2119 		if (ocfs2_xa_can_reuse_entry(loc, xi)) {
2120 			orig_value_size = loc->xl_entry->xe_value_size;
2121 			rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
2122 			if (rc)
2123 				goto out;
2124 			goto alloc_value;
2125 		}
2126 
2127 		if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2128 			orig_clusters = ocfs2_xa_value_clusters(loc);
2129 			rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2130 			if (rc) {
2131 				mlog_errno(rc);
2132 				ocfs2_xa_cleanup_value_truncate(loc,
2133 								"overwriting",
2134 								orig_clusters);
2135 				goto out;
2136 			}
2137 		}
2138 		ocfs2_xa_wipe_namevalue(loc);
2139 	} else
2140 		ocfs2_xa_add_entry(loc, name_hash);
2141 
2142 	/*
2143 	 * If we get here, we have a blank entry.  Fill it.  We grow our
2144 	 * name+value pair back from the end.
2145 	 */
2146 	ocfs2_xa_add_namevalue(loc, xi);
2147 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2148 		ocfs2_xa_install_value_root(loc);
2149 
2150 alloc_value:
2151 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2152 		orig_clusters = ocfs2_xa_value_clusters(loc);
2153 		rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2154 		if (rc < 0) {
2155 			ctxt->set_abort = 1;
2156 			ocfs2_xa_cleanup_value_truncate(loc, "growing",
2157 							orig_clusters);
2158 			/*
2159 			 * If we were growing an existing value,
2160 			 * ocfs2_xa_cleanup_value_truncate() won't remove
2161 			 * the entry. We need to restore the original value
2162 			 * size.
2163 			 */
2164 			if (loc->xl_entry) {
2165 				BUG_ON(!orig_value_size);
2166 				loc->xl_entry->xe_value_size = orig_value_size;
2167 			}
2168 			mlog_errno(rc);
2169 		}
2170 	}
2171 
2172 out:
2173 	return rc;
2174 }
2175 
2176 /*
2177  * Store the value portion of the name+value pair.  This will skip
2178  * values that are stored externally.  Their tree roots were set up
2179  * by ocfs2_xa_prepare_entry().
2180  */
2181 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc,
2182 				struct ocfs2_xattr_info *xi,
2183 				struct ocfs2_xattr_set_ctxt *ctxt)
2184 {
2185 	int rc = 0;
2186 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
2187 	int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2188 	char *nameval_buf;
2189 	struct ocfs2_xattr_value_buf vb;
2190 
2191 	nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
2192 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2193 		ocfs2_xa_fill_value_buf(loc, &vb);
2194 		rc = __ocfs2_xattr_set_value_outside(loc->xl_inode,
2195 						     ctxt->handle, &vb,
2196 						     xi->xi_value,
2197 						     xi->xi_value_len);
2198 	} else
2199 		memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len);
2200 
2201 	return rc;
2202 }
2203 
2204 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc,
2205 			struct ocfs2_xattr_info *xi,
2206 			struct ocfs2_xattr_set_ctxt *ctxt)
2207 {
2208 	int ret;
2209 	u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name,
2210 					      xi->xi_name_len);
2211 
2212 	ret = ocfs2_xa_journal_access(ctxt->handle, loc,
2213 				      OCFS2_JOURNAL_ACCESS_WRITE);
2214 	if (ret) {
2215 		mlog_errno(ret);
2216 		goto out;
2217 	}
2218 
2219 	/*
2220 	 * From here on out, everything is going to modify the buffer a
2221 	 * little.  Errors are going to leave the xattr header in a
2222 	 * sane state.  Thus, even with errors we dirty the sucker.
2223 	 */
2224 
2225 	/* Don't worry, we are never called with !xi_value and !xl_entry */
2226 	if (!xi->xi_value) {
2227 		ret = ocfs2_xa_remove(loc, ctxt);
2228 		goto out_dirty;
2229 	}
2230 
2231 	ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
2232 	if (ret) {
2233 		if (ret != -ENOSPC)
2234 			mlog_errno(ret);
2235 		goto out_dirty;
2236 	}
2237 
2238 	ret = ocfs2_xa_store_value(loc, xi, ctxt);
2239 	if (ret)
2240 		mlog_errno(ret);
2241 
2242 out_dirty:
2243 	ocfs2_xa_journal_dirty(ctxt->handle, loc);
2244 
2245 out:
2246 	return ret;
2247 }
2248 
2249 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
2250 				     struct inode *inode,
2251 				     struct buffer_head *bh,
2252 				     struct ocfs2_xattr_entry *entry)
2253 {
2254 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
2255 
2256 	BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL));
2257 
2258 	loc->xl_inode = inode;
2259 	loc->xl_ops = &ocfs2_xa_block_loc_ops;
2260 	loc->xl_storage = bh;
2261 	loc->xl_entry = entry;
2262 	loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
2263 	loc->xl_header =
2264 		(struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
2265 					      loc->xl_size);
2266 }
2267 
2268 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
2269 					  struct inode *inode,
2270 					  struct buffer_head *bh,
2271 					  struct ocfs2_xattr_entry *entry)
2272 {
2273 	struct ocfs2_xattr_block *xb =
2274 		(struct ocfs2_xattr_block *)bh->b_data;
2275 
2276 	BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
2277 
2278 	loc->xl_inode = inode;
2279 	loc->xl_ops = &ocfs2_xa_block_loc_ops;
2280 	loc->xl_storage = bh;
2281 	loc->xl_header = &(xb->xb_attrs.xb_header);
2282 	loc->xl_entry = entry;
2283 	loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
2284 					     xb_attrs.xb_header);
2285 }
2286 
2287 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
2288 					   struct ocfs2_xattr_bucket *bucket,
2289 					   struct ocfs2_xattr_entry *entry)
2290 {
2291 	loc->xl_inode = bucket->bu_inode;
2292 	loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
2293 	loc->xl_storage = bucket;
2294 	loc->xl_header = bucket_xh(bucket);
2295 	loc->xl_entry = entry;
2296 	loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
2297 }
2298 
2299 /*
2300  * In xattr remove, if it is stored outside and refcounted, we may have
2301  * the chance to split the refcount tree. So need the allocators.
2302  */
2303 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
2304 					struct ocfs2_xattr_value_root *xv,
2305 					struct ocfs2_caching_info *ref_ci,
2306 					struct buffer_head *ref_root_bh,
2307 					struct ocfs2_alloc_context **meta_ac,
2308 					int *ref_credits)
2309 {
2310 	int ret, meta_add = 0;
2311 	u32 p_cluster, num_clusters;
2312 	unsigned int ext_flags;
2313 
2314 	*ref_credits = 0;
2315 	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
2316 				       &num_clusters,
2317 				       &xv->xr_list,
2318 				       &ext_flags);
2319 	if (ret) {
2320 		mlog_errno(ret);
2321 		goto out;
2322 	}
2323 
2324 	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2325 		goto out;
2326 
2327 	ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2328 						 ref_root_bh, xv,
2329 						 &meta_add, ref_credits);
2330 	if (ret) {
2331 		mlog_errno(ret);
2332 		goto out;
2333 	}
2334 
2335 	ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2336 						meta_add, meta_ac);
2337 	if (ret)
2338 		mlog_errno(ret);
2339 
2340 out:
2341 	return ret;
2342 }
2343 
2344 static int ocfs2_remove_value_outside(struct inode*inode,
2345 				      struct ocfs2_xattr_value_buf *vb,
2346 				      struct ocfs2_xattr_header *header,
2347 				      struct ocfs2_caching_info *ref_ci,
2348 				      struct buffer_head *ref_root_bh)
2349 {
2350 	int ret = 0, i, ref_credits;
2351 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2352 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2353 	void *val;
2354 
2355 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2356 
2357 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2358 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2359 
2360 		if (ocfs2_xattr_is_local(entry))
2361 			continue;
2362 
2363 		val = (void *)header +
2364 			le16_to_cpu(entry->xe_name_offset);
2365 		vb->vb_xv = (struct ocfs2_xattr_value_root *)
2366 			(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2367 
2368 		ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2369 							 ref_ci, ref_root_bh,
2370 							 &ctxt.meta_ac,
2371 							 &ref_credits);
2372 
2373 		ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2374 					ocfs2_remove_extent_credits(osb->sb));
2375 		if (IS_ERR(ctxt.handle)) {
2376 			ret = PTR_ERR(ctxt.handle);
2377 			mlog_errno(ret);
2378 			break;
2379 		}
2380 
2381 		ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2382 		if (ret < 0) {
2383 			mlog_errno(ret);
2384 			break;
2385 		}
2386 
2387 		ocfs2_commit_trans(osb, ctxt.handle);
2388 		if (ctxt.meta_ac) {
2389 			ocfs2_free_alloc_context(ctxt.meta_ac);
2390 			ctxt.meta_ac = NULL;
2391 		}
2392 	}
2393 
2394 	if (ctxt.meta_ac)
2395 		ocfs2_free_alloc_context(ctxt.meta_ac);
2396 	ocfs2_schedule_truncate_log_flush(osb, 1);
2397 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
2398 	return ret;
2399 }
2400 
2401 static int ocfs2_xattr_ibody_remove(struct inode *inode,
2402 				    struct buffer_head *di_bh,
2403 				    struct ocfs2_caching_info *ref_ci,
2404 				    struct buffer_head *ref_root_bh)
2405 {
2406 
2407 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2408 	struct ocfs2_xattr_header *header;
2409 	int ret;
2410 	struct ocfs2_xattr_value_buf vb = {
2411 		.vb_bh = di_bh,
2412 		.vb_access = ocfs2_journal_access_di,
2413 	};
2414 
2415 	header = (struct ocfs2_xattr_header *)
2416 		 ((void *)di + inode->i_sb->s_blocksize -
2417 		 le16_to_cpu(di->i_xattr_inline_size));
2418 
2419 	ret = ocfs2_remove_value_outside(inode, &vb, header,
2420 					 ref_ci, ref_root_bh);
2421 
2422 	return ret;
2423 }
2424 
2425 struct ocfs2_rm_xattr_bucket_para {
2426 	struct ocfs2_caching_info *ref_ci;
2427 	struct buffer_head *ref_root_bh;
2428 };
2429 
2430 static int ocfs2_xattr_block_remove(struct inode *inode,
2431 				    struct buffer_head *blk_bh,
2432 				    struct ocfs2_caching_info *ref_ci,
2433 				    struct buffer_head *ref_root_bh)
2434 {
2435 	struct ocfs2_xattr_block *xb;
2436 	int ret = 0;
2437 	struct ocfs2_xattr_value_buf vb = {
2438 		.vb_bh = blk_bh,
2439 		.vb_access = ocfs2_journal_access_xb,
2440 	};
2441 	struct ocfs2_rm_xattr_bucket_para args = {
2442 		.ref_ci = ref_ci,
2443 		.ref_root_bh = ref_root_bh,
2444 	};
2445 
2446 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2447 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2448 		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2449 		ret = ocfs2_remove_value_outside(inode, &vb, header,
2450 						 ref_ci, ref_root_bh);
2451 	} else
2452 		ret = ocfs2_iterate_xattr_index_block(inode,
2453 						blk_bh,
2454 						ocfs2_rm_xattr_cluster,
2455 						&args);
2456 
2457 	return ret;
2458 }
2459 
2460 static int ocfs2_xattr_free_block(struct inode *inode,
2461 				  u64 block,
2462 				  struct ocfs2_caching_info *ref_ci,
2463 				  struct buffer_head *ref_root_bh)
2464 {
2465 	struct inode *xb_alloc_inode;
2466 	struct buffer_head *xb_alloc_bh = NULL;
2467 	struct buffer_head *blk_bh = NULL;
2468 	struct ocfs2_xattr_block *xb;
2469 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2470 	handle_t *handle;
2471 	int ret = 0;
2472 	u64 blk, bg_blkno;
2473 	u16 bit;
2474 
2475 	ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2476 	if (ret < 0) {
2477 		mlog_errno(ret);
2478 		goto out;
2479 	}
2480 
2481 	ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2482 	if (ret < 0) {
2483 		mlog_errno(ret);
2484 		goto out;
2485 	}
2486 
2487 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2488 	blk = le64_to_cpu(xb->xb_blkno);
2489 	bit = le16_to_cpu(xb->xb_suballoc_bit);
2490 	if (xb->xb_suballoc_loc)
2491 		bg_blkno = le64_to_cpu(xb->xb_suballoc_loc);
2492 	else
2493 		bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2494 
2495 	xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2496 				EXTENT_ALLOC_SYSTEM_INODE,
2497 				le16_to_cpu(xb->xb_suballoc_slot));
2498 	if (!xb_alloc_inode) {
2499 		ret = -ENOMEM;
2500 		mlog_errno(ret);
2501 		goto out;
2502 	}
2503 	mutex_lock(&xb_alloc_inode->i_mutex);
2504 
2505 	ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2506 	if (ret < 0) {
2507 		mlog_errno(ret);
2508 		goto out_mutex;
2509 	}
2510 
2511 	handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2512 	if (IS_ERR(handle)) {
2513 		ret = PTR_ERR(handle);
2514 		mlog_errno(ret);
2515 		goto out_unlock;
2516 	}
2517 
2518 	ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2519 				       bit, bg_blkno, 1);
2520 	if (ret < 0)
2521 		mlog_errno(ret);
2522 
2523 	ocfs2_commit_trans(osb, handle);
2524 out_unlock:
2525 	ocfs2_inode_unlock(xb_alloc_inode, 1);
2526 	brelse(xb_alloc_bh);
2527 out_mutex:
2528 	mutex_unlock(&xb_alloc_inode->i_mutex);
2529 	iput(xb_alloc_inode);
2530 out:
2531 	brelse(blk_bh);
2532 	return ret;
2533 }
2534 
2535 /*
2536  * ocfs2_xattr_remove()
2537  *
2538  * Free extended attribute resources associated with this inode.
2539  */
2540 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2541 {
2542 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2543 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2544 	struct ocfs2_refcount_tree *ref_tree = NULL;
2545 	struct buffer_head *ref_root_bh = NULL;
2546 	struct ocfs2_caching_info *ref_ci = NULL;
2547 	handle_t *handle;
2548 	int ret;
2549 
2550 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2551 		return 0;
2552 
2553 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2554 		return 0;
2555 
2556 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2557 		ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2558 					       le64_to_cpu(di->i_refcount_loc),
2559 					       1, &ref_tree, &ref_root_bh);
2560 		if (ret) {
2561 			mlog_errno(ret);
2562 			goto out;
2563 		}
2564 		ref_ci = &ref_tree->rf_ci;
2565 
2566 	}
2567 
2568 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2569 		ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2570 					       ref_ci, ref_root_bh);
2571 		if (ret < 0) {
2572 			mlog_errno(ret);
2573 			goto out;
2574 		}
2575 	}
2576 
2577 	if (di->i_xattr_loc) {
2578 		ret = ocfs2_xattr_free_block(inode,
2579 					     le64_to_cpu(di->i_xattr_loc),
2580 					     ref_ci, ref_root_bh);
2581 		if (ret < 0) {
2582 			mlog_errno(ret);
2583 			goto out;
2584 		}
2585 	}
2586 
2587 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2588 				   OCFS2_INODE_UPDATE_CREDITS);
2589 	if (IS_ERR(handle)) {
2590 		ret = PTR_ERR(handle);
2591 		mlog_errno(ret);
2592 		goto out;
2593 	}
2594 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2595 				      OCFS2_JOURNAL_ACCESS_WRITE);
2596 	if (ret) {
2597 		mlog_errno(ret);
2598 		goto out_commit;
2599 	}
2600 
2601 	di->i_xattr_loc = 0;
2602 
2603 	spin_lock(&oi->ip_lock);
2604 	oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2605 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2606 	spin_unlock(&oi->ip_lock);
2607 
2608 	ocfs2_journal_dirty(handle, di_bh);
2609 out_commit:
2610 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2611 out:
2612 	if (ref_tree)
2613 		ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2614 	brelse(ref_root_bh);
2615 	return ret;
2616 }
2617 
2618 static int ocfs2_xattr_has_space_inline(struct inode *inode,
2619 					struct ocfs2_dinode *di)
2620 {
2621 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2622 	unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2623 	int free;
2624 
2625 	if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2626 		return 0;
2627 
2628 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2629 		struct ocfs2_inline_data *idata = &di->id2.i_data;
2630 		free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2631 	} else if (ocfs2_inode_is_fast_symlink(inode)) {
2632 		free = ocfs2_fast_symlink_chars(inode->i_sb) -
2633 			le64_to_cpu(di->i_size);
2634 	} else {
2635 		struct ocfs2_extent_list *el = &di->id2.i_list;
2636 		free = (le16_to_cpu(el->l_count) -
2637 			le16_to_cpu(el->l_next_free_rec)) *
2638 			sizeof(struct ocfs2_extent_rec);
2639 	}
2640 	if (free >= xattrsize)
2641 		return 1;
2642 
2643 	return 0;
2644 }
2645 
2646 /*
2647  * ocfs2_xattr_ibody_find()
2648  *
2649  * Find extended attribute in inode block and
2650  * fill search info into struct ocfs2_xattr_search.
2651  */
2652 static int ocfs2_xattr_ibody_find(struct inode *inode,
2653 				  int name_index,
2654 				  const char *name,
2655 				  struct ocfs2_xattr_search *xs)
2656 {
2657 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2658 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2659 	int ret;
2660 	int has_space = 0;
2661 
2662 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2663 		return 0;
2664 
2665 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2666 		down_read(&oi->ip_alloc_sem);
2667 		has_space = ocfs2_xattr_has_space_inline(inode, di);
2668 		up_read(&oi->ip_alloc_sem);
2669 		if (!has_space)
2670 			return 0;
2671 	}
2672 
2673 	xs->xattr_bh = xs->inode_bh;
2674 	xs->end = (void *)di + inode->i_sb->s_blocksize;
2675 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2676 		xs->header = (struct ocfs2_xattr_header *)
2677 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
2678 	else
2679 		xs->header = (struct ocfs2_xattr_header *)
2680 			(xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2681 	xs->base = (void *)xs->header;
2682 	xs->here = xs->header->xh_entries;
2683 
2684 	/* Find the named attribute. */
2685 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2686 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
2687 		if (ret && ret != -ENODATA)
2688 			return ret;
2689 		xs->not_found = ret;
2690 	}
2691 
2692 	return 0;
2693 }
2694 
2695 static int ocfs2_xattr_ibody_init(struct inode *inode,
2696 				  struct buffer_head *di_bh,
2697 				  struct ocfs2_xattr_set_ctxt *ctxt)
2698 {
2699 	int ret;
2700 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2701 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2702 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2703 	unsigned int xattrsize = osb->s_xattr_inline_size;
2704 
2705 	if (!ocfs2_xattr_has_space_inline(inode, di)) {
2706 		ret = -ENOSPC;
2707 		goto out;
2708 	}
2709 
2710 	ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh,
2711 				      OCFS2_JOURNAL_ACCESS_WRITE);
2712 	if (ret) {
2713 		mlog_errno(ret);
2714 		goto out;
2715 	}
2716 
2717 	/*
2718 	 * Adjust extent record count or inline data size
2719 	 * to reserve space for extended attribute.
2720 	 */
2721 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2722 		struct ocfs2_inline_data *idata = &di->id2.i_data;
2723 		le16_add_cpu(&idata->id_count, -xattrsize);
2724 	} else if (!(ocfs2_inode_is_fast_symlink(inode))) {
2725 		struct ocfs2_extent_list *el = &di->id2.i_list;
2726 		le16_add_cpu(&el->l_count, -(xattrsize /
2727 					     sizeof(struct ocfs2_extent_rec)));
2728 	}
2729 	di->i_xattr_inline_size = cpu_to_le16(xattrsize);
2730 
2731 	spin_lock(&oi->ip_lock);
2732 	oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL;
2733 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2734 	spin_unlock(&oi->ip_lock);
2735 
2736 	ocfs2_journal_dirty(ctxt->handle, di_bh);
2737 
2738 out:
2739 	return ret;
2740 }
2741 
2742 /*
2743  * ocfs2_xattr_ibody_set()
2744  *
2745  * Set, replace or remove an extended attribute into inode block.
2746  *
2747  */
2748 static int ocfs2_xattr_ibody_set(struct inode *inode,
2749 				 struct ocfs2_xattr_info *xi,
2750 				 struct ocfs2_xattr_search *xs,
2751 				 struct ocfs2_xattr_set_ctxt *ctxt)
2752 {
2753 	int ret;
2754 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2755 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2756 	struct ocfs2_xa_loc loc;
2757 
2758 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2759 		return -ENOSPC;
2760 
2761 	down_write(&oi->ip_alloc_sem);
2762 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2763 		if (!ocfs2_xattr_has_space_inline(inode, di)) {
2764 			ret = -ENOSPC;
2765 			goto out;
2766 		}
2767 	}
2768 
2769 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2770 		ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt);
2771 		if (ret) {
2772 			if (ret != -ENOSPC)
2773 				mlog_errno(ret);
2774 			goto out;
2775 		}
2776 	}
2777 
2778 	ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
2779 				 xs->not_found ? NULL : xs->here);
2780 	ret = ocfs2_xa_set(&loc, xi, ctxt);
2781 	if (ret) {
2782 		if (ret != -ENOSPC)
2783 			mlog_errno(ret);
2784 		goto out;
2785 	}
2786 	xs->here = loc.xl_entry;
2787 
2788 out:
2789 	up_write(&oi->ip_alloc_sem);
2790 
2791 	return ret;
2792 }
2793 
2794 /*
2795  * ocfs2_xattr_block_find()
2796  *
2797  * Find extended attribute in external block and
2798  * fill search info into struct ocfs2_xattr_search.
2799  */
2800 static int ocfs2_xattr_block_find(struct inode *inode,
2801 				  int name_index,
2802 				  const char *name,
2803 				  struct ocfs2_xattr_search *xs)
2804 {
2805 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2806 	struct buffer_head *blk_bh = NULL;
2807 	struct ocfs2_xattr_block *xb;
2808 	int ret = 0;
2809 
2810 	if (!di->i_xattr_loc)
2811 		return ret;
2812 
2813 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2814 				     &blk_bh);
2815 	if (ret < 0) {
2816 		mlog_errno(ret);
2817 		return ret;
2818 	}
2819 
2820 	xs->xattr_bh = blk_bh;
2821 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2822 
2823 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2824 		xs->header = &xb->xb_attrs.xb_header;
2825 		xs->base = (void *)xs->header;
2826 		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2827 		xs->here = xs->header->xh_entries;
2828 
2829 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
2830 	} else
2831 		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2832 						   name_index,
2833 						   name, xs);
2834 
2835 	if (ret && ret != -ENODATA) {
2836 		xs->xattr_bh = NULL;
2837 		goto cleanup;
2838 	}
2839 	xs->not_found = ret;
2840 	return 0;
2841 cleanup:
2842 	brelse(blk_bh);
2843 
2844 	return ret;
2845 }
2846 
2847 static int ocfs2_create_xattr_block(struct inode *inode,
2848 				    struct buffer_head *inode_bh,
2849 				    struct ocfs2_xattr_set_ctxt *ctxt,
2850 				    int indexed,
2851 				    struct buffer_head **ret_bh)
2852 {
2853 	int ret;
2854 	u16 suballoc_bit_start;
2855 	u32 num_got;
2856 	u64 suballoc_loc, first_blkno;
2857 	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2858 	struct buffer_head *new_bh = NULL;
2859 	struct ocfs2_xattr_block *xblk;
2860 
2861 	ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2862 				      inode_bh, OCFS2_JOURNAL_ACCESS_CREATE);
2863 	if (ret < 0) {
2864 		mlog_errno(ret);
2865 		goto end;
2866 	}
2867 
2868 	ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1,
2869 				   &suballoc_loc, &suballoc_bit_start,
2870 				   &num_got, &first_blkno);
2871 	if (ret < 0) {
2872 		mlog_errno(ret);
2873 		goto end;
2874 	}
2875 
2876 	new_bh = sb_getblk(inode->i_sb, first_blkno);
2877 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2878 
2879 	ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode),
2880 				      new_bh,
2881 				      OCFS2_JOURNAL_ACCESS_CREATE);
2882 	if (ret < 0) {
2883 		mlog_errno(ret);
2884 		goto end;
2885 	}
2886 
2887 	/* Initialize ocfs2_xattr_block */
2888 	xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2889 	memset(xblk, 0, inode->i_sb->s_blocksize);
2890 	strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2891 	xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
2892 	xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
2893 	xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2894 	xblk->xb_fs_generation =
2895 		cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation);
2896 	xblk->xb_blkno = cpu_to_le64(first_blkno);
2897 	if (indexed) {
2898 		struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2899 		xr->xt_clusters = cpu_to_le32(1);
2900 		xr->xt_last_eb_blk = 0;
2901 		xr->xt_list.l_tree_depth = 0;
2902 		xr->xt_list.l_count = cpu_to_le16(
2903 					ocfs2_xattr_recs_per_xb(inode->i_sb));
2904 		xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2905 		xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2906 	}
2907 	ocfs2_journal_dirty(ctxt->handle, new_bh);
2908 
2909 	/* Add it to the inode */
2910 	di->i_xattr_loc = cpu_to_le64(first_blkno);
2911 
2912 	spin_lock(&OCFS2_I(inode)->ip_lock);
2913 	OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
2914 	di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
2915 	spin_unlock(&OCFS2_I(inode)->ip_lock);
2916 
2917 	ocfs2_journal_dirty(ctxt->handle, inode_bh);
2918 
2919 	*ret_bh = new_bh;
2920 	new_bh = NULL;
2921 
2922 end:
2923 	brelse(new_bh);
2924 	return ret;
2925 }
2926 
2927 /*
2928  * ocfs2_xattr_block_set()
2929  *
2930  * Set, replace or remove an extended attribute into external block.
2931  *
2932  */
2933 static int ocfs2_xattr_block_set(struct inode *inode,
2934 				 struct ocfs2_xattr_info *xi,
2935 				 struct ocfs2_xattr_search *xs,
2936 				 struct ocfs2_xattr_set_ctxt *ctxt)
2937 {
2938 	struct buffer_head *new_bh = NULL;
2939 	struct ocfs2_xattr_block *xblk = NULL;
2940 	int ret;
2941 	struct ocfs2_xa_loc loc;
2942 
2943 	if (!xs->xattr_bh) {
2944 		ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt,
2945 					       0, &new_bh);
2946 		if (ret) {
2947 			mlog_errno(ret);
2948 			goto end;
2949 		}
2950 
2951 		xs->xattr_bh = new_bh;
2952 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2953 		xs->header = &xblk->xb_attrs.xb_header;
2954 		xs->base = (void *)xs->header;
2955 		xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2956 		xs->here = xs->header->xh_entries;
2957 	} else
2958 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2959 
2960 	if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2961 		ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh,
2962 					      xs->not_found ? NULL : xs->here);
2963 
2964 		ret = ocfs2_xa_set(&loc, xi, ctxt);
2965 		if (!ret)
2966 			xs->here = loc.xl_entry;
2967 		else if ((ret != -ENOSPC) || ctxt->set_abort)
2968 			goto end;
2969 		else {
2970 			ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2971 			if (ret)
2972 				goto end;
2973 		}
2974 	}
2975 
2976 	if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)
2977 		ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2978 
2979 end:
2980 	return ret;
2981 }
2982 
2983 /* Check whether the new xattr can be inserted into the inode. */
2984 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2985 				       struct ocfs2_xattr_info *xi,
2986 				       struct ocfs2_xattr_search *xs)
2987 {
2988 	struct ocfs2_xattr_entry *last;
2989 	int free, i;
2990 	size_t min_offs = xs->end - xs->base;
2991 
2992 	if (!xs->header)
2993 		return 0;
2994 
2995 	last = xs->header->xh_entries;
2996 
2997 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2998 		size_t offs = le16_to_cpu(last->xe_name_offset);
2999 		if (offs < min_offs)
3000 			min_offs = offs;
3001 		last += 1;
3002 	}
3003 
3004 	free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
3005 	if (free < 0)
3006 		return 0;
3007 
3008 	BUG_ON(!xs->not_found);
3009 
3010 	if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
3011 		return 1;
3012 
3013 	return 0;
3014 }
3015 
3016 static int ocfs2_calc_xattr_set_need(struct inode *inode,
3017 				     struct ocfs2_dinode *di,
3018 				     struct ocfs2_xattr_info *xi,
3019 				     struct ocfs2_xattr_search *xis,
3020 				     struct ocfs2_xattr_search *xbs,
3021 				     int *clusters_need,
3022 				     int *meta_need,
3023 				     int *credits_need)
3024 {
3025 	int ret = 0, old_in_xb = 0;
3026 	int clusters_add = 0, meta_add = 0, credits = 0;
3027 	struct buffer_head *bh = NULL;
3028 	struct ocfs2_xattr_block *xb = NULL;
3029 	struct ocfs2_xattr_entry *xe = NULL;
3030 	struct ocfs2_xattr_value_root *xv = NULL;
3031 	char *base = NULL;
3032 	int name_offset, name_len = 0;
3033 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
3034 						    xi->xi_value_len);
3035 	u64 value_size;
3036 
3037 	/*
3038 	 * Calculate the clusters we need to write.
3039 	 * No matter whether we replace an old one or add a new one,
3040 	 * we need this for writing.
3041 	 */
3042 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
3043 		credits += new_clusters *
3044 			   ocfs2_clusters_to_blocks(inode->i_sb, 1);
3045 
3046 	if (xis->not_found && xbs->not_found) {
3047 		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3048 
3049 		if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3050 			clusters_add += new_clusters;
3051 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3052 							&def_xv.xv.xr_list,
3053 							new_clusters);
3054 		}
3055 
3056 		goto meta_guess;
3057 	}
3058 
3059 	if (!xis->not_found) {
3060 		xe = xis->here;
3061 		name_offset = le16_to_cpu(xe->xe_name_offset);
3062 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3063 		base = xis->base;
3064 		credits += OCFS2_INODE_UPDATE_CREDITS;
3065 	} else {
3066 		int i, block_off = 0;
3067 		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3068 		xe = xbs->here;
3069 		name_offset = le16_to_cpu(xe->xe_name_offset);
3070 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3071 		i = xbs->here - xbs->header->xh_entries;
3072 		old_in_xb = 1;
3073 
3074 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3075 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3076 							bucket_xh(xbs->bucket),
3077 							i, &block_off,
3078 							&name_offset);
3079 			base = bucket_block(xbs->bucket, block_off);
3080 			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3081 		} else {
3082 			base = xbs->base;
3083 			credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
3084 		}
3085 	}
3086 
3087 	/*
3088 	 * delete a xattr doesn't need metadata and cluster allocation.
3089 	 * so just calculate the credits and return.
3090 	 *
3091 	 * The credits for removing the value tree will be extended
3092 	 * by ocfs2_remove_extent itself.
3093 	 */
3094 	if (!xi->xi_value) {
3095 		if (!ocfs2_xattr_is_local(xe))
3096 			credits += ocfs2_remove_extent_credits(inode->i_sb);
3097 
3098 		goto out;
3099 	}
3100 
3101 	/* do cluster allocation guess first. */
3102 	value_size = le64_to_cpu(xe->xe_value_size);
3103 
3104 	if (old_in_xb) {
3105 		/*
3106 		 * In xattr set, we always try to set the xe in inode first,
3107 		 * so if it can be inserted into inode successfully, the old
3108 		 * one will be removed from the xattr block, and this xattr
3109 		 * will be inserted into inode as a new xattr in inode.
3110 		 */
3111 		if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
3112 			clusters_add += new_clusters;
3113 			credits += ocfs2_remove_extent_credits(inode->i_sb) +
3114 				    OCFS2_INODE_UPDATE_CREDITS;
3115 			if (!ocfs2_xattr_is_local(xe))
3116 				credits += ocfs2_calc_extend_credits(
3117 							inode->i_sb,
3118 							&def_xv.xv.xr_list,
3119 							new_clusters);
3120 			goto out;
3121 		}
3122 	}
3123 
3124 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3125 		/* the new values will be stored outside. */
3126 		u32 old_clusters = 0;
3127 
3128 		if (!ocfs2_xattr_is_local(xe)) {
3129 			old_clusters =	ocfs2_clusters_for_bytes(inode->i_sb,
3130 								 value_size);
3131 			xv = (struct ocfs2_xattr_value_root *)
3132 			     (base + name_offset + name_len);
3133 			value_size = OCFS2_XATTR_ROOT_SIZE;
3134 		} else
3135 			xv = &def_xv.xv;
3136 
3137 		if (old_clusters >= new_clusters) {
3138 			credits += ocfs2_remove_extent_credits(inode->i_sb);
3139 			goto out;
3140 		} else {
3141 			meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
3142 			clusters_add += new_clusters - old_clusters;
3143 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3144 							     &xv->xr_list,
3145 							     new_clusters -
3146 							     old_clusters);
3147 			if (value_size >= OCFS2_XATTR_ROOT_SIZE)
3148 				goto out;
3149 		}
3150 	} else {
3151 		/*
3152 		 * Now the new value will be stored inside. So if the new
3153 		 * value is smaller than the size of value root or the old
3154 		 * value, we don't need any allocation, otherwise we have
3155 		 * to guess metadata allocation.
3156 		 */
3157 		if ((ocfs2_xattr_is_local(xe) &&
3158 		     (value_size >= xi->xi_value_len)) ||
3159 		    (!ocfs2_xattr_is_local(xe) &&
3160 		     OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
3161 			goto out;
3162 	}
3163 
3164 meta_guess:
3165 	/* calculate metadata allocation. */
3166 	if (di->i_xattr_loc) {
3167 		if (!xbs->xattr_bh) {
3168 			ret = ocfs2_read_xattr_block(inode,
3169 						     le64_to_cpu(di->i_xattr_loc),
3170 						     &bh);
3171 			if (ret) {
3172 				mlog_errno(ret);
3173 				goto out;
3174 			}
3175 
3176 			xb = (struct ocfs2_xattr_block *)bh->b_data;
3177 		} else
3178 			xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3179 
3180 		/*
3181 		 * If there is already an xattr tree, good, we can calculate
3182 		 * like other b-trees. Otherwise we may have the chance of
3183 		 * create a tree, the credit calculation is borrowed from
3184 		 * ocfs2_calc_extend_credits with root_el = NULL. And the
3185 		 * new tree will be cluster based, so no meta is needed.
3186 		 */
3187 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3188 			struct ocfs2_extent_list *el =
3189 				 &xb->xb_attrs.xb_root.xt_list;
3190 			meta_add += ocfs2_extend_meta_needed(el);
3191 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3192 							     el, 1);
3193 		} else
3194 			credits += OCFS2_SUBALLOC_ALLOC + 1;
3195 
3196 		/*
3197 		 * This cluster will be used either for new bucket or for
3198 		 * new xattr block.
3199 		 * If the cluster size is the same as the bucket size, one
3200 		 * more is needed since we may need to extend the bucket
3201 		 * also.
3202 		 */
3203 		clusters_add += 1;
3204 		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3205 		if (OCFS2_XATTR_BUCKET_SIZE ==
3206 			OCFS2_SB(inode->i_sb)->s_clustersize) {
3207 			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3208 			clusters_add += 1;
3209 		}
3210 	} else {
3211 		meta_add += 1;
3212 		credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3213 	}
3214 out:
3215 	if (clusters_need)
3216 		*clusters_need = clusters_add;
3217 	if (meta_need)
3218 		*meta_need = meta_add;
3219 	if (credits_need)
3220 		*credits_need = credits;
3221 	brelse(bh);
3222 	return ret;
3223 }
3224 
3225 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3226 				     struct ocfs2_dinode *di,
3227 				     struct ocfs2_xattr_info *xi,
3228 				     struct ocfs2_xattr_search *xis,
3229 				     struct ocfs2_xattr_search *xbs,
3230 				     struct ocfs2_xattr_set_ctxt *ctxt,
3231 				     int extra_meta,
3232 				     int *credits)
3233 {
3234 	int clusters_add, meta_add, ret;
3235 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3236 
3237 	memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
3238 
3239 	ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
3240 
3241 	ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
3242 					&clusters_add, &meta_add, credits);
3243 	if (ret) {
3244 		mlog_errno(ret);
3245 		return ret;
3246 	}
3247 
3248 	meta_add += extra_meta;
3249 	mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
3250 	     "credits = %d\n", xi->xi_name, meta_add, clusters_add, *credits);
3251 
3252 	if (meta_add) {
3253 		ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
3254 							&ctxt->meta_ac);
3255 		if (ret) {
3256 			mlog_errno(ret);
3257 			goto out;
3258 		}
3259 	}
3260 
3261 	if (clusters_add) {
3262 		ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
3263 		if (ret)
3264 			mlog_errno(ret);
3265 	}
3266 out:
3267 	if (ret) {
3268 		if (ctxt->meta_ac) {
3269 			ocfs2_free_alloc_context(ctxt->meta_ac);
3270 			ctxt->meta_ac = NULL;
3271 		}
3272 
3273 		/*
3274 		 * We cannot have an error and a non null ctxt->data_ac.
3275 		 */
3276 	}
3277 
3278 	return ret;
3279 }
3280 
3281 static int __ocfs2_xattr_set_handle(struct inode *inode,
3282 				    struct ocfs2_dinode *di,
3283 				    struct ocfs2_xattr_info *xi,
3284 				    struct ocfs2_xattr_search *xis,
3285 				    struct ocfs2_xattr_search *xbs,
3286 				    struct ocfs2_xattr_set_ctxt *ctxt)
3287 {
3288 	int ret = 0, credits, old_found;
3289 
3290 	if (!xi->xi_value) {
3291 		/* Remove existing extended attribute */
3292 		if (!xis->not_found)
3293 			ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3294 		else if (!xbs->not_found)
3295 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3296 	} else {
3297 		/* We always try to set extended attribute into inode first*/
3298 		ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3299 		if (!ret && !xbs->not_found) {
3300 			/*
3301 			 * If succeed and that extended attribute existing in
3302 			 * external block, then we will remove it.
3303 			 */
3304 			xi->xi_value = NULL;
3305 			xi->xi_value_len = 0;
3306 
3307 			old_found = xis->not_found;
3308 			xis->not_found = -ENODATA;
3309 			ret = ocfs2_calc_xattr_set_need(inode,
3310 							di,
3311 							xi,
3312 							xis,
3313 							xbs,
3314 							NULL,
3315 							NULL,
3316 							&credits);
3317 			xis->not_found = old_found;
3318 			if (ret) {
3319 				mlog_errno(ret);
3320 				goto out;
3321 			}
3322 
3323 			ret = ocfs2_extend_trans(ctxt->handle, credits);
3324 			if (ret) {
3325 				mlog_errno(ret);
3326 				goto out;
3327 			}
3328 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3329 		} else if ((ret == -ENOSPC) && !ctxt->set_abort) {
3330 			if (di->i_xattr_loc && !xbs->xattr_bh) {
3331 				ret = ocfs2_xattr_block_find(inode,
3332 							     xi->xi_name_index,
3333 							     xi->xi_name, xbs);
3334 				if (ret)
3335 					goto out;
3336 
3337 				old_found = xis->not_found;
3338 				xis->not_found = -ENODATA;
3339 				ret = ocfs2_calc_xattr_set_need(inode,
3340 								di,
3341 								xi,
3342 								xis,
3343 								xbs,
3344 								NULL,
3345 								NULL,
3346 								&credits);
3347 				xis->not_found = old_found;
3348 				if (ret) {
3349 					mlog_errno(ret);
3350 					goto out;
3351 				}
3352 
3353 				ret = ocfs2_extend_trans(ctxt->handle, credits);
3354 				if (ret) {
3355 					mlog_errno(ret);
3356 					goto out;
3357 				}
3358 			}
3359 			/*
3360 			 * If no space in inode, we will set extended attribute
3361 			 * into external block.
3362 			 */
3363 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3364 			if (ret)
3365 				goto out;
3366 			if (!xis->not_found) {
3367 				/*
3368 				 * If succeed and that extended attribute
3369 				 * existing in inode, we will remove it.
3370 				 */
3371 				xi->xi_value = NULL;
3372 				xi->xi_value_len = 0;
3373 				xbs->not_found = -ENODATA;
3374 				ret = ocfs2_calc_xattr_set_need(inode,
3375 								di,
3376 								xi,
3377 								xis,
3378 								xbs,
3379 								NULL,
3380 								NULL,
3381 								&credits);
3382 				if (ret) {
3383 					mlog_errno(ret);
3384 					goto out;
3385 				}
3386 
3387 				ret = ocfs2_extend_trans(ctxt->handle, credits);
3388 				if (ret) {
3389 					mlog_errno(ret);
3390 					goto out;
3391 				}
3392 				ret = ocfs2_xattr_ibody_set(inode, xi,
3393 							    xis, ctxt);
3394 			}
3395 		}
3396 	}
3397 
3398 	if (!ret) {
3399 		/* Update inode ctime. */
3400 		ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3401 					      xis->inode_bh,
3402 					      OCFS2_JOURNAL_ACCESS_WRITE);
3403 		if (ret) {
3404 			mlog_errno(ret);
3405 			goto out;
3406 		}
3407 
3408 		inode->i_ctime = CURRENT_TIME;
3409 		di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3410 		di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3411 		ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3412 	}
3413 out:
3414 	return ret;
3415 }
3416 
3417 /*
3418  * This function only called duing creating inode
3419  * for init security/acl xattrs of the new inode.
3420  * All transanction credits have been reserved in mknod.
3421  */
3422 int ocfs2_xattr_set_handle(handle_t *handle,
3423 			   struct inode *inode,
3424 			   struct buffer_head *di_bh,
3425 			   int name_index,
3426 			   const char *name,
3427 			   const void *value,
3428 			   size_t value_len,
3429 			   int flags,
3430 			   struct ocfs2_alloc_context *meta_ac,
3431 			   struct ocfs2_alloc_context *data_ac)
3432 {
3433 	struct ocfs2_dinode *di;
3434 	int ret;
3435 
3436 	struct ocfs2_xattr_info xi = {
3437 		.xi_name_index = name_index,
3438 		.xi_name = name,
3439 		.xi_name_len = strlen(name),
3440 		.xi_value = value,
3441 		.xi_value_len = value_len,
3442 	};
3443 
3444 	struct ocfs2_xattr_search xis = {
3445 		.not_found = -ENODATA,
3446 	};
3447 
3448 	struct ocfs2_xattr_search xbs = {
3449 		.not_found = -ENODATA,
3450 	};
3451 
3452 	struct ocfs2_xattr_set_ctxt ctxt = {
3453 		.handle = handle,
3454 		.meta_ac = meta_ac,
3455 		.data_ac = data_ac,
3456 	};
3457 
3458 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3459 		return -EOPNOTSUPP;
3460 
3461 	/*
3462 	 * In extreme situation, may need xattr bucket when
3463 	 * block size is too small. And we have already reserved
3464 	 * the credits for bucket in mknod.
3465 	 */
3466 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3467 		xbs.bucket = ocfs2_xattr_bucket_new(inode);
3468 		if (!xbs.bucket) {
3469 			mlog_errno(-ENOMEM);
3470 			return -ENOMEM;
3471 		}
3472 	}
3473 
3474 	xis.inode_bh = xbs.inode_bh = di_bh;
3475 	di = (struct ocfs2_dinode *)di_bh->b_data;
3476 
3477 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
3478 
3479 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3480 	if (ret)
3481 		goto cleanup;
3482 	if (xis.not_found) {
3483 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3484 		if (ret)
3485 			goto cleanup;
3486 	}
3487 
3488 	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3489 
3490 cleanup:
3491 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
3492 	brelse(xbs.xattr_bh);
3493 	ocfs2_xattr_bucket_free(xbs.bucket);
3494 
3495 	return ret;
3496 }
3497 
3498 /*
3499  * ocfs2_xattr_set()
3500  *
3501  * Set, replace or remove an extended attribute for this inode.
3502  * value is NULL to remove an existing extended attribute, else either
3503  * create or replace an extended attribute.
3504  */
3505 int ocfs2_xattr_set(struct inode *inode,
3506 		    int name_index,
3507 		    const char *name,
3508 		    const void *value,
3509 		    size_t value_len,
3510 		    int flags)
3511 {
3512 	struct buffer_head *di_bh = NULL;
3513 	struct ocfs2_dinode *di;
3514 	int ret, credits, ref_meta = 0, ref_credits = 0;
3515 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3516 	struct inode *tl_inode = osb->osb_tl_inode;
3517 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
3518 	struct ocfs2_refcount_tree *ref_tree = NULL;
3519 
3520 	struct ocfs2_xattr_info xi = {
3521 		.xi_name_index = name_index,
3522 		.xi_name = name,
3523 		.xi_name_len = strlen(name),
3524 		.xi_value = value,
3525 		.xi_value_len = value_len,
3526 	};
3527 
3528 	struct ocfs2_xattr_search xis = {
3529 		.not_found = -ENODATA,
3530 	};
3531 
3532 	struct ocfs2_xattr_search xbs = {
3533 		.not_found = -ENODATA,
3534 	};
3535 
3536 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3537 		return -EOPNOTSUPP;
3538 
3539 	/*
3540 	 * Only xbs will be used on indexed trees.  xis doesn't need a
3541 	 * bucket.
3542 	 */
3543 	xbs.bucket = ocfs2_xattr_bucket_new(inode);
3544 	if (!xbs.bucket) {
3545 		mlog_errno(-ENOMEM);
3546 		return -ENOMEM;
3547 	}
3548 
3549 	ret = ocfs2_inode_lock(inode, &di_bh, 1);
3550 	if (ret < 0) {
3551 		mlog_errno(ret);
3552 		goto cleanup_nolock;
3553 	}
3554 	xis.inode_bh = xbs.inode_bh = di_bh;
3555 	di = (struct ocfs2_dinode *)di_bh->b_data;
3556 
3557 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
3558 	/*
3559 	 * Scan inode and external block to find the same name
3560 	 * extended attribute and collect search infomation.
3561 	 */
3562 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3563 	if (ret)
3564 		goto cleanup;
3565 	if (xis.not_found) {
3566 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3567 		if (ret)
3568 			goto cleanup;
3569 	}
3570 
3571 	if (xis.not_found && xbs.not_found) {
3572 		ret = -ENODATA;
3573 		if (flags & XATTR_REPLACE)
3574 			goto cleanup;
3575 		ret = 0;
3576 		if (!value)
3577 			goto cleanup;
3578 	} else {
3579 		ret = -EEXIST;
3580 		if (flags & XATTR_CREATE)
3581 			goto cleanup;
3582 	}
3583 
3584 	/* Check whether the value is refcounted and do some prepartion. */
3585 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
3586 	    (!xis.not_found || !xbs.not_found)) {
3587 		ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3588 						   &xis, &xbs, &ref_tree,
3589 						   &ref_meta, &ref_credits);
3590 		if (ret) {
3591 			mlog_errno(ret);
3592 			goto cleanup;
3593 		}
3594 	}
3595 
3596 	mutex_lock(&tl_inode->i_mutex);
3597 
3598 	if (ocfs2_truncate_log_needs_flush(osb)) {
3599 		ret = __ocfs2_flush_truncate_log(osb);
3600 		if (ret < 0) {
3601 			mutex_unlock(&tl_inode->i_mutex);
3602 			mlog_errno(ret);
3603 			goto cleanup;
3604 		}
3605 	}
3606 	mutex_unlock(&tl_inode->i_mutex);
3607 
3608 	ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3609 					&xbs, &ctxt, ref_meta, &credits);
3610 	if (ret) {
3611 		mlog_errno(ret);
3612 		goto cleanup;
3613 	}
3614 
3615 	/* we need to update inode's ctime field, so add credit for it. */
3616 	credits += OCFS2_INODE_UPDATE_CREDITS;
3617 	ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3618 	if (IS_ERR(ctxt.handle)) {
3619 		ret = PTR_ERR(ctxt.handle);
3620 		mlog_errno(ret);
3621 		goto cleanup;
3622 	}
3623 
3624 	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3625 
3626 	ocfs2_commit_trans(osb, ctxt.handle);
3627 
3628 	if (ctxt.data_ac)
3629 		ocfs2_free_alloc_context(ctxt.data_ac);
3630 	if (ctxt.meta_ac)
3631 		ocfs2_free_alloc_context(ctxt.meta_ac);
3632 	if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3633 		ocfs2_schedule_truncate_log_flush(osb, 1);
3634 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
3635 
3636 cleanup:
3637 	if (ref_tree)
3638 		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3639 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
3640 	if (!value && !ret) {
3641 		ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3642 		if (ret)
3643 			mlog_errno(ret);
3644 	}
3645 	ocfs2_inode_unlock(inode, 1);
3646 cleanup_nolock:
3647 	brelse(di_bh);
3648 	brelse(xbs.xattr_bh);
3649 	ocfs2_xattr_bucket_free(xbs.bucket);
3650 
3651 	return ret;
3652 }
3653 
3654 /*
3655  * Find the xattr extent rec which may contains name_hash.
3656  * e_cpos will be the first name hash of the xattr rec.
3657  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3658  */
3659 static int ocfs2_xattr_get_rec(struct inode *inode,
3660 			       u32 name_hash,
3661 			       u64 *p_blkno,
3662 			       u32 *e_cpos,
3663 			       u32 *num_clusters,
3664 			       struct ocfs2_extent_list *el)
3665 {
3666 	int ret = 0, i;
3667 	struct buffer_head *eb_bh = NULL;
3668 	struct ocfs2_extent_block *eb;
3669 	struct ocfs2_extent_rec *rec = NULL;
3670 	u64 e_blkno = 0;
3671 
3672 	if (el->l_tree_depth) {
3673 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3674 				      &eb_bh);
3675 		if (ret) {
3676 			mlog_errno(ret);
3677 			goto out;
3678 		}
3679 
3680 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3681 		el = &eb->h_list;
3682 
3683 		if (el->l_tree_depth) {
3684 			ocfs2_error(inode->i_sb,
3685 				    "Inode %lu has non zero tree depth in "
3686 				    "xattr tree block %llu\n", inode->i_ino,
3687 				    (unsigned long long)eb_bh->b_blocknr);
3688 			ret = -EROFS;
3689 			goto out;
3690 		}
3691 	}
3692 
3693 	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3694 		rec = &el->l_recs[i];
3695 
3696 		if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3697 			e_blkno = le64_to_cpu(rec->e_blkno);
3698 			break;
3699 		}
3700 	}
3701 
3702 	if (!e_blkno) {
3703 		ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
3704 			    "record (%u, %u, 0) in xattr", inode->i_ino,
3705 			    le32_to_cpu(rec->e_cpos),
3706 			    ocfs2_rec_clusters(el, rec));
3707 		ret = -EROFS;
3708 		goto out;
3709 	}
3710 
3711 	*p_blkno = le64_to_cpu(rec->e_blkno);
3712 	*num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3713 	if (e_cpos)
3714 		*e_cpos = le32_to_cpu(rec->e_cpos);
3715 out:
3716 	brelse(eb_bh);
3717 	return ret;
3718 }
3719 
3720 typedef int (xattr_bucket_func)(struct inode *inode,
3721 				struct ocfs2_xattr_bucket *bucket,
3722 				void *para);
3723 
3724 static int ocfs2_find_xe_in_bucket(struct inode *inode,
3725 				   struct ocfs2_xattr_bucket *bucket,
3726 				   int name_index,
3727 				   const char *name,
3728 				   u32 name_hash,
3729 				   u16 *xe_index,
3730 				   int *found)
3731 {
3732 	int i, ret = 0, cmp = 1, block_off, new_offset;
3733 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3734 	size_t name_len = strlen(name);
3735 	struct ocfs2_xattr_entry *xe = NULL;
3736 	char *xe_name;
3737 
3738 	/*
3739 	 * We don't use binary search in the bucket because there
3740 	 * may be multiple entries with the same name hash.
3741 	 */
3742 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3743 		xe = &xh->xh_entries[i];
3744 
3745 		if (name_hash > le32_to_cpu(xe->xe_name_hash))
3746 			continue;
3747 		else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3748 			break;
3749 
3750 		cmp = name_index - ocfs2_xattr_get_type(xe);
3751 		if (!cmp)
3752 			cmp = name_len - xe->xe_name_len;
3753 		if (cmp)
3754 			continue;
3755 
3756 		ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3757 							xh,
3758 							i,
3759 							&block_off,
3760 							&new_offset);
3761 		if (ret) {
3762 			mlog_errno(ret);
3763 			break;
3764 		}
3765 
3766 
3767 		xe_name = bucket_block(bucket, block_off) + new_offset;
3768 		if (!memcmp(name, xe_name, name_len)) {
3769 			*xe_index = i;
3770 			*found = 1;
3771 			ret = 0;
3772 			break;
3773 		}
3774 	}
3775 
3776 	return ret;
3777 }
3778 
3779 /*
3780  * Find the specified xattr entry in a series of buckets.
3781  * This series start from p_blkno and last for num_clusters.
3782  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3783  * the num of the valid buckets.
3784  *
3785  * Return the buffer_head this xattr should reside in. And if the xattr's
3786  * hash is in the gap of 2 buckets, return the lower bucket.
3787  */
3788 static int ocfs2_xattr_bucket_find(struct inode *inode,
3789 				   int name_index,
3790 				   const char *name,
3791 				   u32 name_hash,
3792 				   u64 p_blkno,
3793 				   u32 first_hash,
3794 				   u32 num_clusters,
3795 				   struct ocfs2_xattr_search *xs)
3796 {
3797 	int ret, found = 0;
3798 	struct ocfs2_xattr_header *xh = NULL;
3799 	struct ocfs2_xattr_entry *xe = NULL;
3800 	u16 index = 0;
3801 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3802 	int low_bucket = 0, bucket, high_bucket;
3803 	struct ocfs2_xattr_bucket *search;
3804 	u32 last_hash;
3805 	u64 blkno, lower_blkno = 0;
3806 
3807 	search = ocfs2_xattr_bucket_new(inode);
3808 	if (!search) {
3809 		ret = -ENOMEM;
3810 		mlog_errno(ret);
3811 		goto out;
3812 	}
3813 
3814 	ret = ocfs2_read_xattr_bucket(search, p_blkno);
3815 	if (ret) {
3816 		mlog_errno(ret);
3817 		goto out;
3818 	}
3819 
3820 	xh = bucket_xh(search);
3821 	high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3822 	while (low_bucket <= high_bucket) {
3823 		ocfs2_xattr_bucket_relse(search);
3824 
3825 		bucket = (low_bucket + high_bucket) / 2;
3826 		blkno = p_blkno + bucket * blk_per_bucket;
3827 		ret = ocfs2_read_xattr_bucket(search, blkno);
3828 		if (ret) {
3829 			mlog_errno(ret);
3830 			goto out;
3831 		}
3832 
3833 		xh = bucket_xh(search);
3834 		xe = &xh->xh_entries[0];
3835 		if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3836 			high_bucket = bucket - 1;
3837 			continue;
3838 		}
3839 
3840 		/*
3841 		 * Check whether the hash of the last entry in our
3842 		 * bucket is larger than the search one. for an empty
3843 		 * bucket, the last one is also the first one.
3844 		 */
3845 		if (xh->xh_count)
3846 			xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3847 
3848 		last_hash = le32_to_cpu(xe->xe_name_hash);
3849 
3850 		/* record lower_blkno which may be the insert place. */
3851 		lower_blkno = blkno;
3852 
3853 		if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3854 			low_bucket = bucket + 1;
3855 			continue;
3856 		}
3857 
3858 		/* the searched xattr should reside in this bucket if exists. */
3859 		ret = ocfs2_find_xe_in_bucket(inode, search,
3860 					      name_index, name, name_hash,
3861 					      &index, &found);
3862 		if (ret) {
3863 			mlog_errno(ret);
3864 			goto out;
3865 		}
3866 		break;
3867 	}
3868 
3869 	/*
3870 	 * Record the bucket we have found.
3871 	 * When the xattr's hash value is in the gap of 2 buckets, we will
3872 	 * always set it to the previous bucket.
3873 	 */
3874 	if (!lower_blkno)
3875 		lower_blkno = p_blkno;
3876 
3877 	/* This should be in cache - we just read it during the search */
3878 	ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3879 	if (ret) {
3880 		mlog_errno(ret);
3881 		goto out;
3882 	}
3883 
3884 	xs->header = bucket_xh(xs->bucket);
3885 	xs->base = bucket_block(xs->bucket, 0);
3886 	xs->end = xs->base + inode->i_sb->s_blocksize;
3887 
3888 	if (found) {
3889 		xs->here = &xs->header->xh_entries[index];
3890 		mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3891 		     (unsigned long long)bucket_blkno(xs->bucket), index);
3892 	} else
3893 		ret = -ENODATA;
3894 
3895 out:
3896 	ocfs2_xattr_bucket_free(search);
3897 	return ret;
3898 }
3899 
3900 static int ocfs2_xattr_index_block_find(struct inode *inode,
3901 					struct buffer_head *root_bh,
3902 					int name_index,
3903 					const char *name,
3904 					struct ocfs2_xattr_search *xs)
3905 {
3906 	int ret;
3907 	struct ocfs2_xattr_block *xb =
3908 			(struct ocfs2_xattr_block *)root_bh->b_data;
3909 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3910 	struct ocfs2_extent_list *el = &xb_root->xt_list;
3911 	u64 p_blkno = 0;
3912 	u32 first_hash, num_clusters = 0;
3913 	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3914 
3915 	if (le16_to_cpu(el->l_next_free_rec) == 0)
3916 		return -ENODATA;
3917 
3918 	mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3919 	     name, name_hash, name_index);
3920 
3921 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3922 				  &num_clusters, el);
3923 	if (ret) {
3924 		mlog_errno(ret);
3925 		goto out;
3926 	}
3927 
3928 	BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3929 
3930 	mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3931 	     "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3932 	     first_hash);
3933 
3934 	ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3935 				      p_blkno, first_hash, num_clusters, xs);
3936 
3937 out:
3938 	return ret;
3939 }
3940 
3941 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3942 				       u64 blkno,
3943 				       u32 clusters,
3944 				       xattr_bucket_func *func,
3945 				       void *para)
3946 {
3947 	int i, ret = 0;
3948 	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3949 	u32 num_buckets = clusters * bpc;
3950 	struct ocfs2_xattr_bucket *bucket;
3951 
3952 	bucket = ocfs2_xattr_bucket_new(inode);
3953 	if (!bucket) {
3954 		mlog_errno(-ENOMEM);
3955 		return -ENOMEM;
3956 	}
3957 
3958 	mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3959 	     clusters, (unsigned long long)blkno);
3960 
3961 	for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3962 		ret = ocfs2_read_xattr_bucket(bucket, blkno);
3963 		if (ret) {
3964 			mlog_errno(ret);
3965 			break;
3966 		}
3967 
3968 		/*
3969 		 * The real bucket num in this series of blocks is stored
3970 		 * in the 1st bucket.
3971 		 */
3972 		if (i == 0)
3973 			num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3974 
3975 		mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3976 		     (unsigned long long)blkno,
3977 		     le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3978 		if (func) {
3979 			ret = func(inode, bucket, para);
3980 			if (ret && ret != -ERANGE)
3981 				mlog_errno(ret);
3982 			/* Fall through to bucket_relse() */
3983 		}
3984 
3985 		ocfs2_xattr_bucket_relse(bucket);
3986 		if (ret)
3987 			break;
3988 	}
3989 
3990 	ocfs2_xattr_bucket_free(bucket);
3991 	return ret;
3992 }
3993 
3994 struct ocfs2_xattr_tree_list {
3995 	char *buffer;
3996 	size_t buffer_size;
3997 	size_t result;
3998 };
3999 
4000 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
4001 					     struct ocfs2_xattr_header *xh,
4002 					     int index,
4003 					     int *block_off,
4004 					     int *new_offset)
4005 {
4006 	u16 name_offset;
4007 
4008 	if (index < 0 || index >= le16_to_cpu(xh->xh_count))
4009 		return -EINVAL;
4010 
4011 	name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
4012 
4013 	*block_off = name_offset >> sb->s_blocksize_bits;
4014 	*new_offset = name_offset % sb->s_blocksize;
4015 
4016 	return 0;
4017 }
4018 
4019 static int ocfs2_list_xattr_bucket(struct inode *inode,
4020 				   struct ocfs2_xattr_bucket *bucket,
4021 				   void *para)
4022 {
4023 	int ret = 0, type;
4024 	struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
4025 	int i, block_off, new_offset;
4026 	const char *prefix, *name;
4027 
4028 	for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
4029 		struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
4030 		type = ocfs2_xattr_get_type(entry);
4031 		prefix = ocfs2_xattr_prefix(type);
4032 
4033 		if (prefix) {
4034 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
4035 								bucket_xh(bucket),
4036 								i,
4037 								&block_off,
4038 								&new_offset);
4039 			if (ret)
4040 				break;
4041 
4042 			name = (const char *)bucket_block(bucket, block_off) +
4043 				new_offset;
4044 			ret = ocfs2_xattr_list_entry(xl->buffer,
4045 						     xl->buffer_size,
4046 						     &xl->result,
4047 						     prefix, name,
4048 						     entry->xe_name_len);
4049 			if (ret)
4050 				break;
4051 		}
4052 	}
4053 
4054 	return ret;
4055 }
4056 
4057 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
4058 					   struct buffer_head *blk_bh,
4059 					   xattr_tree_rec_func *rec_func,
4060 					   void *para)
4061 {
4062 	struct ocfs2_xattr_block *xb =
4063 			(struct ocfs2_xattr_block *)blk_bh->b_data;
4064 	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4065 	int ret = 0;
4066 	u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
4067 	u64 p_blkno = 0;
4068 
4069 	if (!el->l_next_free_rec || !rec_func)
4070 		return 0;
4071 
4072 	while (name_hash > 0) {
4073 		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4074 					  &e_cpos, &num_clusters, el);
4075 		if (ret) {
4076 			mlog_errno(ret);
4077 			break;
4078 		}
4079 
4080 		ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
4081 			       num_clusters, para);
4082 		if (ret) {
4083 			if (ret != -ERANGE)
4084 				mlog_errno(ret);
4085 			break;
4086 		}
4087 
4088 		if (e_cpos == 0)
4089 			break;
4090 
4091 		name_hash = e_cpos - 1;
4092 	}
4093 
4094 	return ret;
4095 
4096 }
4097 
4098 static int ocfs2_list_xattr_tree_rec(struct inode *inode,
4099 				     struct buffer_head *root_bh,
4100 				     u64 blkno, u32 cpos, u32 len, void *para)
4101 {
4102 	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
4103 					   ocfs2_list_xattr_bucket, para);
4104 }
4105 
4106 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
4107 					     struct buffer_head *blk_bh,
4108 					     char *buffer,
4109 					     size_t buffer_size)
4110 {
4111 	int ret;
4112 	struct ocfs2_xattr_tree_list xl = {
4113 		.buffer = buffer,
4114 		.buffer_size = buffer_size,
4115 		.result = 0,
4116 	};
4117 
4118 	ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
4119 					      ocfs2_list_xattr_tree_rec, &xl);
4120 	if (ret) {
4121 		mlog_errno(ret);
4122 		goto out;
4123 	}
4124 
4125 	ret = xl.result;
4126 out:
4127 	return ret;
4128 }
4129 
4130 static int cmp_xe(const void *a, const void *b)
4131 {
4132 	const struct ocfs2_xattr_entry *l = a, *r = b;
4133 	u32 l_hash = le32_to_cpu(l->xe_name_hash);
4134 	u32 r_hash = le32_to_cpu(r->xe_name_hash);
4135 
4136 	if (l_hash > r_hash)
4137 		return 1;
4138 	if (l_hash < r_hash)
4139 		return -1;
4140 	return 0;
4141 }
4142 
4143 static void swap_xe(void *a, void *b, int size)
4144 {
4145 	struct ocfs2_xattr_entry *l = a, *r = b, tmp;
4146 
4147 	tmp = *l;
4148 	memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
4149 	memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
4150 }
4151 
4152 /*
4153  * When the ocfs2_xattr_block is filled up, new bucket will be created
4154  * and all the xattr entries will be moved to the new bucket.
4155  * The header goes at the start of the bucket, and the names+values are
4156  * filled from the end.  This is why *target starts as the last buffer.
4157  * Note: we need to sort the entries since they are not saved in order
4158  * in the ocfs2_xattr_block.
4159  */
4160 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4161 					   struct buffer_head *xb_bh,
4162 					   struct ocfs2_xattr_bucket *bucket)
4163 {
4164 	int i, blocksize = inode->i_sb->s_blocksize;
4165 	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4166 	u16 offset, size, off_change;
4167 	struct ocfs2_xattr_entry *xe;
4168 	struct ocfs2_xattr_block *xb =
4169 				(struct ocfs2_xattr_block *)xb_bh->b_data;
4170 	struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
4171 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4172 	u16 count = le16_to_cpu(xb_xh->xh_count);
4173 	char *src = xb_bh->b_data;
4174 	char *target = bucket_block(bucket, blks - 1);
4175 
4176 	mlog(0, "cp xattr from block %llu to bucket %llu\n",
4177 	     (unsigned long long)xb_bh->b_blocknr,
4178 	     (unsigned long long)bucket_blkno(bucket));
4179 
4180 	for (i = 0; i < blks; i++)
4181 		memset(bucket_block(bucket, i), 0, blocksize);
4182 
4183 	/*
4184 	 * Since the xe_name_offset is based on ocfs2_xattr_header,
4185 	 * there is a offset change corresponding to the change of
4186 	 * ocfs2_xattr_header's position.
4187 	 */
4188 	off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4189 	xe = &xb_xh->xh_entries[count - 1];
4190 	offset = le16_to_cpu(xe->xe_name_offset) + off_change;
4191 	size = blocksize - offset;
4192 
4193 	/* copy all the names and values. */
4194 	memcpy(target + offset, src + offset, size);
4195 
4196 	/* Init new header now. */
4197 	xh->xh_count = xb_xh->xh_count;
4198 	xh->xh_num_buckets = cpu_to_le16(1);
4199 	xh->xh_name_value_len = cpu_to_le16(size);
4200 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
4201 
4202 	/* copy all the entries. */
4203 	target = bucket_block(bucket, 0);
4204 	offset = offsetof(struct ocfs2_xattr_header, xh_entries);
4205 	size = count * sizeof(struct ocfs2_xattr_entry);
4206 	memcpy(target + offset, (char *)xb_xh + offset, size);
4207 
4208 	/* Change the xe offset for all the xe because of the move. */
4209 	off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
4210 		 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4211 	for (i = 0; i < count; i++)
4212 		le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4213 
4214 	mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
4215 	     offset, size, off_change);
4216 
4217 	sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4218 	     cmp_xe, swap_xe);
4219 }
4220 
4221 /*
4222  * After we move xattr from block to index btree, we have to
4223  * update ocfs2_xattr_search to the new xe and base.
4224  *
4225  * When the entry is in xattr block, xattr_bh indicates the storage place.
4226  * While if the entry is in index b-tree, "bucket" indicates the
4227  * real place of the xattr.
4228  */
4229 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
4230 					    struct ocfs2_xattr_search *xs,
4231 					    struct buffer_head *old_bh)
4232 {
4233 	char *buf = old_bh->b_data;
4234 	struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
4235 	struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
4236 	int i;
4237 
4238 	xs->header = bucket_xh(xs->bucket);
4239 	xs->base = bucket_block(xs->bucket, 0);
4240 	xs->end = xs->base + inode->i_sb->s_blocksize;
4241 
4242 	if (xs->not_found)
4243 		return;
4244 
4245 	i = xs->here - old_xh->xh_entries;
4246 	xs->here = &xs->header->xh_entries[i];
4247 }
4248 
4249 static int ocfs2_xattr_create_index_block(struct inode *inode,
4250 					  struct ocfs2_xattr_search *xs,
4251 					  struct ocfs2_xattr_set_ctxt *ctxt)
4252 {
4253 	int ret;
4254 	u32 bit_off, len;
4255 	u64 blkno;
4256 	handle_t *handle = ctxt->handle;
4257 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
4258 	struct buffer_head *xb_bh = xs->xattr_bh;
4259 	struct ocfs2_xattr_block *xb =
4260 			(struct ocfs2_xattr_block *)xb_bh->b_data;
4261 	struct ocfs2_xattr_tree_root *xr;
4262 	u16 xb_flags = le16_to_cpu(xb->xb_flags);
4263 
4264 	mlog(0, "create xattr index block for %llu\n",
4265 	     (unsigned long long)xb_bh->b_blocknr);
4266 
4267 	BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4268 	BUG_ON(!xs->bucket);
4269 
4270 	/*
4271 	 * XXX:
4272 	 * We can use this lock for now, and maybe move to a dedicated mutex
4273 	 * if performance becomes a problem later.
4274 	 */
4275 	down_write(&oi->ip_alloc_sem);
4276 
4277 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
4278 				      OCFS2_JOURNAL_ACCESS_WRITE);
4279 	if (ret) {
4280 		mlog_errno(ret);
4281 		goto out;
4282 	}
4283 
4284 	ret = __ocfs2_claim_clusters(handle, ctxt->data_ac,
4285 				     1, 1, &bit_off, &len);
4286 	if (ret) {
4287 		mlog_errno(ret);
4288 		goto out;
4289 	}
4290 
4291 	/*
4292 	 * The bucket may spread in many blocks, and
4293 	 * we will only touch the 1st block and the last block
4294 	 * in the whole bucket(one for entry and one for data).
4295 	 */
4296 	blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4297 
4298 	mlog(0, "allocate 1 cluster from %llu to xattr block\n",
4299 	     (unsigned long long)blkno);
4300 
4301 	ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
4302 	if (ret) {
4303 		mlog_errno(ret);
4304 		goto out;
4305 	}
4306 
4307 	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4308 						OCFS2_JOURNAL_ACCESS_CREATE);
4309 	if (ret) {
4310 		mlog_errno(ret);
4311 		goto out;
4312 	}
4313 
4314 	ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
4315 	ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4316 
4317 	ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
4318 
4319 	/* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
4320 	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
4321 	       offsetof(struct ocfs2_xattr_block, xb_attrs));
4322 
4323 	xr = &xb->xb_attrs.xb_root;
4324 	xr->xt_clusters = cpu_to_le32(1);
4325 	xr->xt_last_eb_blk = 0;
4326 	xr->xt_list.l_tree_depth = 0;
4327 	xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
4328 	xr->xt_list.l_next_free_rec = cpu_to_le16(1);
4329 
4330 	xr->xt_list.l_recs[0].e_cpos = 0;
4331 	xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
4332 	xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
4333 
4334 	xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
4335 
4336 	ocfs2_journal_dirty(handle, xb_bh);
4337 
4338 out:
4339 	up_write(&oi->ip_alloc_sem);
4340 
4341 	return ret;
4342 }
4343 
4344 static int cmp_xe_offset(const void *a, const void *b)
4345 {
4346 	const struct ocfs2_xattr_entry *l = a, *r = b;
4347 	u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
4348 	u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
4349 
4350 	if (l_name_offset < r_name_offset)
4351 		return 1;
4352 	if (l_name_offset > r_name_offset)
4353 		return -1;
4354 	return 0;
4355 }
4356 
4357 /*
4358  * defrag a xattr bucket if we find that the bucket has some
4359  * holes beteen name/value pairs.
4360  * We will move all the name/value pairs to the end of the bucket
4361  * so that we can spare some space for insertion.
4362  */
4363 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4364 				     handle_t *handle,
4365 				     struct ocfs2_xattr_bucket *bucket)
4366 {
4367 	int ret, i;
4368 	size_t end, offset, len;
4369 	struct ocfs2_xattr_header *xh;
4370 	char *entries, *buf, *bucket_buf = NULL;
4371 	u64 blkno = bucket_blkno(bucket);
4372 	u16 xh_free_start;
4373 	size_t blocksize = inode->i_sb->s_blocksize;
4374 	struct ocfs2_xattr_entry *xe;
4375 
4376 	/*
4377 	 * In order to make the operation more efficient and generic,
4378 	 * we copy all the blocks into a contiguous memory and do the
4379 	 * defragment there, so if anything is error, we will not touch
4380 	 * the real block.
4381 	 */
4382 	bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
4383 	if (!bucket_buf) {
4384 		ret = -EIO;
4385 		goto out;
4386 	}
4387 
4388 	buf = bucket_buf;
4389 	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4390 		memcpy(buf, bucket_block(bucket, i), blocksize);
4391 
4392 	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4393 						OCFS2_JOURNAL_ACCESS_WRITE);
4394 	if (ret < 0) {
4395 		mlog_errno(ret);
4396 		goto out;
4397 	}
4398 
4399 	xh = (struct ocfs2_xattr_header *)bucket_buf;
4400 	entries = (char *)xh->xh_entries;
4401 	xh_free_start = le16_to_cpu(xh->xh_free_start);
4402 
4403 	mlog(0, "adjust xattr bucket in %llu, count = %u, "
4404 	     "xh_free_start = %u, xh_name_value_len = %u.\n",
4405 	     (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4406 	     xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4407 
4408 	/*
4409 	 * sort all the entries by their offset.
4410 	 * the largest will be the first, so that we can
4411 	 * move them to the end one by one.
4412 	 */
4413 	sort(entries, le16_to_cpu(xh->xh_count),
4414 	     sizeof(struct ocfs2_xattr_entry),
4415 	     cmp_xe_offset, swap_xe);
4416 
4417 	/* Move all name/values to the end of the bucket. */
4418 	xe = xh->xh_entries;
4419 	end = OCFS2_XATTR_BUCKET_SIZE;
4420 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4421 		offset = le16_to_cpu(xe->xe_name_offset);
4422 		len = namevalue_size_xe(xe);
4423 
4424 		/*
4425 		 * We must make sure that the name/value pair
4426 		 * exist in the same block. So adjust end to
4427 		 * the previous block end if needed.
4428 		 */
4429 		if (((end - len) / blocksize !=
4430 			(end - 1) / blocksize))
4431 			end = end - end % blocksize;
4432 
4433 		if (end > offset + len) {
4434 			memmove(bucket_buf + end - len,
4435 				bucket_buf + offset, len);
4436 			xe->xe_name_offset = cpu_to_le16(end - len);
4437 		}
4438 
4439 		mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4440 				"bucket %llu\n", (unsigned long long)blkno);
4441 
4442 		end -= len;
4443 	}
4444 
4445 	mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4446 			"bucket %llu\n", (unsigned long long)blkno);
4447 
4448 	if (xh_free_start == end)
4449 		goto out;
4450 
4451 	memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4452 	xh->xh_free_start = cpu_to_le16(end);
4453 
4454 	/* sort the entries by their name_hash. */
4455 	sort(entries, le16_to_cpu(xh->xh_count),
4456 	     sizeof(struct ocfs2_xattr_entry),
4457 	     cmp_xe, swap_xe);
4458 
4459 	buf = bucket_buf;
4460 	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4461 		memcpy(bucket_block(bucket, i), buf, blocksize);
4462 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4463 
4464 out:
4465 	kfree(bucket_buf);
4466 	return ret;
4467 }
4468 
4469 /*
4470  * prev_blkno points to the start of an existing extent.  new_blkno
4471  * points to a newly allocated extent.  Because we know each of our
4472  * clusters contains more than bucket, we can easily split one cluster
4473  * at a bucket boundary.  So we take the last cluster of the existing
4474  * extent and split it down the middle.  We move the last half of the
4475  * buckets in the last cluster of the existing extent over to the new
4476  * extent.
4477  *
4478  * first_bh is the buffer at prev_blkno so we can update the existing
4479  * extent's bucket count.  header_bh is the bucket were we were hoping
4480  * to insert our xattr.  If the bucket move places the target in the new
4481  * extent, we'll update first_bh and header_bh after modifying the old
4482  * extent.
4483  *
4484  * first_hash will be set as the 1st xe's name_hash in the new extent.
4485  */
4486 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4487 					       handle_t *handle,
4488 					       struct ocfs2_xattr_bucket *first,
4489 					       struct ocfs2_xattr_bucket *target,
4490 					       u64 new_blkno,
4491 					       u32 num_clusters,
4492 					       u32 *first_hash)
4493 {
4494 	int ret;
4495 	struct super_block *sb = inode->i_sb;
4496 	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4497 	int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4498 	int to_move = num_buckets / 2;
4499 	u64 src_blkno;
4500 	u64 last_cluster_blkno = bucket_blkno(first) +
4501 		((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4502 
4503 	BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4504 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4505 
4506 	mlog(0, "move half of xattrs in cluster %llu to %llu\n",
4507 	     (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
4508 
4509 	ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4510 				     last_cluster_blkno, new_blkno,
4511 				     to_move, first_hash);
4512 	if (ret) {
4513 		mlog_errno(ret);
4514 		goto out;
4515 	}
4516 
4517 	/* This is the first bucket that got moved */
4518 	src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4519 
4520 	/*
4521 	 * If the target bucket was part of the moved buckets, we need to
4522 	 * update first and target.
4523 	 */
4524 	if (bucket_blkno(target) >= src_blkno) {
4525 		/* Find the block for the new target bucket */
4526 		src_blkno = new_blkno +
4527 			(bucket_blkno(target) - src_blkno);
4528 
4529 		ocfs2_xattr_bucket_relse(first);
4530 		ocfs2_xattr_bucket_relse(target);
4531 
4532 		/*
4533 		 * These shouldn't fail - the buffers are in the
4534 		 * journal from ocfs2_cp_xattr_bucket().
4535 		 */
4536 		ret = ocfs2_read_xattr_bucket(first, new_blkno);
4537 		if (ret) {
4538 			mlog_errno(ret);
4539 			goto out;
4540 		}
4541 		ret = ocfs2_read_xattr_bucket(target, src_blkno);
4542 		if (ret)
4543 			mlog_errno(ret);
4544 
4545 	}
4546 
4547 out:
4548 	return ret;
4549 }
4550 
4551 /*
4552  * Find the suitable pos when we divide a bucket into 2.
4553  * We have to make sure the xattrs with the same hash value exist
4554  * in the same bucket.
4555  *
4556  * If this ocfs2_xattr_header covers more than one hash value, find a
4557  * place where the hash value changes.  Try to find the most even split.
4558  * The most common case is that all entries have different hash values,
4559  * and the first check we make will find a place to split.
4560  */
4561 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4562 {
4563 	struct ocfs2_xattr_entry *entries = xh->xh_entries;
4564 	int count = le16_to_cpu(xh->xh_count);
4565 	int delta, middle = count / 2;
4566 
4567 	/*
4568 	 * We start at the middle.  Each step gets farther away in both
4569 	 * directions.  We therefore hit the change in hash value
4570 	 * nearest to the middle.  Note that this loop does not execute for
4571 	 * count < 2.
4572 	 */
4573 	for (delta = 0; delta < middle; delta++) {
4574 		/* Let's check delta earlier than middle */
4575 		if (cmp_xe(&entries[middle - delta - 1],
4576 			   &entries[middle - delta]))
4577 			return middle - delta;
4578 
4579 		/* For even counts, don't walk off the end */
4580 		if ((middle + delta + 1) == count)
4581 			continue;
4582 
4583 		/* Now try delta past middle */
4584 		if (cmp_xe(&entries[middle + delta],
4585 			   &entries[middle + delta + 1]))
4586 			return middle + delta + 1;
4587 	}
4588 
4589 	/* Every entry had the same hash */
4590 	return count;
4591 }
4592 
4593 /*
4594  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4595  * first_hash will record the 1st hash of the new bucket.
4596  *
4597  * Normally half of the xattrs will be moved.  But we have to make
4598  * sure that the xattrs with the same hash value are stored in the
4599  * same bucket. If all the xattrs in this bucket have the same hash
4600  * value, the new bucket will be initialized as an empty one and the
4601  * first_hash will be initialized as (hash_value+1).
4602  */
4603 static int ocfs2_divide_xattr_bucket(struct inode *inode,
4604 				    handle_t *handle,
4605 				    u64 blk,
4606 				    u64 new_blk,
4607 				    u32 *first_hash,
4608 				    int new_bucket_head)
4609 {
4610 	int ret, i;
4611 	int count, start, len, name_value_len = 0, name_offset = 0;
4612 	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4613 	struct ocfs2_xattr_header *xh;
4614 	struct ocfs2_xattr_entry *xe;
4615 	int blocksize = inode->i_sb->s_blocksize;
4616 
4617 	mlog(0, "move some of xattrs from bucket %llu to %llu\n",
4618 	     (unsigned long long)blk, (unsigned long long)new_blk);
4619 
4620 	s_bucket = ocfs2_xattr_bucket_new(inode);
4621 	t_bucket = ocfs2_xattr_bucket_new(inode);
4622 	if (!s_bucket || !t_bucket) {
4623 		ret = -ENOMEM;
4624 		mlog_errno(ret);
4625 		goto out;
4626 	}
4627 
4628 	ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4629 	if (ret) {
4630 		mlog_errno(ret);
4631 		goto out;
4632 	}
4633 
4634 	ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4635 						OCFS2_JOURNAL_ACCESS_WRITE);
4636 	if (ret) {
4637 		mlog_errno(ret);
4638 		goto out;
4639 	}
4640 
4641 	/*
4642 	 * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4643 	 * there's no need to read it.
4644 	 */
4645 	ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
4646 	if (ret) {
4647 		mlog_errno(ret);
4648 		goto out;
4649 	}
4650 
4651 	/*
4652 	 * Hey, if we're overwriting t_bucket, what difference does
4653 	 * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4654 	 * same part of ocfs2_cp_xattr_bucket().
4655 	 */
4656 	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4657 						new_bucket_head ?
4658 						OCFS2_JOURNAL_ACCESS_CREATE :
4659 						OCFS2_JOURNAL_ACCESS_WRITE);
4660 	if (ret) {
4661 		mlog_errno(ret);
4662 		goto out;
4663 	}
4664 
4665 	xh = bucket_xh(s_bucket);
4666 	count = le16_to_cpu(xh->xh_count);
4667 	start = ocfs2_xattr_find_divide_pos(xh);
4668 
4669 	if (start == count) {
4670 		xe = &xh->xh_entries[start-1];
4671 
4672 		/*
4673 		 * initialized a new empty bucket here.
4674 		 * The hash value is set as one larger than
4675 		 * that of the last entry in the previous bucket.
4676 		 */
4677 		for (i = 0; i < t_bucket->bu_blocks; i++)
4678 			memset(bucket_block(t_bucket, i), 0, blocksize);
4679 
4680 		xh = bucket_xh(t_bucket);
4681 		xh->xh_free_start = cpu_to_le16(blocksize);
4682 		xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4683 		le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4684 
4685 		goto set_num_buckets;
4686 	}
4687 
4688 	/* copy the whole bucket to the new first. */
4689 	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4690 
4691 	/* update the new bucket. */
4692 	xh = bucket_xh(t_bucket);
4693 
4694 	/*
4695 	 * Calculate the total name/value len and xh_free_start for
4696 	 * the old bucket first.
4697 	 */
4698 	name_offset = OCFS2_XATTR_BUCKET_SIZE;
4699 	name_value_len = 0;
4700 	for (i = 0; i < start; i++) {
4701 		xe = &xh->xh_entries[i];
4702 		name_value_len += namevalue_size_xe(xe);
4703 		if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4704 			name_offset = le16_to_cpu(xe->xe_name_offset);
4705 	}
4706 
4707 	/*
4708 	 * Now begin the modification to the new bucket.
4709 	 *
4710 	 * In the new bucket, We just move the xattr entry to the beginning
4711 	 * and don't touch the name/value. So there will be some holes in the
4712 	 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4713 	 * called.
4714 	 */
4715 	xe = &xh->xh_entries[start];
4716 	len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4717 	mlog(0, "mv xattr entry len %d from %d to %d\n", len,
4718 	     (int)((char *)xe - (char *)xh),
4719 	     (int)((char *)xh->xh_entries - (char *)xh));
4720 	memmove((char *)xh->xh_entries, (char *)xe, len);
4721 	xe = &xh->xh_entries[count - start];
4722 	len = sizeof(struct ocfs2_xattr_entry) * start;
4723 	memset((char *)xe, 0, len);
4724 
4725 	le16_add_cpu(&xh->xh_count, -start);
4726 	le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4727 
4728 	/* Calculate xh_free_start for the new bucket. */
4729 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4730 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4731 		xe = &xh->xh_entries[i];
4732 		if (le16_to_cpu(xe->xe_name_offset) <
4733 		    le16_to_cpu(xh->xh_free_start))
4734 			xh->xh_free_start = xe->xe_name_offset;
4735 	}
4736 
4737 set_num_buckets:
4738 	/* set xh->xh_num_buckets for the new xh. */
4739 	if (new_bucket_head)
4740 		xh->xh_num_buckets = cpu_to_le16(1);
4741 	else
4742 		xh->xh_num_buckets = 0;
4743 
4744 	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4745 
4746 	/* store the first_hash of the new bucket. */
4747 	if (first_hash)
4748 		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4749 
4750 	/*
4751 	 * Now only update the 1st block of the old bucket.  If we
4752 	 * just added a new empty bucket, there is no need to modify
4753 	 * it.
4754 	 */
4755 	if (start == count)
4756 		goto out;
4757 
4758 	xh = bucket_xh(s_bucket);
4759 	memset(&xh->xh_entries[start], 0,
4760 	       sizeof(struct ocfs2_xattr_entry) * (count - start));
4761 	xh->xh_count = cpu_to_le16(start);
4762 	xh->xh_free_start = cpu_to_le16(name_offset);
4763 	xh->xh_name_value_len = cpu_to_le16(name_value_len);
4764 
4765 	ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4766 
4767 out:
4768 	ocfs2_xattr_bucket_free(s_bucket);
4769 	ocfs2_xattr_bucket_free(t_bucket);
4770 
4771 	return ret;
4772 }
4773 
4774 /*
4775  * Copy xattr from one bucket to another bucket.
4776  *
4777  * The caller must make sure that the journal transaction
4778  * has enough space for journaling.
4779  */
4780 static int ocfs2_cp_xattr_bucket(struct inode *inode,
4781 				 handle_t *handle,
4782 				 u64 s_blkno,
4783 				 u64 t_blkno,
4784 				 int t_is_new)
4785 {
4786 	int ret;
4787 	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4788 
4789 	BUG_ON(s_blkno == t_blkno);
4790 
4791 	mlog(0, "cp bucket %llu to %llu, target is %d\n",
4792 	     (unsigned long long)s_blkno, (unsigned long long)t_blkno,
4793 	     t_is_new);
4794 
4795 	s_bucket = ocfs2_xattr_bucket_new(inode);
4796 	t_bucket = ocfs2_xattr_bucket_new(inode);
4797 	if (!s_bucket || !t_bucket) {
4798 		ret = -ENOMEM;
4799 		mlog_errno(ret);
4800 		goto out;
4801 	}
4802 
4803 	ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4804 	if (ret)
4805 		goto out;
4806 
4807 	/*
4808 	 * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4809 	 * there's no need to read it.
4810 	 */
4811 	ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
4812 	if (ret)
4813 		goto out;
4814 
4815 	/*
4816 	 * Hey, if we're overwriting t_bucket, what difference does
4817 	 * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4818 	 * cluster to fill, we came here from
4819 	 * ocfs2_mv_xattr_buckets(), and it is really new -
4820 	 * ACCESS_CREATE is required.  But we also might have moved data
4821 	 * out of t_bucket before extending back into it.
4822 	 * ocfs2_add_new_xattr_bucket() can do this - its call to
4823 	 * ocfs2_add_new_xattr_cluster() may have created a new extent
4824 	 * and copied out the end of the old extent.  Then it re-extends
4825 	 * the old extent back to create space for new xattrs.  That's
4826 	 * how we get here, and the bucket isn't really new.
4827 	 */
4828 	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4829 						t_is_new ?
4830 						OCFS2_JOURNAL_ACCESS_CREATE :
4831 						OCFS2_JOURNAL_ACCESS_WRITE);
4832 	if (ret)
4833 		goto out;
4834 
4835 	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4836 	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4837 
4838 out:
4839 	ocfs2_xattr_bucket_free(t_bucket);
4840 	ocfs2_xattr_bucket_free(s_bucket);
4841 
4842 	return ret;
4843 }
4844 
4845 /*
4846  * src_blk points to the start of an existing extent.  last_blk points to
4847  * last cluster in that extent.  to_blk points to a newly allocated
4848  * extent.  We copy the buckets from the cluster at last_blk to the new
4849  * extent.  If start_bucket is non-zero, we skip that many buckets before
4850  * we start copying.  The new extent's xh_num_buckets gets set to the
4851  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4852  * by the same amount.
4853  */
4854 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4855 				  u64 src_blk, u64 last_blk, u64 to_blk,
4856 				  unsigned int start_bucket,
4857 				  u32 *first_hash)
4858 {
4859 	int i, ret, credits;
4860 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4861 	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4862 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4863 	struct ocfs2_xattr_bucket *old_first, *new_first;
4864 
4865 	mlog(0, "mv xattrs from cluster %llu to %llu\n",
4866 	     (unsigned long long)last_blk, (unsigned long long)to_blk);
4867 
4868 	BUG_ON(start_bucket >= num_buckets);
4869 	if (start_bucket) {
4870 		num_buckets -= start_bucket;
4871 		last_blk += (start_bucket * blks_per_bucket);
4872 	}
4873 
4874 	/* The first bucket of the original extent */
4875 	old_first = ocfs2_xattr_bucket_new(inode);
4876 	/* The first bucket of the new extent */
4877 	new_first = ocfs2_xattr_bucket_new(inode);
4878 	if (!old_first || !new_first) {
4879 		ret = -ENOMEM;
4880 		mlog_errno(ret);
4881 		goto out;
4882 	}
4883 
4884 	ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4885 	if (ret) {
4886 		mlog_errno(ret);
4887 		goto out;
4888 	}
4889 
4890 	/*
4891 	 * We need to update the first bucket of the old extent and all
4892 	 * the buckets going to the new extent.
4893 	 */
4894 	credits = ((num_buckets + 1) * blks_per_bucket);
4895 	ret = ocfs2_extend_trans(handle, credits);
4896 	if (ret) {
4897 		mlog_errno(ret);
4898 		goto out;
4899 	}
4900 
4901 	ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4902 						OCFS2_JOURNAL_ACCESS_WRITE);
4903 	if (ret) {
4904 		mlog_errno(ret);
4905 		goto out;
4906 	}
4907 
4908 	for (i = 0; i < num_buckets; i++) {
4909 		ret = ocfs2_cp_xattr_bucket(inode, handle,
4910 					    last_blk + (i * blks_per_bucket),
4911 					    to_blk + (i * blks_per_bucket),
4912 					    1);
4913 		if (ret) {
4914 			mlog_errno(ret);
4915 			goto out;
4916 		}
4917 	}
4918 
4919 	/*
4920 	 * Get the new bucket ready before we dirty anything
4921 	 * (This actually shouldn't fail, because we already dirtied
4922 	 * it once in ocfs2_cp_xattr_bucket()).
4923 	 */
4924 	ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4925 	if (ret) {
4926 		mlog_errno(ret);
4927 		goto out;
4928 	}
4929 	ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4930 						OCFS2_JOURNAL_ACCESS_WRITE);
4931 	if (ret) {
4932 		mlog_errno(ret);
4933 		goto out;
4934 	}
4935 
4936 	/* Now update the headers */
4937 	le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4938 	ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4939 
4940 	bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4941 	ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4942 
4943 	if (first_hash)
4944 		*first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4945 
4946 out:
4947 	ocfs2_xattr_bucket_free(new_first);
4948 	ocfs2_xattr_bucket_free(old_first);
4949 	return ret;
4950 }
4951 
4952 /*
4953  * Move some xattrs in this cluster to the new cluster.
4954  * This function should only be called when bucket size == cluster size.
4955  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4956  */
4957 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4958 				      handle_t *handle,
4959 				      u64 prev_blk,
4960 				      u64 new_blk,
4961 				      u32 *first_hash)
4962 {
4963 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4964 	int ret, credits = 2 * blk_per_bucket;
4965 
4966 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4967 
4968 	ret = ocfs2_extend_trans(handle, credits);
4969 	if (ret) {
4970 		mlog_errno(ret);
4971 		return ret;
4972 	}
4973 
4974 	/* Move half of the xattr in start_blk to the next bucket. */
4975 	return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4976 					  new_blk, first_hash, 1);
4977 }
4978 
4979 /*
4980  * Move some xattrs from the old cluster to the new one since they are not
4981  * contiguous in ocfs2 xattr tree.
4982  *
4983  * new_blk starts a new separate cluster, and we will move some xattrs from
4984  * prev_blk to it. v_start will be set as the first name hash value in this
4985  * new cluster so that it can be used as e_cpos during tree insertion and
4986  * don't collide with our original b-tree operations. first_bh and header_bh
4987  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4988  * to extend the insert bucket.
4989  *
4990  * The problem is how much xattr should we move to the new one and when should
4991  * we update first_bh and header_bh?
4992  * 1. If cluster size > bucket size, that means the previous cluster has more
4993  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4994  *    update the first_bh and header_bh if the insert bucket has been moved
4995  *    to the new cluster.
4996  * 2. If cluster_size == bucket_size:
4997  *    a) If the previous extent rec has more than one cluster and the insert
4998  *       place isn't in the last cluster, copy the entire last cluster to the
4999  *       new one. This time, we don't need to upate the first_bh and header_bh
5000  *       since they will not be moved into the new cluster.
5001  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
5002  *       the new one. And we set the extend flag to zero if the insert place is
5003  *       moved into the new allocated cluster since no extend is needed.
5004  */
5005 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
5006 					    handle_t *handle,
5007 					    struct ocfs2_xattr_bucket *first,
5008 					    struct ocfs2_xattr_bucket *target,
5009 					    u64 new_blk,
5010 					    u32 prev_clusters,
5011 					    u32 *v_start,
5012 					    int *extend)
5013 {
5014 	int ret;
5015 
5016 	mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
5017 	     (unsigned long long)bucket_blkno(first), prev_clusters,
5018 	     (unsigned long long)new_blk);
5019 
5020 	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
5021 		ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
5022 							  handle,
5023 							  first, target,
5024 							  new_blk,
5025 							  prev_clusters,
5026 							  v_start);
5027 		if (ret)
5028 			mlog_errno(ret);
5029 	} else {
5030 		/* The start of the last cluster in the first extent */
5031 		u64 last_blk = bucket_blkno(first) +
5032 			((prev_clusters - 1) *
5033 			 ocfs2_clusters_to_blocks(inode->i_sb, 1));
5034 
5035 		if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
5036 			ret = ocfs2_mv_xattr_buckets(inode, handle,
5037 						     bucket_blkno(first),
5038 						     last_blk, new_blk, 0,
5039 						     v_start);
5040 			if (ret)
5041 				mlog_errno(ret);
5042 		} else {
5043 			ret = ocfs2_divide_xattr_cluster(inode, handle,
5044 							 last_blk, new_blk,
5045 							 v_start);
5046 			if (ret)
5047 				mlog_errno(ret);
5048 
5049 			if ((bucket_blkno(target) == last_blk) && extend)
5050 				*extend = 0;
5051 		}
5052 	}
5053 
5054 	return ret;
5055 }
5056 
5057 /*
5058  * Add a new cluster for xattr storage.
5059  *
5060  * If the new cluster is contiguous with the previous one, it will be
5061  * appended to the same extent record, and num_clusters will be updated.
5062  * If not, we will insert a new extent for it and move some xattrs in
5063  * the last cluster into the new allocated one.
5064  * We also need to limit the maximum size of a btree leaf, otherwise we'll
5065  * lose the benefits of hashing because we'll have to search large leaves.
5066  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
5067  * if it's bigger).
5068  *
5069  * first_bh is the first block of the previous extent rec and header_bh
5070  * indicates the bucket we will insert the new xattrs. They will be updated
5071  * when the header_bh is moved into the new cluster.
5072  */
5073 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5074 				       struct buffer_head *root_bh,
5075 				       struct ocfs2_xattr_bucket *first,
5076 				       struct ocfs2_xattr_bucket *target,
5077 				       u32 *num_clusters,
5078 				       u32 prev_cpos,
5079 				       int *extend,
5080 				       struct ocfs2_xattr_set_ctxt *ctxt)
5081 {
5082 	int ret;
5083 	u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
5084 	u32 prev_clusters = *num_clusters;
5085 	u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
5086 	u64 block;
5087 	handle_t *handle = ctxt->handle;
5088 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5089 	struct ocfs2_extent_tree et;
5090 
5091 	mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
5092 	     "previous xattr blkno = %llu\n",
5093 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
5094 	     prev_cpos, (unsigned long long)bucket_blkno(first));
5095 
5096 	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5097 
5098 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5099 				      OCFS2_JOURNAL_ACCESS_WRITE);
5100 	if (ret < 0) {
5101 		mlog_errno(ret);
5102 		goto leave;
5103 	}
5104 
5105 	ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1,
5106 				     clusters_to_add, &bit_off, &num_bits);
5107 	if (ret < 0) {
5108 		if (ret != -ENOSPC)
5109 			mlog_errno(ret);
5110 		goto leave;
5111 	}
5112 
5113 	BUG_ON(num_bits > clusters_to_add);
5114 
5115 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5116 	mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
5117 	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
5118 
5119 	if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5120 	    (prev_clusters + num_bits) << osb->s_clustersize_bits <=
5121 	     OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
5122 		/*
5123 		 * If this cluster is contiguous with the old one and
5124 		 * adding this new cluster, we don't surpass the limit of
5125 		 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
5126 		 * initialized and used like other buckets in the previous
5127 		 * cluster.
5128 		 * So add it as a contiguous one. The caller will handle
5129 		 * its init process.
5130 		 */
5131 		v_start = prev_cpos + prev_clusters;
5132 		*num_clusters = prev_clusters + num_bits;
5133 		mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
5134 		     num_bits);
5135 	} else {
5136 		ret = ocfs2_adjust_xattr_cross_cluster(inode,
5137 						       handle,
5138 						       first,
5139 						       target,
5140 						       block,
5141 						       prev_clusters,
5142 						       &v_start,
5143 						       extend);
5144 		if (ret) {
5145 			mlog_errno(ret);
5146 			goto leave;
5147 		}
5148 	}
5149 
5150 	mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
5151 	     num_bits, (unsigned long long)block, v_start);
5152 	ret = ocfs2_insert_extent(handle, &et, v_start, block,
5153 				  num_bits, 0, ctxt->meta_ac);
5154 	if (ret < 0) {
5155 		mlog_errno(ret);
5156 		goto leave;
5157 	}
5158 
5159 	ocfs2_journal_dirty(handle, root_bh);
5160 
5161 leave:
5162 	return ret;
5163 }
5164 
5165 /*
5166  * We are given an extent.  'first' is the bucket at the very front of
5167  * the extent.  The extent has space for an additional bucket past
5168  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
5169  * of the target bucket.  We wish to shift every bucket past the target
5170  * down one, filling in that additional space.  When we get back to the
5171  * target, we split the target between itself and the now-empty bucket
5172  * at target+1 (aka, target_blkno + blks_per_bucket).
5173  */
5174 static int ocfs2_extend_xattr_bucket(struct inode *inode,
5175 				     handle_t *handle,
5176 				     struct ocfs2_xattr_bucket *first,
5177 				     u64 target_blk,
5178 				     u32 num_clusters)
5179 {
5180 	int ret, credits;
5181 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5182 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5183 	u64 end_blk;
5184 	u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5185 
5186 	mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
5187 	     "from %llu, len = %u\n", (unsigned long long)target_blk,
5188 	     (unsigned long long)bucket_blkno(first), num_clusters);
5189 
5190 	/* The extent must have room for an additional bucket */
5191 	BUG_ON(new_bucket >=
5192 	       (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
5193 
5194 	/* end_blk points to the last existing bucket */
5195 	end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
5196 
5197 	/*
5198 	 * end_blk is the start of the last existing bucket.
5199 	 * Thus, (end_blk - target_blk) covers the target bucket and
5200 	 * every bucket after it up to, but not including, the last
5201 	 * existing bucket.  Then we add the last existing bucket, the
5202 	 * new bucket, and the first bucket (3 * blk_per_bucket).
5203 	 */
5204 	credits = (end_blk - target_blk) + (3 * blk_per_bucket);
5205 	ret = ocfs2_extend_trans(handle, credits);
5206 	if (ret) {
5207 		mlog_errno(ret);
5208 		goto out;
5209 	}
5210 
5211 	ret = ocfs2_xattr_bucket_journal_access(handle, first,
5212 						OCFS2_JOURNAL_ACCESS_WRITE);
5213 	if (ret) {
5214 		mlog_errno(ret);
5215 		goto out;
5216 	}
5217 
5218 	while (end_blk != target_blk) {
5219 		ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
5220 					    end_blk + blk_per_bucket, 0);
5221 		if (ret)
5222 			goto out;
5223 		end_blk -= blk_per_bucket;
5224 	}
5225 
5226 	/* Move half of the xattr in target_blkno to the next bucket. */
5227 	ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
5228 					target_blk + blk_per_bucket, NULL, 0);
5229 
5230 	le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
5231 	ocfs2_xattr_bucket_journal_dirty(handle, first);
5232 
5233 out:
5234 	return ret;
5235 }
5236 
5237 /*
5238  * Add new xattr bucket in an extent record and adjust the buckets
5239  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
5240  * bucket we want to insert into.
5241  *
5242  * In the easy case, we will move all the buckets after target down by
5243  * one. Half of target's xattrs will be moved to the next bucket.
5244  *
5245  * If current cluster is full, we'll allocate a new one.  This may not
5246  * be contiguous.  The underlying calls will make sure that there is
5247  * space for the insert, shifting buckets around if necessary.
5248  * 'target' may be moved by those calls.
5249  */
5250 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5251 				      struct buffer_head *xb_bh,
5252 				      struct ocfs2_xattr_bucket *target,
5253 				      struct ocfs2_xattr_set_ctxt *ctxt)
5254 {
5255 	struct ocfs2_xattr_block *xb =
5256 			(struct ocfs2_xattr_block *)xb_bh->b_data;
5257 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
5258 	struct ocfs2_extent_list *el = &xb_root->xt_list;
5259 	u32 name_hash =
5260 		le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
5261 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5262 	int ret, num_buckets, extend = 1;
5263 	u64 p_blkno;
5264 	u32 e_cpos, num_clusters;
5265 	/* The bucket at the front of the extent */
5266 	struct ocfs2_xattr_bucket *first;
5267 
5268 	mlog(0, "Add new xattr bucket starting from %llu\n",
5269 	     (unsigned long long)bucket_blkno(target));
5270 
5271 	/* The first bucket of the original extent */
5272 	first = ocfs2_xattr_bucket_new(inode);
5273 	if (!first) {
5274 		ret = -ENOMEM;
5275 		mlog_errno(ret);
5276 		goto out;
5277 	}
5278 
5279 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
5280 				  &num_clusters, el);
5281 	if (ret) {
5282 		mlog_errno(ret);
5283 		goto out;
5284 	}
5285 
5286 	ret = ocfs2_read_xattr_bucket(first, p_blkno);
5287 	if (ret) {
5288 		mlog_errno(ret);
5289 		goto out;
5290 	}
5291 
5292 	num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
5293 	if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
5294 		/*
5295 		 * This can move first+target if the target bucket moves
5296 		 * to the new extent.
5297 		 */
5298 		ret = ocfs2_add_new_xattr_cluster(inode,
5299 						  xb_bh,
5300 						  first,
5301 						  target,
5302 						  &num_clusters,
5303 						  e_cpos,
5304 						  &extend,
5305 						  ctxt);
5306 		if (ret) {
5307 			mlog_errno(ret);
5308 			goto out;
5309 		}
5310 	}
5311 
5312 	if (extend) {
5313 		ret = ocfs2_extend_xattr_bucket(inode,
5314 						ctxt->handle,
5315 						first,
5316 						bucket_blkno(target),
5317 						num_clusters);
5318 		if (ret)
5319 			mlog_errno(ret);
5320 	}
5321 
5322 out:
5323 	ocfs2_xattr_bucket_free(first);
5324 
5325 	return ret;
5326 }
5327 
5328 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
5329 					struct ocfs2_xattr_bucket *bucket,
5330 					int offs)
5331 {
5332 	int block_off = offs >> inode->i_sb->s_blocksize_bits;
5333 
5334 	offs = offs % inode->i_sb->s_blocksize;
5335 	return bucket_block(bucket, block_off) + offs;
5336 }
5337 
5338 /*
5339  * Truncate the specified xe_off entry in xattr bucket.
5340  * bucket is indicated by header_bh and len is the new length.
5341  * Both the ocfs2_xattr_value_root and the entry will be updated here.
5342  *
5343  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5344  */
5345 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5346 					     struct ocfs2_xattr_bucket *bucket,
5347 					     int xe_off,
5348 					     int len,
5349 					     struct ocfs2_xattr_set_ctxt *ctxt)
5350 {
5351 	int ret, offset;
5352 	u64 value_blk;
5353 	struct ocfs2_xattr_entry *xe;
5354 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5355 	size_t blocksize = inode->i_sb->s_blocksize;
5356 	struct ocfs2_xattr_value_buf vb = {
5357 		.vb_access = ocfs2_journal_access,
5358 	};
5359 
5360 	xe = &xh->xh_entries[xe_off];
5361 
5362 	BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5363 
5364 	offset = le16_to_cpu(xe->xe_name_offset) +
5365 		 OCFS2_XATTR_SIZE(xe->xe_name_len);
5366 
5367 	value_blk = offset / blocksize;
5368 
5369 	/* We don't allow ocfs2_xattr_value to be stored in different block. */
5370 	BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5371 
5372 	vb.vb_bh = bucket->bu_bhs[value_blk];
5373 	BUG_ON(!vb.vb_bh);
5374 
5375 	vb.vb_xv = (struct ocfs2_xattr_value_root *)
5376 		(vb.vb_bh->b_data + offset % blocksize);
5377 
5378 	/*
5379 	 * From here on out we have to dirty the bucket.  The generic
5380 	 * value calls only modify one of the bucket's bhs, but we need
5381 	 * to send the bucket at once.  So if they error, they *could* have
5382 	 * modified something.  We have to assume they did, and dirty
5383 	 * the whole bucket.  This leaves us in a consistent state.
5384 	 */
5385 	mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
5386 	     xe_off, (unsigned long long)bucket_blkno(bucket), len);
5387 	ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5388 	if (ret) {
5389 		mlog_errno(ret);
5390 		goto out;
5391 	}
5392 
5393 	ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5394 						OCFS2_JOURNAL_ACCESS_WRITE);
5395 	if (ret) {
5396 		mlog_errno(ret);
5397 		goto out;
5398 	}
5399 
5400 	xe->xe_value_size = cpu_to_le64(len);
5401 
5402 	ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5403 
5404 out:
5405 	return ret;
5406 }
5407 
5408 static int ocfs2_rm_xattr_cluster(struct inode *inode,
5409 				  struct buffer_head *root_bh,
5410 				  u64 blkno,
5411 				  u32 cpos,
5412 				  u32 len,
5413 				  void *para)
5414 {
5415 	int ret;
5416 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5417 	struct inode *tl_inode = osb->osb_tl_inode;
5418 	handle_t *handle;
5419 	struct ocfs2_xattr_block *xb =
5420 			(struct ocfs2_xattr_block *)root_bh->b_data;
5421 	struct ocfs2_alloc_context *meta_ac = NULL;
5422 	struct ocfs2_cached_dealloc_ctxt dealloc;
5423 	struct ocfs2_extent_tree et;
5424 
5425 	ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5426 					  ocfs2_delete_xattr_in_bucket, para);
5427 	if (ret) {
5428 		mlog_errno(ret);
5429 		return ret;
5430 	}
5431 
5432 	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5433 
5434 	ocfs2_init_dealloc_ctxt(&dealloc);
5435 
5436 	mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
5437 	     cpos, len, (unsigned long long)blkno);
5438 
5439 	ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5440 					       len);
5441 
5442 	ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5443 	if (ret) {
5444 		mlog_errno(ret);
5445 		return ret;
5446 	}
5447 
5448 	mutex_lock(&tl_inode->i_mutex);
5449 
5450 	if (ocfs2_truncate_log_needs_flush(osb)) {
5451 		ret = __ocfs2_flush_truncate_log(osb);
5452 		if (ret < 0) {
5453 			mlog_errno(ret);
5454 			goto out;
5455 		}
5456 	}
5457 
5458 	handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5459 	if (IS_ERR(handle)) {
5460 		ret = -ENOMEM;
5461 		mlog_errno(ret);
5462 		goto out;
5463 	}
5464 
5465 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5466 				      OCFS2_JOURNAL_ACCESS_WRITE);
5467 	if (ret) {
5468 		mlog_errno(ret);
5469 		goto out_commit;
5470 	}
5471 
5472 	ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5473 				  &dealloc);
5474 	if (ret) {
5475 		mlog_errno(ret);
5476 		goto out_commit;
5477 	}
5478 
5479 	le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5480 	ocfs2_journal_dirty(handle, root_bh);
5481 
5482 	ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5483 	if (ret)
5484 		mlog_errno(ret);
5485 
5486 out_commit:
5487 	ocfs2_commit_trans(osb, handle);
5488 out:
5489 	ocfs2_schedule_truncate_log_flush(osb, 1);
5490 
5491 	mutex_unlock(&tl_inode->i_mutex);
5492 
5493 	if (meta_ac)
5494 		ocfs2_free_alloc_context(meta_ac);
5495 
5496 	ocfs2_run_deallocs(osb, &dealloc);
5497 
5498 	return ret;
5499 }
5500 
5501 /*
5502  * check whether the xattr bucket is filled up with the same hash value.
5503  * If we want to insert the xattr with the same hash, return -ENOSPC.
5504  * If we want to insert a xattr with different hash value, go ahead
5505  * and ocfs2_divide_xattr_bucket will handle this.
5506  */
5507 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5508 					      struct ocfs2_xattr_bucket *bucket,
5509 					      const char *name)
5510 {
5511 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5512 	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5513 
5514 	if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5515 		return 0;
5516 
5517 	if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5518 	    xh->xh_entries[0].xe_name_hash) {
5519 		mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5520 		     "hash = %u\n",
5521 		     (unsigned long long)bucket_blkno(bucket),
5522 		     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5523 		return -ENOSPC;
5524 	}
5525 
5526 	return 0;
5527 }
5528 
5529 /*
5530  * Try to set the entry in the current bucket.  If we fail, the caller
5531  * will handle getting us another bucket.
5532  */
5533 static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5534 					struct ocfs2_xattr_info *xi,
5535 					struct ocfs2_xattr_search *xs,
5536 					struct ocfs2_xattr_set_ctxt *ctxt)
5537 {
5538 	int ret;
5539 	struct ocfs2_xa_loc loc;
5540 
5541 	mlog_entry("Set xattr %s in xattr bucket\n", xi->xi_name);
5542 
5543 	ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5544 				       xs->not_found ? NULL : xs->here);
5545 	ret = ocfs2_xa_set(&loc, xi, ctxt);
5546 	if (!ret) {
5547 		xs->here = loc.xl_entry;
5548 		goto out;
5549 	}
5550 	if (ret != -ENOSPC) {
5551 		mlog_errno(ret);
5552 		goto out;
5553 	}
5554 
5555 	/* Ok, we need space.  Let's try defragmenting the bucket. */
5556 	ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5557 					xs->bucket);
5558 	if (ret) {
5559 		mlog_errno(ret);
5560 		goto out;
5561 	}
5562 
5563 	ret = ocfs2_xa_set(&loc, xi, ctxt);
5564 	if (!ret) {
5565 		xs->here = loc.xl_entry;
5566 		goto out;
5567 	}
5568 	if (ret != -ENOSPC)
5569 		mlog_errno(ret);
5570 
5571 
5572 out:
5573 	mlog_exit(ret);
5574 	return ret;
5575 }
5576 
5577 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5578 					     struct ocfs2_xattr_info *xi,
5579 					     struct ocfs2_xattr_search *xs,
5580 					     struct ocfs2_xattr_set_ctxt *ctxt)
5581 {
5582 	int ret;
5583 
5584 	mlog_entry("Set xattr %s in xattr index block\n", xi->xi_name);
5585 
5586 	ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5587 	if (!ret)
5588 		goto out;
5589 	if (ret != -ENOSPC) {
5590 		mlog_errno(ret);
5591 		goto out;
5592 	}
5593 
5594 	/* Ack, need more space.  Let's try to get another bucket! */
5595 
5596 	/*
5597 	 * We do not allow for overlapping ranges between buckets. And
5598 	 * the maximum number of collisions we will allow for then is
5599 	 * one bucket's worth, so check it here whether we need to
5600 	 * add a new bucket for the insert.
5601 	 */
5602 	ret = ocfs2_check_xattr_bucket_collision(inode,
5603 						 xs->bucket,
5604 						 xi->xi_name);
5605 	if (ret) {
5606 		mlog_errno(ret);
5607 		goto out;
5608 	}
5609 
5610 	ret = ocfs2_add_new_xattr_bucket(inode,
5611 					 xs->xattr_bh,
5612 					 xs->bucket,
5613 					 ctxt);
5614 	if (ret) {
5615 		mlog_errno(ret);
5616 		goto out;
5617 	}
5618 
5619 	/*
5620 	 * ocfs2_add_new_xattr_bucket() will have updated
5621 	 * xs->bucket if it moved, but it will not have updated
5622 	 * any of the other search fields.  Thus, we drop it and
5623 	 * re-search.  Everything should be cached, so it'll be
5624 	 * quick.
5625 	 */
5626 	ocfs2_xattr_bucket_relse(xs->bucket);
5627 	ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5628 					   xi->xi_name_index,
5629 					   xi->xi_name, xs);
5630 	if (ret && ret != -ENODATA)
5631 		goto out;
5632 	xs->not_found = ret;
5633 
5634 	/* Ok, we have a new bucket, let's try again */
5635 	ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5636 	if (ret && (ret != -ENOSPC))
5637 		mlog_errno(ret);
5638 
5639 out:
5640 	mlog_exit(ret);
5641 	return ret;
5642 }
5643 
5644 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5645 					struct ocfs2_xattr_bucket *bucket,
5646 					void *para)
5647 {
5648 	int ret = 0, ref_credits;
5649 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5650 	u16 i;
5651 	struct ocfs2_xattr_entry *xe;
5652 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5653 	struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5654 	int credits = ocfs2_remove_extent_credits(osb->sb) +
5655 		ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5656 	struct ocfs2_xattr_value_root *xv;
5657 	struct ocfs2_rm_xattr_bucket_para *args =
5658 			(struct ocfs2_rm_xattr_bucket_para *)para;
5659 
5660 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5661 
5662 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5663 		xe = &xh->xh_entries[i];
5664 		if (ocfs2_xattr_is_local(xe))
5665 			continue;
5666 
5667 		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5668 						      i, &xv, NULL);
5669 
5670 		ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5671 							 args->ref_ci,
5672 							 args->ref_root_bh,
5673 							 &ctxt.meta_ac,
5674 							 &ref_credits);
5675 
5676 		ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5677 		if (IS_ERR(ctxt.handle)) {
5678 			ret = PTR_ERR(ctxt.handle);
5679 			mlog_errno(ret);
5680 			break;
5681 		}
5682 
5683 		ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5684 							i, 0, &ctxt);
5685 
5686 		ocfs2_commit_trans(osb, ctxt.handle);
5687 		if (ctxt.meta_ac) {
5688 			ocfs2_free_alloc_context(ctxt.meta_ac);
5689 			ctxt.meta_ac = NULL;
5690 		}
5691 		if (ret) {
5692 			mlog_errno(ret);
5693 			break;
5694 		}
5695 	}
5696 
5697 	if (ctxt.meta_ac)
5698 		ocfs2_free_alloc_context(ctxt.meta_ac);
5699 	ocfs2_schedule_truncate_log_flush(osb, 1);
5700 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
5701 	return ret;
5702 }
5703 
5704 /*
5705  * Whenever we modify a xattr value root in the bucket(e.g, CoW
5706  * or change the extent record flag), we need to recalculate
5707  * the metaecc for the whole bucket. So it is done here.
5708  *
5709  * Note:
5710  * We have to give the extra credits for the caller.
5711  */
5712 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5713 					    handle_t *handle,
5714 					    void *para)
5715 {
5716 	int ret;
5717 	struct ocfs2_xattr_bucket *bucket =
5718 			(struct ocfs2_xattr_bucket *)para;
5719 
5720 	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5721 						OCFS2_JOURNAL_ACCESS_WRITE);
5722 	if (ret) {
5723 		mlog_errno(ret);
5724 		return ret;
5725 	}
5726 
5727 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5728 
5729 	return 0;
5730 }
5731 
5732 /*
5733  * Special action we need if the xattr value is refcounted.
5734  *
5735  * 1. If the xattr is refcounted, lock the tree.
5736  * 2. CoW the xattr if we are setting the new value and the value
5737  *    will be stored outside.
5738  * 3. In other case, decrease_refcount will work for us, so just
5739  *    lock the refcount tree, calculate the meta and credits is OK.
5740  *
5741  * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5742  * currently CoW is a completed transaction, while this function
5743  * will also lock the allocators and let us deadlock. So we will
5744  * CoW the whole xattr value.
5745  */
5746 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5747 					struct ocfs2_dinode *di,
5748 					struct ocfs2_xattr_info *xi,
5749 					struct ocfs2_xattr_search *xis,
5750 					struct ocfs2_xattr_search *xbs,
5751 					struct ocfs2_refcount_tree **ref_tree,
5752 					int *meta_add,
5753 					int *credits)
5754 {
5755 	int ret = 0;
5756 	struct ocfs2_xattr_block *xb;
5757 	struct ocfs2_xattr_entry *xe;
5758 	char *base;
5759 	u32 p_cluster, num_clusters;
5760 	unsigned int ext_flags;
5761 	int name_offset, name_len;
5762 	struct ocfs2_xattr_value_buf vb;
5763 	struct ocfs2_xattr_bucket *bucket = NULL;
5764 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5765 	struct ocfs2_post_refcount refcount;
5766 	struct ocfs2_post_refcount *p = NULL;
5767 	struct buffer_head *ref_root_bh = NULL;
5768 
5769 	if (!xis->not_found) {
5770 		xe = xis->here;
5771 		name_offset = le16_to_cpu(xe->xe_name_offset);
5772 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5773 		base = xis->base;
5774 		vb.vb_bh = xis->inode_bh;
5775 		vb.vb_access = ocfs2_journal_access_di;
5776 	} else {
5777 		int i, block_off = 0;
5778 		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5779 		xe = xbs->here;
5780 		name_offset = le16_to_cpu(xe->xe_name_offset);
5781 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5782 		i = xbs->here - xbs->header->xh_entries;
5783 
5784 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5785 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5786 							bucket_xh(xbs->bucket),
5787 							i, &block_off,
5788 							&name_offset);
5789 			if (ret) {
5790 				mlog_errno(ret);
5791 				goto out;
5792 			}
5793 			base = bucket_block(xbs->bucket, block_off);
5794 			vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5795 			vb.vb_access = ocfs2_journal_access;
5796 
5797 			if (ocfs2_meta_ecc(osb)) {
5798 				/*create parameters for ocfs2_post_refcount. */
5799 				bucket = xbs->bucket;
5800 				refcount.credits = bucket->bu_blocks;
5801 				refcount.para = bucket;
5802 				refcount.func =
5803 					ocfs2_xattr_bucket_post_refcount;
5804 				p = &refcount;
5805 			}
5806 		} else {
5807 			base = xbs->base;
5808 			vb.vb_bh = xbs->xattr_bh;
5809 			vb.vb_access = ocfs2_journal_access_xb;
5810 		}
5811 	}
5812 
5813 	if (ocfs2_xattr_is_local(xe))
5814 		goto out;
5815 
5816 	vb.vb_xv = (struct ocfs2_xattr_value_root *)
5817 				(base + name_offset + name_len);
5818 
5819 	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5820 				       &num_clusters, &vb.vb_xv->xr_list,
5821 				       &ext_flags);
5822 	if (ret) {
5823 		mlog_errno(ret);
5824 		goto out;
5825 	}
5826 
5827 	/*
5828 	 * We just need to check the 1st extent record, since we always
5829 	 * CoW the whole xattr. So there shouldn't be a xattr with
5830 	 * some REFCOUNT extent recs after the 1st one.
5831 	 */
5832 	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5833 		goto out;
5834 
5835 	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5836 				       1, ref_tree, &ref_root_bh);
5837 	if (ret) {
5838 		mlog_errno(ret);
5839 		goto out;
5840 	}
5841 
5842 	/*
5843 	 * If we are deleting the xattr or the new size will be stored inside,
5844 	 * cool, leave it there, the xattr truncate process will remove them
5845 	 * for us(it still needs the refcount tree lock and the meta, credits).
5846 	 * And the worse case is that every cluster truncate will split the
5847 	 * refcount tree, and make the original extent become 3. So we will need
5848 	 * 2 * cluster more extent recs at most.
5849 	 */
5850 	if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
5851 
5852 		ret = ocfs2_refcounted_xattr_delete_need(inode,
5853 							 &(*ref_tree)->rf_ci,
5854 							 ref_root_bh, vb.vb_xv,
5855 							 meta_add, credits);
5856 		if (ret)
5857 			mlog_errno(ret);
5858 		goto out;
5859 	}
5860 
5861 	ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5862 				       *ref_tree, ref_root_bh, 0,
5863 				       le32_to_cpu(vb.vb_xv->xr_clusters), p);
5864 	if (ret)
5865 		mlog_errno(ret);
5866 
5867 out:
5868 	brelse(ref_root_bh);
5869 	return ret;
5870 }
5871 
5872 /*
5873  * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5874  * The physical clusters will be added to refcount tree.
5875  */
5876 static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5877 				struct ocfs2_xattr_value_root *xv,
5878 				struct ocfs2_extent_tree *value_et,
5879 				struct ocfs2_caching_info *ref_ci,
5880 				struct buffer_head *ref_root_bh,
5881 				struct ocfs2_cached_dealloc_ctxt *dealloc,
5882 				struct ocfs2_post_refcount *refcount)
5883 {
5884 	int ret = 0;
5885 	u32 clusters = le32_to_cpu(xv->xr_clusters);
5886 	u32 cpos, p_cluster, num_clusters;
5887 	struct ocfs2_extent_list *el = &xv->xr_list;
5888 	unsigned int ext_flags;
5889 
5890 	cpos = 0;
5891 	while (cpos < clusters) {
5892 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5893 					       &num_clusters, el, &ext_flags);
5894 
5895 		cpos += num_clusters;
5896 		if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5897 			continue;
5898 
5899 		BUG_ON(!p_cluster);
5900 
5901 		ret = ocfs2_add_refcount_flag(inode, value_et,
5902 					      ref_ci, ref_root_bh,
5903 					      cpos - num_clusters,
5904 					      p_cluster, num_clusters,
5905 					      dealloc, refcount);
5906 		if (ret) {
5907 			mlog_errno(ret);
5908 			break;
5909 		}
5910 	}
5911 
5912 	return ret;
5913 }
5914 
5915 /*
5916  * Given a normal ocfs2_xattr_header, refcount all the entries which
5917  * have value stored outside.
5918  * Used for xattrs stored in inode and ocfs2_xattr_block.
5919  */
5920 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5921 				struct ocfs2_xattr_value_buf *vb,
5922 				struct ocfs2_xattr_header *header,
5923 				struct ocfs2_caching_info *ref_ci,
5924 				struct buffer_head *ref_root_bh,
5925 				struct ocfs2_cached_dealloc_ctxt *dealloc)
5926 {
5927 
5928 	struct ocfs2_xattr_entry *xe;
5929 	struct ocfs2_xattr_value_root *xv;
5930 	struct ocfs2_extent_tree et;
5931 	int i, ret = 0;
5932 
5933 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5934 		xe = &header->xh_entries[i];
5935 
5936 		if (ocfs2_xattr_is_local(xe))
5937 			continue;
5938 
5939 		xv = (struct ocfs2_xattr_value_root *)((void *)header +
5940 			le16_to_cpu(xe->xe_name_offset) +
5941 			OCFS2_XATTR_SIZE(xe->xe_name_len));
5942 
5943 		vb->vb_xv = xv;
5944 		ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5945 
5946 		ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5947 							ref_ci, ref_root_bh,
5948 							dealloc, NULL);
5949 		if (ret) {
5950 			mlog_errno(ret);
5951 			break;
5952 		}
5953 	}
5954 
5955 	return ret;
5956 }
5957 
5958 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5959 				struct buffer_head *fe_bh,
5960 				struct ocfs2_caching_info *ref_ci,
5961 				struct buffer_head *ref_root_bh,
5962 				struct ocfs2_cached_dealloc_ctxt *dealloc)
5963 {
5964 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5965 	struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
5966 				(fe_bh->b_data + inode->i_sb->s_blocksize -
5967 				le16_to_cpu(di->i_xattr_inline_size));
5968 	struct ocfs2_xattr_value_buf vb = {
5969 		.vb_bh = fe_bh,
5970 		.vb_access = ocfs2_journal_access_di,
5971 	};
5972 
5973 	return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5974 						  ref_ci, ref_root_bh, dealloc);
5975 }
5976 
5977 struct ocfs2_xattr_tree_value_refcount_para {
5978 	struct ocfs2_caching_info *ref_ci;
5979 	struct buffer_head *ref_root_bh;
5980 	struct ocfs2_cached_dealloc_ctxt *dealloc;
5981 };
5982 
5983 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
5984 					   struct ocfs2_xattr_bucket *bucket,
5985 					   int offset,
5986 					   struct ocfs2_xattr_value_root **xv,
5987 					   struct buffer_head **bh)
5988 {
5989 	int ret, block_off, name_offset;
5990 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5991 	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
5992 	void *base;
5993 
5994 	ret = ocfs2_xattr_bucket_get_name_value(sb,
5995 						bucket_xh(bucket),
5996 						offset,
5997 						&block_off,
5998 						&name_offset);
5999 	if (ret) {
6000 		mlog_errno(ret);
6001 		goto out;
6002 	}
6003 
6004 	base = bucket_block(bucket, block_off);
6005 
6006 	*xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
6007 			 OCFS2_XATTR_SIZE(xe->xe_name_len));
6008 
6009 	if (bh)
6010 		*bh = bucket->bu_bhs[block_off];
6011 out:
6012 	return ret;
6013 }
6014 
6015 /*
6016  * For a given xattr bucket, refcount all the entries which
6017  * have value stored outside.
6018  */
6019 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6020 					     struct ocfs2_xattr_bucket *bucket,
6021 					     void *para)
6022 {
6023 	int i, ret = 0;
6024 	struct ocfs2_extent_tree et;
6025 	struct ocfs2_xattr_tree_value_refcount_para *ref =
6026 			(struct ocfs2_xattr_tree_value_refcount_para *)para;
6027 	struct ocfs2_xattr_header *xh =
6028 			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6029 	struct ocfs2_xattr_entry *xe;
6030 	struct ocfs2_xattr_value_buf vb = {
6031 		.vb_access = ocfs2_journal_access,
6032 	};
6033 	struct ocfs2_post_refcount refcount = {
6034 		.credits = bucket->bu_blocks,
6035 		.para = bucket,
6036 		.func = ocfs2_xattr_bucket_post_refcount,
6037 	};
6038 	struct ocfs2_post_refcount *p = NULL;
6039 
6040 	/* We only need post_refcount if we support metaecc. */
6041 	if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6042 		p = &refcount;
6043 
6044 	mlog(0, "refcount bucket %llu, count = %u\n",
6045 	     (unsigned long long)bucket_blkno(bucket),
6046 	     le16_to_cpu(xh->xh_count));
6047 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6048 		xe = &xh->xh_entries[i];
6049 
6050 		if (ocfs2_xattr_is_local(xe))
6051 			continue;
6052 
6053 		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
6054 						      &vb.vb_xv, &vb.vb_bh);
6055 		if (ret) {
6056 			mlog_errno(ret);
6057 			break;
6058 		}
6059 
6060 		ocfs2_init_xattr_value_extent_tree(&et,
6061 						   INODE_CACHE(inode), &vb);
6062 
6063 		ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
6064 							&et, ref->ref_ci,
6065 							ref->ref_root_bh,
6066 							ref->dealloc, p);
6067 		if (ret) {
6068 			mlog_errno(ret);
6069 			break;
6070 		}
6071 	}
6072 
6073 	return ret;
6074 
6075 }
6076 
6077 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
6078 				     struct buffer_head *root_bh,
6079 				     u64 blkno, u32 cpos, u32 len, void *para)
6080 {
6081 	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
6082 					   ocfs2_xattr_bucket_value_refcount,
6083 					   para);
6084 }
6085 
6086 static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
6087 				struct buffer_head *blk_bh,
6088 				struct ocfs2_caching_info *ref_ci,
6089 				struct buffer_head *ref_root_bh,
6090 				struct ocfs2_cached_dealloc_ctxt *dealloc)
6091 {
6092 	int ret = 0;
6093 	struct ocfs2_xattr_block *xb =
6094 				(struct ocfs2_xattr_block *)blk_bh->b_data;
6095 
6096 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6097 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6098 		struct ocfs2_xattr_value_buf vb = {
6099 			.vb_bh = blk_bh,
6100 			.vb_access = ocfs2_journal_access_xb,
6101 		};
6102 
6103 		ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6104 							 ref_ci, ref_root_bh,
6105 							 dealloc);
6106 	} else {
6107 		struct ocfs2_xattr_tree_value_refcount_para para = {
6108 			.ref_ci = ref_ci,
6109 			.ref_root_bh = ref_root_bh,
6110 			.dealloc = dealloc,
6111 		};
6112 
6113 		ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6114 						ocfs2_refcount_xattr_tree_rec,
6115 						&para);
6116 	}
6117 
6118 	return ret;
6119 }
6120 
6121 int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6122 				     struct buffer_head *fe_bh,
6123 				     struct ocfs2_caching_info *ref_ci,
6124 				     struct buffer_head *ref_root_bh,
6125 				     struct ocfs2_cached_dealloc_ctxt *dealloc)
6126 {
6127 	int ret = 0;
6128 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
6129 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6130 	struct buffer_head *blk_bh = NULL;
6131 
6132 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6133 		ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6134 							 ref_ci, ref_root_bh,
6135 							 dealloc);
6136 		if (ret) {
6137 			mlog_errno(ret);
6138 			goto out;
6139 		}
6140 	}
6141 
6142 	if (!di->i_xattr_loc)
6143 		goto out;
6144 
6145 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6146 				     &blk_bh);
6147 	if (ret < 0) {
6148 		mlog_errno(ret);
6149 		goto out;
6150 	}
6151 
6152 	ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6153 						ref_root_bh, dealloc);
6154 	if (ret)
6155 		mlog_errno(ret);
6156 
6157 	brelse(blk_bh);
6158 out:
6159 
6160 	return ret;
6161 }
6162 
6163 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6164 /*
6165  * Store the information we need in xattr reflink.
6166  * old_bh and new_bh are inode bh for the old and new inode.
6167  */
6168 struct ocfs2_xattr_reflink {
6169 	struct inode *old_inode;
6170 	struct inode *new_inode;
6171 	struct buffer_head *old_bh;
6172 	struct buffer_head *new_bh;
6173 	struct ocfs2_caching_info *ref_ci;
6174 	struct buffer_head *ref_root_bh;
6175 	struct ocfs2_cached_dealloc_ctxt *dealloc;
6176 	should_xattr_reflinked *xattr_reflinked;
6177 };
6178 
6179 /*
6180  * Given a xattr header and xe offset,
6181  * return the proper xv and the corresponding bh.
6182  * xattr in inode, block and xattr tree have different implementaions.
6183  */
6184 typedef int (get_xattr_value_root)(struct super_block *sb,
6185 				   struct buffer_head *bh,
6186 				   struct ocfs2_xattr_header *xh,
6187 				   int offset,
6188 				   struct ocfs2_xattr_value_root **xv,
6189 				   struct buffer_head **ret_bh,
6190 				   void *para);
6191 
6192 /*
6193  * Calculate all the xattr value root metadata stored in this xattr header and
6194  * credits we need if we create them from the scratch.
6195  * We use get_xattr_value_root so that all types of xattr container can use it.
6196  */
6197 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6198 					     struct buffer_head *bh,
6199 					     struct ocfs2_xattr_header *xh,
6200 					     int *metas, int *credits,
6201 					     int *num_recs,
6202 					     get_xattr_value_root *func,
6203 					     void *para)
6204 {
6205 	int i, ret = 0;
6206 	struct ocfs2_xattr_value_root *xv;
6207 	struct ocfs2_xattr_entry *xe;
6208 
6209 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6210 		xe = &xh->xh_entries[i];
6211 		if (ocfs2_xattr_is_local(xe))
6212 			continue;
6213 
6214 		ret = func(sb, bh, xh, i, &xv, NULL, para);
6215 		if (ret) {
6216 			mlog_errno(ret);
6217 			break;
6218 		}
6219 
6220 		*metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6221 			  le16_to_cpu(xv->xr_list.l_next_free_rec);
6222 
6223 		*credits += ocfs2_calc_extend_credits(sb,
6224 						&def_xv.xv.xr_list,
6225 						le32_to_cpu(xv->xr_clusters));
6226 
6227 		/*
6228 		 * If the value is a tree with depth > 1, We don't go deep
6229 		 * to the extent block, so just calculate a maximum record num.
6230 		 */
6231 		if (!xv->xr_list.l_tree_depth)
6232 			*num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6233 		else
6234 			*num_recs += ocfs2_clusters_for_bytes(sb,
6235 							      XATTR_SIZE_MAX);
6236 	}
6237 
6238 	return ret;
6239 }
6240 
6241 /* Used by xattr inode and block to return the right xv and buffer_head. */
6242 static int ocfs2_get_xattr_value_root(struct super_block *sb,
6243 				      struct buffer_head *bh,
6244 				      struct ocfs2_xattr_header *xh,
6245 				      int offset,
6246 				      struct ocfs2_xattr_value_root **xv,
6247 				      struct buffer_head **ret_bh,
6248 				      void *para)
6249 {
6250 	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6251 
6252 	*xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6253 		le16_to_cpu(xe->xe_name_offset) +
6254 		OCFS2_XATTR_SIZE(xe->xe_name_len));
6255 
6256 	if (ret_bh)
6257 		*ret_bh = bh;
6258 
6259 	return 0;
6260 }
6261 
6262 /*
6263  * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6264  * It is only used for inline xattr and xattr block.
6265  */
6266 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6267 					struct ocfs2_xattr_header *xh,
6268 					struct buffer_head *ref_root_bh,
6269 					int *credits,
6270 					struct ocfs2_alloc_context **meta_ac)
6271 {
6272 	int ret, meta_add = 0, num_recs = 0;
6273 	struct ocfs2_refcount_block *rb =
6274 			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
6275 
6276 	*credits = 0;
6277 
6278 	ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6279 						&meta_add, credits, &num_recs,
6280 						ocfs2_get_xattr_value_root,
6281 						NULL);
6282 	if (ret) {
6283 		mlog_errno(ret);
6284 		goto out;
6285 	}
6286 
6287 	/*
6288 	 * We need to add/modify num_recs in refcount tree, so just calculate
6289 	 * an approximate number we need for refcount tree change.
6290 	 * Sometimes we need to split the tree, and after split,  half recs
6291 	 * will be moved to the new block, and a new block can only provide
6292 	 * half number of recs. So we multiple new blocks by 2.
6293 	 */
6294 	num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6295 	meta_add += num_recs;
6296 	*credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6297 	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6298 		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6299 			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6300 	else
6301 		*credits += 1;
6302 
6303 	ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6304 	if (ret)
6305 		mlog_errno(ret);
6306 
6307 out:
6308 	return ret;
6309 }
6310 
6311 /*
6312  * Given a xattr header, reflink all the xattrs in this container.
6313  * It can be used for inode, block and bucket.
6314  *
6315  * NOTE:
6316  * Before we call this function, the caller has memcpy the xattr in
6317  * old_xh to the new_xh.
6318  *
6319  * If args.xattr_reflinked is set, call it to decide whether the xe should
6320  * be reflinked or not. If not, remove it from the new xattr header.
6321  */
6322 static int ocfs2_reflink_xattr_header(handle_t *handle,
6323 				      struct ocfs2_xattr_reflink *args,
6324 				      struct buffer_head *old_bh,
6325 				      struct ocfs2_xattr_header *xh,
6326 				      struct buffer_head *new_bh,
6327 				      struct ocfs2_xattr_header *new_xh,
6328 				      struct ocfs2_xattr_value_buf *vb,
6329 				      struct ocfs2_alloc_context *meta_ac,
6330 				      get_xattr_value_root *func,
6331 				      void *para)
6332 {
6333 	int ret = 0, i, j;
6334 	struct super_block *sb = args->old_inode->i_sb;
6335 	struct buffer_head *value_bh;
6336 	struct ocfs2_xattr_entry *xe, *last;
6337 	struct ocfs2_xattr_value_root *xv, *new_xv;
6338 	struct ocfs2_extent_tree data_et;
6339 	u32 clusters, cpos, p_cluster, num_clusters;
6340 	unsigned int ext_flags = 0;
6341 
6342 	mlog(0, "reflink xattr in container %llu, count = %u\n",
6343 	     (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
6344 
6345 	last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6346 	for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6347 		xe = &xh->xh_entries[i];
6348 
6349 		if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6350 			xe = &new_xh->xh_entries[j];
6351 
6352 			le16_add_cpu(&new_xh->xh_count, -1);
6353 			if (new_xh->xh_count) {
6354 				memmove(xe, xe + 1,
6355 					(void *)last - (void *)xe);
6356 				memset(last, 0,
6357 				       sizeof(struct ocfs2_xattr_entry));
6358 			}
6359 
6360 			/*
6361 			 * We don't want j to increase in the next round since
6362 			 * it is already moved ahead.
6363 			 */
6364 			j--;
6365 			continue;
6366 		}
6367 
6368 		if (ocfs2_xattr_is_local(xe))
6369 			continue;
6370 
6371 		ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6372 		if (ret) {
6373 			mlog_errno(ret);
6374 			break;
6375 		}
6376 
6377 		ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6378 		if (ret) {
6379 			mlog_errno(ret);
6380 			break;
6381 		}
6382 
6383 		/*
6384 		 * For the xattr which has l_tree_depth = 0, all the extent
6385 		 * recs have already be copied to the new xh with the
6386 		 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6387 		 * increase the refount count int the refcount tree.
6388 		 *
6389 		 * For the xattr which has l_tree_depth > 0, we need
6390 		 * to initialize it to the empty default value root,
6391 		 * and then insert the extents one by one.
6392 		 */
6393 		if (xv->xr_list.l_tree_depth) {
6394 			memcpy(new_xv, &def_xv, sizeof(def_xv));
6395 			vb->vb_xv = new_xv;
6396 			vb->vb_bh = value_bh;
6397 			ocfs2_init_xattr_value_extent_tree(&data_et,
6398 					INODE_CACHE(args->new_inode), vb);
6399 		}
6400 
6401 		clusters = le32_to_cpu(xv->xr_clusters);
6402 		cpos = 0;
6403 		while (cpos < clusters) {
6404 			ret = ocfs2_xattr_get_clusters(args->old_inode,
6405 						       cpos,
6406 						       &p_cluster,
6407 						       &num_clusters,
6408 						       &xv->xr_list,
6409 						       &ext_flags);
6410 			if (ret) {
6411 				mlog_errno(ret);
6412 				goto out;
6413 			}
6414 
6415 			BUG_ON(!p_cluster);
6416 
6417 			if (xv->xr_list.l_tree_depth) {
6418 				ret = ocfs2_insert_extent(handle,
6419 						&data_et, cpos,
6420 						ocfs2_clusters_to_blocks(
6421 							args->old_inode->i_sb,
6422 							p_cluster),
6423 						num_clusters, ext_flags,
6424 						meta_ac);
6425 				if (ret) {
6426 					mlog_errno(ret);
6427 					goto out;
6428 				}
6429 			}
6430 
6431 			ret = ocfs2_increase_refcount(handle, args->ref_ci,
6432 						      args->ref_root_bh,
6433 						      p_cluster, num_clusters,
6434 						      meta_ac, args->dealloc);
6435 			if (ret) {
6436 				mlog_errno(ret);
6437 				goto out;
6438 			}
6439 
6440 			cpos += num_clusters;
6441 		}
6442 	}
6443 
6444 out:
6445 	return ret;
6446 }
6447 
6448 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6449 {
6450 	int ret = 0, credits = 0;
6451 	handle_t *handle;
6452 	struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6453 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6454 	int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6455 	int header_off = osb->sb->s_blocksize - inline_size;
6456 	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6457 					(args->old_bh->b_data + header_off);
6458 	struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6459 					(args->new_bh->b_data + header_off);
6460 	struct ocfs2_alloc_context *meta_ac = NULL;
6461 	struct ocfs2_inode_info *new_oi;
6462 	struct ocfs2_dinode *new_di;
6463 	struct ocfs2_xattr_value_buf vb = {
6464 		.vb_bh = args->new_bh,
6465 		.vb_access = ocfs2_journal_access_di,
6466 	};
6467 
6468 	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6469 						  &credits, &meta_ac);
6470 	if (ret) {
6471 		mlog_errno(ret);
6472 		goto out;
6473 	}
6474 
6475 	handle = ocfs2_start_trans(osb, credits);
6476 	if (IS_ERR(handle)) {
6477 		ret = PTR_ERR(handle);
6478 		mlog_errno(ret);
6479 		goto out;
6480 	}
6481 
6482 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6483 				      args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6484 	if (ret) {
6485 		mlog_errno(ret);
6486 		goto out_commit;
6487 	}
6488 
6489 	memcpy(args->new_bh->b_data + header_off,
6490 	       args->old_bh->b_data + header_off, inline_size);
6491 
6492 	new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6493 	new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6494 
6495 	ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6496 					 args->new_bh, new_xh, &vb, meta_ac,
6497 					 ocfs2_get_xattr_value_root, NULL);
6498 	if (ret) {
6499 		mlog_errno(ret);
6500 		goto out_commit;
6501 	}
6502 
6503 	new_oi = OCFS2_I(args->new_inode);
6504 	spin_lock(&new_oi->ip_lock);
6505 	new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6506 	new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6507 	spin_unlock(&new_oi->ip_lock);
6508 
6509 	ocfs2_journal_dirty(handle, args->new_bh);
6510 
6511 out_commit:
6512 	ocfs2_commit_trans(osb, handle);
6513 
6514 out:
6515 	if (meta_ac)
6516 		ocfs2_free_alloc_context(meta_ac);
6517 	return ret;
6518 }
6519 
6520 static int ocfs2_create_empty_xattr_block(struct inode *inode,
6521 					  struct buffer_head *fe_bh,
6522 					  struct buffer_head **ret_bh,
6523 					  int indexed)
6524 {
6525 	int ret;
6526 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6527 	struct ocfs2_xattr_set_ctxt ctxt;
6528 
6529 	memset(&ctxt, 0, sizeof(ctxt));
6530 	ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac);
6531 	if (ret < 0) {
6532 		mlog_errno(ret);
6533 		return ret;
6534 	}
6535 
6536 	ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6537 	if (IS_ERR(ctxt.handle)) {
6538 		ret = PTR_ERR(ctxt.handle);
6539 		mlog_errno(ret);
6540 		goto out;
6541 	}
6542 
6543 	mlog(0, "create new xattr block for inode %llu, index = %d\n",
6544 	     (unsigned long long)fe_bh->b_blocknr, indexed);
6545 	ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed,
6546 				       ret_bh);
6547 	if (ret)
6548 		mlog_errno(ret);
6549 
6550 	ocfs2_commit_trans(osb, ctxt.handle);
6551 out:
6552 	ocfs2_free_alloc_context(ctxt.meta_ac);
6553 	return ret;
6554 }
6555 
6556 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6557 				     struct buffer_head *blk_bh,
6558 				     struct buffer_head *new_blk_bh)
6559 {
6560 	int ret = 0, credits = 0;
6561 	handle_t *handle;
6562 	struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6563 	struct ocfs2_dinode *new_di;
6564 	struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6565 	int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6566 	struct ocfs2_xattr_block *xb =
6567 			(struct ocfs2_xattr_block *)blk_bh->b_data;
6568 	struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6569 	struct ocfs2_xattr_block *new_xb =
6570 			(struct ocfs2_xattr_block *)new_blk_bh->b_data;
6571 	struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6572 	struct ocfs2_alloc_context *meta_ac;
6573 	struct ocfs2_xattr_value_buf vb = {
6574 		.vb_bh = new_blk_bh,
6575 		.vb_access = ocfs2_journal_access_xb,
6576 	};
6577 
6578 	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6579 						  &credits, &meta_ac);
6580 	if (ret) {
6581 		mlog_errno(ret);
6582 		return ret;
6583 	}
6584 
6585 	/* One more credits in case we need to add xattr flags in new inode. */
6586 	handle = ocfs2_start_trans(osb, credits + 1);
6587 	if (IS_ERR(handle)) {
6588 		ret = PTR_ERR(handle);
6589 		mlog_errno(ret);
6590 		goto out;
6591 	}
6592 
6593 	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6594 		ret = ocfs2_journal_access_di(handle,
6595 					      INODE_CACHE(args->new_inode),
6596 					      args->new_bh,
6597 					      OCFS2_JOURNAL_ACCESS_WRITE);
6598 		if (ret) {
6599 			mlog_errno(ret);
6600 			goto out_commit;
6601 		}
6602 	}
6603 
6604 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6605 				      new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6606 	if (ret) {
6607 		mlog_errno(ret);
6608 		goto out_commit;
6609 	}
6610 
6611 	memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6612 	       osb->sb->s_blocksize - header_off);
6613 
6614 	ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6615 					 new_blk_bh, new_xh, &vb, meta_ac,
6616 					 ocfs2_get_xattr_value_root, NULL);
6617 	if (ret) {
6618 		mlog_errno(ret);
6619 		goto out_commit;
6620 	}
6621 
6622 	ocfs2_journal_dirty(handle, new_blk_bh);
6623 
6624 	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6625 		new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6626 		spin_lock(&new_oi->ip_lock);
6627 		new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6628 		new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6629 		spin_unlock(&new_oi->ip_lock);
6630 
6631 		ocfs2_journal_dirty(handle, args->new_bh);
6632 	}
6633 
6634 out_commit:
6635 	ocfs2_commit_trans(osb, handle);
6636 
6637 out:
6638 	ocfs2_free_alloc_context(meta_ac);
6639 	return ret;
6640 }
6641 
6642 struct ocfs2_reflink_xattr_tree_args {
6643 	struct ocfs2_xattr_reflink *reflink;
6644 	struct buffer_head *old_blk_bh;
6645 	struct buffer_head *new_blk_bh;
6646 	struct ocfs2_xattr_bucket *old_bucket;
6647 	struct ocfs2_xattr_bucket *new_bucket;
6648 };
6649 
6650 /*
6651  * NOTE:
6652  * We have to handle the case that both old bucket and new bucket
6653  * will call this function to get the right ret_bh.
6654  * So The caller must give us the right bh.
6655  */
6656 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6657 					struct buffer_head *bh,
6658 					struct ocfs2_xattr_header *xh,
6659 					int offset,
6660 					struct ocfs2_xattr_value_root **xv,
6661 					struct buffer_head **ret_bh,
6662 					void *para)
6663 {
6664 	struct ocfs2_reflink_xattr_tree_args *args =
6665 			(struct ocfs2_reflink_xattr_tree_args *)para;
6666 	struct ocfs2_xattr_bucket *bucket;
6667 
6668 	if (bh == args->old_bucket->bu_bhs[0])
6669 		bucket = args->old_bucket;
6670 	else
6671 		bucket = args->new_bucket;
6672 
6673 	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6674 					       xv, ret_bh);
6675 }
6676 
6677 struct ocfs2_value_tree_metas {
6678 	int num_metas;
6679 	int credits;
6680 	int num_recs;
6681 };
6682 
6683 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6684 					struct buffer_head *bh,
6685 					struct ocfs2_xattr_header *xh,
6686 					int offset,
6687 					struct ocfs2_xattr_value_root **xv,
6688 					struct buffer_head **ret_bh,
6689 					void *para)
6690 {
6691 	struct ocfs2_xattr_bucket *bucket =
6692 				(struct ocfs2_xattr_bucket *)para;
6693 
6694 	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6695 					       xv, ret_bh);
6696 }
6697 
6698 static int ocfs2_calc_value_tree_metas(struct inode *inode,
6699 				      struct ocfs2_xattr_bucket *bucket,
6700 				      void *para)
6701 {
6702 	struct ocfs2_value_tree_metas *metas =
6703 			(struct ocfs2_value_tree_metas *)para;
6704 	struct ocfs2_xattr_header *xh =
6705 			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6706 
6707 	/* Add the credits for this bucket first. */
6708 	metas->credits += bucket->bu_blocks;
6709 	return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6710 					xh, &metas->num_metas,
6711 					&metas->credits, &metas->num_recs,
6712 					ocfs2_value_tree_metas_in_bucket,
6713 					bucket);
6714 }
6715 
6716 /*
6717  * Given a xattr extent rec starting from blkno and having len clusters,
6718  * iterate all the buckets calculate how much metadata we need for reflinking
6719  * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6720  */
6721 static int ocfs2_lock_reflink_xattr_rec_allocators(
6722 				struct ocfs2_reflink_xattr_tree_args *args,
6723 				struct ocfs2_extent_tree *xt_et,
6724 				u64 blkno, u32 len, int *credits,
6725 				struct ocfs2_alloc_context **meta_ac,
6726 				struct ocfs2_alloc_context **data_ac)
6727 {
6728 	int ret, num_free_extents;
6729 	struct ocfs2_value_tree_metas metas;
6730 	struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6731 	struct ocfs2_refcount_block *rb;
6732 
6733 	memset(&metas, 0, sizeof(metas));
6734 
6735 	ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6736 					  ocfs2_calc_value_tree_metas, &metas);
6737 	if (ret) {
6738 		mlog_errno(ret);
6739 		goto out;
6740 	}
6741 
6742 	*credits = metas.credits;
6743 
6744 	/*
6745 	 * Calculate we need for refcount tree change.
6746 	 *
6747 	 * We need to add/modify num_recs in refcount tree, so just calculate
6748 	 * an approximate number we need for refcount tree change.
6749 	 * Sometimes we need to split the tree, and after split,  half recs
6750 	 * will be moved to the new block, and a new block can only provide
6751 	 * half number of recs. So we multiple new blocks by 2.
6752 	 * In the end, we have to add credits for modifying the already
6753 	 * existed refcount block.
6754 	 */
6755 	rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6756 	metas.num_recs =
6757 		(metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6758 		 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6759 	metas.num_metas += metas.num_recs;
6760 	*credits += metas.num_recs +
6761 		    metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6762 	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6763 		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6764 			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6765 	else
6766 		*credits += 1;
6767 
6768 	/* count in the xattr tree change. */
6769 	num_free_extents = ocfs2_num_free_extents(osb, xt_et);
6770 	if (num_free_extents < 0) {
6771 		ret = num_free_extents;
6772 		mlog_errno(ret);
6773 		goto out;
6774 	}
6775 
6776 	if (num_free_extents < len)
6777 		metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6778 
6779 	*credits += ocfs2_calc_extend_credits(osb->sb,
6780 					      xt_et->et_root_el, len);
6781 
6782 	if (metas.num_metas) {
6783 		ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6784 							meta_ac);
6785 		if (ret) {
6786 			mlog_errno(ret);
6787 			goto out;
6788 		}
6789 	}
6790 
6791 	if (len) {
6792 		ret = ocfs2_reserve_clusters(osb, len, data_ac);
6793 		if (ret)
6794 			mlog_errno(ret);
6795 	}
6796 out:
6797 	if (ret) {
6798 		if (*meta_ac) {
6799 			ocfs2_free_alloc_context(*meta_ac);
6800 			meta_ac = NULL;
6801 		}
6802 	}
6803 
6804 	return ret;
6805 }
6806 
6807 static int ocfs2_reflink_xattr_bucket(handle_t *handle,
6808 				u64 blkno, u64 new_blkno, u32 clusters,
6809 				u32 *cpos, int num_buckets,
6810 				struct ocfs2_alloc_context *meta_ac,
6811 				struct ocfs2_alloc_context *data_ac,
6812 				struct ocfs2_reflink_xattr_tree_args *args)
6813 {
6814 	int i, j, ret = 0;
6815 	struct super_block *sb = args->reflink->old_inode->i_sb;
6816 	int bpb = args->old_bucket->bu_blocks;
6817 	struct ocfs2_xattr_value_buf vb = {
6818 		.vb_access = ocfs2_journal_access,
6819 	};
6820 
6821 	for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6822 		ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6823 		if (ret) {
6824 			mlog_errno(ret);
6825 			break;
6826 		}
6827 
6828 		ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
6829 		if (ret) {
6830 			mlog_errno(ret);
6831 			break;
6832 		}
6833 
6834 		ret = ocfs2_xattr_bucket_journal_access(handle,
6835 						args->new_bucket,
6836 						OCFS2_JOURNAL_ACCESS_CREATE);
6837 		if (ret) {
6838 			mlog_errno(ret);
6839 			break;
6840 		}
6841 
6842 		for (j = 0; j < bpb; j++)
6843 			memcpy(bucket_block(args->new_bucket, j),
6844 			       bucket_block(args->old_bucket, j),
6845 			       sb->s_blocksize);
6846 
6847 		/*
6848 		 * Record the start cpos so that we can use it to initialize
6849 		 * our xattr tree we also set the xh_num_bucket for the new
6850 		 * bucket.
6851 		 */
6852 		if (i == 0) {
6853 			*cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
6854 					    xh_entries[0].xe_name_hash);
6855 			bucket_xh(args->new_bucket)->xh_num_buckets =
6856 				cpu_to_le16(num_buckets);
6857 		}
6858 
6859 		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6860 
6861 		ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6862 					args->old_bucket->bu_bhs[0],
6863 					bucket_xh(args->old_bucket),
6864 					args->new_bucket->bu_bhs[0],
6865 					bucket_xh(args->new_bucket),
6866 					&vb, meta_ac,
6867 					ocfs2_get_reflink_xattr_value_root,
6868 					args);
6869 		if (ret) {
6870 			mlog_errno(ret);
6871 			break;
6872 		}
6873 
6874 		/*
6875 		 * Re-access and dirty the bucket to calculate metaecc.
6876 		 * Because we may extend the transaction in reflink_xattr_header
6877 		 * which will let the already accessed block gone.
6878 		 */
6879 		ret = ocfs2_xattr_bucket_journal_access(handle,
6880 						args->new_bucket,
6881 						OCFS2_JOURNAL_ACCESS_WRITE);
6882 		if (ret) {
6883 			mlog_errno(ret);
6884 			break;
6885 		}
6886 
6887 		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6888 
6889 		ocfs2_xattr_bucket_relse(args->old_bucket);
6890 		ocfs2_xattr_bucket_relse(args->new_bucket);
6891 	}
6892 
6893 	ocfs2_xattr_bucket_relse(args->old_bucket);
6894 	ocfs2_xattr_bucket_relse(args->new_bucket);
6895 	return ret;
6896 }
6897 
6898 static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6899 				struct inode *inode,
6900 				struct ocfs2_reflink_xattr_tree_args *args,
6901 				struct ocfs2_extent_tree *et,
6902 				struct ocfs2_alloc_context *meta_ac,
6903 				struct ocfs2_alloc_context *data_ac,
6904 				u64 blkno, u32 cpos, u32 len)
6905 {
6906 	int ret, first_inserted = 0;
6907 	u32 p_cluster, num_clusters, reflink_cpos = 0;
6908 	u64 new_blkno;
6909 	unsigned int num_buckets, reflink_buckets;
6910 	unsigned int bpc =
6911 		ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
6912 
6913 	ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6914 	if (ret) {
6915 		mlog_errno(ret);
6916 		goto out;
6917 	}
6918 	num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
6919 	ocfs2_xattr_bucket_relse(args->old_bucket);
6920 
6921 	while (len && num_buckets) {
6922 		ret = ocfs2_claim_clusters(handle, data_ac,
6923 					   1, &p_cluster, &num_clusters);
6924 		if (ret) {
6925 			mlog_errno(ret);
6926 			goto out;
6927 		}
6928 
6929 		new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
6930 		reflink_buckets = min(num_buckets, bpc * num_clusters);
6931 
6932 		ret = ocfs2_reflink_xattr_bucket(handle, blkno,
6933 						 new_blkno, num_clusters,
6934 						 &reflink_cpos, reflink_buckets,
6935 						 meta_ac, data_ac, args);
6936 		if (ret) {
6937 			mlog_errno(ret);
6938 			goto out;
6939 		}
6940 
6941 		/*
6942 		 * For the 1st allocated cluster, we make it use the same cpos
6943 		 * so that the xattr tree looks the same as the original one
6944 		 * in the most case.
6945 		 */
6946 		if (!first_inserted) {
6947 			reflink_cpos = cpos;
6948 			first_inserted = 1;
6949 		}
6950 		ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
6951 					  num_clusters, 0, meta_ac);
6952 		if (ret)
6953 			mlog_errno(ret);
6954 
6955 		mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6956 		     (unsigned long long)new_blkno, num_clusters, reflink_cpos);
6957 
6958 		len -= num_clusters;
6959 		blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
6960 		num_buckets -= reflink_buckets;
6961 	}
6962 out:
6963 	return ret;
6964 }
6965 
6966 /*
6967  * Create the same xattr extent record in the new inode's xattr tree.
6968  */
6969 static int ocfs2_reflink_xattr_rec(struct inode *inode,
6970 				   struct buffer_head *root_bh,
6971 				   u64 blkno,
6972 				   u32 cpos,
6973 				   u32 len,
6974 				   void *para)
6975 {
6976 	int ret, credits = 0;
6977 	handle_t *handle;
6978 	struct ocfs2_reflink_xattr_tree_args *args =
6979 			(struct ocfs2_reflink_xattr_tree_args *)para;
6980 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6981 	struct ocfs2_alloc_context *meta_ac = NULL;
6982 	struct ocfs2_alloc_context *data_ac = NULL;
6983 	struct ocfs2_extent_tree et;
6984 
6985 	mlog(0, "reflink xattr buckets %llu len %u\n",
6986 	     (unsigned long long)blkno, len);
6987 
6988 	ocfs2_init_xattr_tree_extent_tree(&et,
6989 					  INODE_CACHE(args->reflink->new_inode),
6990 					  args->new_blk_bh);
6991 
6992 	ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
6993 						      len, &credits,
6994 						      &meta_ac, &data_ac);
6995 	if (ret) {
6996 		mlog_errno(ret);
6997 		goto out;
6998 	}
6999 
7000 	handle = ocfs2_start_trans(osb, credits);
7001 	if (IS_ERR(handle)) {
7002 		ret = PTR_ERR(handle);
7003 		mlog_errno(ret);
7004 		goto out;
7005 	}
7006 
7007 	ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
7008 					  meta_ac, data_ac,
7009 					  blkno, cpos, len);
7010 	if (ret)
7011 		mlog_errno(ret);
7012 
7013 	ocfs2_commit_trans(osb, handle);
7014 
7015 out:
7016 	if (meta_ac)
7017 		ocfs2_free_alloc_context(meta_ac);
7018 	if (data_ac)
7019 		ocfs2_free_alloc_context(data_ac);
7020 	return ret;
7021 }
7022 
7023 /*
7024  * Create reflinked xattr buckets.
7025  * We will add bucket one by one, and refcount all the xattrs in the bucket
7026  * if they are stored outside.
7027  */
7028 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
7029 				    struct buffer_head *blk_bh,
7030 				    struct buffer_head *new_blk_bh)
7031 {
7032 	int ret;
7033 	struct ocfs2_reflink_xattr_tree_args para;
7034 
7035 	memset(&para, 0, sizeof(para));
7036 	para.reflink = args;
7037 	para.old_blk_bh = blk_bh;
7038 	para.new_blk_bh = new_blk_bh;
7039 
7040 	para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
7041 	if (!para.old_bucket) {
7042 		mlog_errno(-ENOMEM);
7043 		return -ENOMEM;
7044 	}
7045 
7046 	para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
7047 	if (!para.new_bucket) {
7048 		ret = -ENOMEM;
7049 		mlog_errno(ret);
7050 		goto out;
7051 	}
7052 
7053 	ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
7054 					      ocfs2_reflink_xattr_rec,
7055 					      &para);
7056 	if (ret)
7057 		mlog_errno(ret);
7058 
7059 out:
7060 	ocfs2_xattr_bucket_free(para.old_bucket);
7061 	ocfs2_xattr_bucket_free(para.new_bucket);
7062 	return ret;
7063 }
7064 
7065 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
7066 					struct buffer_head *blk_bh)
7067 {
7068 	int ret, indexed = 0;
7069 	struct buffer_head *new_blk_bh = NULL;
7070 	struct ocfs2_xattr_block *xb =
7071 			(struct ocfs2_xattr_block *)blk_bh->b_data;
7072 
7073 
7074 	if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
7075 		indexed = 1;
7076 
7077 	ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
7078 					     &new_blk_bh, indexed);
7079 	if (ret) {
7080 		mlog_errno(ret);
7081 		goto out;
7082 	}
7083 
7084 	if (!indexed)
7085 		ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
7086 	else
7087 		ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
7088 	if (ret)
7089 		mlog_errno(ret);
7090 
7091 out:
7092 	brelse(new_blk_bh);
7093 	return ret;
7094 }
7095 
7096 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
7097 {
7098 	int type = ocfs2_xattr_get_type(xe);
7099 
7100 	return type != OCFS2_XATTR_INDEX_SECURITY &&
7101 	       type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
7102 	       type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
7103 }
7104 
7105 int ocfs2_reflink_xattrs(struct inode *old_inode,
7106 			 struct buffer_head *old_bh,
7107 			 struct inode *new_inode,
7108 			 struct buffer_head *new_bh,
7109 			 bool preserve_security)
7110 {
7111 	int ret;
7112 	struct ocfs2_xattr_reflink args;
7113 	struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
7114 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
7115 	struct buffer_head *blk_bh = NULL;
7116 	struct ocfs2_cached_dealloc_ctxt dealloc;
7117 	struct ocfs2_refcount_tree *ref_tree;
7118 	struct buffer_head *ref_root_bh = NULL;
7119 
7120 	ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7121 				       le64_to_cpu(di->i_refcount_loc),
7122 				       1, &ref_tree, &ref_root_bh);
7123 	if (ret) {
7124 		mlog_errno(ret);
7125 		goto out;
7126 	}
7127 
7128 	ocfs2_init_dealloc_ctxt(&dealloc);
7129 
7130 	args.old_inode = old_inode;
7131 	args.new_inode = new_inode;
7132 	args.old_bh = old_bh;
7133 	args.new_bh = new_bh;
7134 	args.ref_ci = &ref_tree->rf_ci;
7135 	args.ref_root_bh = ref_root_bh;
7136 	args.dealloc = &dealloc;
7137 	if (preserve_security)
7138 		args.xattr_reflinked = NULL;
7139 	else
7140 		args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
7141 
7142 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7143 		ret = ocfs2_reflink_xattr_inline(&args);
7144 		if (ret) {
7145 			mlog_errno(ret);
7146 			goto out_unlock;
7147 		}
7148 	}
7149 
7150 	if (!di->i_xattr_loc)
7151 		goto out_unlock;
7152 
7153 	ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7154 				     &blk_bh);
7155 	if (ret < 0) {
7156 		mlog_errno(ret);
7157 		goto out_unlock;
7158 	}
7159 
7160 	ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7161 	if (ret)
7162 		mlog_errno(ret);
7163 
7164 	brelse(blk_bh);
7165 
7166 out_unlock:
7167 	ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7168 				   ref_tree, 1);
7169 	brelse(ref_root_bh);
7170 
7171 	if (ocfs2_dealloc_has_cluster(&dealloc)) {
7172 		ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7173 		ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7174 	}
7175 
7176 out:
7177 	return ret;
7178 }
7179 
7180 /*
7181  * Initialize security and acl for a already created inode.
7182  * Used for reflink a non-preserve-security file.
7183  *
7184  * It uses common api like ocfs2_xattr_set, so the caller
7185  * must not hold any lock expect i_mutex.
7186  */
7187 int ocfs2_init_security_and_acl(struct inode *dir,
7188 				struct inode *inode)
7189 {
7190 	int ret = 0;
7191 	struct buffer_head *dir_bh = NULL;
7192 	struct ocfs2_security_xattr_info si = {
7193 		.enable = 1,
7194 	};
7195 
7196 	ret = ocfs2_init_security_get(inode, dir, &si);
7197 	if (!ret) {
7198 		ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7199 				      si.name, si.value, si.value_len,
7200 				      XATTR_CREATE);
7201 		if (ret) {
7202 			mlog_errno(ret);
7203 			goto leave;
7204 		}
7205 	} else if (ret != -EOPNOTSUPP) {
7206 		mlog_errno(ret);
7207 		goto leave;
7208 	}
7209 
7210 	ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7211 	if (ret) {
7212 		mlog_errno(ret);
7213 		goto leave;
7214 	}
7215 
7216 	ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7217 	if (ret)
7218 		mlog_errno(ret);
7219 
7220 	ocfs2_inode_unlock(dir, 0);
7221 	brelse(dir_bh);
7222 leave:
7223 	return ret;
7224 }
7225 /*
7226  * 'security' attributes support
7227  */
7228 static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
7229 					size_t list_size, const char *name,
7230 					size_t name_len, int type)
7231 {
7232 	const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7233 	const size_t total_len = prefix_len + name_len + 1;
7234 
7235 	if (list && total_len <= list_size) {
7236 		memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
7237 		memcpy(list + prefix_len, name, name_len);
7238 		list[prefix_len + name_len] = '\0';
7239 	}
7240 	return total_len;
7241 }
7242 
7243 static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
7244 				    void *buffer, size_t size, int type)
7245 {
7246 	if (strcmp(name, "") == 0)
7247 		return -EINVAL;
7248 	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7249 			       name, buffer, size);
7250 }
7251 
7252 static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7253 		const void *value, size_t size, int flags, int type)
7254 {
7255 	if (strcmp(name, "") == 0)
7256 		return -EINVAL;
7257 
7258 	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7259 			       name, value, size, flags);
7260 }
7261 
7262 int ocfs2_init_security_get(struct inode *inode,
7263 			    struct inode *dir,
7264 			    struct ocfs2_security_xattr_info *si)
7265 {
7266 	/* check whether ocfs2 support feature xattr */
7267 	if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7268 		return -EOPNOTSUPP;
7269 	return security_inode_init_security(inode, dir, &si->name, &si->value,
7270 					    &si->value_len);
7271 }
7272 
7273 int ocfs2_init_security_set(handle_t *handle,
7274 			    struct inode *inode,
7275 			    struct buffer_head *di_bh,
7276 			    struct ocfs2_security_xattr_info *si,
7277 			    struct ocfs2_alloc_context *xattr_ac,
7278 			    struct ocfs2_alloc_context *data_ac)
7279 {
7280 	return ocfs2_xattr_set_handle(handle, inode, di_bh,
7281 				     OCFS2_XATTR_INDEX_SECURITY,
7282 				     si->name, si->value, si->value_len, 0,
7283 				     xattr_ac, data_ac);
7284 }
7285 
7286 const struct xattr_handler ocfs2_xattr_security_handler = {
7287 	.prefix	= XATTR_SECURITY_PREFIX,
7288 	.list	= ocfs2_xattr_security_list,
7289 	.get	= ocfs2_xattr_security_get,
7290 	.set	= ocfs2_xattr_security_set,
7291 };
7292 
7293 /*
7294  * 'trusted' attributes support
7295  */
7296 static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
7297 				       size_t list_size, const char *name,
7298 				       size_t name_len, int type)
7299 {
7300 	const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7301 	const size_t total_len = prefix_len + name_len + 1;
7302 
7303 	if (list && total_len <= list_size) {
7304 		memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
7305 		memcpy(list + prefix_len, name, name_len);
7306 		list[prefix_len + name_len] = '\0';
7307 	}
7308 	return total_len;
7309 }
7310 
7311 static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
7312 		void *buffer, size_t size, int type)
7313 {
7314 	if (strcmp(name, "") == 0)
7315 		return -EINVAL;
7316 	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7317 			       name, buffer, size);
7318 }
7319 
7320 static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7321 		const void *value, size_t size, int flags, int type)
7322 {
7323 	if (strcmp(name, "") == 0)
7324 		return -EINVAL;
7325 
7326 	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7327 			       name, value, size, flags);
7328 }
7329 
7330 const struct xattr_handler ocfs2_xattr_trusted_handler = {
7331 	.prefix	= XATTR_TRUSTED_PREFIX,
7332 	.list	= ocfs2_xattr_trusted_list,
7333 	.get	= ocfs2_xattr_trusted_get,
7334 	.set	= ocfs2_xattr_trusted_set,
7335 };
7336 
7337 /*
7338  * 'user' attributes support
7339  */
7340 static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
7341 				    size_t list_size, const char *name,
7342 				    size_t name_len, int type)
7343 {
7344 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7345 	const size_t total_len = prefix_len + name_len + 1;
7346 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7347 
7348 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7349 		return 0;
7350 
7351 	if (list && total_len <= list_size) {
7352 		memcpy(list, XATTR_USER_PREFIX, prefix_len);
7353 		memcpy(list + prefix_len, name, name_len);
7354 		list[prefix_len + name_len] = '\0';
7355 	}
7356 	return total_len;
7357 }
7358 
7359 static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
7360 		void *buffer, size_t size, int type)
7361 {
7362 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7363 
7364 	if (strcmp(name, "") == 0)
7365 		return -EINVAL;
7366 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7367 		return -EOPNOTSUPP;
7368 	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
7369 			       buffer, size);
7370 }
7371 
7372 static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7373 		const void *value, size_t size, int flags, int type)
7374 {
7375 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7376 
7377 	if (strcmp(name, "") == 0)
7378 		return -EINVAL;
7379 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7380 		return -EOPNOTSUPP;
7381 
7382 	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
7383 			       name, value, size, flags);
7384 }
7385 
7386 const struct xattr_handler ocfs2_xattr_user_handler = {
7387 	.prefix	= XATTR_USER_PREFIX,
7388 	.list	= ocfs2_xattr_user_list,
7389 	.get	= ocfs2_xattr_user_get,
7390 	.set	= ocfs2_xattr_user_set,
7391 };
7392