xref: /openbmc/linux/fs/ocfs2/xattr.c (revision 4dc7ccf7)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21 
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39 
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42 
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "blockcheck.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 #include "refcounttree.h"
59 #include "acl.h"
60 
61 struct ocfs2_xattr_def_value_root {
62 	struct ocfs2_xattr_value_root	xv;
63 	struct ocfs2_extent_rec		er;
64 };
65 
66 struct ocfs2_xattr_bucket {
67 	/* The inode these xattrs are associated with */
68 	struct inode *bu_inode;
69 
70 	/* The actual buffers that make up the bucket */
71 	struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
72 
73 	/* How many blocks make up one bucket for this filesystem */
74 	int bu_blocks;
75 };
76 
77 struct ocfs2_xattr_set_ctxt {
78 	handle_t *handle;
79 	struct ocfs2_alloc_context *meta_ac;
80 	struct ocfs2_alloc_context *data_ac;
81 	struct ocfs2_cached_dealloc_ctxt dealloc;
82 };
83 
84 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
85 #define OCFS2_XATTR_INLINE_SIZE	80
86 #define OCFS2_XATTR_HEADER_GAP	4
87 #define OCFS2_XATTR_FREE_IN_IBODY	(OCFS2_MIN_XATTR_INLINE_SIZE \
88 					 - sizeof(struct ocfs2_xattr_header) \
89 					 - OCFS2_XATTR_HEADER_GAP)
90 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)	((ptr)->i_sb->s_blocksize \
91 					 - sizeof(struct ocfs2_xattr_block) \
92 					 - sizeof(struct ocfs2_xattr_header) \
93 					 - OCFS2_XATTR_HEADER_GAP)
94 
95 static struct ocfs2_xattr_def_value_root def_xv = {
96 	.xv.xr_list.l_count = cpu_to_le16(1),
97 };
98 
99 struct xattr_handler *ocfs2_xattr_handlers[] = {
100 	&ocfs2_xattr_user_handler,
101 	&ocfs2_xattr_acl_access_handler,
102 	&ocfs2_xattr_acl_default_handler,
103 	&ocfs2_xattr_trusted_handler,
104 	&ocfs2_xattr_security_handler,
105 	NULL
106 };
107 
108 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
109 	[OCFS2_XATTR_INDEX_USER]	= &ocfs2_xattr_user_handler,
110 	[OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
111 					= &ocfs2_xattr_acl_access_handler,
112 	[OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
113 					= &ocfs2_xattr_acl_default_handler,
114 	[OCFS2_XATTR_INDEX_TRUSTED]	= &ocfs2_xattr_trusted_handler,
115 	[OCFS2_XATTR_INDEX_SECURITY]	= &ocfs2_xattr_security_handler,
116 };
117 
118 struct ocfs2_xattr_info {
119 	int		xi_name_index;
120 	const char	*xi_name;
121 	int		xi_name_len;
122 	const void	*xi_value;
123 	size_t		xi_value_len;
124 };
125 
126 struct ocfs2_xattr_search {
127 	struct buffer_head *inode_bh;
128 	/*
129 	 * xattr_bh point to the block buffer head which has extended attribute
130 	 * when extended attribute in inode, xattr_bh is equal to inode_bh.
131 	 */
132 	struct buffer_head *xattr_bh;
133 	struct ocfs2_xattr_header *header;
134 	struct ocfs2_xattr_bucket *bucket;
135 	void *base;
136 	void *end;
137 	struct ocfs2_xattr_entry *here;
138 	int not_found;
139 };
140 
141 /* Operations on struct ocfs2_xa_entry */
142 struct ocfs2_xa_loc;
143 struct ocfs2_xa_loc_operations {
144 	/*
145 	 * Journal functions
146 	 */
147 	int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc,
148 				  int type);
149 	void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc);
150 
151 	/*
152 	 * Return a pointer to the appropriate buffer in loc->xl_storage
153 	 * at the given offset from loc->xl_header.
154 	 */
155 	void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
156 
157 	/* Can we reuse the existing entry for the new value? */
158 	int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
159 			     struct ocfs2_xattr_info *xi);
160 
161 	/* How much space is needed for the new value? */
162 	int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
163 			       struct ocfs2_xattr_info *xi);
164 
165 	/*
166 	 * Return the offset of the first name+value pair.  This is
167 	 * the start of our downward-filling free space.
168 	 */
169 	int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
170 
171 	/*
172 	 * Remove the name+value at this location.  Do whatever is
173 	 * appropriate with the remaining name+value pairs.
174 	 */
175 	void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
176 
177 	/* Fill xl_entry with a new entry */
178 	void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
179 
180 	/* Add name+value storage to an entry */
181 	void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
182 
183 	/*
184 	 * Initialize the value buf's access and bh fields for this entry.
185 	 * ocfs2_xa_fill_value_buf() will handle the xv pointer.
186 	 */
187 	void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc,
188 				   struct ocfs2_xattr_value_buf *vb);
189 };
190 
191 /*
192  * Describes an xattr entry location.  This is a memory structure
193  * tracking the on-disk structure.
194  */
195 struct ocfs2_xa_loc {
196 	/* This xattr belongs to this inode */
197 	struct inode *xl_inode;
198 
199 	/* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
200 	struct ocfs2_xattr_header *xl_header;
201 
202 	/* Bytes from xl_header to the end of the storage */
203 	int xl_size;
204 
205 	/*
206 	 * The ocfs2_xattr_entry this location describes.  If this is
207 	 * NULL, this location describes the on-disk structure where it
208 	 * would have been.
209 	 */
210 	struct ocfs2_xattr_entry *xl_entry;
211 
212 	/*
213 	 * Internal housekeeping
214 	 */
215 
216 	/* Buffer(s) containing this entry */
217 	void *xl_storage;
218 
219 	/* Operations on the storage backing this location */
220 	const struct ocfs2_xa_loc_operations *xl_ops;
221 };
222 
223 /*
224  * Convenience functions to calculate how much space is needed for a
225  * given name+value pair
226  */
227 static int namevalue_size(int name_len, uint64_t value_len)
228 {
229 	if (value_len > OCFS2_XATTR_INLINE_SIZE)
230 		return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
231 	else
232 		return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
233 }
234 
235 static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
236 {
237 	return namevalue_size(xi->xi_name_len, xi->xi_value_len);
238 }
239 
240 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
241 {
242 	u64 value_len = le64_to_cpu(xe->xe_value_size);
243 
244 	BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
245 	       ocfs2_xattr_is_local(xe));
246 	return namevalue_size(xe->xe_name_len, value_len);
247 }
248 
249 
250 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
251 					     struct ocfs2_xattr_header *xh,
252 					     int index,
253 					     int *block_off,
254 					     int *new_offset);
255 
256 static int ocfs2_xattr_block_find(struct inode *inode,
257 				  int name_index,
258 				  const char *name,
259 				  struct ocfs2_xattr_search *xs);
260 static int ocfs2_xattr_index_block_find(struct inode *inode,
261 					struct buffer_head *root_bh,
262 					int name_index,
263 					const char *name,
264 					struct ocfs2_xattr_search *xs);
265 
266 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
267 					struct buffer_head *blk_bh,
268 					char *buffer,
269 					size_t buffer_size);
270 
271 static int ocfs2_xattr_create_index_block(struct inode *inode,
272 					  struct ocfs2_xattr_search *xs,
273 					  struct ocfs2_xattr_set_ctxt *ctxt);
274 
275 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
276 					     struct ocfs2_xattr_info *xi,
277 					     struct ocfs2_xattr_search *xs,
278 					     struct ocfs2_xattr_set_ctxt *ctxt);
279 
280 typedef int (xattr_tree_rec_func)(struct inode *inode,
281 				  struct buffer_head *root_bh,
282 				  u64 blkno, u32 cpos, u32 len, void *para);
283 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
284 					   struct buffer_head *root_bh,
285 					   xattr_tree_rec_func *rec_func,
286 					   void *para);
287 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
288 					struct ocfs2_xattr_bucket *bucket,
289 					void *para);
290 static int ocfs2_rm_xattr_cluster(struct inode *inode,
291 				  struct buffer_head *root_bh,
292 				  u64 blkno,
293 				  u32 cpos,
294 				  u32 len,
295 				  void *para);
296 
297 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
298 				  u64 src_blk, u64 last_blk, u64 to_blk,
299 				  unsigned int start_bucket,
300 				  u32 *first_hash);
301 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
302 					struct ocfs2_dinode *di,
303 					struct ocfs2_xattr_info *xi,
304 					struct ocfs2_xattr_search *xis,
305 					struct ocfs2_xattr_search *xbs,
306 					struct ocfs2_refcount_tree **ref_tree,
307 					int *meta_need,
308 					int *credits);
309 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
310 					   struct ocfs2_xattr_bucket *bucket,
311 					   int offset,
312 					   struct ocfs2_xattr_value_root **xv,
313 					   struct buffer_head **bh);
314 
315 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
316 {
317 	return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
318 }
319 
320 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
321 {
322 	return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
323 }
324 
325 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
326 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
327 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
328 
329 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
330 {
331 	struct ocfs2_xattr_bucket *bucket;
332 	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
333 
334 	BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
335 
336 	bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
337 	if (bucket) {
338 		bucket->bu_inode = inode;
339 		bucket->bu_blocks = blks;
340 	}
341 
342 	return bucket;
343 }
344 
345 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
346 {
347 	int i;
348 
349 	for (i = 0; i < bucket->bu_blocks; i++) {
350 		brelse(bucket->bu_bhs[i]);
351 		bucket->bu_bhs[i] = NULL;
352 	}
353 }
354 
355 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
356 {
357 	if (bucket) {
358 		ocfs2_xattr_bucket_relse(bucket);
359 		bucket->bu_inode = NULL;
360 		kfree(bucket);
361 	}
362 }
363 
364 /*
365  * A bucket that has never been written to disk doesn't need to be
366  * read.  We just need the buffer_heads.  Don't call this for
367  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
368  * them fully.
369  */
370 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
371 				   u64 xb_blkno)
372 {
373 	int i, rc = 0;
374 
375 	for (i = 0; i < bucket->bu_blocks; i++) {
376 		bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
377 					      xb_blkno + i);
378 		if (!bucket->bu_bhs[i]) {
379 			rc = -EIO;
380 			mlog_errno(rc);
381 			break;
382 		}
383 
384 		if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
385 					   bucket->bu_bhs[i]))
386 			ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
387 						      bucket->bu_bhs[i]);
388 	}
389 
390 	if (rc)
391 		ocfs2_xattr_bucket_relse(bucket);
392 	return rc;
393 }
394 
395 /* Read the xattr bucket at xb_blkno */
396 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
397 				   u64 xb_blkno)
398 {
399 	int rc;
400 
401 	rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
402 			       bucket->bu_blocks, bucket->bu_bhs, 0,
403 			       NULL);
404 	if (!rc) {
405 		spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
406 		rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
407 						 bucket->bu_bhs,
408 						 bucket->bu_blocks,
409 						 &bucket_xh(bucket)->xh_check);
410 		spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
411 		if (rc)
412 			mlog_errno(rc);
413 	}
414 
415 	if (rc)
416 		ocfs2_xattr_bucket_relse(bucket);
417 	return rc;
418 }
419 
420 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
421 					     struct ocfs2_xattr_bucket *bucket,
422 					     int type)
423 {
424 	int i, rc = 0;
425 
426 	for (i = 0; i < bucket->bu_blocks; i++) {
427 		rc = ocfs2_journal_access(handle,
428 					  INODE_CACHE(bucket->bu_inode),
429 					  bucket->bu_bhs[i], type);
430 		if (rc) {
431 			mlog_errno(rc);
432 			break;
433 		}
434 	}
435 
436 	return rc;
437 }
438 
439 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
440 					     struct ocfs2_xattr_bucket *bucket)
441 {
442 	int i;
443 
444 	spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
445 	ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
446 				   bucket->bu_bhs, bucket->bu_blocks,
447 				   &bucket_xh(bucket)->xh_check);
448 	spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
449 
450 	for (i = 0; i < bucket->bu_blocks; i++)
451 		ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
452 }
453 
454 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
455 					 struct ocfs2_xattr_bucket *src)
456 {
457 	int i;
458 	int blocksize = src->bu_inode->i_sb->s_blocksize;
459 
460 	BUG_ON(dest->bu_blocks != src->bu_blocks);
461 	BUG_ON(dest->bu_inode != src->bu_inode);
462 
463 	for (i = 0; i < src->bu_blocks; i++) {
464 		memcpy(bucket_block(dest, i), bucket_block(src, i),
465 		       blocksize);
466 	}
467 }
468 
469 static int ocfs2_validate_xattr_block(struct super_block *sb,
470 				      struct buffer_head *bh)
471 {
472 	int rc;
473 	struct ocfs2_xattr_block *xb =
474 		(struct ocfs2_xattr_block *)bh->b_data;
475 
476 	mlog(0, "Validating xattr block %llu\n",
477 	     (unsigned long long)bh->b_blocknr);
478 
479 	BUG_ON(!buffer_uptodate(bh));
480 
481 	/*
482 	 * If the ecc fails, we return the error but otherwise
483 	 * leave the filesystem running.  We know any error is
484 	 * local to this block.
485 	 */
486 	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
487 	if (rc)
488 		return rc;
489 
490 	/*
491 	 * Errors after here are fatal
492 	 */
493 
494 	if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
495 		ocfs2_error(sb,
496 			    "Extended attribute block #%llu has bad "
497 			    "signature %.*s",
498 			    (unsigned long long)bh->b_blocknr, 7,
499 			    xb->xb_signature);
500 		return -EINVAL;
501 	}
502 
503 	if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
504 		ocfs2_error(sb,
505 			    "Extended attribute block #%llu has an "
506 			    "invalid xb_blkno of %llu",
507 			    (unsigned long long)bh->b_blocknr,
508 			    (unsigned long long)le64_to_cpu(xb->xb_blkno));
509 		return -EINVAL;
510 	}
511 
512 	if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
513 		ocfs2_error(sb,
514 			    "Extended attribute block #%llu has an invalid "
515 			    "xb_fs_generation of #%u",
516 			    (unsigned long long)bh->b_blocknr,
517 			    le32_to_cpu(xb->xb_fs_generation));
518 		return -EINVAL;
519 	}
520 
521 	return 0;
522 }
523 
524 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
525 				  struct buffer_head **bh)
526 {
527 	int rc;
528 	struct buffer_head *tmp = *bh;
529 
530 	rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
531 			      ocfs2_validate_xattr_block);
532 
533 	/* If ocfs2_read_block() got us a new bh, pass it up. */
534 	if (!rc && !*bh)
535 		*bh = tmp;
536 
537 	return rc;
538 }
539 
540 static inline const char *ocfs2_xattr_prefix(int name_index)
541 {
542 	struct xattr_handler *handler = NULL;
543 
544 	if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
545 		handler = ocfs2_xattr_handler_map[name_index];
546 
547 	return handler ? handler->prefix : NULL;
548 }
549 
550 static u32 ocfs2_xattr_name_hash(struct inode *inode,
551 				 const char *name,
552 				 int name_len)
553 {
554 	/* Get hash value of uuid from super block */
555 	u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
556 	int i;
557 
558 	/* hash extended attribute name */
559 	for (i = 0; i < name_len; i++) {
560 		hash = (hash << OCFS2_HASH_SHIFT) ^
561 		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
562 		       *name++;
563 	}
564 
565 	return hash;
566 }
567 
568 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
569 {
570 	return namevalue_size(name_len, value_len) +
571 		sizeof(struct ocfs2_xattr_entry);
572 }
573 
574 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
575 {
576 	return namevalue_size_xi(xi) +
577 		sizeof(struct ocfs2_xattr_entry);
578 }
579 
580 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
581 {
582 	return namevalue_size_xe(xe) +
583 		sizeof(struct ocfs2_xattr_entry);
584 }
585 
586 int ocfs2_calc_security_init(struct inode *dir,
587 			     struct ocfs2_security_xattr_info *si,
588 			     int *want_clusters,
589 			     int *xattr_credits,
590 			     struct ocfs2_alloc_context **xattr_ac)
591 {
592 	int ret = 0;
593 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
594 	int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
595 						 si->value_len);
596 
597 	/*
598 	 * The max space of security xattr taken inline is
599 	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
600 	 * So reserve one metadata block for it is ok.
601 	 */
602 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
603 	    s_size > OCFS2_XATTR_FREE_IN_IBODY) {
604 		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
605 		if (ret) {
606 			mlog_errno(ret);
607 			return ret;
608 		}
609 		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
610 	}
611 
612 	/* reserve clusters for xattr value which will be set in B tree*/
613 	if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
614 		int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
615 							    si->value_len);
616 
617 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
618 							   new_clusters);
619 		*want_clusters += new_clusters;
620 	}
621 	return ret;
622 }
623 
624 int ocfs2_calc_xattr_init(struct inode *dir,
625 			  struct buffer_head *dir_bh,
626 			  int mode,
627 			  struct ocfs2_security_xattr_info *si,
628 			  int *want_clusters,
629 			  int *xattr_credits,
630 			  int *want_meta)
631 {
632 	int ret = 0;
633 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
634 	int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
635 
636 	if (si->enable)
637 		s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
638 						     si->value_len);
639 
640 	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
641 		acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
642 					OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
643 					"", NULL, 0);
644 		if (acl_len > 0) {
645 			a_size = ocfs2_xattr_entry_real_size(0, acl_len);
646 			if (S_ISDIR(mode))
647 				a_size <<= 1;
648 		} else if (acl_len != 0 && acl_len != -ENODATA) {
649 			mlog_errno(ret);
650 			return ret;
651 		}
652 	}
653 
654 	if (!(s_size + a_size))
655 		return ret;
656 
657 	/*
658 	 * The max space of security xattr taken inline is
659 	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
660 	 * The max space of acl xattr taken inline is
661 	 * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
662 	 * when blocksize = 512, may reserve one more cluser for
663 	 * xattr bucket, otherwise reserve one metadata block
664 	 * for them is ok.
665 	 * If this is a new directory with inline data,
666 	 * we choose to reserve the entire inline area for
667 	 * directory contents and force an external xattr block.
668 	 */
669 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
670 	    (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
671 	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
672 		*want_meta = *want_meta + 1;
673 		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
674 	}
675 
676 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
677 	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
678 		*want_clusters += 1;
679 		*xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
680 	}
681 
682 	/*
683 	 * reserve credits and clusters for xattrs which has large value
684 	 * and have to be set outside
685 	 */
686 	if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
687 		new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
688 							si->value_len);
689 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
690 							   new_clusters);
691 		*want_clusters += new_clusters;
692 	}
693 	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
694 	    acl_len > OCFS2_XATTR_INLINE_SIZE) {
695 		/* for directory, it has DEFAULT and ACCESS two types of acls */
696 		new_clusters = (S_ISDIR(mode) ? 2 : 1) *
697 				ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
698 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
699 							   new_clusters);
700 		*want_clusters += new_clusters;
701 	}
702 
703 	return ret;
704 }
705 
706 static int ocfs2_xattr_extend_allocation(struct inode *inode,
707 					 u32 clusters_to_add,
708 					 struct ocfs2_xattr_value_buf *vb,
709 					 struct ocfs2_xattr_set_ctxt *ctxt)
710 {
711 	int status = 0;
712 	handle_t *handle = ctxt->handle;
713 	enum ocfs2_alloc_restarted why;
714 	u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
715 	struct ocfs2_extent_tree et;
716 
717 	mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
718 
719 	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
720 
721 	status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
722 			      OCFS2_JOURNAL_ACCESS_WRITE);
723 	if (status < 0) {
724 		mlog_errno(status);
725 		goto leave;
726 	}
727 
728 	prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
729 	status = ocfs2_add_clusters_in_btree(handle,
730 					     &et,
731 					     &logical_start,
732 					     clusters_to_add,
733 					     0,
734 					     ctxt->data_ac,
735 					     ctxt->meta_ac,
736 					     &why);
737 	if (status < 0) {
738 		mlog_errno(status);
739 		goto leave;
740 	}
741 
742 	status = ocfs2_journal_dirty(handle, vb->vb_bh);
743 	if (status < 0) {
744 		mlog_errno(status);
745 		goto leave;
746 	}
747 
748 	clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
749 
750 	/*
751 	 * We should have already allocated enough space before the transaction,
752 	 * so no need to restart.
753 	 */
754 	BUG_ON(why != RESTART_NONE || clusters_to_add);
755 
756 leave:
757 
758 	return status;
759 }
760 
761 static int __ocfs2_remove_xattr_range(struct inode *inode,
762 				      struct ocfs2_xattr_value_buf *vb,
763 				      u32 cpos, u32 phys_cpos, u32 len,
764 				      unsigned int ext_flags,
765 				      struct ocfs2_xattr_set_ctxt *ctxt)
766 {
767 	int ret;
768 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
769 	handle_t *handle = ctxt->handle;
770 	struct ocfs2_extent_tree et;
771 
772 	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
773 
774 	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
775 			    OCFS2_JOURNAL_ACCESS_WRITE);
776 	if (ret) {
777 		mlog_errno(ret);
778 		goto out;
779 	}
780 
781 	ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
782 				  &ctxt->dealloc);
783 	if (ret) {
784 		mlog_errno(ret);
785 		goto out;
786 	}
787 
788 	le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
789 
790 	ret = ocfs2_journal_dirty(handle, vb->vb_bh);
791 	if (ret) {
792 		mlog_errno(ret);
793 		goto out;
794 	}
795 
796 	if (ext_flags & OCFS2_EXT_REFCOUNTED)
797 		ret = ocfs2_decrease_refcount(inode, handle,
798 					ocfs2_blocks_to_clusters(inode->i_sb,
799 								 phys_blkno),
800 					len, ctxt->meta_ac, &ctxt->dealloc, 1);
801 	else
802 		ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
803 						  phys_blkno, len);
804 	if (ret)
805 		mlog_errno(ret);
806 
807 out:
808 	return ret;
809 }
810 
811 static int ocfs2_xattr_shrink_size(struct inode *inode,
812 				   u32 old_clusters,
813 				   u32 new_clusters,
814 				   struct ocfs2_xattr_value_buf *vb,
815 				   struct ocfs2_xattr_set_ctxt *ctxt)
816 {
817 	int ret = 0;
818 	unsigned int ext_flags;
819 	u32 trunc_len, cpos, phys_cpos, alloc_size;
820 	u64 block;
821 
822 	if (old_clusters <= new_clusters)
823 		return 0;
824 
825 	cpos = new_clusters;
826 	trunc_len = old_clusters - new_clusters;
827 	while (trunc_len) {
828 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
829 					       &alloc_size,
830 					       &vb->vb_xv->xr_list, &ext_flags);
831 		if (ret) {
832 			mlog_errno(ret);
833 			goto out;
834 		}
835 
836 		if (alloc_size > trunc_len)
837 			alloc_size = trunc_len;
838 
839 		ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
840 						 phys_cpos, alloc_size,
841 						 ext_flags, ctxt);
842 		if (ret) {
843 			mlog_errno(ret);
844 			goto out;
845 		}
846 
847 		block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
848 		ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
849 						       block, alloc_size);
850 		cpos += alloc_size;
851 		trunc_len -= alloc_size;
852 	}
853 
854 out:
855 	return ret;
856 }
857 
858 static int ocfs2_xattr_value_truncate(struct inode *inode,
859 				      struct ocfs2_xattr_value_buf *vb,
860 				      int len,
861 				      struct ocfs2_xattr_set_ctxt *ctxt)
862 {
863 	int ret;
864 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
865 	u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
866 
867 	if (new_clusters == old_clusters)
868 		return 0;
869 
870 	if (new_clusters > old_clusters)
871 		ret = ocfs2_xattr_extend_allocation(inode,
872 						    new_clusters - old_clusters,
873 						    vb, ctxt);
874 	else
875 		ret = ocfs2_xattr_shrink_size(inode,
876 					      old_clusters, new_clusters,
877 					      vb, ctxt);
878 
879 	return ret;
880 }
881 
882 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
883 				  size_t *result, const char *prefix,
884 				  const char *name, int name_len)
885 {
886 	char *p = buffer + *result;
887 	int prefix_len = strlen(prefix);
888 	int total_len = prefix_len + name_len + 1;
889 
890 	*result += total_len;
891 
892 	/* we are just looking for how big our buffer needs to be */
893 	if (!size)
894 		return 0;
895 
896 	if (*result > size)
897 		return -ERANGE;
898 
899 	memcpy(p, prefix, prefix_len);
900 	memcpy(p + prefix_len, name, name_len);
901 	p[prefix_len + name_len] = '\0';
902 
903 	return 0;
904 }
905 
906 static int ocfs2_xattr_list_entries(struct inode *inode,
907 				    struct ocfs2_xattr_header *header,
908 				    char *buffer, size_t buffer_size)
909 {
910 	size_t result = 0;
911 	int i, type, ret;
912 	const char *prefix, *name;
913 
914 	for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
915 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
916 		type = ocfs2_xattr_get_type(entry);
917 		prefix = ocfs2_xattr_prefix(type);
918 
919 		if (prefix) {
920 			name = (const char *)header +
921 				le16_to_cpu(entry->xe_name_offset);
922 
923 			ret = ocfs2_xattr_list_entry(buffer, buffer_size,
924 						     &result, prefix, name,
925 						     entry->xe_name_len);
926 			if (ret)
927 				return ret;
928 		}
929 	}
930 
931 	return result;
932 }
933 
934 int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
935 					 struct ocfs2_dinode *di)
936 {
937 	struct ocfs2_xattr_header *xh;
938 	int i;
939 
940 	xh = (struct ocfs2_xattr_header *)
941 		 ((void *)di + inode->i_sb->s_blocksize -
942 		 le16_to_cpu(di->i_xattr_inline_size));
943 
944 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
945 		if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
946 			return 1;
947 
948 	return 0;
949 }
950 
951 static int ocfs2_xattr_ibody_list(struct inode *inode,
952 				  struct ocfs2_dinode *di,
953 				  char *buffer,
954 				  size_t buffer_size)
955 {
956 	struct ocfs2_xattr_header *header = NULL;
957 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
958 	int ret = 0;
959 
960 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
961 		return ret;
962 
963 	header = (struct ocfs2_xattr_header *)
964 		 ((void *)di + inode->i_sb->s_blocksize -
965 		 le16_to_cpu(di->i_xattr_inline_size));
966 
967 	ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
968 
969 	return ret;
970 }
971 
972 static int ocfs2_xattr_block_list(struct inode *inode,
973 				  struct ocfs2_dinode *di,
974 				  char *buffer,
975 				  size_t buffer_size)
976 {
977 	struct buffer_head *blk_bh = NULL;
978 	struct ocfs2_xattr_block *xb;
979 	int ret = 0;
980 
981 	if (!di->i_xattr_loc)
982 		return ret;
983 
984 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
985 				     &blk_bh);
986 	if (ret < 0) {
987 		mlog_errno(ret);
988 		return ret;
989 	}
990 
991 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
992 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
993 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
994 		ret = ocfs2_xattr_list_entries(inode, header,
995 					       buffer, buffer_size);
996 	} else
997 		ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
998 						   buffer, buffer_size);
999 
1000 	brelse(blk_bh);
1001 
1002 	return ret;
1003 }
1004 
1005 ssize_t ocfs2_listxattr(struct dentry *dentry,
1006 			char *buffer,
1007 			size_t size)
1008 {
1009 	int ret = 0, i_ret = 0, b_ret = 0;
1010 	struct buffer_head *di_bh = NULL;
1011 	struct ocfs2_dinode *di = NULL;
1012 	struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
1013 
1014 	if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
1015 		return -EOPNOTSUPP;
1016 
1017 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1018 		return ret;
1019 
1020 	ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
1021 	if (ret < 0) {
1022 		mlog_errno(ret);
1023 		return ret;
1024 	}
1025 
1026 	di = (struct ocfs2_dinode *)di_bh->b_data;
1027 
1028 	down_read(&oi->ip_xattr_sem);
1029 	i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
1030 	if (i_ret < 0)
1031 		b_ret = 0;
1032 	else {
1033 		if (buffer) {
1034 			buffer += i_ret;
1035 			size -= i_ret;
1036 		}
1037 		b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
1038 					       buffer, size);
1039 		if (b_ret < 0)
1040 			i_ret = 0;
1041 	}
1042 	up_read(&oi->ip_xattr_sem);
1043 	ocfs2_inode_unlock(dentry->d_inode, 0);
1044 
1045 	brelse(di_bh);
1046 
1047 	return i_ret + b_ret;
1048 }
1049 
1050 static int ocfs2_xattr_find_entry(int name_index,
1051 				  const char *name,
1052 				  struct ocfs2_xattr_search *xs)
1053 {
1054 	struct ocfs2_xattr_entry *entry;
1055 	size_t name_len;
1056 	int i, cmp = 1;
1057 
1058 	if (name == NULL)
1059 		return -EINVAL;
1060 
1061 	name_len = strlen(name);
1062 	entry = xs->here;
1063 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1064 		cmp = name_index - ocfs2_xattr_get_type(entry);
1065 		if (!cmp)
1066 			cmp = name_len - entry->xe_name_len;
1067 		if (!cmp)
1068 			cmp = memcmp(name, (xs->base +
1069 				     le16_to_cpu(entry->xe_name_offset)),
1070 				     name_len);
1071 		if (cmp == 0)
1072 			break;
1073 		entry += 1;
1074 	}
1075 	xs->here = entry;
1076 
1077 	return cmp ? -ENODATA : 0;
1078 }
1079 
1080 static int ocfs2_xattr_get_value_outside(struct inode *inode,
1081 					 struct ocfs2_xattr_value_root *xv,
1082 					 void *buffer,
1083 					 size_t len)
1084 {
1085 	u32 cpos, p_cluster, num_clusters, bpc, clusters;
1086 	u64 blkno;
1087 	int i, ret = 0;
1088 	size_t cplen, blocksize;
1089 	struct buffer_head *bh = NULL;
1090 	struct ocfs2_extent_list *el;
1091 
1092 	el = &xv->xr_list;
1093 	clusters = le32_to_cpu(xv->xr_clusters);
1094 	bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1095 	blocksize = inode->i_sb->s_blocksize;
1096 
1097 	cpos = 0;
1098 	while (cpos < clusters) {
1099 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1100 					       &num_clusters, el, NULL);
1101 		if (ret) {
1102 			mlog_errno(ret);
1103 			goto out;
1104 		}
1105 
1106 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1107 		/* Copy ocfs2_xattr_value */
1108 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1109 			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1110 					       &bh, NULL);
1111 			if (ret) {
1112 				mlog_errno(ret);
1113 				goto out;
1114 			}
1115 
1116 			cplen = len >= blocksize ? blocksize : len;
1117 			memcpy(buffer, bh->b_data, cplen);
1118 			len -= cplen;
1119 			buffer += cplen;
1120 
1121 			brelse(bh);
1122 			bh = NULL;
1123 			if (len == 0)
1124 				break;
1125 		}
1126 		cpos += num_clusters;
1127 	}
1128 out:
1129 	return ret;
1130 }
1131 
1132 static int ocfs2_xattr_ibody_get(struct inode *inode,
1133 				 int name_index,
1134 				 const char *name,
1135 				 void *buffer,
1136 				 size_t buffer_size,
1137 				 struct ocfs2_xattr_search *xs)
1138 {
1139 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1140 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1141 	struct ocfs2_xattr_value_root *xv;
1142 	size_t size;
1143 	int ret = 0;
1144 
1145 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1146 		return -ENODATA;
1147 
1148 	xs->end = (void *)di + inode->i_sb->s_blocksize;
1149 	xs->header = (struct ocfs2_xattr_header *)
1150 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
1151 	xs->base = (void *)xs->header;
1152 	xs->here = xs->header->xh_entries;
1153 
1154 	ret = ocfs2_xattr_find_entry(name_index, name, xs);
1155 	if (ret)
1156 		return ret;
1157 	size = le64_to_cpu(xs->here->xe_value_size);
1158 	if (buffer) {
1159 		if (size > buffer_size)
1160 			return -ERANGE;
1161 		if (ocfs2_xattr_is_local(xs->here)) {
1162 			memcpy(buffer, (void *)xs->base +
1163 			       le16_to_cpu(xs->here->xe_name_offset) +
1164 			       OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1165 		} else {
1166 			xv = (struct ocfs2_xattr_value_root *)
1167 				(xs->base + le16_to_cpu(
1168 				 xs->here->xe_name_offset) +
1169 				OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1170 			ret = ocfs2_xattr_get_value_outside(inode, xv,
1171 							    buffer, size);
1172 			if (ret < 0) {
1173 				mlog_errno(ret);
1174 				return ret;
1175 			}
1176 		}
1177 	}
1178 
1179 	return size;
1180 }
1181 
1182 static int ocfs2_xattr_block_get(struct inode *inode,
1183 				 int name_index,
1184 				 const char *name,
1185 				 void *buffer,
1186 				 size_t buffer_size,
1187 				 struct ocfs2_xattr_search *xs)
1188 {
1189 	struct ocfs2_xattr_block *xb;
1190 	struct ocfs2_xattr_value_root *xv;
1191 	size_t size;
1192 	int ret = -ENODATA, name_offset, name_len, i;
1193 	int uninitialized_var(block_off);
1194 
1195 	xs->bucket = ocfs2_xattr_bucket_new(inode);
1196 	if (!xs->bucket) {
1197 		ret = -ENOMEM;
1198 		mlog_errno(ret);
1199 		goto cleanup;
1200 	}
1201 
1202 	ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1203 	if (ret) {
1204 		mlog_errno(ret);
1205 		goto cleanup;
1206 	}
1207 
1208 	if (xs->not_found) {
1209 		ret = -ENODATA;
1210 		goto cleanup;
1211 	}
1212 
1213 	xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1214 	size = le64_to_cpu(xs->here->xe_value_size);
1215 	if (buffer) {
1216 		ret = -ERANGE;
1217 		if (size > buffer_size)
1218 			goto cleanup;
1219 
1220 		name_offset = le16_to_cpu(xs->here->xe_name_offset);
1221 		name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1222 		i = xs->here - xs->header->xh_entries;
1223 
1224 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1225 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1226 								bucket_xh(xs->bucket),
1227 								i,
1228 								&block_off,
1229 								&name_offset);
1230 			xs->base = bucket_block(xs->bucket, block_off);
1231 		}
1232 		if (ocfs2_xattr_is_local(xs->here)) {
1233 			memcpy(buffer, (void *)xs->base +
1234 			       name_offset + name_len, size);
1235 		} else {
1236 			xv = (struct ocfs2_xattr_value_root *)
1237 				(xs->base + name_offset + name_len);
1238 			ret = ocfs2_xattr_get_value_outside(inode, xv,
1239 							    buffer, size);
1240 			if (ret < 0) {
1241 				mlog_errno(ret);
1242 				goto cleanup;
1243 			}
1244 		}
1245 	}
1246 	ret = size;
1247 cleanup:
1248 	ocfs2_xattr_bucket_free(xs->bucket);
1249 
1250 	brelse(xs->xattr_bh);
1251 	xs->xattr_bh = NULL;
1252 	return ret;
1253 }
1254 
1255 int ocfs2_xattr_get_nolock(struct inode *inode,
1256 			   struct buffer_head *di_bh,
1257 			   int name_index,
1258 			   const char *name,
1259 			   void *buffer,
1260 			   size_t buffer_size)
1261 {
1262 	int ret;
1263 	struct ocfs2_dinode *di = NULL;
1264 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1265 	struct ocfs2_xattr_search xis = {
1266 		.not_found = -ENODATA,
1267 	};
1268 	struct ocfs2_xattr_search xbs = {
1269 		.not_found = -ENODATA,
1270 	};
1271 
1272 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1273 		return -EOPNOTSUPP;
1274 
1275 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1276 		ret = -ENODATA;
1277 
1278 	xis.inode_bh = xbs.inode_bh = di_bh;
1279 	di = (struct ocfs2_dinode *)di_bh->b_data;
1280 
1281 	down_read(&oi->ip_xattr_sem);
1282 	ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1283 				    buffer_size, &xis);
1284 	if (ret == -ENODATA && di->i_xattr_loc)
1285 		ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1286 					    buffer_size, &xbs);
1287 	up_read(&oi->ip_xattr_sem);
1288 
1289 	return ret;
1290 }
1291 
1292 /* ocfs2_xattr_get()
1293  *
1294  * Copy an extended attribute into the buffer provided.
1295  * Buffer is NULL to compute the size of buffer required.
1296  */
1297 static int ocfs2_xattr_get(struct inode *inode,
1298 			   int name_index,
1299 			   const char *name,
1300 			   void *buffer,
1301 			   size_t buffer_size)
1302 {
1303 	int ret;
1304 	struct buffer_head *di_bh = NULL;
1305 
1306 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
1307 	if (ret < 0) {
1308 		mlog_errno(ret);
1309 		return ret;
1310 	}
1311 	ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1312 				     name, buffer, buffer_size);
1313 
1314 	ocfs2_inode_unlock(inode, 0);
1315 
1316 	brelse(di_bh);
1317 
1318 	return ret;
1319 }
1320 
1321 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1322 					   handle_t *handle,
1323 					   struct ocfs2_xattr_value_buf *vb,
1324 					   const void *value,
1325 					   int value_len)
1326 {
1327 	int ret = 0, i, cp_len;
1328 	u16 blocksize = inode->i_sb->s_blocksize;
1329 	u32 p_cluster, num_clusters;
1330 	u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1331 	u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1332 	u64 blkno;
1333 	struct buffer_head *bh = NULL;
1334 	unsigned int ext_flags;
1335 	struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1336 
1337 	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1338 
1339 	while (cpos < clusters) {
1340 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1341 					       &num_clusters, &xv->xr_list,
1342 					       &ext_flags);
1343 		if (ret) {
1344 			mlog_errno(ret);
1345 			goto out;
1346 		}
1347 
1348 		BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1349 
1350 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1351 
1352 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1353 			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1354 					       &bh, NULL);
1355 			if (ret) {
1356 				mlog_errno(ret);
1357 				goto out;
1358 			}
1359 
1360 			ret = ocfs2_journal_access(handle,
1361 						   INODE_CACHE(inode),
1362 						   bh,
1363 						   OCFS2_JOURNAL_ACCESS_WRITE);
1364 			if (ret < 0) {
1365 				mlog_errno(ret);
1366 				goto out;
1367 			}
1368 
1369 			cp_len = value_len > blocksize ? blocksize : value_len;
1370 			memcpy(bh->b_data, value, cp_len);
1371 			value_len -= cp_len;
1372 			value += cp_len;
1373 			if (cp_len < blocksize)
1374 				memset(bh->b_data + cp_len, 0,
1375 				       blocksize - cp_len);
1376 
1377 			ret = ocfs2_journal_dirty(handle, bh);
1378 			if (ret < 0) {
1379 				mlog_errno(ret);
1380 				goto out;
1381 			}
1382 			brelse(bh);
1383 			bh = NULL;
1384 
1385 			/*
1386 			 * XXX: do we need to empty all the following
1387 			 * blocks in this cluster?
1388 			 */
1389 			if (!value_len)
1390 				break;
1391 		}
1392 		cpos += num_clusters;
1393 	}
1394 out:
1395 	brelse(bh);
1396 
1397 	return ret;
1398 }
1399 
1400 static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
1401 				       int num_entries)
1402 {
1403 	int free_space;
1404 
1405 	if (!needed_space)
1406 		return 0;
1407 
1408 	free_space = free_start -
1409 		sizeof(struct ocfs2_xattr_header) -
1410 		(num_entries * sizeof(struct ocfs2_xattr_entry)) -
1411 		OCFS2_XATTR_HEADER_GAP;
1412 	if (free_space < 0)
1413 		return -EIO;
1414 	if (free_space < needed_space)
1415 		return -ENOSPC;
1416 
1417 	return 0;
1418 }
1419 
1420 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc,
1421 				   int type)
1422 {
1423 	return loc->xl_ops->xlo_journal_access(handle, loc, type);
1424 }
1425 
1426 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc)
1427 {
1428 	loc->xl_ops->xlo_journal_dirty(handle, loc);
1429 }
1430 
1431 /* Give a pointer into the storage for the given offset */
1432 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
1433 {
1434 	BUG_ON(offset >= loc->xl_size);
1435 	return loc->xl_ops->xlo_offset_pointer(loc, offset);
1436 }
1437 
1438 /*
1439  * Wipe the name+value pair and allow the storage to reclaim it.  This
1440  * must be followed by either removal of the entry or a call to
1441  * ocfs2_xa_add_namevalue().
1442  */
1443 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1444 {
1445 	loc->xl_ops->xlo_wipe_namevalue(loc);
1446 }
1447 
1448 /*
1449  * Find lowest offset to a name+value pair.  This is the start of our
1450  * downward-growing free space.
1451  */
1452 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
1453 {
1454 	return loc->xl_ops->xlo_get_free_start(loc);
1455 }
1456 
1457 /* Can we reuse loc->xl_entry for xi? */
1458 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
1459 				    struct ocfs2_xattr_info *xi)
1460 {
1461 	return loc->xl_ops->xlo_can_reuse(loc, xi);
1462 }
1463 
1464 /* How much free space is needed to set the new value */
1465 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
1466 				struct ocfs2_xattr_info *xi)
1467 {
1468 	return loc->xl_ops->xlo_check_space(loc, xi);
1469 }
1470 
1471 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1472 {
1473 	loc->xl_ops->xlo_add_entry(loc, name_hash);
1474 	loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
1475 	/*
1476 	 * We can't leave the new entry's xe_name_offset at zero or
1477 	 * add_namevalue() will go nuts.  We set it to the size of our
1478 	 * storage so that it can never be less than any other entry.
1479 	 */
1480 	loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
1481 }
1482 
1483 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
1484 				   struct ocfs2_xattr_info *xi)
1485 {
1486 	int size = namevalue_size_xi(xi);
1487 	int nameval_offset;
1488 	char *nameval_buf;
1489 
1490 	loc->xl_ops->xlo_add_namevalue(loc, size);
1491 	loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
1492 	loc->xl_entry->xe_name_len = xi->xi_name_len;
1493 	ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
1494 	ocfs2_xattr_set_local(loc->xl_entry,
1495 			      xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
1496 
1497 	nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1498 	nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1499 	memset(nameval_buf, 0, size);
1500 	memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
1501 }
1502 
1503 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc,
1504 				    struct ocfs2_xattr_value_buf *vb)
1505 {
1506 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1507 	int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1508 
1509 	/* Value bufs are for value trees */
1510 	BUG_ON(ocfs2_xattr_is_local(loc->xl_entry));
1511 	BUG_ON(namevalue_size_xe(loc->xl_entry) !=
1512 	       (name_size + OCFS2_XATTR_ROOT_SIZE));
1513 
1514 	loc->xl_ops->xlo_fill_value_buf(loc, vb);
1515 	vb->vb_xv =
1516 		(struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc,
1517 							nameval_offset +
1518 							name_size);
1519 }
1520 
1521 static int ocfs2_xa_block_journal_access(handle_t *handle,
1522 					 struct ocfs2_xa_loc *loc, int type)
1523 {
1524 	struct buffer_head *bh = loc->xl_storage;
1525 	ocfs2_journal_access_func access;
1526 
1527 	if (loc->xl_size == (bh->b_size -
1528 			     offsetof(struct ocfs2_xattr_block,
1529 				      xb_attrs.xb_header)))
1530 		access = ocfs2_journal_access_xb;
1531 	else
1532 		access = ocfs2_journal_access_di;
1533 	return access(handle, INODE_CACHE(loc->xl_inode), bh, type);
1534 }
1535 
1536 static void ocfs2_xa_block_journal_dirty(handle_t *handle,
1537 					 struct ocfs2_xa_loc *loc)
1538 {
1539 	struct buffer_head *bh = loc->xl_storage;
1540 
1541 	ocfs2_journal_dirty(handle, bh);
1542 }
1543 
1544 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1545 					   int offset)
1546 {
1547 	return (char *)loc->xl_header + offset;
1548 }
1549 
1550 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
1551 				    struct ocfs2_xattr_info *xi)
1552 {
1553 	/*
1554 	 * Block storage is strict.  If the sizes aren't exact, we will
1555 	 * remove the old one and reinsert the new.
1556 	 */
1557 	return namevalue_size_xe(loc->xl_entry) ==
1558 		namevalue_size_xi(xi);
1559 }
1560 
1561 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
1562 {
1563 	struct ocfs2_xattr_header *xh = loc->xl_header;
1564 	int i, count = le16_to_cpu(xh->xh_count);
1565 	int offset, free_start = loc->xl_size;
1566 
1567 	for (i = 0; i < count; i++) {
1568 		offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1569 		if (offset < free_start)
1570 			free_start = offset;
1571 	}
1572 
1573 	return free_start;
1574 }
1575 
1576 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
1577 				      struct ocfs2_xattr_info *xi)
1578 {
1579 	int count = le16_to_cpu(loc->xl_header->xh_count);
1580 	int free_start = ocfs2_xa_get_free_start(loc);
1581 	int needed_space = ocfs2_xi_entry_usage(xi);
1582 
1583 	/*
1584 	 * Block storage will reclaim the original entry before inserting
1585 	 * the new value, so we only need the difference.  If the new
1586 	 * entry is smaller than the old one, we don't need anything.
1587 	 */
1588 	if (loc->xl_entry) {
1589 		/* Don't need space if we're reusing! */
1590 		if (ocfs2_xa_can_reuse_entry(loc, xi))
1591 			needed_space = 0;
1592 		else
1593 			needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
1594 	}
1595 	if (needed_space < 0)
1596 		needed_space = 0;
1597 	return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1598 }
1599 
1600 /*
1601  * Block storage for xattrs keeps the name+value pairs compacted.  When
1602  * we remove one, we have to shift any that preceded it towards the end.
1603  */
1604 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1605 {
1606 	int i, offset;
1607 	int namevalue_offset, first_namevalue_offset, namevalue_size;
1608 	struct ocfs2_xattr_entry *entry = loc->xl_entry;
1609 	struct ocfs2_xattr_header *xh = loc->xl_header;
1610 	int count = le16_to_cpu(xh->xh_count);
1611 
1612 	namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1613 	namevalue_size = namevalue_size_xe(entry);
1614 	first_namevalue_offset = ocfs2_xa_get_free_start(loc);
1615 
1616 	/* Shift the name+value pairs */
1617 	memmove((char *)xh + first_namevalue_offset + namevalue_size,
1618 		(char *)xh + first_namevalue_offset,
1619 		namevalue_offset - first_namevalue_offset);
1620 	memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1621 
1622 	/* Now tell xh->xh_entries about it */
1623 	for (i = 0; i < count; i++) {
1624 		offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1625 		if (offset <= namevalue_offset)
1626 			le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1627 				     namevalue_size);
1628 	}
1629 
1630 	/*
1631 	 * Note that we don't update xh_free_start or xh_name_value_len
1632 	 * because they're not used in block-stored xattrs.
1633 	 */
1634 }
1635 
1636 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1637 {
1638 	int count = le16_to_cpu(loc->xl_header->xh_count);
1639 	loc->xl_entry = &(loc->xl_header->xh_entries[count]);
1640 	le16_add_cpu(&loc->xl_header->xh_count, 1);
1641 	memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1642 }
1643 
1644 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1645 {
1646 	int free_start = ocfs2_xa_get_free_start(loc);
1647 
1648 	loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
1649 }
1650 
1651 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc,
1652 					  struct ocfs2_xattr_value_buf *vb)
1653 {
1654 	struct buffer_head *bh = loc->xl_storage;
1655 
1656 	if (loc->xl_size == (bh->b_size -
1657 			     offsetof(struct ocfs2_xattr_block,
1658 				      xb_attrs.xb_header)))
1659 		vb->vb_access = ocfs2_journal_access_xb;
1660 	else
1661 		vb->vb_access = ocfs2_journal_access_di;
1662 	vb->vb_bh = bh;
1663 }
1664 
1665 /*
1666  * Operations for xattrs stored in blocks.  This includes inline inode
1667  * storage and unindexed ocfs2_xattr_blocks.
1668  */
1669 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1670 	.xlo_journal_access	= ocfs2_xa_block_journal_access,
1671 	.xlo_journal_dirty	= ocfs2_xa_block_journal_dirty,
1672 	.xlo_offset_pointer	= ocfs2_xa_block_offset_pointer,
1673 	.xlo_check_space	= ocfs2_xa_block_check_space,
1674 	.xlo_can_reuse		= ocfs2_xa_block_can_reuse,
1675 	.xlo_get_free_start	= ocfs2_xa_block_get_free_start,
1676 	.xlo_wipe_namevalue	= ocfs2_xa_block_wipe_namevalue,
1677 	.xlo_add_entry		= ocfs2_xa_block_add_entry,
1678 	.xlo_add_namevalue	= ocfs2_xa_block_add_namevalue,
1679 	.xlo_fill_value_buf	= ocfs2_xa_block_fill_value_buf,
1680 };
1681 
1682 static int ocfs2_xa_bucket_journal_access(handle_t *handle,
1683 					  struct ocfs2_xa_loc *loc, int type)
1684 {
1685 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1686 
1687 	return ocfs2_xattr_bucket_journal_access(handle, bucket, type);
1688 }
1689 
1690 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle,
1691 					  struct ocfs2_xa_loc *loc)
1692 {
1693 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1694 
1695 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
1696 }
1697 
1698 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1699 					    int offset)
1700 {
1701 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1702 	int block, block_offset;
1703 
1704 	/* The header is at the front of the bucket */
1705 	block = offset >> loc->xl_inode->i_sb->s_blocksize_bits;
1706 	block_offset = offset % loc->xl_inode->i_sb->s_blocksize;
1707 
1708 	return bucket_block(bucket, block) + block_offset;
1709 }
1710 
1711 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
1712 				     struct ocfs2_xattr_info *xi)
1713 {
1714 	return namevalue_size_xe(loc->xl_entry) >=
1715 		namevalue_size_xi(xi);
1716 }
1717 
1718 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
1719 {
1720 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1721 	return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
1722 }
1723 
1724 static int ocfs2_bucket_align_free_start(struct super_block *sb,
1725 					 int free_start, int size)
1726 {
1727 	/*
1728 	 * We need to make sure that the name+value pair fits within
1729 	 * one block.
1730 	 */
1731 	if (((free_start - size) >> sb->s_blocksize_bits) !=
1732 	    ((free_start - 1) >> sb->s_blocksize_bits))
1733 		free_start -= free_start % sb->s_blocksize;
1734 
1735 	return free_start;
1736 }
1737 
1738 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
1739 				       struct ocfs2_xattr_info *xi)
1740 {
1741 	int rc;
1742 	int count = le16_to_cpu(loc->xl_header->xh_count);
1743 	int free_start = ocfs2_xa_get_free_start(loc);
1744 	int needed_space = ocfs2_xi_entry_usage(xi);
1745 	int size = namevalue_size_xi(xi);
1746 	struct super_block *sb = loc->xl_inode->i_sb;
1747 
1748 	/*
1749 	 * Bucket storage does not reclaim name+value pairs it cannot
1750 	 * reuse.  They live as holes until the bucket fills, and then
1751 	 * the bucket is defragmented.  However, the bucket can reclaim
1752 	 * the ocfs2_xattr_entry.
1753 	 */
1754 	if (loc->xl_entry) {
1755 		/* Don't need space if we're reusing! */
1756 		if (ocfs2_xa_can_reuse_entry(loc, xi))
1757 			needed_space = 0;
1758 		else
1759 			needed_space -= sizeof(struct ocfs2_xattr_entry);
1760 	}
1761 	BUG_ON(needed_space < 0);
1762 
1763 	if (free_start < size) {
1764 		if (needed_space)
1765 			return -ENOSPC;
1766 	} else {
1767 		/*
1768 		 * First we check if it would fit in the first place.
1769 		 * Below, we align the free start to a block.  This may
1770 		 * slide us below the minimum gap.  By checking unaligned
1771 		 * first, we avoid that error.
1772 		 */
1773 		rc = ocfs2_xa_check_space_helper(needed_space, free_start,
1774 						 count);
1775 		if (rc)
1776 			return rc;
1777 		free_start = ocfs2_bucket_align_free_start(sb, free_start,
1778 							   size);
1779 	}
1780 	return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1781 }
1782 
1783 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1784 {
1785 	le16_add_cpu(&loc->xl_header->xh_name_value_len,
1786 		     -namevalue_size_xe(loc->xl_entry));
1787 }
1788 
1789 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1790 {
1791 	struct ocfs2_xattr_header *xh = loc->xl_header;
1792 	int count = le16_to_cpu(xh->xh_count);
1793 	int low = 0, high = count - 1, tmp;
1794 	struct ocfs2_xattr_entry *tmp_xe;
1795 
1796 	/*
1797 	 * We keep buckets sorted by name_hash, so we need to find
1798 	 * our insert place.
1799 	 */
1800 	while (low <= high && count) {
1801 		tmp = (low + high) / 2;
1802 		tmp_xe = &xh->xh_entries[tmp];
1803 
1804 		if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
1805 			low = tmp + 1;
1806 		else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
1807 			high = tmp - 1;
1808 		else {
1809 			low = tmp;
1810 			break;
1811 		}
1812 	}
1813 
1814 	if (low != count)
1815 		memmove(&xh->xh_entries[low + 1],
1816 			&xh->xh_entries[low],
1817 			((count - low) * sizeof(struct ocfs2_xattr_entry)));
1818 
1819 	le16_add_cpu(&xh->xh_count, 1);
1820 	loc->xl_entry = &xh->xh_entries[low];
1821 	memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1822 }
1823 
1824 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1825 {
1826 	int free_start = ocfs2_xa_get_free_start(loc);
1827 	struct ocfs2_xattr_header *xh = loc->xl_header;
1828 	struct super_block *sb = loc->xl_inode->i_sb;
1829 	int nameval_offset;
1830 
1831 	free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
1832 	nameval_offset = free_start - size;
1833 	loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
1834 	xh->xh_free_start = cpu_to_le16(nameval_offset);
1835 	le16_add_cpu(&xh->xh_name_value_len, size);
1836 
1837 }
1838 
1839 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc,
1840 					   struct ocfs2_xattr_value_buf *vb)
1841 {
1842 	struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1843 	struct super_block *sb = loc->xl_inode->i_sb;
1844 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1845 	int size = namevalue_size_xe(loc->xl_entry);
1846 	int block_offset = nameval_offset >> sb->s_blocksize_bits;
1847 
1848 	/* Values are not allowed to straddle block boundaries */
1849 	BUG_ON(block_offset !=
1850 	       ((nameval_offset + size - 1) >> sb->s_blocksize_bits));
1851 	/* We expect the bucket to be filled in */
1852 	BUG_ON(!bucket->bu_bhs[block_offset]);
1853 
1854 	vb->vb_access = ocfs2_journal_access;
1855 	vb->vb_bh = bucket->bu_bhs[block_offset];
1856 }
1857 
1858 /* Operations for xattrs stored in buckets. */
1859 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1860 	.xlo_journal_access	= ocfs2_xa_bucket_journal_access,
1861 	.xlo_journal_dirty	= ocfs2_xa_bucket_journal_dirty,
1862 	.xlo_offset_pointer	= ocfs2_xa_bucket_offset_pointer,
1863 	.xlo_check_space	= ocfs2_xa_bucket_check_space,
1864 	.xlo_can_reuse		= ocfs2_xa_bucket_can_reuse,
1865 	.xlo_get_free_start	= ocfs2_xa_bucket_get_free_start,
1866 	.xlo_wipe_namevalue	= ocfs2_xa_bucket_wipe_namevalue,
1867 	.xlo_add_entry		= ocfs2_xa_bucket_add_entry,
1868 	.xlo_add_namevalue	= ocfs2_xa_bucket_add_namevalue,
1869 	.xlo_fill_value_buf	= ocfs2_xa_bucket_fill_value_buf,
1870 };
1871 
1872 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc)
1873 {
1874 	struct ocfs2_xattr_value_buf vb;
1875 
1876 	if (ocfs2_xattr_is_local(loc->xl_entry))
1877 		return 0;
1878 
1879 	ocfs2_xa_fill_value_buf(loc, &vb);
1880 	return le32_to_cpu(vb.vb_xv->xr_clusters);
1881 }
1882 
1883 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
1884 				   struct ocfs2_xattr_set_ctxt *ctxt)
1885 {
1886 	int trunc_rc, access_rc;
1887 	struct ocfs2_xattr_value_buf vb;
1888 
1889 	ocfs2_xa_fill_value_buf(loc, &vb);
1890 	trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes,
1891 					      ctxt);
1892 
1893 	/*
1894 	 * The caller of ocfs2_xa_value_truncate() has already called
1895 	 * ocfs2_xa_journal_access on the loc.  However, The truncate code
1896 	 * calls ocfs2_extend_trans().  This may commit the previous
1897 	 * transaction and open a new one.  If this is a bucket, truncate
1898 	 * could leave only vb->vb_bh set up for journaling.  Meanwhile,
1899 	 * the caller is expecting to dirty the entire bucket.  So we must
1900 	 * reset the journal work.  We do this even if truncate has failed,
1901 	 * as it could have failed after committing the extend.
1902 	 */
1903 	access_rc = ocfs2_xa_journal_access(ctxt->handle, loc,
1904 					    OCFS2_JOURNAL_ACCESS_WRITE);
1905 
1906 	/* Errors in truncate take precedence */
1907 	return trunc_rc ? trunc_rc : access_rc;
1908 }
1909 
1910 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1911 {
1912 	int index, count;
1913 	struct ocfs2_xattr_header *xh = loc->xl_header;
1914 	struct ocfs2_xattr_entry *entry = loc->xl_entry;
1915 
1916 	ocfs2_xa_wipe_namevalue(loc);
1917 	loc->xl_entry = NULL;
1918 
1919 	le16_add_cpu(&xh->xh_count, -1);
1920 	count = le16_to_cpu(xh->xh_count);
1921 
1922 	/*
1923 	 * Only zero out the entry if there are more remaining.  This is
1924 	 * important for an empty bucket, as it keeps track of the
1925 	 * bucket's hash value.  It doesn't hurt empty block storage.
1926 	 */
1927 	if (count) {
1928 		index = ((char *)entry - (char *)&xh->xh_entries) /
1929 			sizeof(struct ocfs2_xattr_entry);
1930 		memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1931 			(count - index) * sizeof(struct ocfs2_xattr_entry));
1932 		memset(&xh->xh_entries[count], 0,
1933 		       sizeof(struct ocfs2_xattr_entry));
1934 	}
1935 }
1936 
1937 /*
1938  * If we have a problem adjusting the size of an external value during
1939  * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr
1940  * in an intermediate state.  For example, the value may be partially
1941  * truncated.
1942  *
1943  * If the value tree hasn't changed, the extend/truncate went nowhere.
1944  * We have nothing to do.  The caller can treat it as a straight error.
1945  *
1946  * If the value tree got partially truncated, we now have a corrupted
1947  * extended attribute.  We're going to wipe its entry and leak the
1948  * clusters.  Better to leak some storage than leave a corrupt entry.
1949  *
1950  * If the value tree grew, it obviously didn't grow enough for the
1951  * new entry.  We're not going to try and reclaim those clusters either.
1952  * If there was already an external value there (orig_clusters != 0),
1953  * the new clusters are attached safely and we can just leave the old
1954  * value in place.  If there was no external value there, we remove
1955  * the entry.
1956  *
1957  * This way, the xattr block we store in the journal will be consistent.
1958  * If the size change broke because of the journal, no changes will hit
1959  * disk anyway.
1960  */
1961 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc,
1962 					    const char *what,
1963 					    unsigned int orig_clusters)
1964 {
1965 	unsigned int new_clusters = ocfs2_xa_value_clusters(loc);
1966 	char *nameval_buf = ocfs2_xa_offset_pointer(loc,
1967 				le16_to_cpu(loc->xl_entry->xe_name_offset));
1968 
1969 	if (new_clusters < orig_clusters) {
1970 		mlog(ML_ERROR,
1971 		     "Partial truncate while %s xattr %.*s.  Leaking "
1972 		     "%u clusters and removing the entry\n",
1973 		     what, loc->xl_entry->xe_name_len, nameval_buf,
1974 		     orig_clusters - new_clusters);
1975 		ocfs2_xa_remove_entry(loc);
1976 	} else if (!orig_clusters) {
1977 		mlog(ML_ERROR,
1978 		     "Unable to allocate an external value for xattr "
1979 		     "%.*s safely.  Leaking %u clusters and removing the "
1980 		     "entry\n",
1981 		     loc->xl_entry->xe_name_len, nameval_buf,
1982 		     new_clusters - orig_clusters);
1983 		ocfs2_xa_remove_entry(loc);
1984 	} else if (new_clusters > orig_clusters)
1985 		mlog(ML_ERROR,
1986 		     "Unable to grow xattr %.*s safely.  %u new clusters "
1987 		     "have been added, but the value will not be "
1988 		     "modified\n",
1989 		     loc->xl_entry->xe_name_len, nameval_buf,
1990 		     new_clusters - orig_clusters);
1991 }
1992 
1993 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
1994 			   struct ocfs2_xattr_set_ctxt *ctxt)
1995 {
1996 	int rc = 0;
1997 	unsigned int orig_clusters;
1998 
1999 	if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2000 		orig_clusters = ocfs2_xa_value_clusters(loc);
2001 		rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2002 		if (rc) {
2003 			mlog_errno(rc);
2004 			/*
2005 			 * Since this is remove, we can return 0 if
2006 			 * ocfs2_xa_cleanup_value_truncate() is going to
2007 			 * wipe the entry anyway.  So we check the
2008 			 * cluster count as well.
2009 			 */
2010 			if (orig_clusters != ocfs2_xa_value_clusters(loc))
2011 				rc = 0;
2012 			ocfs2_xa_cleanup_value_truncate(loc, "removing",
2013 							orig_clusters);
2014 			if (rc)
2015 				goto out;
2016 		}
2017 	}
2018 
2019 	ocfs2_xa_remove_entry(loc);
2020 
2021 out:
2022 	return rc;
2023 }
2024 
2025 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc)
2026 {
2027 	int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
2028 	char *nameval_buf;
2029 
2030 	nameval_buf = ocfs2_xa_offset_pointer(loc,
2031 				le16_to_cpu(loc->xl_entry->xe_name_offset));
2032 	memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE);
2033 }
2034 
2035 /*
2036  * Take an existing entry and make it ready for the new value.  This
2037  * won't allocate space, but it may free space.  It should be ready for
2038  * ocfs2_xa_prepare_entry() to finish the work.
2039  */
2040 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc,
2041 				struct ocfs2_xattr_info *xi,
2042 				struct ocfs2_xattr_set_ctxt *ctxt)
2043 {
2044 	int rc = 0;
2045 	int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2046 	unsigned int orig_clusters;
2047 	char *nameval_buf;
2048 	int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
2049 	int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
2050 
2051 	BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) !=
2052 	       name_size);
2053 
2054 	nameval_buf = ocfs2_xa_offset_pointer(loc,
2055 				le16_to_cpu(loc->xl_entry->xe_name_offset));
2056 	if (xe_local) {
2057 		memset(nameval_buf + name_size, 0,
2058 		       namevalue_size_xe(loc->xl_entry) - name_size);
2059 		if (!xi_local)
2060 			ocfs2_xa_install_value_root(loc);
2061 	} else {
2062 		orig_clusters = ocfs2_xa_value_clusters(loc);
2063 		if (xi_local) {
2064 			rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2065 			if (rc < 0)
2066 				mlog_errno(rc);
2067 			else
2068 				memset(nameval_buf + name_size, 0,
2069 				       namevalue_size_xe(loc->xl_entry) -
2070 				       name_size);
2071 		} else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
2072 			   xi->xi_value_len) {
2073 			rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
2074 						     ctxt);
2075 			if (rc < 0)
2076 				mlog_errno(rc);
2077 		}
2078 
2079 		if (rc) {
2080 			ocfs2_xa_cleanup_value_truncate(loc, "reusing",
2081 							orig_clusters);
2082 			goto out;
2083 		}
2084 	}
2085 
2086 	loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
2087 	ocfs2_xattr_set_local(loc->xl_entry, xi_local);
2088 
2089 out:
2090 	return rc;
2091 }
2092 
2093 /*
2094  * Prepares loc->xl_entry to receive the new xattr.  This includes
2095  * properly setting up the name+value pair region.  If loc->xl_entry
2096  * already exists, it will take care of modifying it appropriately.
2097  *
2098  * Note that this modifies the data.  You did journal_access already,
2099  * right?
2100  */
2101 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
2102 				  struct ocfs2_xattr_info *xi,
2103 				  u32 name_hash,
2104 				  struct ocfs2_xattr_set_ctxt *ctxt)
2105 {
2106 	int rc = 0;
2107 	unsigned int orig_clusters;
2108 	__le64 orig_value_size = 0;
2109 
2110 	rc = ocfs2_xa_check_space(loc, xi);
2111 	if (rc)
2112 		goto out;
2113 
2114 	if (loc->xl_entry) {
2115 		if (ocfs2_xa_can_reuse_entry(loc, xi)) {
2116 			orig_value_size = loc->xl_entry->xe_value_size;
2117 			rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
2118 			if (rc)
2119 				goto out;
2120 			goto alloc_value;
2121 		}
2122 
2123 		if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2124 			orig_clusters = ocfs2_xa_value_clusters(loc);
2125 			rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2126 			if (rc) {
2127 				mlog_errno(rc);
2128 				ocfs2_xa_cleanup_value_truncate(loc,
2129 								"overwriting",
2130 								orig_clusters);
2131 				goto out;
2132 			}
2133 		}
2134 		ocfs2_xa_wipe_namevalue(loc);
2135 	} else
2136 		ocfs2_xa_add_entry(loc, name_hash);
2137 
2138 	/*
2139 	 * If we get here, we have a blank entry.  Fill it.  We grow our
2140 	 * name+value pair back from the end.
2141 	 */
2142 	ocfs2_xa_add_namevalue(loc, xi);
2143 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2144 		ocfs2_xa_install_value_root(loc);
2145 
2146 alloc_value:
2147 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2148 		orig_clusters = ocfs2_xa_value_clusters(loc);
2149 		rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2150 		if (rc < 0) {
2151 			/*
2152 			 * If we tried to grow an existing external value,
2153 			 * ocfs2_xa_cleanuP-value_truncate() is going to
2154 			 * let it stand.  We have to restore its original
2155 			 * value size.
2156 			 */
2157 			loc->xl_entry->xe_value_size = orig_value_size;
2158 			ocfs2_xa_cleanup_value_truncate(loc, "growing",
2159 							orig_clusters);
2160 			mlog_errno(rc);
2161 		}
2162 	}
2163 
2164 out:
2165 	return rc;
2166 }
2167 
2168 /*
2169  * Store the value portion of the name+value pair.  This will skip
2170  * values that are stored externally.  Their tree roots were set up
2171  * by ocfs2_xa_prepare_entry().
2172  */
2173 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc,
2174 				struct ocfs2_xattr_info *xi,
2175 				struct ocfs2_xattr_set_ctxt *ctxt)
2176 {
2177 	int rc = 0;
2178 	int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
2179 	int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2180 	char *nameval_buf;
2181 	struct ocfs2_xattr_value_buf vb;
2182 
2183 	nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
2184 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2185 		ocfs2_xa_fill_value_buf(loc, &vb);
2186 		rc = __ocfs2_xattr_set_value_outside(loc->xl_inode,
2187 						     ctxt->handle, &vb,
2188 						     xi->xi_value,
2189 						     xi->xi_value_len);
2190 	} else
2191 		memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len);
2192 
2193 	return rc;
2194 }
2195 
2196 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc,
2197 			struct ocfs2_xattr_info *xi,
2198 			struct ocfs2_xattr_set_ctxt *ctxt)
2199 {
2200 	int ret;
2201 	u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name,
2202 					      xi->xi_name_len);
2203 
2204 	ret = ocfs2_xa_journal_access(ctxt->handle, loc,
2205 				      OCFS2_JOURNAL_ACCESS_WRITE);
2206 	if (ret) {
2207 		mlog_errno(ret);
2208 		goto out;
2209 	}
2210 
2211 	/*
2212 	 * From here on out, everything is going to modify the buffer a
2213 	 * little.  Errors are going to leave the xattr header in a
2214 	 * sane state.  Thus, even with errors we dirty the sucker.
2215 	 */
2216 
2217 	/* Don't worry, we are never called with !xi_value and !xl_entry */
2218 	if (!xi->xi_value) {
2219 		ret = ocfs2_xa_remove(loc, ctxt);
2220 		goto out_dirty;
2221 	}
2222 
2223 	ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
2224 	if (ret) {
2225 		if (ret != -ENOSPC)
2226 			mlog_errno(ret);
2227 		goto out_dirty;
2228 	}
2229 
2230 	ret = ocfs2_xa_store_value(loc, xi, ctxt);
2231 	if (ret)
2232 		mlog_errno(ret);
2233 
2234 out_dirty:
2235 	ocfs2_xa_journal_dirty(ctxt->handle, loc);
2236 
2237 out:
2238 	return ret;
2239 }
2240 
2241 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
2242 				     struct inode *inode,
2243 				     struct buffer_head *bh,
2244 				     struct ocfs2_xattr_entry *entry)
2245 {
2246 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
2247 
2248 	BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL));
2249 
2250 	loc->xl_inode = inode;
2251 	loc->xl_ops = &ocfs2_xa_block_loc_ops;
2252 	loc->xl_storage = bh;
2253 	loc->xl_entry = entry;
2254 	loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
2255 	loc->xl_header =
2256 		(struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
2257 					      loc->xl_size);
2258 }
2259 
2260 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
2261 					  struct inode *inode,
2262 					  struct buffer_head *bh,
2263 					  struct ocfs2_xattr_entry *entry)
2264 {
2265 	struct ocfs2_xattr_block *xb =
2266 		(struct ocfs2_xattr_block *)bh->b_data;
2267 
2268 	BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
2269 
2270 	loc->xl_inode = inode;
2271 	loc->xl_ops = &ocfs2_xa_block_loc_ops;
2272 	loc->xl_storage = bh;
2273 	loc->xl_header = &(xb->xb_attrs.xb_header);
2274 	loc->xl_entry = entry;
2275 	loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
2276 					     xb_attrs.xb_header);
2277 }
2278 
2279 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
2280 					   struct ocfs2_xattr_bucket *bucket,
2281 					   struct ocfs2_xattr_entry *entry)
2282 {
2283 	loc->xl_inode = bucket->bu_inode;
2284 	loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
2285 	loc->xl_storage = bucket;
2286 	loc->xl_header = bucket_xh(bucket);
2287 	loc->xl_entry = entry;
2288 	loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
2289 }
2290 
2291 /*
2292  * In xattr remove, if it is stored outside and refcounted, we may have
2293  * the chance to split the refcount tree. So need the allocators.
2294  */
2295 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
2296 					struct ocfs2_xattr_value_root *xv,
2297 					struct ocfs2_caching_info *ref_ci,
2298 					struct buffer_head *ref_root_bh,
2299 					struct ocfs2_alloc_context **meta_ac,
2300 					int *ref_credits)
2301 {
2302 	int ret, meta_add = 0;
2303 	u32 p_cluster, num_clusters;
2304 	unsigned int ext_flags;
2305 
2306 	*ref_credits = 0;
2307 	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
2308 				       &num_clusters,
2309 				       &xv->xr_list,
2310 				       &ext_flags);
2311 	if (ret) {
2312 		mlog_errno(ret);
2313 		goto out;
2314 	}
2315 
2316 	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2317 		goto out;
2318 
2319 	ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2320 						 ref_root_bh, xv,
2321 						 &meta_add, ref_credits);
2322 	if (ret) {
2323 		mlog_errno(ret);
2324 		goto out;
2325 	}
2326 
2327 	ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2328 						meta_add, meta_ac);
2329 	if (ret)
2330 		mlog_errno(ret);
2331 
2332 out:
2333 	return ret;
2334 }
2335 
2336 static int ocfs2_remove_value_outside(struct inode*inode,
2337 				      struct ocfs2_xattr_value_buf *vb,
2338 				      struct ocfs2_xattr_header *header,
2339 				      struct ocfs2_caching_info *ref_ci,
2340 				      struct buffer_head *ref_root_bh)
2341 {
2342 	int ret = 0, i, ref_credits;
2343 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2344 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2345 	void *val;
2346 
2347 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2348 
2349 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2350 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2351 
2352 		if (ocfs2_xattr_is_local(entry))
2353 			continue;
2354 
2355 		val = (void *)header +
2356 			le16_to_cpu(entry->xe_name_offset);
2357 		vb->vb_xv = (struct ocfs2_xattr_value_root *)
2358 			(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2359 
2360 		ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2361 							 ref_ci, ref_root_bh,
2362 							 &ctxt.meta_ac,
2363 							 &ref_credits);
2364 
2365 		ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2366 					ocfs2_remove_extent_credits(osb->sb));
2367 		if (IS_ERR(ctxt.handle)) {
2368 			ret = PTR_ERR(ctxt.handle);
2369 			mlog_errno(ret);
2370 			break;
2371 		}
2372 
2373 		ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2374 		if (ret < 0) {
2375 			mlog_errno(ret);
2376 			break;
2377 		}
2378 
2379 		ocfs2_commit_trans(osb, ctxt.handle);
2380 		if (ctxt.meta_ac) {
2381 			ocfs2_free_alloc_context(ctxt.meta_ac);
2382 			ctxt.meta_ac = NULL;
2383 		}
2384 	}
2385 
2386 	if (ctxt.meta_ac)
2387 		ocfs2_free_alloc_context(ctxt.meta_ac);
2388 	ocfs2_schedule_truncate_log_flush(osb, 1);
2389 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
2390 	return ret;
2391 }
2392 
2393 static int ocfs2_xattr_ibody_remove(struct inode *inode,
2394 				    struct buffer_head *di_bh,
2395 				    struct ocfs2_caching_info *ref_ci,
2396 				    struct buffer_head *ref_root_bh)
2397 {
2398 
2399 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2400 	struct ocfs2_xattr_header *header;
2401 	int ret;
2402 	struct ocfs2_xattr_value_buf vb = {
2403 		.vb_bh = di_bh,
2404 		.vb_access = ocfs2_journal_access_di,
2405 	};
2406 
2407 	header = (struct ocfs2_xattr_header *)
2408 		 ((void *)di + inode->i_sb->s_blocksize -
2409 		 le16_to_cpu(di->i_xattr_inline_size));
2410 
2411 	ret = ocfs2_remove_value_outside(inode, &vb, header,
2412 					 ref_ci, ref_root_bh);
2413 
2414 	return ret;
2415 }
2416 
2417 struct ocfs2_rm_xattr_bucket_para {
2418 	struct ocfs2_caching_info *ref_ci;
2419 	struct buffer_head *ref_root_bh;
2420 };
2421 
2422 static int ocfs2_xattr_block_remove(struct inode *inode,
2423 				    struct buffer_head *blk_bh,
2424 				    struct ocfs2_caching_info *ref_ci,
2425 				    struct buffer_head *ref_root_bh)
2426 {
2427 	struct ocfs2_xattr_block *xb;
2428 	int ret = 0;
2429 	struct ocfs2_xattr_value_buf vb = {
2430 		.vb_bh = blk_bh,
2431 		.vb_access = ocfs2_journal_access_xb,
2432 	};
2433 	struct ocfs2_rm_xattr_bucket_para args = {
2434 		.ref_ci = ref_ci,
2435 		.ref_root_bh = ref_root_bh,
2436 	};
2437 
2438 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2439 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2440 		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2441 		ret = ocfs2_remove_value_outside(inode, &vb, header,
2442 						 ref_ci, ref_root_bh);
2443 	} else
2444 		ret = ocfs2_iterate_xattr_index_block(inode,
2445 						blk_bh,
2446 						ocfs2_rm_xattr_cluster,
2447 						&args);
2448 
2449 	return ret;
2450 }
2451 
2452 static int ocfs2_xattr_free_block(struct inode *inode,
2453 				  u64 block,
2454 				  struct ocfs2_caching_info *ref_ci,
2455 				  struct buffer_head *ref_root_bh)
2456 {
2457 	struct inode *xb_alloc_inode;
2458 	struct buffer_head *xb_alloc_bh = NULL;
2459 	struct buffer_head *blk_bh = NULL;
2460 	struct ocfs2_xattr_block *xb;
2461 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2462 	handle_t *handle;
2463 	int ret = 0;
2464 	u64 blk, bg_blkno;
2465 	u16 bit;
2466 
2467 	ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2468 	if (ret < 0) {
2469 		mlog_errno(ret);
2470 		goto out;
2471 	}
2472 
2473 	ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2474 	if (ret < 0) {
2475 		mlog_errno(ret);
2476 		goto out;
2477 	}
2478 
2479 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2480 	blk = le64_to_cpu(xb->xb_blkno);
2481 	bit = le16_to_cpu(xb->xb_suballoc_bit);
2482 	bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2483 
2484 	xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2485 				EXTENT_ALLOC_SYSTEM_INODE,
2486 				le16_to_cpu(xb->xb_suballoc_slot));
2487 	if (!xb_alloc_inode) {
2488 		ret = -ENOMEM;
2489 		mlog_errno(ret);
2490 		goto out;
2491 	}
2492 	mutex_lock(&xb_alloc_inode->i_mutex);
2493 
2494 	ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2495 	if (ret < 0) {
2496 		mlog_errno(ret);
2497 		goto out_mutex;
2498 	}
2499 
2500 	handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2501 	if (IS_ERR(handle)) {
2502 		ret = PTR_ERR(handle);
2503 		mlog_errno(ret);
2504 		goto out_unlock;
2505 	}
2506 
2507 	ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2508 				       bit, bg_blkno, 1);
2509 	if (ret < 0)
2510 		mlog_errno(ret);
2511 
2512 	ocfs2_commit_trans(osb, handle);
2513 out_unlock:
2514 	ocfs2_inode_unlock(xb_alloc_inode, 1);
2515 	brelse(xb_alloc_bh);
2516 out_mutex:
2517 	mutex_unlock(&xb_alloc_inode->i_mutex);
2518 	iput(xb_alloc_inode);
2519 out:
2520 	brelse(blk_bh);
2521 	return ret;
2522 }
2523 
2524 /*
2525  * ocfs2_xattr_remove()
2526  *
2527  * Free extended attribute resources associated with this inode.
2528  */
2529 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2530 {
2531 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2532 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2533 	struct ocfs2_refcount_tree *ref_tree = NULL;
2534 	struct buffer_head *ref_root_bh = NULL;
2535 	struct ocfs2_caching_info *ref_ci = NULL;
2536 	handle_t *handle;
2537 	int ret;
2538 
2539 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2540 		return 0;
2541 
2542 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2543 		return 0;
2544 
2545 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2546 		ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2547 					       le64_to_cpu(di->i_refcount_loc),
2548 					       1, &ref_tree, &ref_root_bh);
2549 		if (ret) {
2550 			mlog_errno(ret);
2551 			goto out;
2552 		}
2553 		ref_ci = &ref_tree->rf_ci;
2554 
2555 	}
2556 
2557 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2558 		ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2559 					       ref_ci, ref_root_bh);
2560 		if (ret < 0) {
2561 			mlog_errno(ret);
2562 			goto out;
2563 		}
2564 	}
2565 
2566 	if (di->i_xattr_loc) {
2567 		ret = ocfs2_xattr_free_block(inode,
2568 					     le64_to_cpu(di->i_xattr_loc),
2569 					     ref_ci, ref_root_bh);
2570 		if (ret < 0) {
2571 			mlog_errno(ret);
2572 			goto out;
2573 		}
2574 	}
2575 
2576 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2577 				   OCFS2_INODE_UPDATE_CREDITS);
2578 	if (IS_ERR(handle)) {
2579 		ret = PTR_ERR(handle);
2580 		mlog_errno(ret);
2581 		goto out;
2582 	}
2583 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2584 				      OCFS2_JOURNAL_ACCESS_WRITE);
2585 	if (ret) {
2586 		mlog_errno(ret);
2587 		goto out_commit;
2588 	}
2589 
2590 	di->i_xattr_loc = 0;
2591 
2592 	spin_lock(&oi->ip_lock);
2593 	oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2594 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2595 	spin_unlock(&oi->ip_lock);
2596 
2597 	ret = ocfs2_journal_dirty(handle, di_bh);
2598 	if (ret < 0)
2599 		mlog_errno(ret);
2600 out_commit:
2601 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2602 out:
2603 	if (ref_tree)
2604 		ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2605 	brelse(ref_root_bh);
2606 	return ret;
2607 }
2608 
2609 static int ocfs2_xattr_has_space_inline(struct inode *inode,
2610 					struct ocfs2_dinode *di)
2611 {
2612 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2613 	unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2614 	int free;
2615 
2616 	if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2617 		return 0;
2618 
2619 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2620 		struct ocfs2_inline_data *idata = &di->id2.i_data;
2621 		free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2622 	} else if (ocfs2_inode_is_fast_symlink(inode)) {
2623 		free = ocfs2_fast_symlink_chars(inode->i_sb) -
2624 			le64_to_cpu(di->i_size);
2625 	} else {
2626 		struct ocfs2_extent_list *el = &di->id2.i_list;
2627 		free = (le16_to_cpu(el->l_count) -
2628 			le16_to_cpu(el->l_next_free_rec)) *
2629 			sizeof(struct ocfs2_extent_rec);
2630 	}
2631 	if (free >= xattrsize)
2632 		return 1;
2633 
2634 	return 0;
2635 }
2636 
2637 /*
2638  * ocfs2_xattr_ibody_find()
2639  *
2640  * Find extended attribute in inode block and
2641  * fill search info into struct ocfs2_xattr_search.
2642  */
2643 static int ocfs2_xattr_ibody_find(struct inode *inode,
2644 				  int name_index,
2645 				  const char *name,
2646 				  struct ocfs2_xattr_search *xs)
2647 {
2648 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2649 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2650 	int ret;
2651 	int has_space = 0;
2652 
2653 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2654 		return 0;
2655 
2656 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2657 		down_read(&oi->ip_alloc_sem);
2658 		has_space = ocfs2_xattr_has_space_inline(inode, di);
2659 		up_read(&oi->ip_alloc_sem);
2660 		if (!has_space)
2661 			return 0;
2662 	}
2663 
2664 	xs->xattr_bh = xs->inode_bh;
2665 	xs->end = (void *)di + inode->i_sb->s_blocksize;
2666 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2667 		xs->header = (struct ocfs2_xattr_header *)
2668 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
2669 	else
2670 		xs->header = (struct ocfs2_xattr_header *)
2671 			(xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2672 	xs->base = (void *)xs->header;
2673 	xs->here = xs->header->xh_entries;
2674 
2675 	/* Find the named attribute. */
2676 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2677 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
2678 		if (ret && ret != -ENODATA)
2679 			return ret;
2680 		xs->not_found = ret;
2681 	}
2682 
2683 	return 0;
2684 }
2685 
2686 static int ocfs2_xattr_ibody_init(struct inode *inode,
2687 				  struct buffer_head *di_bh,
2688 				  struct ocfs2_xattr_set_ctxt *ctxt)
2689 {
2690 	int ret;
2691 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2692 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2693 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2694 	unsigned int xattrsize = osb->s_xattr_inline_size;
2695 
2696 	if (!ocfs2_xattr_has_space_inline(inode, di)) {
2697 		ret = -ENOSPC;
2698 		goto out;
2699 	}
2700 
2701 	ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh,
2702 				      OCFS2_JOURNAL_ACCESS_WRITE);
2703 	if (ret) {
2704 		mlog_errno(ret);
2705 		goto out;
2706 	}
2707 
2708 	/*
2709 	 * Adjust extent record count or inline data size
2710 	 * to reserve space for extended attribute.
2711 	 */
2712 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2713 		struct ocfs2_inline_data *idata = &di->id2.i_data;
2714 		le16_add_cpu(&idata->id_count, -xattrsize);
2715 	} else if (!(ocfs2_inode_is_fast_symlink(inode))) {
2716 		struct ocfs2_extent_list *el = &di->id2.i_list;
2717 		le16_add_cpu(&el->l_count, -(xattrsize /
2718 					     sizeof(struct ocfs2_extent_rec)));
2719 	}
2720 	di->i_xattr_inline_size = cpu_to_le16(xattrsize);
2721 
2722 	spin_lock(&oi->ip_lock);
2723 	oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL;
2724 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2725 	spin_unlock(&oi->ip_lock);
2726 
2727 	ret = ocfs2_journal_dirty(ctxt->handle, di_bh);
2728 	if (ret < 0)
2729 		mlog_errno(ret);
2730 
2731 out:
2732 	return ret;
2733 }
2734 
2735 /*
2736  * ocfs2_xattr_ibody_set()
2737  *
2738  * Set, replace or remove an extended attribute into inode block.
2739  *
2740  */
2741 static int ocfs2_xattr_ibody_set(struct inode *inode,
2742 				 struct ocfs2_xattr_info *xi,
2743 				 struct ocfs2_xattr_search *xs,
2744 				 struct ocfs2_xattr_set_ctxt *ctxt)
2745 {
2746 	int ret;
2747 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2748 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2749 	struct ocfs2_xa_loc loc;
2750 
2751 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2752 		return -ENOSPC;
2753 
2754 	down_write(&oi->ip_alloc_sem);
2755 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2756 		if (!ocfs2_xattr_has_space_inline(inode, di)) {
2757 			ret = -ENOSPC;
2758 			goto out;
2759 		}
2760 	}
2761 
2762 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2763 		ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt);
2764 		if (ret) {
2765 			if (ret != -ENOSPC)
2766 				mlog_errno(ret);
2767 			goto out;
2768 		}
2769 	}
2770 
2771 	ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
2772 				 xs->not_found ? NULL : xs->here);
2773 	ret = ocfs2_xa_set(&loc, xi, ctxt);
2774 	if (ret) {
2775 		if (ret != -ENOSPC)
2776 			mlog_errno(ret);
2777 		goto out;
2778 	}
2779 	xs->here = loc.xl_entry;
2780 
2781 out:
2782 	up_write(&oi->ip_alloc_sem);
2783 
2784 	return ret;
2785 }
2786 
2787 /*
2788  * ocfs2_xattr_block_find()
2789  *
2790  * Find extended attribute in external block and
2791  * fill search info into struct ocfs2_xattr_search.
2792  */
2793 static int ocfs2_xattr_block_find(struct inode *inode,
2794 				  int name_index,
2795 				  const char *name,
2796 				  struct ocfs2_xattr_search *xs)
2797 {
2798 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2799 	struct buffer_head *blk_bh = NULL;
2800 	struct ocfs2_xattr_block *xb;
2801 	int ret = 0;
2802 
2803 	if (!di->i_xattr_loc)
2804 		return ret;
2805 
2806 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2807 				     &blk_bh);
2808 	if (ret < 0) {
2809 		mlog_errno(ret);
2810 		return ret;
2811 	}
2812 
2813 	xs->xattr_bh = blk_bh;
2814 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2815 
2816 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2817 		xs->header = &xb->xb_attrs.xb_header;
2818 		xs->base = (void *)xs->header;
2819 		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2820 		xs->here = xs->header->xh_entries;
2821 
2822 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
2823 	} else
2824 		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2825 						   name_index,
2826 						   name, xs);
2827 
2828 	if (ret && ret != -ENODATA) {
2829 		xs->xattr_bh = NULL;
2830 		goto cleanup;
2831 	}
2832 	xs->not_found = ret;
2833 	return 0;
2834 cleanup:
2835 	brelse(blk_bh);
2836 
2837 	return ret;
2838 }
2839 
2840 static int ocfs2_create_xattr_block(struct inode *inode,
2841 				    struct buffer_head *inode_bh,
2842 				    struct ocfs2_xattr_set_ctxt *ctxt,
2843 				    int indexed,
2844 				    struct buffer_head **ret_bh)
2845 {
2846 	int ret;
2847 	u16 suballoc_bit_start;
2848 	u32 num_got;
2849 	u64 first_blkno;
2850 	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2851 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2852 	struct buffer_head *new_bh = NULL;
2853 	struct ocfs2_xattr_block *xblk;
2854 
2855 	ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2856 				      inode_bh, OCFS2_JOURNAL_ACCESS_CREATE);
2857 	if (ret < 0) {
2858 		mlog_errno(ret);
2859 		goto end;
2860 	}
2861 
2862 	ret = ocfs2_claim_metadata(osb, ctxt->handle, ctxt->meta_ac, 1,
2863 				   &suballoc_bit_start, &num_got,
2864 				   &first_blkno);
2865 	if (ret < 0) {
2866 		mlog_errno(ret);
2867 		goto end;
2868 	}
2869 
2870 	new_bh = sb_getblk(inode->i_sb, first_blkno);
2871 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2872 
2873 	ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode),
2874 				      new_bh,
2875 				      OCFS2_JOURNAL_ACCESS_CREATE);
2876 	if (ret < 0) {
2877 		mlog_errno(ret);
2878 		goto end;
2879 	}
2880 
2881 	/* Initialize ocfs2_xattr_block */
2882 	xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2883 	memset(xblk, 0, inode->i_sb->s_blocksize);
2884 	strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2885 	xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
2886 	xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2887 	xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2888 	xblk->xb_blkno = cpu_to_le64(first_blkno);
2889 	if (indexed) {
2890 		struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2891 		xr->xt_clusters = cpu_to_le32(1);
2892 		xr->xt_last_eb_blk = 0;
2893 		xr->xt_list.l_tree_depth = 0;
2894 		xr->xt_list.l_count = cpu_to_le16(
2895 					ocfs2_xattr_recs_per_xb(inode->i_sb));
2896 		xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2897 		xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2898 	}
2899 	ocfs2_journal_dirty(ctxt->handle, new_bh);
2900 
2901 	/* Add it to the inode */
2902 	di->i_xattr_loc = cpu_to_le64(first_blkno);
2903 
2904 	spin_lock(&OCFS2_I(inode)->ip_lock);
2905 	OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
2906 	di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
2907 	spin_unlock(&OCFS2_I(inode)->ip_lock);
2908 
2909 	ocfs2_journal_dirty(ctxt->handle, inode_bh);
2910 
2911 	*ret_bh = new_bh;
2912 	new_bh = NULL;
2913 
2914 end:
2915 	brelse(new_bh);
2916 	return ret;
2917 }
2918 
2919 /*
2920  * ocfs2_xattr_block_set()
2921  *
2922  * Set, replace or remove an extended attribute into external block.
2923  *
2924  */
2925 static int ocfs2_xattr_block_set(struct inode *inode,
2926 				 struct ocfs2_xattr_info *xi,
2927 				 struct ocfs2_xattr_search *xs,
2928 				 struct ocfs2_xattr_set_ctxt *ctxt)
2929 {
2930 	struct buffer_head *new_bh = NULL;
2931 	struct ocfs2_xattr_block *xblk = NULL;
2932 	int ret;
2933 	struct ocfs2_xa_loc loc;
2934 
2935 	if (!xs->xattr_bh) {
2936 		ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt,
2937 					       0, &new_bh);
2938 		if (ret) {
2939 			mlog_errno(ret);
2940 			goto end;
2941 		}
2942 
2943 		xs->xattr_bh = new_bh;
2944 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2945 		xs->header = &xblk->xb_attrs.xb_header;
2946 		xs->base = (void *)xs->header;
2947 		xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2948 		xs->here = xs->header->xh_entries;
2949 	} else
2950 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2951 
2952 	if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2953 		ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh,
2954 					      xs->not_found ? NULL : xs->here);
2955 
2956 		ret = ocfs2_xa_set(&loc, xi, ctxt);
2957 		if (!ret)
2958 			xs->here = loc.xl_entry;
2959 		else if (ret != -ENOSPC)
2960 			goto end;
2961 		else {
2962 			ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2963 			if (ret)
2964 				goto end;
2965 		}
2966 	}
2967 
2968 	if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)
2969 		ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2970 
2971 end:
2972 	return ret;
2973 }
2974 
2975 /* Check whether the new xattr can be inserted into the inode. */
2976 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2977 				       struct ocfs2_xattr_info *xi,
2978 				       struct ocfs2_xattr_search *xs)
2979 {
2980 	struct ocfs2_xattr_entry *last;
2981 	int free, i;
2982 	size_t min_offs = xs->end - xs->base;
2983 
2984 	if (!xs->header)
2985 		return 0;
2986 
2987 	last = xs->header->xh_entries;
2988 
2989 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2990 		size_t offs = le16_to_cpu(last->xe_name_offset);
2991 		if (offs < min_offs)
2992 			min_offs = offs;
2993 		last += 1;
2994 	}
2995 
2996 	free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2997 	if (free < 0)
2998 		return 0;
2999 
3000 	BUG_ON(!xs->not_found);
3001 
3002 	if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
3003 		return 1;
3004 
3005 	return 0;
3006 }
3007 
3008 static int ocfs2_calc_xattr_set_need(struct inode *inode,
3009 				     struct ocfs2_dinode *di,
3010 				     struct ocfs2_xattr_info *xi,
3011 				     struct ocfs2_xattr_search *xis,
3012 				     struct ocfs2_xattr_search *xbs,
3013 				     int *clusters_need,
3014 				     int *meta_need,
3015 				     int *credits_need)
3016 {
3017 	int ret = 0, old_in_xb = 0;
3018 	int clusters_add = 0, meta_add = 0, credits = 0;
3019 	struct buffer_head *bh = NULL;
3020 	struct ocfs2_xattr_block *xb = NULL;
3021 	struct ocfs2_xattr_entry *xe = NULL;
3022 	struct ocfs2_xattr_value_root *xv = NULL;
3023 	char *base = NULL;
3024 	int name_offset, name_len = 0;
3025 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
3026 						    xi->xi_value_len);
3027 	u64 value_size;
3028 
3029 	/*
3030 	 * Calculate the clusters we need to write.
3031 	 * No matter whether we replace an old one or add a new one,
3032 	 * we need this for writing.
3033 	 */
3034 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
3035 		credits += new_clusters *
3036 			   ocfs2_clusters_to_blocks(inode->i_sb, 1);
3037 
3038 	if (xis->not_found && xbs->not_found) {
3039 		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3040 
3041 		if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3042 			clusters_add += new_clusters;
3043 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3044 							&def_xv.xv.xr_list,
3045 							new_clusters);
3046 		}
3047 
3048 		goto meta_guess;
3049 	}
3050 
3051 	if (!xis->not_found) {
3052 		xe = xis->here;
3053 		name_offset = le16_to_cpu(xe->xe_name_offset);
3054 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3055 		base = xis->base;
3056 		credits += OCFS2_INODE_UPDATE_CREDITS;
3057 	} else {
3058 		int i, block_off = 0;
3059 		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3060 		xe = xbs->here;
3061 		name_offset = le16_to_cpu(xe->xe_name_offset);
3062 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3063 		i = xbs->here - xbs->header->xh_entries;
3064 		old_in_xb = 1;
3065 
3066 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3067 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3068 							bucket_xh(xbs->bucket),
3069 							i, &block_off,
3070 							&name_offset);
3071 			base = bucket_block(xbs->bucket, block_off);
3072 			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3073 		} else {
3074 			base = xbs->base;
3075 			credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
3076 		}
3077 	}
3078 
3079 	/*
3080 	 * delete a xattr doesn't need metadata and cluster allocation.
3081 	 * so just calculate the credits and return.
3082 	 *
3083 	 * The credits for removing the value tree will be extended
3084 	 * by ocfs2_remove_extent itself.
3085 	 */
3086 	if (!xi->xi_value) {
3087 		if (!ocfs2_xattr_is_local(xe))
3088 			credits += ocfs2_remove_extent_credits(inode->i_sb);
3089 
3090 		goto out;
3091 	}
3092 
3093 	/* do cluster allocation guess first. */
3094 	value_size = le64_to_cpu(xe->xe_value_size);
3095 
3096 	if (old_in_xb) {
3097 		/*
3098 		 * In xattr set, we always try to set the xe in inode first,
3099 		 * so if it can be inserted into inode successfully, the old
3100 		 * one will be removed from the xattr block, and this xattr
3101 		 * will be inserted into inode as a new xattr in inode.
3102 		 */
3103 		if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
3104 			clusters_add += new_clusters;
3105 			credits += ocfs2_remove_extent_credits(inode->i_sb) +
3106 				    OCFS2_INODE_UPDATE_CREDITS;
3107 			if (!ocfs2_xattr_is_local(xe))
3108 				credits += ocfs2_calc_extend_credits(
3109 							inode->i_sb,
3110 							&def_xv.xv.xr_list,
3111 							new_clusters);
3112 			goto out;
3113 		}
3114 	}
3115 
3116 	if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3117 		/* the new values will be stored outside. */
3118 		u32 old_clusters = 0;
3119 
3120 		if (!ocfs2_xattr_is_local(xe)) {
3121 			old_clusters =	ocfs2_clusters_for_bytes(inode->i_sb,
3122 								 value_size);
3123 			xv = (struct ocfs2_xattr_value_root *)
3124 			     (base + name_offset + name_len);
3125 			value_size = OCFS2_XATTR_ROOT_SIZE;
3126 		} else
3127 			xv = &def_xv.xv;
3128 
3129 		if (old_clusters >= new_clusters) {
3130 			credits += ocfs2_remove_extent_credits(inode->i_sb);
3131 			goto out;
3132 		} else {
3133 			meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
3134 			clusters_add += new_clusters - old_clusters;
3135 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3136 							     &xv->xr_list,
3137 							     new_clusters -
3138 							     old_clusters);
3139 			if (value_size >= OCFS2_XATTR_ROOT_SIZE)
3140 				goto out;
3141 		}
3142 	} else {
3143 		/*
3144 		 * Now the new value will be stored inside. So if the new
3145 		 * value is smaller than the size of value root or the old
3146 		 * value, we don't need any allocation, otherwise we have
3147 		 * to guess metadata allocation.
3148 		 */
3149 		if ((ocfs2_xattr_is_local(xe) &&
3150 		     (value_size >= xi->xi_value_len)) ||
3151 		    (!ocfs2_xattr_is_local(xe) &&
3152 		     OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
3153 			goto out;
3154 	}
3155 
3156 meta_guess:
3157 	/* calculate metadata allocation. */
3158 	if (di->i_xattr_loc) {
3159 		if (!xbs->xattr_bh) {
3160 			ret = ocfs2_read_xattr_block(inode,
3161 						     le64_to_cpu(di->i_xattr_loc),
3162 						     &bh);
3163 			if (ret) {
3164 				mlog_errno(ret);
3165 				goto out;
3166 			}
3167 
3168 			xb = (struct ocfs2_xattr_block *)bh->b_data;
3169 		} else
3170 			xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3171 
3172 		/*
3173 		 * If there is already an xattr tree, good, we can calculate
3174 		 * like other b-trees. Otherwise we may have the chance of
3175 		 * create a tree, the credit calculation is borrowed from
3176 		 * ocfs2_calc_extend_credits with root_el = NULL. And the
3177 		 * new tree will be cluster based, so no meta is needed.
3178 		 */
3179 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3180 			struct ocfs2_extent_list *el =
3181 				 &xb->xb_attrs.xb_root.xt_list;
3182 			meta_add += ocfs2_extend_meta_needed(el);
3183 			credits += ocfs2_calc_extend_credits(inode->i_sb,
3184 							     el, 1);
3185 		} else
3186 			credits += OCFS2_SUBALLOC_ALLOC + 1;
3187 
3188 		/*
3189 		 * This cluster will be used either for new bucket or for
3190 		 * new xattr block.
3191 		 * If the cluster size is the same as the bucket size, one
3192 		 * more is needed since we may need to extend the bucket
3193 		 * also.
3194 		 */
3195 		clusters_add += 1;
3196 		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3197 		if (OCFS2_XATTR_BUCKET_SIZE ==
3198 			OCFS2_SB(inode->i_sb)->s_clustersize) {
3199 			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3200 			clusters_add += 1;
3201 		}
3202 	} else {
3203 		meta_add += 1;
3204 		credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3205 	}
3206 out:
3207 	if (clusters_need)
3208 		*clusters_need = clusters_add;
3209 	if (meta_need)
3210 		*meta_need = meta_add;
3211 	if (credits_need)
3212 		*credits_need = credits;
3213 	brelse(bh);
3214 	return ret;
3215 }
3216 
3217 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3218 				     struct ocfs2_dinode *di,
3219 				     struct ocfs2_xattr_info *xi,
3220 				     struct ocfs2_xattr_search *xis,
3221 				     struct ocfs2_xattr_search *xbs,
3222 				     struct ocfs2_xattr_set_ctxt *ctxt,
3223 				     int extra_meta,
3224 				     int *credits)
3225 {
3226 	int clusters_add, meta_add, ret;
3227 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3228 
3229 	memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
3230 
3231 	ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
3232 
3233 	ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
3234 					&clusters_add, &meta_add, credits);
3235 	if (ret) {
3236 		mlog_errno(ret);
3237 		return ret;
3238 	}
3239 
3240 	meta_add += extra_meta;
3241 	mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
3242 	     "credits = %d\n", xi->xi_name, meta_add, clusters_add, *credits);
3243 
3244 	if (meta_add) {
3245 		ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
3246 							&ctxt->meta_ac);
3247 		if (ret) {
3248 			mlog_errno(ret);
3249 			goto out;
3250 		}
3251 	}
3252 
3253 	if (clusters_add) {
3254 		ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
3255 		if (ret)
3256 			mlog_errno(ret);
3257 	}
3258 out:
3259 	if (ret) {
3260 		if (ctxt->meta_ac) {
3261 			ocfs2_free_alloc_context(ctxt->meta_ac);
3262 			ctxt->meta_ac = NULL;
3263 		}
3264 
3265 		/*
3266 		 * We cannot have an error and a non null ctxt->data_ac.
3267 		 */
3268 	}
3269 
3270 	return ret;
3271 }
3272 
3273 static int __ocfs2_xattr_set_handle(struct inode *inode,
3274 				    struct ocfs2_dinode *di,
3275 				    struct ocfs2_xattr_info *xi,
3276 				    struct ocfs2_xattr_search *xis,
3277 				    struct ocfs2_xattr_search *xbs,
3278 				    struct ocfs2_xattr_set_ctxt *ctxt)
3279 {
3280 	int ret = 0, credits, old_found;
3281 
3282 	if (!xi->xi_value) {
3283 		/* Remove existing extended attribute */
3284 		if (!xis->not_found)
3285 			ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3286 		else if (!xbs->not_found)
3287 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3288 	} else {
3289 		/* We always try to set extended attribute into inode first*/
3290 		ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3291 		if (!ret && !xbs->not_found) {
3292 			/*
3293 			 * If succeed and that extended attribute existing in
3294 			 * external block, then we will remove it.
3295 			 */
3296 			xi->xi_value = NULL;
3297 			xi->xi_value_len = 0;
3298 
3299 			old_found = xis->not_found;
3300 			xis->not_found = -ENODATA;
3301 			ret = ocfs2_calc_xattr_set_need(inode,
3302 							di,
3303 							xi,
3304 							xis,
3305 							xbs,
3306 							NULL,
3307 							NULL,
3308 							&credits);
3309 			xis->not_found = old_found;
3310 			if (ret) {
3311 				mlog_errno(ret);
3312 				goto out;
3313 			}
3314 
3315 			ret = ocfs2_extend_trans(ctxt->handle, credits +
3316 					ctxt->handle->h_buffer_credits);
3317 			if (ret) {
3318 				mlog_errno(ret);
3319 				goto out;
3320 			}
3321 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3322 		} else if (ret == -ENOSPC) {
3323 			if (di->i_xattr_loc && !xbs->xattr_bh) {
3324 				ret = ocfs2_xattr_block_find(inode,
3325 							     xi->xi_name_index,
3326 							     xi->xi_name, xbs);
3327 				if (ret)
3328 					goto out;
3329 
3330 				old_found = xis->not_found;
3331 				xis->not_found = -ENODATA;
3332 				ret = ocfs2_calc_xattr_set_need(inode,
3333 								di,
3334 								xi,
3335 								xis,
3336 								xbs,
3337 								NULL,
3338 								NULL,
3339 								&credits);
3340 				xis->not_found = old_found;
3341 				if (ret) {
3342 					mlog_errno(ret);
3343 					goto out;
3344 				}
3345 
3346 				ret = ocfs2_extend_trans(ctxt->handle, credits +
3347 					ctxt->handle->h_buffer_credits);
3348 				if (ret) {
3349 					mlog_errno(ret);
3350 					goto out;
3351 				}
3352 			}
3353 			/*
3354 			 * If no space in inode, we will set extended attribute
3355 			 * into external block.
3356 			 */
3357 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3358 			if (ret)
3359 				goto out;
3360 			if (!xis->not_found) {
3361 				/*
3362 				 * If succeed and that extended attribute
3363 				 * existing in inode, we will remove it.
3364 				 */
3365 				xi->xi_value = NULL;
3366 				xi->xi_value_len = 0;
3367 				xbs->not_found = -ENODATA;
3368 				ret = ocfs2_calc_xattr_set_need(inode,
3369 								di,
3370 								xi,
3371 								xis,
3372 								xbs,
3373 								NULL,
3374 								NULL,
3375 								&credits);
3376 				if (ret) {
3377 					mlog_errno(ret);
3378 					goto out;
3379 				}
3380 
3381 				ret = ocfs2_extend_trans(ctxt->handle, credits +
3382 						ctxt->handle->h_buffer_credits);
3383 				if (ret) {
3384 					mlog_errno(ret);
3385 					goto out;
3386 				}
3387 				ret = ocfs2_xattr_ibody_set(inode, xi,
3388 							    xis, ctxt);
3389 			}
3390 		}
3391 	}
3392 
3393 	if (!ret) {
3394 		/* Update inode ctime. */
3395 		ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3396 					      xis->inode_bh,
3397 					      OCFS2_JOURNAL_ACCESS_WRITE);
3398 		if (ret) {
3399 			mlog_errno(ret);
3400 			goto out;
3401 		}
3402 
3403 		inode->i_ctime = CURRENT_TIME;
3404 		di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3405 		di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3406 		ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3407 	}
3408 out:
3409 	return ret;
3410 }
3411 
3412 /*
3413  * This function only called duing creating inode
3414  * for init security/acl xattrs of the new inode.
3415  * All transanction credits have been reserved in mknod.
3416  */
3417 int ocfs2_xattr_set_handle(handle_t *handle,
3418 			   struct inode *inode,
3419 			   struct buffer_head *di_bh,
3420 			   int name_index,
3421 			   const char *name,
3422 			   const void *value,
3423 			   size_t value_len,
3424 			   int flags,
3425 			   struct ocfs2_alloc_context *meta_ac,
3426 			   struct ocfs2_alloc_context *data_ac)
3427 {
3428 	struct ocfs2_dinode *di;
3429 	int ret;
3430 
3431 	struct ocfs2_xattr_info xi = {
3432 		.xi_name_index = name_index,
3433 		.xi_name = name,
3434 		.xi_name_len = strlen(name),
3435 		.xi_value = value,
3436 		.xi_value_len = value_len,
3437 	};
3438 
3439 	struct ocfs2_xattr_search xis = {
3440 		.not_found = -ENODATA,
3441 	};
3442 
3443 	struct ocfs2_xattr_search xbs = {
3444 		.not_found = -ENODATA,
3445 	};
3446 
3447 	struct ocfs2_xattr_set_ctxt ctxt = {
3448 		.handle = handle,
3449 		.meta_ac = meta_ac,
3450 		.data_ac = data_ac,
3451 	};
3452 
3453 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3454 		return -EOPNOTSUPP;
3455 
3456 	/*
3457 	 * In extreme situation, may need xattr bucket when
3458 	 * block size is too small. And we have already reserved
3459 	 * the credits for bucket in mknod.
3460 	 */
3461 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3462 		xbs.bucket = ocfs2_xattr_bucket_new(inode);
3463 		if (!xbs.bucket) {
3464 			mlog_errno(-ENOMEM);
3465 			return -ENOMEM;
3466 		}
3467 	}
3468 
3469 	xis.inode_bh = xbs.inode_bh = di_bh;
3470 	di = (struct ocfs2_dinode *)di_bh->b_data;
3471 
3472 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
3473 
3474 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3475 	if (ret)
3476 		goto cleanup;
3477 	if (xis.not_found) {
3478 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3479 		if (ret)
3480 			goto cleanup;
3481 	}
3482 
3483 	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3484 
3485 cleanup:
3486 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
3487 	brelse(xbs.xattr_bh);
3488 	ocfs2_xattr_bucket_free(xbs.bucket);
3489 
3490 	return ret;
3491 }
3492 
3493 /*
3494  * ocfs2_xattr_set()
3495  *
3496  * Set, replace or remove an extended attribute for this inode.
3497  * value is NULL to remove an existing extended attribute, else either
3498  * create or replace an extended attribute.
3499  */
3500 int ocfs2_xattr_set(struct inode *inode,
3501 		    int name_index,
3502 		    const char *name,
3503 		    const void *value,
3504 		    size_t value_len,
3505 		    int flags)
3506 {
3507 	struct buffer_head *di_bh = NULL;
3508 	struct ocfs2_dinode *di;
3509 	int ret, credits, ref_meta = 0, ref_credits = 0;
3510 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3511 	struct inode *tl_inode = osb->osb_tl_inode;
3512 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
3513 	struct ocfs2_refcount_tree *ref_tree = NULL;
3514 
3515 	struct ocfs2_xattr_info xi = {
3516 		.xi_name_index = name_index,
3517 		.xi_name = name,
3518 		.xi_name_len = strlen(name),
3519 		.xi_value = value,
3520 		.xi_value_len = value_len,
3521 	};
3522 
3523 	struct ocfs2_xattr_search xis = {
3524 		.not_found = -ENODATA,
3525 	};
3526 
3527 	struct ocfs2_xattr_search xbs = {
3528 		.not_found = -ENODATA,
3529 	};
3530 
3531 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3532 		return -EOPNOTSUPP;
3533 
3534 	/*
3535 	 * Only xbs will be used on indexed trees.  xis doesn't need a
3536 	 * bucket.
3537 	 */
3538 	xbs.bucket = ocfs2_xattr_bucket_new(inode);
3539 	if (!xbs.bucket) {
3540 		mlog_errno(-ENOMEM);
3541 		return -ENOMEM;
3542 	}
3543 
3544 	ret = ocfs2_inode_lock(inode, &di_bh, 1);
3545 	if (ret < 0) {
3546 		mlog_errno(ret);
3547 		goto cleanup_nolock;
3548 	}
3549 	xis.inode_bh = xbs.inode_bh = di_bh;
3550 	di = (struct ocfs2_dinode *)di_bh->b_data;
3551 
3552 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
3553 	/*
3554 	 * Scan inode and external block to find the same name
3555 	 * extended attribute and collect search infomation.
3556 	 */
3557 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3558 	if (ret)
3559 		goto cleanup;
3560 	if (xis.not_found) {
3561 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3562 		if (ret)
3563 			goto cleanup;
3564 	}
3565 
3566 	if (xis.not_found && xbs.not_found) {
3567 		ret = -ENODATA;
3568 		if (flags & XATTR_REPLACE)
3569 			goto cleanup;
3570 		ret = 0;
3571 		if (!value)
3572 			goto cleanup;
3573 	} else {
3574 		ret = -EEXIST;
3575 		if (flags & XATTR_CREATE)
3576 			goto cleanup;
3577 	}
3578 
3579 	/* Check whether the value is refcounted and do some prepartion. */
3580 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
3581 	    (!xis.not_found || !xbs.not_found)) {
3582 		ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3583 						   &xis, &xbs, &ref_tree,
3584 						   &ref_meta, &ref_credits);
3585 		if (ret) {
3586 			mlog_errno(ret);
3587 			goto cleanup;
3588 		}
3589 	}
3590 
3591 	mutex_lock(&tl_inode->i_mutex);
3592 
3593 	if (ocfs2_truncate_log_needs_flush(osb)) {
3594 		ret = __ocfs2_flush_truncate_log(osb);
3595 		if (ret < 0) {
3596 			mutex_unlock(&tl_inode->i_mutex);
3597 			mlog_errno(ret);
3598 			goto cleanup;
3599 		}
3600 	}
3601 	mutex_unlock(&tl_inode->i_mutex);
3602 
3603 	ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3604 					&xbs, &ctxt, ref_meta, &credits);
3605 	if (ret) {
3606 		mlog_errno(ret);
3607 		goto cleanup;
3608 	}
3609 
3610 	/* we need to update inode's ctime field, so add credit for it. */
3611 	credits += OCFS2_INODE_UPDATE_CREDITS;
3612 	ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3613 	if (IS_ERR(ctxt.handle)) {
3614 		ret = PTR_ERR(ctxt.handle);
3615 		mlog_errno(ret);
3616 		goto cleanup;
3617 	}
3618 
3619 	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3620 
3621 	ocfs2_commit_trans(osb, ctxt.handle);
3622 
3623 	if (ctxt.data_ac)
3624 		ocfs2_free_alloc_context(ctxt.data_ac);
3625 	if (ctxt.meta_ac)
3626 		ocfs2_free_alloc_context(ctxt.meta_ac);
3627 	if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3628 		ocfs2_schedule_truncate_log_flush(osb, 1);
3629 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
3630 
3631 cleanup:
3632 	if (ref_tree)
3633 		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3634 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
3635 	if (!value && !ret) {
3636 		ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3637 		if (ret)
3638 			mlog_errno(ret);
3639 	}
3640 	ocfs2_inode_unlock(inode, 1);
3641 cleanup_nolock:
3642 	brelse(di_bh);
3643 	brelse(xbs.xattr_bh);
3644 	ocfs2_xattr_bucket_free(xbs.bucket);
3645 
3646 	return ret;
3647 }
3648 
3649 /*
3650  * Find the xattr extent rec which may contains name_hash.
3651  * e_cpos will be the first name hash of the xattr rec.
3652  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3653  */
3654 static int ocfs2_xattr_get_rec(struct inode *inode,
3655 			       u32 name_hash,
3656 			       u64 *p_blkno,
3657 			       u32 *e_cpos,
3658 			       u32 *num_clusters,
3659 			       struct ocfs2_extent_list *el)
3660 {
3661 	int ret = 0, i;
3662 	struct buffer_head *eb_bh = NULL;
3663 	struct ocfs2_extent_block *eb;
3664 	struct ocfs2_extent_rec *rec = NULL;
3665 	u64 e_blkno = 0;
3666 
3667 	if (el->l_tree_depth) {
3668 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3669 				      &eb_bh);
3670 		if (ret) {
3671 			mlog_errno(ret);
3672 			goto out;
3673 		}
3674 
3675 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3676 		el = &eb->h_list;
3677 
3678 		if (el->l_tree_depth) {
3679 			ocfs2_error(inode->i_sb,
3680 				    "Inode %lu has non zero tree depth in "
3681 				    "xattr tree block %llu\n", inode->i_ino,
3682 				    (unsigned long long)eb_bh->b_blocknr);
3683 			ret = -EROFS;
3684 			goto out;
3685 		}
3686 	}
3687 
3688 	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3689 		rec = &el->l_recs[i];
3690 
3691 		if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3692 			e_blkno = le64_to_cpu(rec->e_blkno);
3693 			break;
3694 		}
3695 	}
3696 
3697 	if (!e_blkno) {
3698 		ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
3699 			    "record (%u, %u, 0) in xattr", inode->i_ino,
3700 			    le32_to_cpu(rec->e_cpos),
3701 			    ocfs2_rec_clusters(el, rec));
3702 		ret = -EROFS;
3703 		goto out;
3704 	}
3705 
3706 	*p_blkno = le64_to_cpu(rec->e_blkno);
3707 	*num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3708 	if (e_cpos)
3709 		*e_cpos = le32_to_cpu(rec->e_cpos);
3710 out:
3711 	brelse(eb_bh);
3712 	return ret;
3713 }
3714 
3715 typedef int (xattr_bucket_func)(struct inode *inode,
3716 				struct ocfs2_xattr_bucket *bucket,
3717 				void *para);
3718 
3719 static int ocfs2_find_xe_in_bucket(struct inode *inode,
3720 				   struct ocfs2_xattr_bucket *bucket,
3721 				   int name_index,
3722 				   const char *name,
3723 				   u32 name_hash,
3724 				   u16 *xe_index,
3725 				   int *found)
3726 {
3727 	int i, ret = 0, cmp = 1, block_off, new_offset;
3728 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3729 	size_t name_len = strlen(name);
3730 	struct ocfs2_xattr_entry *xe = NULL;
3731 	char *xe_name;
3732 
3733 	/*
3734 	 * We don't use binary search in the bucket because there
3735 	 * may be multiple entries with the same name hash.
3736 	 */
3737 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3738 		xe = &xh->xh_entries[i];
3739 
3740 		if (name_hash > le32_to_cpu(xe->xe_name_hash))
3741 			continue;
3742 		else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3743 			break;
3744 
3745 		cmp = name_index - ocfs2_xattr_get_type(xe);
3746 		if (!cmp)
3747 			cmp = name_len - xe->xe_name_len;
3748 		if (cmp)
3749 			continue;
3750 
3751 		ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3752 							xh,
3753 							i,
3754 							&block_off,
3755 							&new_offset);
3756 		if (ret) {
3757 			mlog_errno(ret);
3758 			break;
3759 		}
3760 
3761 
3762 		xe_name = bucket_block(bucket, block_off) + new_offset;
3763 		if (!memcmp(name, xe_name, name_len)) {
3764 			*xe_index = i;
3765 			*found = 1;
3766 			ret = 0;
3767 			break;
3768 		}
3769 	}
3770 
3771 	return ret;
3772 }
3773 
3774 /*
3775  * Find the specified xattr entry in a series of buckets.
3776  * This series start from p_blkno and last for num_clusters.
3777  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3778  * the num of the valid buckets.
3779  *
3780  * Return the buffer_head this xattr should reside in. And if the xattr's
3781  * hash is in the gap of 2 buckets, return the lower bucket.
3782  */
3783 static int ocfs2_xattr_bucket_find(struct inode *inode,
3784 				   int name_index,
3785 				   const char *name,
3786 				   u32 name_hash,
3787 				   u64 p_blkno,
3788 				   u32 first_hash,
3789 				   u32 num_clusters,
3790 				   struct ocfs2_xattr_search *xs)
3791 {
3792 	int ret, found = 0;
3793 	struct ocfs2_xattr_header *xh = NULL;
3794 	struct ocfs2_xattr_entry *xe = NULL;
3795 	u16 index = 0;
3796 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3797 	int low_bucket = 0, bucket, high_bucket;
3798 	struct ocfs2_xattr_bucket *search;
3799 	u32 last_hash;
3800 	u64 blkno, lower_blkno = 0;
3801 
3802 	search = ocfs2_xattr_bucket_new(inode);
3803 	if (!search) {
3804 		ret = -ENOMEM;
3805 		mlog_errno(ret);
3806 		goto out;
3807 	}
3808 
3809 	ret = ocfs2_read_xattr_bucket(search, p_blkno);
3810 	if (ret) {
3811 		mlog_errno(ret);
3812 		goto out;
3813 	}
3814 
3815 	xh = bucket_xh(search);
3816 	high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3817 	while (low_bucket <= high_bucket) {
3818 		ocfs2_xattr_bucket_relse(search);
3819 
3820 		bucket = (low_bucket + high_bucket) / 2;
3821 		blkno = p_blkno + bucket * blk_per_bucket;
3822 		ret = ocfs2_read_xattr_bucket(search, blkno);
3823 		if (ret) {
3824 			mlog_errno(ret);
3825 			goto out;
3826 		}
3827 
3828 		xh = bucket_xh(search);
3829 		xe = &xh->xh_entries[0];
3830 		if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3831 			high_bucket = bucket - 1;
3832 			continue;
3833 		}
3834 
3835 		/*
3836 		 * Check whether the hash of the last entry in our
3837 		 * bucket is larger than the search one. for an empty
3838 		 * bucket, the last one is also the first one.
3839 		 */
3840 		if (xh->xh_count)
3841 			xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3842 
3843 		last_hash = le32_to_cpu(xe->xe_name_hash);
3844 
3845 		/* record lower_blkno which may be the insert place. */
3846 		lower_blkno = blkno;
3847 
3848 		if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3849 			low_bucket = bucket + 1;
3850 			continue;
3851 		}
3852 
3853 		/* the searched xattr should reside in this bucket if exists. */
3854 		ret = ocfs2_find_xe_in_bucket(inode, search,
3855 					      name_index, name, name_hash,
3856 					      &index, &found);
3857 		if (ret) {
3858 			mlog_errno(ret);
3859 			goto out;
3860 		}
3861 		break;
3862 	}
3863 
3864 	/*
3865 	 * Record the bucket we have found.
3866 	 * When the xattr's hash value is in the gap of 2 buckets, we will
3867 	 * always set it to the previous bucket.
3868 	 */
3869 	if (!lower_blkno)
3870 		lower_blkno = p_blkno;
3871 
3872 	/* This should be in cache - we just read it during the search */
3873 	ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3874 	if (ret) {
3875 		mlog_errno(ret);
3876 		goto out;
3877 	}
3878 
3879 	xs->header = bucket_xh(xs->bucket);
3880 	xs->base = bucket_block(xs->bucket, 0);
3881 	xs->end = xs->base + inode->i_sb->s_blocksize;
3882 
3883 	if (found) {
3884 		xs->here = &xs->header->xh_entries[index];
3885 		mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3886 		     (unsigned long long)bucket_blkno(xs->bucket), index);
3887 	} else
3888 		ret = -ENODATA;
3889 
3890 out:
3891 	ocfs2_xattr_bucket_free(search);
3892 	return ret;
3893 }
3894 
3895 static int ocfs2_xattr_index_block_find(struct inode *inode,
3896 					struct buffer_head *root_bh,
3897 					int name_index,
3898 					const char *name,
3899 					struct ocfs2_xattr_search *xs)
3900 {
3901 	int ret;
3902 	struct ocfs2_xattr_block *xb =
3903 			(struct ocfs2_xattr_block *)root_bh->b_data;
3904 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3905 	struct ocfs2_extent_list *el = &xb_root->xt_list;
3906 	u64 p_blkno = 0;
3907 	u32 first_hash, num_clusters = 0;
3908 	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3909 
3910 	if (le16_to_cpu(el->l_next_free_rec) == 0)
3911 		return -ENODATA;
3912 
3913 	mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3914 	     name, name_hash, name_index);
3915 
3916 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3917 				  &num_clusters, el);
3918 	if (ret) {
3919 		mlog_errno(ret);
3920 		goto out;
3921 	}
3922 
3923 	BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3924 
3925 	mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3926 	     "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3927 	     first_hash);
3928 
3929 	ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3930 				      p_blkno, first_hash, num_clusters, xs);
3931 
3932 out:
3933 	return ret;
3934 }
3935 
3936 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3937 				       u64 blkno,
3938 				       u32 clusters,
3939 				       xattr_bucket_func *func,
3940 				       void *para)
3941 {
3942 	int i, ret = 0;
3943 	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3944 	u32 num_buckets = clusters * bpc;
3945 	struct ocfs2_xattr_bucket *bucket;
3946 
3947 	bucket = ocfs2_xattr_bucket_new(inode);
3948 	if (!bucket) {
3949 		mlog_errno(-ENOMEM);
3950 		return -ENOMEM;
3951 	}
3952 
3953 	mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3954 	     clusters, (unsigned long long)blkno);
3955 
3956 	for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3957 		ret = ocfs2_read_xattr_bucket(bucket, blkno);
3958 		if (ret) {
3959 			mlog_errno(ret);
3960 			break;
3961 		}
3962 
3963 		/*
3964 		 * The real bucket num in this series of blocks is stored
3965 		 * in the 1st bucket.
3966 		 */
3967 		if (i == 0)
3968 			num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3969 
3970 		mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3971 		     (unsigned long long)blkno,
3972 		     le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3973 		if (func) {
3974 			ret = func(inode, bucket, para);
3975 			if (ret && ret != -ERANGE)
3976 				mlog_errno(ret);
3977 			/* Fall through to bucket_relse() */
3978 		}
3979 
3980 		ocfs2_xattr_bucket_relse(bucket);
3981 		if (ret)
3982 			break;
3983 	}
3984 
3985 	ocfs2_xattr_bucket_free(bucket);
3986 	return ret;
3987 }
3988 
3989 struct ocfs2_xattr_tree_list {
3990 	char *buffer;
3991 	size_t buffer_size;
3992 	size_t result;
3993 };
3994 
3995 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
3996 					     struct ocfs2_xattr_header *xh,
3997 					     int index,
3998 					     int *block_off,
3999 					     int *new_offset)
4000 {
4001 	u16 name_offset;
4002 
4003 	if (index < 0 || index >= le16_to_cpu(xh->xh_count))
4004 		return -EINVAL;
4005 
4006 	name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
4007 
4008 	*block_off = name_offset >> sb->s_blocksize_bits;
4009 	*new_offset = name_offset % sb->s_blocksize;
4010 
4011 	return 0;
4012 }
4013 
4014 static int ocfs2_list_xattr_bucket(struct inode *inode,
4015 				   struct ocfs2_xattr_bucket *bucket,
4016 				   void *para)
4017 {
4018 	int ret = 0, type;
4019 	struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
4020 	int i, block_off, new_offset;
4021 	const char *prefix, *name;
4022 
4023 	for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
4024 		struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
4025 		type = ocfs2_xattr_get_type(entry);
4026 		prefix = ocfs2_xattr_prefix(type);
4027 
4028 		if (prefix) {
4029 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
4030 								bucket_xh(bucket),
4031 								i,
4032 								&block_off,
4033 								&new_offset);
4034 			if (ret)
4035 				break;
4036 
4037 			name = (const char *)bucket_block(bucket, block_off) +
4038 				new_offset;
4039 			ret = ocfs2_xattr_list_entry(xl->buffer,
4040 						     xl->buffer_size,
4041 						     &xl->result,
4042 						     prefix, name,
4043 						     entry->xe_name_len);
4044 			if (ret)
4045 				break;
4046 		}
4047 	}
4048 
4049 	return ret;
4050 }
4051 
4052 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
4053 					   struct buffer_head *blk_bh,
4054 					   xattr_tree_rec_func *rec_func,
4055 					   void *para)
4056 {
4057 	struct ocfs2_xattr_block *xb =
4058 			(struct ocfs2_xattr_block *)blk_bh->b_data;
4059 	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4060 	int ret = 0;
4061 	u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
4062 	u64 p_blkno = 0;
4063 
4064 	if (!el->l_next_free_rec || !rec_func)
4065 		return 0;
4066 
4067 	while (name_hash > 0) {
4068 		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4069 					  &e_cpos, &num_clusters, el);
4070 		if (ret) {
4071 			mlog_errno(ret);
4072 			break;
4073 		}
4074 
4075 		ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
4076 			       num_clusters, para);
4077 		if (ret) {
4078 			if (ret != -ERANGE)
4079 				mlog_errno(ret);
4080 			break;
4081 		}
4082 
4083 		if (e_cpos == 0)
4084 			break;
4085 
4086 		name_hash = e_cpos - 1;
4087 	}
4088 
4089 	return ret;
4090 
4091 }
4092 
4093 static int ocfs2_list_xattr_tree_rec(struct inode *inode,
4094 				     struct buffer_head *root_bh,
4095 				     u64 blkno, u32 cpos, u32 len, void *para)
4096 {
4097 	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
4098 					   ocfs2_list_xattr_bucket, para);
4099 }
4100 
4101 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
4102 					     struct buffer_head *blk_bh,
4103 					     char *buffer,
4104 					     size_t buffer_size)
4105 {
4106 	int ret;
4107 	struct ocfs2_xattr_tree_list xl = {
4108 		.buffer = buffer,
4109 		.buffer_size = buffer_size,
4110 		.result = 0,
4111 	};
4112 
4113 	ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
4114 					      ocfs2_list_xattr_tree_rec, &xl);
4115 	if (ret) {
4116 		mlog_errno(ret);
4117 		goto out;
4118 	}
4119 
4120 	ret = xl.result;
4121 out:
4122 	return ret;
4123 }
4124 
4125 static int cmp_xe(const void *a, const void *b)
4126 {
4127 	const struct ocfs2_xattr_entry *l = a, *r = b;
4128 	u32 l_hash = le32_to_cpu(l->xe_name_hash);
4129 	u32 r_hash = le32_to_cpu(r->xe_name_hash);
4130 
4131 	if (l_hash > r_hash)
4132 		return 1;
4133 	if (l_hash < r_hash)
4134 		return -1;
4135 	return 0;
4136 }
4137 
4138 static void swap_xe(void *a, void *b, int size)
4139 {
4140 	struct ocfs2_xattr_entry *l = a, *r = b, tmp;
4141 
4142 	tmp = *l;
4143 	memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
4144 	memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
4145 }
4146 
4147 /*
4148  * When the ocfs2_xattr_block is filled up, new bucket will be created
4149  * and all the xattr entries will be moved to the new bucket.
4150  * The header goes at the start of the bucket, and the names+values are
4151  * filled from the end.  This is why *target starts as the last buffer.
4152  * Note: we need to sort the entries since they are not saved in order
4153  * in the ocfs2_xattr_block.
4154  */
4155 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4156 					   struct buffer_head *xb_bh,
4157 					   struct ocfs2_xattr_bucket *bucket)
4158 {
4159 	int i, blocksize = inode->i_sb->s_blocksize;
4160 	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4161 	u16 offset, size, off_change;
4162 	struct ocfs2_xattr_entry *xe;
4163 	struct ocfs2_xattr_block *xb =
4164 				(struct ocfs2_xattr_block *)xb_bh->b_data;
4165 	struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
4166 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4167 	u16 count = le16_to_cpu(xb_xh->xh_count);
4168 	char *src = xb_bh->b_data;
4169 	char *target = bucket_block(bucket, blks - 1);
4170 
4171 	mlog(0, "cp xattr from block %llu to bucket %llu\n",
4172 	     (unsigned long long)xb_bh->b_blocknr,
4173 	     (unsigned long long)bucket_blkno(bucket));
4174 
4175 	for (i = 0; i < blks; i++)
4176 		memset(bucket_block(bucket, i), 0, blocksize);
4177 
4178 	/*
4179 	 * Since the xe_name_offset is based on ocfs2_xattr_header,
4180 	 * there is a offset change corresponding to the change of
4181 	 * ocfs2_xattr_header's position.
4182 	 */
4183 	off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4184 	xe = &xb_xh->xh_entries[count - 1];
4185 	offset = le16_to_cpu(xe->xe_name_offset) + off_change;
4186 	size = blocksize - offset;
4187 
4188 	/* copy all the names and values. */
4189 	memcpy(target + offset, src + offset, size);
4190 
4191 	/* Init new header now. */
4192 	xh->xh_count = xb_xh->xh_count;
4193 	xh->xh_num_buckets = cpu_to_le16(1);
4194 	xh->xh_name_value_len = cpu_to_le16(size);
4195 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
4196 
4197 	/* copy all the entries. */
4198 	target = bucket_block(bucket, 0);
4199 	offset = offsetof(struct ocfs2_xattr_header, xh_entries);
4200 	size = count * sizeof(struct ocfs2_xattr_entry);
4201 	memcpy(target + offset, (char *)xb_xh + offset, size);
4202 
4203 	/* Change the xe offset for all the xe because of the move. */
4204 	off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
4205 		 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4206 	for (i = 0; i < count; i++)
4207 		le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4208 
4209 	mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
4210 	     offset, size, off_change);
4211 
4212 	sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4213 	     cmp_xe, swap_xe);
4214 }
4215 
4216 /*
4217  * After we move xattr from block to index btree, we have to
4218  * update ocfs2_xattr_search to the new xe and base.
4219  *
4220  * When the entry is in xattr block, xattr_bh indicates the storage place.
4221  * While if the entry is in index b-tree, "bucket" indicates the
4222  * real place of the xattr.
4223  */
4224 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
4225 					    struct ocfs2_xattr_search *xs,
4226 					    struct buffer_head *old_bh)
4227 {
4228 	char *buf = old_bh->b_data;
4229 	struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
4230 	struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
4231 	int i;
4232 
4233 	xs->header = bucket_xh(xs->bucket);
4234 	xs->base = bucket_block(xs->bucket, 0);
4235 	xs->end = xs->base + inode->i_sb->s_blocksize;
4236 
4237 	if (xs->not_found)
4238 		return;
4239 
4240 	i = xs->here - old_xh->xh_entries;
4241 	xs->here = &xs->header->xh_entries[i];
4242 }
4243 
4244 static int ocfs2_xattr_create_index_block(struct inode *inode,
4245 					  struct ocfs2_xattr_search *xs,
4246 					  struct ocfs2_xattr_set_ctxt *ctxt)
4247 {
4248 	int ret;
4249 	u32 bit_off, len;
4250 	u64 blkno;
4251 	handle_t *handle = ctxt->handle;
4252 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4253 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
4254 	struct buffer_head *xb_bh = xs->xattr_bh;
4255 	struct ocfs2_xattr_block *xb =
4256 			(struct ocfs2_xattr_block *)xb_bh->b_data;
4257 	struct ocfs2_xattr_tree_root *xr;
4258 	u16 xb_flags = le16_to_cpu(xb->xb_flags);
4259 
4260 	mlog(0, "create xattr index block for %llu\n",
4261 	     (unsigned long long)xb_bh->b_blocknr);
4262 
4263 	BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4264 	BUG_ON(!xs->bucket);
4265 
4266 	/*
4267 	 * XXX:
4268 	 * We can use this lock for now, and maybe move to a dedicated mutex
4269 	 * if performance becomes a problem later.
4270 	 */
4271 	down_write(&oi->ip_alloc_sem);
4272 
4273 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
4274 				      OCFS2_JOURNAL_ACCESS_WRITE);
4275 	if (ret) {
4276 		mlog_errno(ret);
4277 		goto out;
4278 	}
4279 
4280 	ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
4281 				     1, 1, &bit_off, &len);
4282 	if (ret) {
4283 		mlog_errno(ret);
4284 		goto out;
4285 	}
4286 
4287 	/*
4288 	 * The bucket may spread in many blocks, and
4289 	 * we will only touch the 1st block and the last block
4290 	 * in the whole bucket(one for entry and one for data).
4291 	 */
4292 	blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4293 
4294 	mlog(0, "allocate 1 cluster from %llu to xattr block\n",
4295 	     (unsigned long long)blkno);
4296 
4297 	ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
4298 	if (ret) {
4299 		mlog_errno(ret);
4300 		goto out;
4301 	}
4302 
4303 	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4304 						OCFS2_JOURNAL_ACCESS_CREATE);
4305 	if (ret) {
4306 		mlog_errno(ret);
4307 		goto out;
4308 	}
4309 
4310 	ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
4311 	ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4312 
4313 	ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
4314 
4315 	/* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
4316 	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
4317 	       offsetof(struct ocfs2_xattr_block, xb_attrs));
4318 
4319 	xr = &xb->xb_attrs.xb_root;
4320 	xr->xt_clusters = cpu_to_le32(1);
4321 	xr->xt_last_eb_blk = 0;
4322 	xr->xt_list.l_tree_depth = 0;
4323 	xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
4324 	xr->xt_list.l_next_free_rec = cpu_to_le16(1);
4325 
4326 	xr->xt_list.l_recs[0].e_cpos = 0;
4327 	xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
4328 	xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
4329 
4330 	xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
4331 
4332 	ocfs2_journal_dirty(handle, xb_bh);
4333 
4334 out:
4335 	up_write(&oi->ip_alloc_sem);
4336 
4337 	return ret;
4338 }
4339 
4340 static int cmp_xe_offset(const void *a, const void *b)
4341 {
4342 	const struct ocfs2_xattr_entry *l = a, *r = b;
4343 	u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
4344 	u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
4345 
4346 	if (l_name_offset < r_name_offset)
4347 		return 1;
4348 	if (l_name_offset > r_name_offset)
4349 		return -1;
4350 	return 0;
4351 }
4352 
4353 /*
4354  * defrag a xattr bucket if we find that the bucket has some
4355  * holes beteen name/value pairs.
4356  * We will move all the name/value pairs to the end of the bucket
4357  * so that we can spare some space for insertion.
4358  */
4359 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4360 				     handle_t *handle,
4361 				     struct ocfs2_xattr_bucket *bucket)
4362 {
4363 	int ret, i;
4364 	size_t end, offset, len;
4365 	struct ocfs2_xattr_header *xh;
4366 	char *entries, *buf, *bucket_buf = NULL;
4367 	u64 blkno = bucket_blkno(bucket);
4368 	u16 xh_free_start;
4369 	size_t blocksize = inode->i_sb->s_blocksize;
4370 	struct ocfs2_xattr_entry *xe;
4371 
4372 	/*
4373 	 * In order to make the operation more efficient and generic,
4374 	 * we copy all the blocks into a contiguous memory and do the
4375 	 * defragment there, so if anything is error, we will not touch
4376 	 * the real block.
4377 	 */
4378 	bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
4379 	if (!bucket_buf) {
4380 		ret = -EIO;
4381 		goto out;
4382 	}
4383 
4384 	buf = bucket_buf;
4385 	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4386 		memcpy(buf, bucket_block(bucket, i), blocksize);
4387 
4388 	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4389 						OCFS2_JOURNAL_ACCESS_WRITE);
4390 	if (ret < 0) {
4391 		mlog_errno(ret);
4392 		goto out;
4393 	}
4394 
4395 	xh = (struct ocfs2_xattr_header *)bucket_buf;
4396 	entries = (char *)xh->xh_entries;
4397 	xh_free_start = le16_to_cpu(xh->xh_free_start);
4398 
4399 	mlog(0, "adjust xattr bucket in %llu, count = %u, "
4400 	     "xh_free_start = %u, xh_name_value_len = %u.\n",
4401 	     (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4402 	     xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4403 
4404 	/*
4405 	 * sort all the entries by their offset.
4406 	 * the largest will be the first, so that we can
4407 	 * move them to the end one by one.
4408 	 */
4409 	sort(entries, le16_to_cpu(xh->xh_count),
4410 	     sizeof(struct ocfs2_xattr_entry),
4411 	     cmp_xe_offset, swap_xe);
4412 
4413 	/* Move all name/values to the end of the bucket. */
4414 	xe = xh->xh_entries;
4415 	end = OCFS2_XATTR_BUCKET_SIZE;
4416 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4417 		offset = le16_to_cpu(xe->xe_name_offset);
4418 		len = namevalue_size_xe(xe);
4419 
4420 		/*
4421 		 * We must make sure that the name/value pair
4422 		 * exist in the same block. So adjust end to
4423 		 * the previous block end if needed.
4424 		 */
4425 		if (((end - len) / blocksize !=
4426 			(end - 1) / blocksize))
4427 			end = end - end % blocksize;
4428 
4429 		if (end > offset + len) {
4430 			memmove(bucket_buf + end - len,
4431 				bucket_buf + offset, len);
4432 			xe->xe_name_offset = cpu_to_le16(end - len);
4433 		}
4434 
4435 		mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4436 				"bucket %llu\n", (unsigned long long)blkno);
4437 
4438 		end -= len;
4439 	}
4440 
4441 	mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4442 			"bucket %llu\n", (unsigned long long)blkno);
4443 
4444 	if (xh_free_start == end)
4445 		goto out;
4446 
4447 	memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4448 	xh->xh_free_start = cpu_to_le16(end);
4449 
4450 	/* sort the entries by their name_hash. */
4451 	sort(entries, le16_to_cpu(xh->xh_count),
4452 	     sizeof(struct ocfs2_xattr_entry),
4453 	     cmp_xe, swap_xe);
4454 
4455 	buf = bucket_buf;
4456 	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4457 		memcpy(bucket_block(bucket, i), buf, blocksize);
4458 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4459 
4460 out:
4461 	kfree(bucket_buf);
4462 	return ret;
4463 }
4464 
4465 /*
4466  * prev_blkno points to the start of an existing extent.  new_blkno
4467  * points to a newly allocated extent.  Because we know each of our
4468  * clusters contains more than bucket, we can easily split one cluster
4469  * at a bucket boundary.  So we take the last cluster of the existing
4470  * extent and split it down the middle.  We move the last half of the
4471  * buckets in the last cluster of the existing extent over to the new
4472  * extent.
4473  *
4474  * first_bh is the buffer at prev_blkno so we can update the existing
4475  * extent's bucket count.  header_bh is the bucket were we were hoping
4476  * to insert our xattr.  If the bucket move places the target in the new
4477  * extent, we'll update first_bh and header_bh after modifying the old
4478  * extent.
4479  *
4480  * first_hash will be set as the 1st xe's name_hash in the new extent.
4481  */
4482 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4483 					       handle_t *handle,
4484 					       struct ocfs2_xattr_bucket *first,
4485 					       struct ocfs2_xattr_bucket *target,
4486 					       u64 new_blkno,
4487 					       u32 num_clusters,
4488 					       u32 *first_hash)
4489 {
4490 	int ret;
4491 	struct super_block *sb = inode->i_sb;
4492 	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4493 	int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4494 	int to_move = num_buckets / 2;
4495 	u64 src_blkno;
4496 	u64 last_cluster_blkno = bucket_blkno(first) +
4497 		((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4498 
4499 	BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4500 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4501 
4502 	mlog(0, "move half of xattrs in cluster %llu to %llu\n",
4503 	     (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
4504 
4505 	ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4506 				     last_cluster_blkno, new_blkno,
4507 				     to_move, first_hash);
4508 	if (ret) {
4509 		mlog_errno(ret);
4510 		goto out;
4511 	}
4512 
4513 	/* This is the first bucket that got moved */
4514 	src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4515 
4516 	/*
4517 	 * If the target bucket was part of the moved buckets, we need to
4518 	 * update first and target.
4519 	 */
4520 	if (bucket_blkno(target) >= src_blkno) {
4521 		/* Find the block for the new target bucket */
4522 		src_blkno = new_blkno +
4523 			(bucket_blkno(target) - src_blkno);
4524 
4525 		ocfs2_xattr_bucket_relse(first);
4526 		ocfs2_xattr_bucket_relse(target);
4527 
4528 		/*
4529 		 * These shouldn't fail - the buffers are in the
4530 		 * journal from ocfs2_cp_xattr_bucket().
4531 		 */
4532 		ret = ocfs2_read_xattr_bucket(first, new_blkno);
4533 		if (ret) {
4534 			mlog_errno(ret);
4535 			goto out;
4536 		}
4537 		ret = ocfs2_read_xattr_bucket(target, src_blkno);
4538 		if (ret)
4539 			mlog_errno(ret);
4540 
4541 	}
4542 
4543 out:
4544 	return ret;
4545 }
4546 
4547 /*
4548  * Find the suitable pos when we divide a bucket into 2.
4549  * We have to make sure the xattrs with the same hash value exist
4550  * in the same bucket.
4551  *
4552  * If this ocfs2_xattr_header covers more than one hash value, find a
4553  * place where the hash value changes.  Try to find the most even split.
4554  * The most common case is that all entries have different hash values,
4555  * and the first check we make will find a place to split.
4556  */
4557 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4558 {
4559 	struct ocfs2_xattr_entry *entries = xh->xh_entries;
4560 	int count = le16_to_cpu(xh->xh_count);
4561 	int delta, middle = count / 2;
4562 
4563 	/*
4564 	 * We start at the middle.  Each step gets farther away in both
4565 	 * directions.  We therefore hit the change in hash value
4566 	 * nearest to the middle.  Note that this loop does not execute for
4567 	 * count < 2.
4568 	 */
4569 	for (delta = 0; delta < middle; delta++) {
4570 		/* Let's check delta earlier than middle */
4571 		if (cmp_xe(&entries[middle - delta - 1],
4572 			   &entries[middle - delta]))
4573 			return middle - delta;
4574 
4575 		/* For even counts, don't walk off the end */
4576 		if ((middle + delta + 1) == count)
4577 			continue;
4578 
4579 		/* Now try delta past middle */
4580 		if (cmp_xe(&entries[middle + delta],
4581 			   &entries[middle + delta + 1]))
4582 			return middle + delta + 1;
4583 	}
4584 
4585 	/* Every entry had the same hash */
4586 	return count;
4587 }
4588 
4589 /*
4590  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4591  * first_hash will record the 1st hash of the new bucket.
4592  *
4593  * Normally half of the xattrs will be moved.  But we have to make
4594  * sure that the xattrs with the same hash value are stored in the
4595  * same bucket. If all the xattrs in this bucket have the same hash
4596  * value, the new bucket will be initialized as an empty one and the
4597  * first_hash will be initialized as (hash_value+1).
4598  */
4599 static int ocfs2_divide_xattr_bucket(struct inode *inode,
4600 				    handle_t *handle,
4601 				    u64 blk,
4602 				    u64 new_blk,
4603 				    u32 *first_hash,
4604 				    int new_bucket_head)
4605 {
4606 	int ret, i;
4607 	int count, start, len, name_value_len = 0, name_offset = 0;
4608 	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4609 	struct ocfs2_xattr_header *xh;
4610 	struct ocfs2_xattr_entry *xe;
4611 	int blocksize = inode->i_sb->s_blocksize;
4612 
4613 	mlog(0, "move some of xattrs from bucket %llu to %llu\n",
4614 	     (unsigned long long)blk, (unsigned long long)new_blk);
4615 
4616 	s_bucket = ocfs2_xattr_bucket_new(inode);
4617 	t_bucket = ocfs2_xattr_bucket_new(inode);
4618 	if (!s_bucket || !t_bucket) {
4619 		ret = -ENOMEM;
4620 		mlog_errno(ret);
4621 		goto out;
4622 	}
4623 
4624 	ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4625 	if (ret) {
4626 		mlog_errno(ret);
4627 		goto out;
4628 	}
4629 
4630 	ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4631 						OCFS2_JOURNAL_ACCESS_WRITE);
4632 	if (ret) {
4633 		mlog_errno(ret);
4634 		goto out;
4635 	}
4636 
4637 	/*
4638 	 * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4639 	 * there's no need to read it.
4640 	 */
4641 	ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
4642 	if (ret) {
4643 		mlog_errno(ret);
4644 		goto out;
4645 	}
4646 
4647 	/*
4648 	 * Hey, if we're overwriting t_bucket, what difference does
4649 	 * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4650 	 * same part of ocfs2_cp_xattr_bucket().
4651 	 */
4652 	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4653 						new_bucket_head ?
4654 						OCFS2_JOURNAL_ACCESS_CREATE :
4655 						OCFS2_JOURNAL_ACCESS_WRITE);
4656 	if (ret) {
4657 		mlog_errno(ret);
4658 		goto out;
4659 	}
4660 
4661 	xh = bucket_xh(s_bucket);
4662 	count = le16_to_cpu(xh->xh_count);
4663 	start = ocfs2_xattr_find_divide_pos(xh);
4664 
4665 	if (start == count) {
4666 		xe = &xh->xh_entries[start-1];
4667 
4668 		/*
4669 		 * initialized a new empty bucket here.
4670 		 * The hash value is set as one larger than
4671 		 * that of the last entry in the previous bucket.
4672 		 */
4673 		for (i = 0; i < t_bucket->bu_blocks; i++)
4674 			memset(bucket_block(t_bucket, i), 0, blocksize);
4675 
4676 		xh = bucket_xh(t_bucket);
4677 		xh->xh_free_start = cpu_to_le16(blocksize);
4678 		xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4679 		le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4680 
4681 		goto set_num_buckets;
4682 	}
4683 
4684 	/* copy the whole bucket to the new first. */
4685 	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4686 
4687 	/* update the new bucket. */
4688 	xh = bucket_xh(t_bucket);
4689 
4690 	/*
4691 	 * Calculate the total name/value len and xh_free_start for
4692 	 * the old bucket first.
4693 	 */
4694 	name_offset = OCFS2_XATTR_BUCKET_SIZE;
4695 	name_value_len = 0;
4696 	for (i = 0; i < start; i++) {
4697 		xe = &xh->xh_entries[i];
4698 		name_value_len += namevalue_size_xe(xe);
4699 		if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4700 			name_offset = le16_to_cpu(xe->xe_name_offset);
4701 	}
4702 
4703 	/*
4704 	 * Now begin the modification to the new bucket.
4705 	 *
4706 	 * In the new bucket, We just move the xattr entry to the beginning
4707 	 * and don't touch the name/value. So there will be some holes in the
4708 	 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4709 	 * called.
4710 	 */
4711 	xe = &xh->xh_entries[start];
4712 	len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4713 	mlog(0, "mv xattr entry len %d from %d to %d\n", len,
4714 	     (int)((char *)xe - (char *)xh),
4715 	     (int)((char *)xh->xh_entries - (char *)xh));
4716 	memmove((char *)xh->xh_entries, (char *)xe, len);
4717 	xe = &xh->xh_entries[count - start];
4718 	len = sizeof(struct ocfs2_xattr_entry) * start;
4719 	memset((char *)xe, 0, len);
4720 
4721 	le16_add_cpu(&xh->xh_count, -start);
4722 	le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4723 
4724 	/* Calculate xh_free_start for the new bucket. */
4725 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4726 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4727 		xe = &xh->xh_entries[i];
4728 		if (le16_to_cpu(xe->xe_name_offset) <
4729 		    le16_to_cpu(xh->xh_free_start))
4730 			xh->xh_free_start = xe->xe_name_offset;
4731 	}
4732 
4733 set_num_buckets:
4734 	/* set xh->xh_num_buckets for the new xh. */
4735 	if (new_bucket_head)
4736 		xh->xh_num_buckets = cpu_to_le16(1);
4737 	else
4738 		xh->xh_num_buckets = 0;
4739 
4740 	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4741 
4742 	/* store the first_hash of the new bucket. */
4743 	if (first_hash)
4744 		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4745 
4746 	/*
4747 	 * Now only update the 1st block of the old bucket.  If we
4748 	 * just added a new empty bucket, there is no need to modify
4749 	 * it.
4750 	 */
4751 	if (start == count)
4752 		goto out;
4753 
4754 	xh = bucket_xh(s_bucket);
4755 	memset(&xh->xh_entries[start], 0,
4756 	       sizeof(struct ocfs2_xattr_entry) * (count - start));
4757 	xh->xh_count = cpu_to_le16(start);
4758 	xh->xh_free_start = cpu_to_le16(name_offset);
4759 	xh->xh_name_value_len = cpu_to_le16(name_value_len);
4760 
4761 	ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4762 
4763 out:
4764 	ocfs2_xattr_bucket_free(s_bucket);
4765 	ocfs2_xattr_bucket_free(t_bucket);
4766 
4767 	return ret;
4768 }
4769 
4770 /*
4771  * Copy xattr from one bucket to another bucket.
4772  *
4773  * The caller must make sure that the journal transaction
4774  * has enough space for journaling.
4775  */
4776 static int ocfs2_cp_xattr_bucket(struct inode *inode,
4777 				 handle_t *handle,
4778 				 u64 s_blkno,
4779 				 u64 t_blkno,
4780 				 int t_is_new)
4781 {
4782 	int ret;
4783 	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4784 
4785 	BUG_ON(s_blkno == t_blkno);
4786 
4787 	mlog(0, "cp bucket %llu to %llu, target is %d\n",
4788 	     (unsigned long long)s_blkno, (unsigned long long)t_blkno,
4789 	     t_is_new);
4790 
4791 	s_bucket = ocfs2_xattr_bucket_new(inode);
4792 	t_bucket = ocfs2_xattr_bucket_new(inode);
4793 	if (!s_bucket || !t_bucket) {
4794 		ret = -ENOMEM;
4795 		mlog_errno(ret);
4796 		goto out;
4797 	}
4798 
4799 	ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4800 	if (ret)
4801 		goto out;
4802 
4803 	/*
4804 	 * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4805 	 * there's no need to read it.
4806 	 */
4807 	ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
4808 	if (ret)
4809 		goto out;
4810 
4811 	/*
4812 	 * Hey, if we're overwriting t_bucket, what difference does
4813 	 * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4814 	 * cluster to fill, we came here from
4815 	 * ocfs2_mv_xattr_buckets(), and it is really new -
4816 	 * ACCESS_CREATE is required.  But we also might have moved data
4817 	 * out of t_bucket before extending back into it.
4818 	 * ocfs2_add_new_xattr_bucket() can do this - its call to
4819 	 * ocfs2_add_new_xattr_cluster() may have created a new extent
4820 	 * and copied out the end of the old extent.  Then it re-extends
4821 	 * the old extent back to create space for new xattrs.  That's
4822 	 * how we get here, and the bucket isn't really new.
4823 	 */
4824 	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4825 						t_is_new ?
4826 						OCFS2_JOURNAL_ACCESS_CREATE :
4827 						OCFS2_JOURNAL_ACCESS_WRITE);
4828 	if (ret)
4829 		goto out;
4830 
4831 	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4832 	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4833 
4834 out:
4835 	ocfs2_xattr_bucket_free(t_bucket);
4836 	ocfs2_xattr_bucket_free(s_bucket);
4837 
4838 	return ret;
4839 }
4840 
4841 /*
4842  * src_blk points to the start of an existing extent.  last_blk points to
4843  * last cluster in that extent.  to_blk points to a newly allocated
4844  * extent.  We copy the buckets from the cluster at last_blk to the new
4845  * extent.  If start_bucket is non-zero, we skip that many buckets before
4846  * we start copying.  The new extent's xh_num_buckets gets set to the
4847  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4848  * by the same amount.
4849  */
4850 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4851 				  u64 src_blk, u64 last_blk, u64 to_blk,
4852 				  unsigned int start_bucket,
4853 				  u32 *first_hash)
4854 {
4855 	int i, ret, credits;
4856 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4857 	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4858 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4859 	struct ocfs2_xattr_bucket *old_first, *new_first;
4860 
4861 	mlog(0, "mv xattrs from cluster %llu to %llu\n",
4862 	     (unsigned long long)last_blk, (unsigned long long)to_blk);
4863 
4864 	BUG_ON(start_bucket >= num_buckets);
4865 	if (start_bucket) {
4866 		num_buckets -= start_bucket;
4867 		last_blk += (start_bucket * blks_per_bucket);
4868 	}
4869 
4870 	/* The first bucket of the original extent */
4871 	old_first = ocfs2_xattr_bucket_new(inode);
4872 	/* The first bucket of the new extent */
4873 	new_first = ocfs2_xattr_bucket_new(inode);
4874 	if (!old_first || !new_first) {
4875 		ret = -ENOMEM;
4876 		mlog_errno(ret);
4877 		goto out;
4878 	}
4879 
4880 	ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4881 	if (ret) {
4882 		mlog_errno(ret);
4883 		goto out;
4884 	}
4885 
4886 	/*
4887 	 * We need to update the first bucket of the old extent and all
4888 	 * the buckets going to the new extent.
4889 	 */
4890 	credits = ((num_buckets + 1) * blks_per_bucket) +
4891 		handle->h_buffer_credits;
4892 	ret = ocfs2_extend_trans(handle, credits);
4893 	if (ret) {
4894 		mlog_errno(ret);
4895 		goto out;
4896 	}
4897 
4898 	ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4899 						OCFS2_JOURNAL_ACCESS_WRITE);
4900 	if (ret) {
4901 		mlog_errno(ret);
4902 		goto out;
4903 	}
4904 
4905 	for (i = 0; i < num_buckets; i++) {
4906 		ret = ocfs2_cp_xattr_bucket(inode, handle,
4907 					    last_blk + (i * blks_per_bucket),
4908 					    to_blk + (i * blks_per_bucket),
4909 					    1);
4910 		if (ret) {
4911 			mlog_errno(ret);
4912 			goto out;
4913 		}
4914 	}
4915 
4916 	/*
4917 	 * Get the new bucket ready before we dirty anything
4918 	 * (This actually shouldn't fail, because we already dirtied
4919 	 * it once in ocfs2_cp_xattr_bucket()).
4920 	 */
4921 	ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4922 	if (ret) {
4923 		mlog_errno(ret);
4924 		goto out;
4925 	}
4926 	ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4927 						OCFS2_JOURNAL_ACCESS_WRITE);
4928 	if (ret) {
4929 		mlog_errno(ret);
4930 		goto out;
4931 	}
4932 
4933 	/* Now update the headers */
4934 	le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4935 	ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4936 
4937 	bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4938 	ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4939 
4940 	if (first_hash)
4941 		*first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4942 
4943 out:
4944 	ocfs2_xattr_bucket_free(new_first);
4945 	ocfs2_xattr_bucket_free(old_first);
4946 	return ret;
4947 }
4948 
4949 /*
4950  * Move some xattrs in this cluster to the new cluster.
4951  * This function should only be called when bucket size == cluster size.
4952  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4953  */
4954 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4955 				      handle_t *handle,
4956 				      u64 prev_blk,
4957 				      u64 new_blk,
4958 				      u32 *first_hash)
4959 {
4960 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4961 	int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
4962 
4963 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4964 
4965 	ret = ocfs2_extend_trans(handle, credits);
4966 	if (ret) {
4967 		mlog_errno(ret);
4968 		return ret;
4969 	}
4970 
4971 	/* Move half of the xattr in start_blk to the next bucket. */
4972 	return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4973 					  new_blk, first_hash, 1);
4974 }
4975 
4976 /*
4977  * Move some xattrs from the old cluster to the new one since they are not
4978  * contiguous in ocfs2 xattr tree.
4979  *
4980  * new_blk starts a new separate cluster, and we will move some xattrs from
4981  * prev_blk to it. v_start will be set as the first name hash value in this
4982  * new cluster so that it can be used as e_cpos during tree insertion and
4983  * don't collide with our original b-tree operations. first_bh and header_bh
4984  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4985  * to extend the insert bucket.
4986  *
4987  * The problem is how much xattr should we move to the new one and when should
4988  * we update first_bh and header_bh?
4989  * 1. If cluster size > bucket size, that means the previous cluster has more
4990  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4991  *    update the first_bh and header_bh if the insert bucket has been moved
4992  *    to the new cluster.
4993  * 2. If cluster_size == bucket_size:
4994  *    a) If the previous extent rec has more than one cluster and the insert
4995  *       place isn't in the last cluster, copy the entire last cluster to the
4996  *       new one. This time, we don't need to upate the first_bh and header_bh
4997  *       since they will not be moved into the new cluster.
4998  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4999  *       the new one. And we set the extend flag to zero if the insert place is
5000  *       moved into the new allocated cluster since no extend is needed.
5001  */
5002 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
5003 					    handle_t *handle,
5004 					    struct ocfs2_xattr_bucket *first,
5005 					    struct ocfs2_xattr_bucket *target,
5006 					    u64 new_blk,
5007 					    u32 prev_clusters,
5008 					    u32 *v_start,
5009 					    int *extend)
5010 {
5011 	int ret;
5012 
5013 	mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
5014 	     (unsigned long long)bucket_blkno(first), prev_clusters,
5015 	     (unsigned long long)new_blk);
5016 
5017 	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
5018 		ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
5019 							  handle,
5020 							  first, target,
5021 							  new_blk,
5022 							  prev_clusters,
5023 							  v_start);
5024 		if (ret)
5025 			mlog_errno(ret);
5026 	} else {
5027 		/* The start of the last cluster in the first extent */
5028 		u64 last_blk = bucket_blkno(first) +
5029 			((prev_clusters - 1) *
5030 			 ocfs2_clusters_to_blocks(inode->i_sb, 1));
5031 
5032 		if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
5033 			ret = ocfs2_mv_xattr_buckets(inode, handle,
5034 						     bucket_blkno(first),
5035 						     last_blk, new_blk, 0,
5036 						     v_start);
5037 			if (ret)
5038 				mlog_errno(ret);
5039 		} else {
5040 			ret = ocfs2_divide_xattr_cluster(inode, handle,
5041 							 last_blk, new_blk,
5042 							 v_start);
5043 			if (ret)
5044 				mlog_errno(ret);
5045 
5046 			if ((bucket_blkno(target) == last_blk) && extend)
5047 				*extend = 0;
5048 		}
5049 	}
5050 
5051 	return ret;
5052 }
5053 
5054 /*
5055  * Add a new cluster for xattr storage.
5056  *
5057  * If the new cluster is contiguous with the previous one, it will be
5058  * appended to the same extent record, and num_clusters will be updated.
5059  * If not, we will insert a new extent for it and move some xattrs in
5060  * the last cluster into the new allocated one.
5061  * We also need to limit the maximum size of a btree leaf, otherwise we'll
5062  * lose the benefits of hashing because we'll have to search large leaves.
5063  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
5064  * if it's bigger).
5065  *
5066  * first_bh is the first block of the previous extent rec and header_bh
5067  * indicates the bucket we will insert the new xattrs. They will be updated
5068  * when the header_bh is moved into the new cluster.
5069  */
5070 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5071 				       struct buffer_head *root_bh,
5072 				       struct ocfs2_xattr_bucket *first,
5073 				       struct ocfs2_xattr_bucket *target,
5074 				       u32 *num_clusters,
5075 				       u32 prev_cpos,
5076 				       int *extend,
5077 				       struct ocfs2_xattr_set_ctxt *ctxt)
5078 {
5079 	int ret;
5080 	u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
5081 	u32 prev_clusters = *num_clusters;
5082 	u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
5083 	u64 block;
5084 	handle_t *handle = ctxt->handle;
5085 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5086 	struct ocfs2_extent_tree et;
5087 
5088 	mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
5089 	     "previous xattr blkno = %llu\n",
5090 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
5091 	     prev_cpos, (unsigned long long)bucket_blkno(first));
5092 
5093 	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5094 
5095 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5096 				      OCFS2_JOURNAL_ACCESS_WRITE);
5097 	if (ret < 0) {
5098 		mlog_errno(ret);
5099 		goto leave;
5100 	}
5101 
5102 	ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
5103 				     clusters_to_add, &bit_off, &num_bits);
5104 	if (ret < 0) {
5105 		if (ret != -ENOSPC)
5106 			mlog_errno(ret);
5107 		goto leave;
5108 	}
5109 
5110 	BUG_ON(num_bits > clusters_to_add);
5111 
5112 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5113 	mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
5114 	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
5115 
5116 	if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5117 	    (prev_clusters + num_bits) << osb->s_clustersize_bits <=
5118 	     OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
5119 		/*
5120 		 * If this cluster is contiguous with the old one and
5121 		 * adding this new cluster, we don't surpass the limit of
5122 		 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
5123 		 * initialized and used like other buckets in the previous
5124 		 * cluster.
5125 		 * So add it as a contiguous one. The caller will handle
5126 		 * its init process.
5127 		 */
5128 		v_start = prev_cpos + prev_clusters;
5129 		*num_clusters = prev_clusters + num_bits;
5130 		mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
5131 		     num_bits);
5132 	} else {
5133 		ret = ocfs2_adjust_xattr_cross_cluster(inode,
5134 						       handle,
5135 						       first,
5136 						       target,
5137 						       block,
5138 						       prev_clusters,
5139 						       &v_start,
5140 						       extend);
5141 		if (ret) {
5142 			mlog_errno(ret);
5143 			goto leave;
5144 		}
5145 	}
5146 
5147 	mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
5148 	     num_bits, (unsigned long long)block, v_start);
5149 	ret = ocfs2_insert_extent(handle, &et, v_start, block,
5150 				  num_bits, 0, ctxt->meta_ac);
5151 	if (ret < 0) {
5152 		mlog_errno(ret);
5153 		goto leave;
5154 	}
5155 
5156 	ret = ocfs2_journal_dirty(handle, root_bh);
5157 	if (ret < 0)
5158 		mlog_errno(ret);
5159 
5160 leave:
5161 	return ret;
5162 }
5163 
5164 /*
5165  * We are given an extent.  'first' is the bucket at the very front of
5166  * the extent.  The extent has space for an additional bucket past
5167  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
5168  * of the target bucket.  We wish to shift every bucket past the target
5169  * down one, filling in that additional space.  When we get back to the
5170  * target, we split the target between itself and the now-empty bucket
5171  * at target+1 (aka, target_blkno + blks_per_bucket).
5172  */
5173 static int ocfs2_extend_xattr_bucket(struct inode *inode,
5174 				     handle_t *handle,
5175 				     struct ocfs2_xattr_bucket *first,
5176 				     u64 target_blk,
5177 				     u32 num_clusters)
5178 {
5179 	int ret, credits;
5180 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5181 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5182 	u64 end_blk;
5183 	u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5184 
5185 	mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
5186 	     "from %llu, len = %u\n", (unsigned long long)target_blk,
5187 	     (unsigned long long)bucket_blkno(first), num_clusters);
5188 
5189 	/* The extent must have room for an additional bucket */
5190 	BUG_ON(new_bucket >=
5191 	       (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
5192 
5193 	/* end_blk points to the last existing bucket */
5194 	end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
5195 
5196 	/*
5197 	 * end_blk is the start of the last existing bucket.
5198 	 * Thus, (end_blk - target_blk) covers the target bucket and
5199 	 * every bucket after it up to, but not including, the last
5200 	 * existing bucket.  Then we add the last existing bucket, the
5201 	 * new bucket, and the first bucket (3 * blk_per_bucket).
5202 	 */
5203 	credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
5204 		  handle->h_buffer_credits;
5205 	ret = ocfs2_extend_trans(handle, credits);
5206 	if (ret) {
5207 		mlog_errno(ret);
5208 		goto out;
5209 	}
5210 
5211 	ret = ocfs2_xattr_bucket_journal_access(handle, first,
5212 						OCFS2_JOURNAL_ACCESS_WRITE);
5213 	if (ret) {
5214 		mlog_errno(ret);
5215 		goto out;
5216 	}
5217 
5218 	while (end_blk != target_blk) {
5219 		ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
5220 					    end_blk + blk_per_bucket, 0);
5221 		if (ret)
5222 			goto out;
5223 		end_blk -= blk_per_bucket;
5224 	}
5225 
5226 	/* Move half of the xattr in target_blkno to the next bucket. */
5227 	ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
5228 					target_blk + blk_per_bucket, NULL, 0);
5229 
5230 	le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
5231 	ocfs2_xattr_bucket_journal_dirty(handle, first);
5232 
5233 out:
5234 	return ret;
5235 }
5236 
5237 /*
5238  * Add new xattr bucket in an extent record and adjust the buckets
5239  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
5240  * bucket we want to insert into.
5241  *
5242  * In the easy case, we will move all the buckets after target down by
5243  * one. Half of target's xattrs will be moved to the next bucket.
5244  *
5245  * If current cluster is full, we'll allocate a new one.  This may not
5246  * be contiguous.  The underlying calls will make sure that there is
5247  * space for the insert, shifting buckets around if necessary.
5248  * 'target' may be moved by those calls.
5249  */
5250 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5251 				      struct buffer_head *xb_bh,
5252 				      struct ocfs2_xattr_bucket *target,
5253 				      struct ocfs2_xattr_set_ctxt *ctxt)
5254 {
5255 	struct ocfs2_xattr_block *xb =
5256 			(struct ocfs2_xattr_block *)xb_bh->b_data;
5257 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
5258 	struct ocfs2_extent_list *el = &xb_root->xt_list;
5259 	u32 name_hash =
5260 		le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
5261 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5262 	int ret, num_buckets, extend = 1;
5263 	u64 p_blkno;
5264 	u32 e_cpos, num_clusters;
5265 	/* The bucket at the front of the extent */
5266 	struct ocfs2_xattr_bucket *first;
5267 
5268 	mlog(0, "Add new xattr bucket starting from %llu\n",
5269 	     (unsigned long long)bucket_blkno(target));
5270 
5271 	/* The first bucket of the original extent */
5272 	first = ocfs2_xattr_bucket_new(inode);
5273 	if (!first) {
5274 		ret = -ENOMEM;
5275 		mlog_errno(ret);
5276 		goto out;
5277 	}
5278 
5279 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
5280 				  &num_clusters, el);
5281 	if (ret) {
5282 		mlog_errno(ret);
5283 		goto out;
5284 	}
5285 
5286 	ret = ocfs2_read_xattr_bucket(first, p_blkno);
5287 	if (ret) {
5288 		mlog_errno(ret);
5289 		goto out;
5290 	}
5291 
5292 	num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
5293 	if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
5294 		/*
5295 		 * This can move first+target if the target bucket moves
5296 		 * to the new extent.
5297 		 */
5298 		ret = ocfs2_add_new_xattr_cluster(inode,
5299 						  xb_bh,
5300 						  first,
5301 						  target,
5302 						  &num_clusters,
5303 						  e_cpos,
5304 						  &extend,
5305 						  ctxt);
5306 		if (ret) {
5307 			mlog_errno(ret);
5308 			goto out;
5309 		}
5310 	}
5311 
5312 	if (extend) {
5313 		ret = ocfs2_extend_xattr_bucket(inode,
5314 						ctxt->handle,
5315 						first,
5316 						bucket_blkno(target),
5317 						num_clusters);
5318 		if (ret)
5319 			mlog_errno(ret);
5320 	}
5321 
5322 out:
5323 	ocfs2_xattr_bucket_free(first);
5324 
5325 	return ret;
5326 }
5327 
5328 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
5329 					struct ocfs2_xattr_bucket *bucket,
5330 					int offs)
5331 {
5332 	int block_off = offs >> inode->i_sb->s_blocksize_bits;
5333 
5334 	offs = offs % inode->i_sb->s_blocksize;
5335 	return bucket_block(bucket, block_off) + offs;
5336 }
5337 
5338 /*
5339  * Truncate the specified xe_off entry in xattr bucket.
5340  * bucket is indicated by header_bh and len is the new length.
5341  * Both the ocfs2_xattr_value_root and the entry will be updated here.
5342  *
5343  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5344  */
5345 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5346 					     struct ocfs2_xattr_bucket *bucket,
5347 					     int xe_off,
5348 					     int len,
5349 					     struct ocfs2_xattr_set_ctxt *ctxt)
5350 {
5351 	int ret, offset;
5352 	u64 value_blk;
5353 	struct ocfs2_xattr_entry *xe;
5354 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5355 	size_t blocksize = inode->i_sb->s_blocksize;
5356 	struct ocfs2_xattr_value_buf vb = {
5357 		.vb_access = ocfs2_journal_access,
5358 	};
5359 
5360 	xe = &xh->xh_entries[xe_off];
5361 
5362 	BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5363 
5364 	offset = le16_to_cpu(xe->xe_name_offset) +
5365 		 OCFS2_XATTR_SIZE(xe->xe_name_len);
5366 
5367 	value_blk = offset / blocksize;
5368 
5369 	/* We don't allow ocfs2_xattr_value to be stored in different block. */
5370 	BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5371 
5372 	vb.vb_bh = bucket->bu_bhs[value_blk];
5373 	BUG_ON(!vb.vb_bh);
5374 
5375 	vb.vb_xv = (struct ocfs2_xattr_value_root *)
5376 		(vb.vb_bh->b_data + offset % blocksize);
5377 
5378 	/*
5379 	 * From here on out we have to dirty the bucket.  The generic
5380 	 * value calls only modify one of the bucket's bhs, but we need
5381 	 * to send the bucket at once.  So if they error, they *could* have
5382 	 * modified something.  We have to assume they did, and dirty
5383 	 * the whole bucket.  This leaves us in a consistent state.
5384 	 */
5385 	mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
5386 	     xe_off, (unsigned long long)bucket_blkno(bucket), len);
5387 	ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5388 	if (ret) {
5389 		mlog_errno(ret);
5390 		goto out;
5391 	}
5392 
5393 	ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5394 						OCFS2_JOURNAL_ACCESS_WRITE);
5395 	if (ret) {
5396 		mlog_errno(ret);
5397 		goto out;
5398 	}
5399 
5400 	xe->xe_value_size = cpu_to_le64(len);
5401 
5402 	ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5403 
5404 out:
5405 	return ret;
5406 }
5407 
5408 static int ocfs2_rm_xattr_cluster(struct inode *inode,
5409 				  struct buffer_head *root_bh,
5410 				  u64 blkno,
5411 				  u32 cpos,
5412 				  u32 len,
5413 				  void *para)
5414 {
5415 	int ret;
5416 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5417 	struct inode *tl_inode = osb->osb_tl_inode;
5418 	handle_t *handle;
5419 	struct ocfs2_xattr_block *xb =
5420 			(struct ocfs2_xattr_block *)root_bh->b_data;
5421 	struct ocfs2_alloc_context *meta_ac = NULL;
5422 	struct ocfs2_cached_dealloc_ctxt dealloc;
5423 	struct ocfs2_extent_tree et;
5424 
5425 	ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5426 					  ocfs2_delete_xattr_in_bucket, para);
5427 	if (ret) {
5428 		mlog_errno(ret);
5429 		return ret;
5430 	}
5431 
5432 	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5433 
5434 	ocfs2_init_dealloc_ctxt(&dealloc);
5435 
5436 	mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
5437 	     cpos, len, (unsigned long long)blkno);
5438 
5439 	ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5440 					       len);
5441 
5442 	ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5443 	if (ret) {
5444 		mlog_errno(ret);
5445 		return ret;
5446 	}
5447 
5448 	mutex_lock(&tl_inode->i_mutex);
5449 
5450 	if (ocfs2_truncate_log_needs_flush(osb)) {
5451 		ret = __ocfs2_flush_truncate_log(osb);
5452 		if (ret < 0) {
5453 			mlog_errno(ret);
5454 			goto out;
5455 		}
5456 	}
5457 
5458 	handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5459 	if (IS_ERR(handle)) {
5460 		ret = -ENOMEM;
5461 		mlog_errno(ret);
5462 		goto out;
5463 	}
5464 
5465 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5466 				      OCFS2_JOURNAL_ACCESS_WRITE);
5467 	if (ret) {
5468 		mlog_errno(ret);
5469 		goto out_commit;
5470 	}
5471 
5472 	ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5473 				  &dealloc);
5474 	if (ret) {
5475 		mlog_errno(ret);
5476 		goto out_commit;
5477 	}
5478 
5479 	le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5480 
5481 	ret = ocfs2_journal_dirty(handle, root_bh);
5482 	if (ret) {
5483 		mlog_errno(ret);
5484 		goto out_commit;
5485 	}
5486 
5487 	ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5488 	if (ret)
5489 		mlog_errno(ret);
5490 
5491 out_commit:
5492 	ocfs2_commit_trans(osb, handle);
5493 out:
5494 	ocfs2_schedule_truncate_log_flush(osb, 1);
5495 
5496 	mutex_unlock(&tl_inode->i_mutex);
5497 
5498 	if (meta_ac)
5499 		ocfs2_free_alloc_context(meta_ac);
5500 
5501 	ocfs2_run_deallocs(osb, &dealloc);
5502 
5503 	return ret;
5504 }
5505 
5506 /*
5507  * check whether the xattr bucket is filled up with the same hash value.
5508  * If we want to insert the xattr with the same hash, return -ENOSPC.
5509  * If we want to insert a xattr with different hash value, go ahead
5510  * and ocfs2_divide_xattr_bucket will handle this.
5511  */
5512 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5513 					      struct ocfs2_xattr_bucket *bucket,
5514 					      const char *name)
5515 {
5516 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5517 	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5518 
5519 	if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5520 		return 0;
5521 
5522 	if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5523 	    xh->xh_entries[0].xe_name_hash) {
5524 		mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5525 		     "hash = %u\n",
5526 		     (unsigned long long)bucket_blkno(bucket),
5527 		     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5528 		return -ENOSPC;
5529 	}
5530 
5531 	return 0;
5532 }
5533 
5534 /*
5535  * Try to set the entry in the current bucket.  If we fail, the caller
5536  * will handle getting us another bucket.
5537  */
5538 static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5539 					struct ocfs2_xattr_info *xi,
5540 					struct ocfs2_xattr_search *xs,
5541 					struct ocfs2_xattr_set_ctxt *ctxt)
5542 {
5543 	int ret;
5544 	struct ocfs2_xa_loc loc;
5545 
5546 	mlog_entry("Set xattr %s in xattr bucket\n", xi->xi_name);
5547 
5548 	ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5549 				       xs->not_found ? NULL : xs->here);
5550 	ret = ocfs2_xa_set(&loc, xi, ctxt);
5551 	if (!ret) {
5552 		xs->here = loc.xl_entry;
5553 		goto out;
5554 	}
5555 	if (ret != -ENOSPC) {
5556 		mlog_errno(ret);
5557 		goto out;
5558 	}
5559 
5560 	/* Ok, we need space.  Let's try defragmenting the bucket. */
5561 	ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5562 					xs->bucket);
5563 	if (ret) {
5564 		mlog_errno(ret);
5565 		goto out;
5566 	}
5567 
5568 	ret = ocfs2_xa_set(&loc, xi, ctxt);
5569 	if (!ret) {
5570 		xs->here = loc.xl_entry;
5571 		goto out;
5572 	}
5573 	if (ret != -ENOSPC)
5574 		mlog_errno(ret);
5575 
5576 
5577 out:
5578 	mlog_exit(ret);
5579 	return ret;
5580 }
5581 
5582 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5583 					     struct ocfs2_xattr_info *xi,
5584 					     struct ocfs2_xattr_search *xs,
5585 					     struct ocfs2_xattr_set_ctxt *ctxt)
5586 {
5587 	int ret;
5588 
5589 	mlog_entry("Set xattr %s in xattr index block\n", xi->xi_name);
5590 
5591 	ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5592 	if (!ret)
5593 		goto out;
5594 	if (ret != -ENOSPC) {
5595 		mlog_errno(ret);
5596 		goto out;
5597 	}
5598 
5599 	/* Ack, need more space.  Let's try to get another bucket! */
5600 
5601 	/*
5602 	 * We do not allow for overlapping ranges between buckets. And
5603 	 * the maximum number of collisions we will allow for then is
5604 	 * one bucket's worth, so check it here whether we need to
5605 	 * add a new bucket for the insert.
5606 	 */
5607 	ret = ocfs2_check_xattr_bucket_collision(inode,
5608 						 xs->bucket,
5609 						 xi->xi_name);
5610 	if (ret) {
5611 		mlog_errno(ret);
5612 		goto out;
5613 	}
5614 
5615 	ret = ocfs2_add_new_xattr_bucket(inode,
5616 					 xs->xattr_bh,
5617 					 xs->bucket,
5618 					 ctxt);
5619 	if (ret) {
5620 		mlog_errno(ret);
5621 		goto out;
5622 	}
5623 
5624 	/*
5625 	 * ocfs2_add_new_xattr_bucket() will have updated
5626 	 * xs->bucket if it moved, but it will not have updated
5627 	 * any of the other search fields.  Thus, we drop it and
5628 	 * re-search.  Everything should be cached, so it'll be
5629 	 * quick.
5630 	 */
5631 	ocfs2_xattr_bucket_relse(xs->bucket);
5632 	ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5633 					   xi->xi_name_index,
5634 					   xi->xi_name, xs);
5635 	if (ret && ret != -ENODATA)
5636 		goto out;
5637 	xs->not_found = ret;
5638 
5639 	/* Ok, we have a new bucket, let's try again */
5640 	ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5641 	if (ret && (ret != -ENOSPC))
5642 		mlog_errno(ret);
5643 
5644 out:
5645 	mlog_exit(ret);
5646 	return ret;
5647 }
5648 
5649 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5650 					struct ocfs2_xattr_bucket *bucket,
5651 					void *para)
5652 {
5653 	int ret = 0, ref_credits;
5654 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5655 	u16 i;
5656 	struct ocfs2_xattr_entry *xe;
5657 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5658 	struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5659 	int credits = ocfs2_remove_extent_credits(osb->sb) +
5660 		ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5661 	struct ocfs2_xattr_value_root *xv;
5662 	struct ocfs2_rm_xattr_bucket_para *args =
5663 			(struct ocfs2_rm_xattr_bucket_para *)para;
5664 
5665 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5666 
5667 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5668 		xe = &xh->xh_entries[i];
5669 		if (ocfs2_xattr_is_local(xe))
5670 			continue;
5671 
5672 		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5673 						      i, &xv, NULL);
5674 
5675 		ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5676 							 args->ref_ci,
5677 							 args->ref_root_bh,
5678 							 &ctxt.meta_ac,
5679 							 &ref_credits);
5680 
5681 		ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5682 		if (IS_ERR(ctxt.handle)) {
5683 			ret = PTR_ERR(ctxt.handle);
5684 			mlog_errno(ret);
5685 			break;
5686 		}
5687 
5688 		ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5689 							i, 0, &ctxt);
5690 
5691 		ocfs2_commit_trans(osb, ctxt.handle);
5692 		if (ctxt.meta_ac) {
5693 			ocfs2_free_alloc_context(ctxt.meta_ac);
5694 			ctxt.meta_ac = NULL;
5695 		}
5696 		if (ret) {
5697 			mlog_errno(ret);
5698 			break;
5699 		}
5700 	}
5701 
5702 	if (ctxt.meta_ac)
5703 		ocfs2_free_alloc_context(ctxt.meta_ac);
5704 	ocfs2_schedule_truncate_log_flush(osb, 1);
5705 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
5706 	return ret;
5707 }
5708 
5709 /*
5710  * Whenever we modify a xattr value root in the bucket(e.g, CoW
5711  * or change the extent record flag), we need to recalculate
5712  * the metaecc for the whole bucket. So it is done here.
5713  *
5714  * Note:
5715  * We have to give the extra credits for the caller.
5716  */
5717 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5718 					    handle_t *handle,
5719 					    void *para)
5720 {
5721 	int ret;
5722 	struct ocfs2_xattr_bucket *bucket =
5723 			(struct ocfs2_xattr_bucket *)para;
5724 
5725 	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5726 						OCFS2_JOURNAL_ACCESS_WRITE);
5727 	if (ret) {
5728 		mlog_errno(ret);
5729 		return ret;
5730 	}
5731 
5732 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5733 
5734 	return 0;
5735 }
5736 
5737 /*
5738  * Special action we need if the xattr value is refcounted.
5739  *
5740  * 1. If the xattr is refcounted, lock the tree.
5741  * 2. CoW the xattr if we are setting the new value and the value
5742  *    will be stored outside.
5743  * 3. In other case, decrease_refcount will work for us, so just
5744  *    lock the refcount tree, calculate the meta and credits is OK.
5745  *
5746  * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5747  * currently CoW is a completed transaction, while this function
5748  * will also lock the allocators and let us deadlock. So we will
5749  * CoW the whole xattr value.
5750  */
5751 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5752 					struct ocfs2_dinode *di,
5753 					struct ocfs2_xattr_info *xi,
5754 					struct ocfs2_xattr_search *xis,
5755 					struct ocfs2_xattr_search *xbs,
5756 					struct ocfs2_refcount_tree **ref_tree,
5757 					int *meta_add,
5758 					int *credits)
5759 {
5760 	int ret = 0;
5761 	struct ocfs2_xattr_block *xb;
5762 	struct ocfs2_xattr_entry *xe;
5763 	char *base;
5764 	u32 p_cluster, num_clusters;
5765 	unsigned int ext_flags;
5766 	int name_offset, name_len;
5767 	struct ocfs2_xattr_value_buf vb;
5768 	struct ocfs2_xattr_bucket *bucket = NULL;
5769 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5770 	struct ocfs2_post_refcount refcount;
5771 	struct ocfs2_post_refcount *p = NULL;
5772 	struct buffer_head *ref_root_bh = NULL;
5773 
5774 	if (!xis->not_found) {
5775 		xe = xis->here;
5776 		name_offset = le16_to_cpu(xe->xe_name_offset);
5777 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5778 		base = xis->base;
5779 		vb.vb_bh = xis->inode_bh;
5780 		vb.vb_access = ocfs2_journal_access_di;
5781 	} else {
5782 		int i, block_off = 0;
5783 		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5784 		xe = xbs->here;
5785 		name_offset = le16_to_cpu(xe->xe_name_offset);
5786 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5787 		i = xbs->here - xbs->header->xh_entries;
5788 
5789 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5790 			ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5791 							bucket_xh(xbs->bucket),
5792 							i, &block_off,
5793 							&name_offset);
5794 			if (ret) {
5795 				mlog_errno(ret);
5796 				goto out;
5797 			}
5798 			base = bucket_block(xbs->bucket, block_off);
5799 			vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5800 			vb.vb_access = ocfs2_journal_access;
5801 
5802 			if (ocfs2_meta_ecc(osb)) {
5803 				/*create parameters for ocfs2_post_refcount. */
5804 				bucket = xbs->bucket;
5805 				refcount.credits = bucket->bu_blocks;
5806 				refcount.para = bucket;
5807 				refcount.func =
5808 					ocfs2_xattr_bucket_post_refcount;
5809 				p = &refcount;
5810 			}
5811 		} else {
5812 			base = xbs->base;
5813 			vb.vb_bh = xbs->xattr_bh;
5814 			vb.vb_access = ocfs2_journal_access_xb;
5815 		}
5816 	}
5817 
5818 	if (ocfs2_xattr_is_local(xe))
5819 		goto out;
5820 
5821 	vb.vb_xv = (struct ocfs2_xattr_value_root *)
5822 				(base + name_offset + name_len);
5823 
5824 	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5825 				       &num_clusters, &vb.vb_xv->xr_list,
5826 				       &ext_flags);
5827 	if (ret) {
5828 		mlog_errno(ret);
5829 		goto out;
5830 	}
5831 
5832 	/*
5833 	 * We just need to check the 1st extent record, since we always
5834 	 * CoW the whole xattr. So there shouldn't be a xattr with
5835 	 * some REFCOUNT extent recs after the 1st one.
5836 	 */
5837 	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5838 		goto out;
5839 
5840 	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5841 				       1, ref_tree, &ref_root_bh);
5842 	if (ret) {
5843 		mlog_errno(ret);
5844 		goto out;
5845 	}
5846 
5847 	/*
5848 	 * If we are deleting the xattr or the new size will be stored inside,
5849 	 * cool, leave it there, the xattr truncate process will remove them
5850 	 * for us(it still needs the refcount tree lock and the meta, credits).
5851 	 * And the worse case is that every cluster truncate will split the
5852 	 * refcount tree, and make the original extent become 3. So we will need
5853 	 * 2 * cluster more extent recs at most.
5854 	 */
5855 	if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
5856 
5857 		ret = ocfs2_refcounted_xattr_delete_need(inode,
5858 							 &(*ref_tree)->rf_ci,
5859 							 ref_root_bh, vb.vb_xv,
5860 							 meta_add, credits);
5861 		if (ret)
5862 			mlog_errno(ret);
5863 		goto out;
5864 	}
5865 
5866 	ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5867 				       *ref_tree, ref_root_bh, 0,
5868 				       le32_to_cpu(vb.vb_xv->xr_clusters), p);
5869 	if (ret)
5870 		mlog_errno(ret);
5871 
5872 out:
5873 	brelse(ref_root_bh);
5874 	return ret;
5875 }
5876 
5877 /*
5878  * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5879  * The physical clusters will be added to refcount tree.
5880  */
5881 static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5882 				struct ocfs2_xattr_value_root *xv,
5883 				struct ocfs2_extent_tree *value_et,
5884 				struct ocfs2_caching_info *ref_ci,
5885 				struct buffer_head *ref_root_bh,
5886 				struct ocfs2_cached_dealloc_ctxt *dealloc,
5887 				struct ocfs2_post_refcount *refcount)
5888 {
5889 	int ret = 0;
5890 	u32 clusters = le32_to_cpu(xv->xr_clusters);
5891 	u32 cpos, p_cluster, num_clusters;
5892 	struct ocfs2_extent_list *el = &xv->xr_list;
5893 	unsigned int ext_flags;
5894 
5895 	cpos = 0;
5896 	while (cpos < clusters) {
5897 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5898 					       &num_clusters, el, &ext_flags);
5899 
5900 		cpos += num_clusters;
5901 		if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5902 			continue;
5903 
5904 		BUG_ON(!p_cluster);
5905 
5906 		ret = ocfs2_add_refcount_flag(inode, value_et,
5907 					      ref_ci, ref_root_bh,
5908 					      cpos - num_clusters,
5909 					      p_cluster, num_clusters,
5910 					      dealloc, refcount);
5911 		if (ret) {
5912 			mlog_errno(ret);
5913 			break;
5914 		}
5915 	}
5916 
5917 	return ret;
5918 }
5919 
5920 /*
5921  * Given a normal ocfs2_xattr_header, refcount all the entries which
5922  * have value stored outside.
5923  * Used for xattrs stored in inode and ocfs2_xattr_block.
5924  */
5925 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5926 				struct ocfs2_xattr_value_buf *vb,
5927 				struct ocfs2_xattr_header *header,
5928 				struct ocfs2_caching_info *ref_ci,
5929 				struct buffer_head *ref_root_bh,
5930 				struct ocfs2_cached_dealloc_ctxt *dealloc)
5931 {
5932 
5933 	struct ocfs2_xattr_entry *xe;
5934 	struct ocfs2_xattr_value_root *xv;
5935 	struct ocfs2_extent_tree et;
5936 	int i, ret = 0;
5937 
5938 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5939 		xe = &header->xh_entries[i];
5940 
5941 		if (ocfs2_xattr_is_local(xe))
5942 			continue;
5943 
5944 		xv = (struct ocfs2_xattr_value_root *)((void *)header +
5945 			le16_to_cpu(xe->xe_name_offset) +
5946 			OCFS2_XATTR_SIZE(xe->xe_name_len));
5947 
5948 		vb->vb_xv = xv;
5949 		ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5950 
5951 		ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5952 							ref_ci, ref_root_bh,
5953 							dealloc, NULL);
5954 		if (ret) {
5955 			mlog_errno(ret);
5956 			break;
5957 		}
5958 	}
5959 
5960 	return ret;
5961 }
5962 
5963 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5964 				struct buffer_head *fe_bh,
5965 				struct ocfs2_caching_info *ref_ci,
5966 				struct buffer_head *ref_root_bh,
5967 				struct ocfs2_cached_dealloc_ctxt *dealloc)
5968 {
5969 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5970 	struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
5971 				(fe_bh->b_data + inode->i_sb->s_blocksize -
5972 				le16_to_cpu(di->i_xattr_inline_size));
5973 	struct ocfs2_xattr_value_buf vb = {
5974 		.vb_bh = fe_bh,
5975 		.vb_access = ocfs2_journal_access_di,
5976 	};
5977 
5978 	return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5979 						  ref_ci, ref_root_bh, dealloc);
5980 }
5981 
5982 struct ocfs2_xattr_tree_value_refcount_para {
5983 	struct ocfs2_caching_info *ref_ci;
5984 	struct buffer_head *ref_root_bh;
5985 	struct ocfs2_cached_dealloc_ctxt *dealloc;
5986 };
5987 
5988 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
5989 					   struct ocfs2_xattr_bucket *bucket,
5990 					   int offset,
5991 					   struct ocfs2_xattr_value_root **xv,
5992 					   struct buffer_head **bh)
5993 {
5994 	int ret, block_off, name_offset;
5995 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5996 	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
5997 	void *base;
5998 
5999 	ret = ocfs2_xattr_bucket_get_name_value(sb,
6000 						bucket_xh(bucket),
6001 						offset,
6002 						&block_off,
6003 						&name_offset);
6004 	if (ret) {
6005 		mlog_errno(ret);
6006 		goto out;
6007 	}
6008 
6009 	base = bucket_block(bucket, block_off);
6010 
6011 	*xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
6012 			 OCFS2_XATTR_SIZE(xe->xe_name_len));
6013 
6014 	if (bh)
6015 		*bh = bucket->bu_bhs[block_off];
6016 out:
6017 	return ret;
6018 }
6019 
6020 /*
6021  * For a given xattr bucket, refcount all the entries which
6022  * have value stored outside.
6023  */
6024 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6025 					     struct ocfs2_xattr_bucket *bucket,
6026 					     void *para)
6027 {
6028 	int i, ret = 0;
6029 	struct ocfs2_extent_tree et;
6030 	struct ocfs2_xattr_tree_value_refcount_para *ref =
6031 			(struct ocfs2_xattr_tree_value_refcount_para *)para;
6032 	struct ocfs2_xattr_header *xh =
6033 			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6034 	struct ocfs2_xattr_entry *xe;
6035 	struct ocfs2_xattr_value_buf vb = {
6036 		.vb_access = ocfs2_journal_access,
6037 	};
6038 	struct ocfs2_post_refcount refcount = {
6039 		.credits = bucket->bu_blocks,
6040 		.para = bucket,
6041 		.func = ocfs2_xattr_bucket_post_refcount,
6042 	};
6043 	struct ocfs2_post_refcount *p = NULL;
6044 
6045 	/* We only need post_refcount if we support metaecc. */
6046 	if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6047 		p = &refcount;
6048 
6049 	mlog(0, "refcount bucket %llu, count = %u\n",
6050 	     (unsigned long long)bucket_blkno(bucket),
6051 	     le16_to_cpu(xh->xh_count));
6052 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6053 		xe = &xh->xh_entries[i];
6054 
6055 		if (ocfs2_xattr_is_local(xe))
6056 			continue;
6057 
6058 		ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
6059 						      &vb.vb_xv, &vb.vb_bh);
6060 		if (ret) {
6061 			mlog_errno(ret);
6062 			break;
6063 		}
6064 
6065 		ocfs2_init_xattr_value_extent_tree(&et,
6066 						   INODE_CACHE(inode), &vb);
6067 
6068 		ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
6069 							&et, ref->ref_ci,
6070 							ref->ref_root_bh,
6071 							ref->dealloc, p);
6072 		if (ret) {
6073 			mlog_errno(ret);
6074 			break;
6075 		}
6076 	}
6077 
6078 	return ret;
6079 
6080 }
6081 
6082 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
6083 				     struct buffer_head *root_bh,
6084 				     u64 blkno, u32 cpos, u32 len, void *para)
6085 {
6086 	return ocfs2_iterate_xattr_buckets(inode, blkno, len,
6087 					   ocfs2_xattr_bucket_value_refcount,
6088 					   para);
6089 }
6090 
6091 static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
6092 				struct buffer_head *blk_bh,
6093 				struct ocfs2_caching_info *ref_ci,
6094 				struct buffer_head *ref_root_bh,
6095 				struct ocfs2_cached_dealloc_ctxt *dealloc)
6096 {
6097 	int ret = 0;
6098 	struct ocfs2_xattr_block *xb =
6099 				(struct ocfs2_xattr_block *)blk_bh->b_data;
6100 
6101 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6102 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6103 		struct ocfs2_xattr_value_buf vb = {
6104 			.vb_bh = blk_bh,
6105 			.vb_access = ocfs2_journal_access_xb,
6106 		};
6107 
6108 		ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6109 							 ref_ci, ref_root_bh,
6110 							 dealloc);
6111 	} else {
6112 		struct ocfs2_xattr_tree_value_refcount_para para = {
6113 			.ref_ci = ref_ci,
6114 			.ref_root_bh = ref_root_bh,
6115 			.dealloc = dealloc,
6116 		};
6117 
6118 		ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6119 						ocfs2_refcount_xattr_tree_rec,
6120 						&para);
6121 	}
6122 
6123 	return ret;
6124 }
6125 
6126 int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6127 				     struct buffer_head *fe_bh,
6128 				     struct ocfs2_caching_info *ref_ci,
6129 				     struct buffer_head *ref_root_bh,
6130 				     struct ocfs2_cached_dealloc_ctxt *dealloc)
6131 {
6132 	int ret = 0;
6133 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
6134 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6135 	struct buffer_head *blk_bh = NULL;
6136 
6137 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6138 		ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6139 							 ref_ci, ref_root_bh,
6140 							 dealloc);
6141 		if (ret) {
6142 			mlog_errno(ret);
6143 			goto out;
6144 		}
6145 	}
6146 
6147 	if (!di->i_xattr_loc)
6148 		goto out;
6149 
6150 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6151 				     &blk_bh);
6152 	if (ret < 0) {
6153 		mlog_errno(ret);
6154 		goto out;
6155 	}
6156 
6157 	ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6158 						ref_root_bh, dealloc);
6159 	if (ret)
6160 		mlog_errno(ret);
6161 
6162 	brelse(blk_bh);
6163 out:
6164 
6165 	return ret;
6166 }
6167 
6168 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6169 /*
6170  * Store the information we need in xattr reflink.
6171  * old_bh and new_bh are inode bh for the old and new inode.
6172  */
6173 struct ocfs2_xattr_reflink {
6174 	struct inode *old_inode;
6175 	struct inode *new_inode;
6176 	struct buffer_head *old_bh;
6177 	struct buffer_head *new_bh;
6178 	struct ocfs2_caching_info *ref_ci;
6179 	struct buffer_head *ref_root_bh;
6180 	struct ocfs2_cached_dealloc_ctxt *dealloc;
6181 	should_xattr_reflinked *xattr_reflinked;
6182 };
6183 
6184 /*
6185  * Given a xattr header and xe offset,
6186  * return the proper xv and the corresponding bh.
6187  * xattr in inode, block and xattr tree have different implementaions.
6188  */
6189 typedef int (get_xattr_value_root)(struct super_block *sb,
6190 				   struct buffer_head *bh,
6191 				   struct ocfs2_xattr_header *xh,
6192 				   int offset,
6193 				   struct ocfs2_xattr_value_root **xv,
6194 				   struct buffer_head **ret_bh,
6195 				   void *para);
6196 
6197 /*
6198  * Calculate all the xattr value root metadata stored in this xattr header and
6199  * credits we need if we create them from the scratch.
6200  * We use get_xattr_value_root so that all types of xattr container can use it.
6201  */
6202 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6203 					     struct buffer_head *bh,
6204 					     struct ocfs2_xattr_header *xh,
6205 					     int *metas, int *credits,
6206 					     int *num_recs,
6207 					     get_xattr_value_root *func,
6208 					     void *para)
6209 {
6210 	int i, ret = 0;
6211 	struct ocfs2_xattr_value_root *xv;
6212 	struct ocfs2_xattr_entry *xe;
6213 
6214 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6215 		xe = &xh->xh_entries[i];
6216 		if (ocfs2_xattr_is_local(xe))
6217 			continue;
6218 
6219 		ret = func(sb, bh, xh, i, &xv, NULL, para);
6220 		if (ret) {
6221 			mlog_errno(ret);
6222 			break;
6223 		}
6224 
6225 		*metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6226 			  le16_to_cpu(xv->xr_list.l_next_free_rec);
6227 
6228 		*credits += ocfs2_calc_extend_credits(sb,
6229 						&def_xv.xv.xr_list,
6230 						le32_to_cpu(xv->xr_clusters));
6231 
6232 		/*
6233 		 * If the value is a tree with depth > 1, We don't go deep
6234 		 * to the extent block, so just calculate a maximum record num.
6235 		 */
6236 		if (!xv->xr_list.l_tree_depth)
6237 			*num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6238 		else
6239 			*num_recs += ocfs2_clusters_for_bytes(sb,
6240 							      XATTR_SIZE_MAX);
6241 	}
6242 
6243 	return ret;
6244 }
6245 
6246 /* Used by xattr inode and block to return the right xv and buffer_head. */
6247 static int ocfs2_get_xattr_value_root(struct super_block *sb,
6248 				      struct buffer_head *bh,
6249 				      struct ocfs2_xattr_header *xh,
6250 				      int offset,
6251 				      struct ocfs2_xattr_value_root **xv,
6252 				      struct buffer_head **ret_bh,
6253 				      void *para)
6254 {
6255 	struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6256 
6257 	*xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6258 		le16_to_cpu(xe->xe_name_offset) +
6259 		OCFS2_XATTR_SIZE(xe->xe_name_len));
6260 
6261 	if (ret_bh)
6262 		*ret_bh = bh;
6263 
6264 	return 0;
6265 }
6266 
6267 /*
6268  * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6269  * It is only used for inline xattr and xattr block.
6270  */
6271 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6272 					struct ocfs2_xattr_header *xh,
6273 					struct buffer_head *ref_root_bh,
6274 					int *credits,
6275 					struct ocfs2_alloc_context **meta_ac)
6276 {
6277 	int ret, meta_add = 0, num_recs = 0;
6278 	struct ocfs2_refcount_block *rb =
6279 			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
6280 
6281 	*credits = 0;
6282 
6283 	ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6284 						&meta_add, credits, &num_recs,
6285 						ocfs2_get_xattr_value_root,
6286 						NULL);
6287 	if (ret) {
6288 		mlog_errno(ret);
6289 		goto out;
6290 	}
6291 
6292 	/*
6293 	 * We need to add/modify num_recs in refcount tree, so just calculate
6294 	 * an approximate number we need for refcount tree change.
6295 	 * Sometimes we need to split the tree, and after split,  half recs
6296 	 * will be moved to the new block, and a new block can only provide
6297 	 * half number of recs. So we multiple new blocks by 2.
6298 	 */
6299 	num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6300 	meta_add += num_recs;
6301 	*credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6302 	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6303 		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6304 			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6305 	else
6306 		*credits += 1;
6307 
6308 	ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6309 	if (ret)
6310 		mlog_errno(ret);
6311 
6312 out:
6313 	return ret;
6314 }
6315 
6316 /*
6317  * Given a xattr header, reflink all the xattrs in this container.
6318  * It can be used for inode, block and bucket.
6319  *
6320  * NOTE:
6321  * Before we call this function, the caller has memcpy the xattr in
6322  * old_xh to the new_xh.
6323  *
6324  * If args.xattr_reflinked is set, call it to decide whether the xe should
6325  * be reflinked or not. If not, remove it from the new xattr header.
6326  */
6327 static int ocfs2_reflink_xattr_header(handle_t *handle,
6328 				      struct ocfs2_xattr_reflink *args,
6329 				      struct buffer_head *old_bh,
6330 				      struct ocfs2_xattr_header *xh,
6331 				      struct buffer_head *new_bh,
6332 				      struct ocfs2_xattr_header *new_xh,
6333 				      struct ocfs2_xattr_value_buf *vb,
6334 				      struct ocfs2_alloc_context *meta_ac,
6335 				      get_xattr_value_root *func,
6336 				      void *para)
6337 {
6338 	int ret = 0, i, j;
6339 	struct super_block *sb = args->old_inode->i_sb;
6340 	struct buffer_head *value_bh;
6341 	struct ocfs2_xattr_entry *xe, *last;
6342 	struct ocfs2_xattr_value_root *xv, *new_xv;
6343 	struct ocfs2_extent_tree data_et;
6344 	u32 clusters, cpos, p_cluster, num_clusters;
6345 	unsigned int ext_flags = 0;
6346 
6347 	mlog(0, "reflink xattr in container %llu, count = %u\n",
6348 	     (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
6349 
6350 	last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6351 	for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6352 		xe = &xh->xh_entries[i];
6353 
6354 		if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6355 			xe = &new_xh->xh_entries[j];
6356 
6357 			le16_add_cpu(&new_xh->xh_count, -1);
6358 			if (new_xh->xh_count) {
6359 				memmove(xe, xe + 1,
6360 					(void *)last - (void *)xe);
6361 				memset(last, 0,
6362 				       sizeof(struct ocfs2_xattr_entry));
6363 			}
6364 
6365 			/*
6366 			 * We don't want j to increase in the next round since
6367 			 * it is already moved ahead.
6368 			 */
6369 			j--;
6370 			continue;
6371 		}
6372 
6373 		if (ocfs2_xattr_is_local(xe))
6374 			continue;
6375 
6376 		ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6377 		if (ret) {
6378 			mlog_errno(ret);
6379 			break;
6380 		}
6381 
6382 		ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6383 		if (ret) {
6384 			mlog_errno(ret);
6385 			break;
6386 		}
6387 
6388 		/*
6389 		 * For the xattr which has l_tree_depth = 0, all the extent
6390 		 * recs have already be copied to the new xh with the
6391 		 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6392 		 * increase the refount count int the refcount tree.
6393 		 *
6394 		 * For the xattr which has l_tree_depth > 0, we need
6395 		 * to initialize it to the empty default value root,
6396 		 * and then insert the extents one by one.
6397 		 */
6398 		if (xv->xr_list.l_tree_depth) {
6399 			memcpy(new_xv, &def_xv, sizeof(def_xv));
6400 			vb->vb_xv = new_xv;
6401 			vb->vb_bh = value_bh;
6402 			ocfs2_init_xattr_value_extent_tree(&data_et,
6403 					INODE_CACHE(args->new_inode), vb);
6404 		}
6405 
6406 		clusters = le32_to_cpu(xv->xr_clusters);
6407 		cpos = 0;
6408 		while (cpos < clusters) {
6409 			ret = ocfs2_xattr_get_clusters(args->old_inode,
6410 						       cpos,
6411 						       &p_cluster,
6412 						       &num_clusters,
6413 						       &xv->xr_list,
6414 						       &ext_flags);
6415 			if (ret) {
6416 				mlog_errno(ret);
6417 				goto out;
6418 			}
6419 
6420 			BUG_ON(!p_cluster);
6421 
6422 			if (xv->xr_list.l_tree_depth) {
6423 				ret = ocfs2_insert_extent(handle,
6424 						&data_et, cpos,
6425 						ocfs2_clusters_to_blocks(
6426 							args->old_inode->i_sb,
6427 							p_cluster),
6428 						num_clusters, ext_flags,
6429 						meta_ac);
6430 				if (ret) {
6431 					mlog_errno(ret);
6432 					goto out;
6433 				}
6434 			}
6435 
6436 			ret = ocfs2_increase_refcount(handle, args->ref_ci,
6437 						      args->ref_root_bh,
6438 						      p_cluster, num_clusters,
6439 						      meta_ac, args->dealloc);
6440 			if (ret) {
6441 				mlog_errno(ret);
6442 				goto out;
6443 			}
6444 
6445 			cpos += num_clusters;
6446 		}
6447 	}
6448 
6449 out:
6450 	return ret;
6451 }
6452 
6453 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6454 {
6455 	int ret = 0, credits = 0;
6456 	handle_t *handle;
6457 	struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6458 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6459 	int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6460 	int header_off = osb->sb->s_blocksize - inline_size;
6461 	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6462 					(args->old_bh->b_data + header_off);
6463 	struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6464 					(args->new_bh->b_data + header_off);
6465 	struct ocfs2_alloc_context *meta_ac = NULL;
6466 	struct ocfs2_inode_info *new_oi;
6467 	struct ocfs2_dinode *new_di;
6468 	struct ocfs2_xattr_value_buf vb = {
6469 		.vb_bh = args->new_bh,
6470 		.vb_access = ocfs2_journal_access_di,
6471 	};
6472 
6473 	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6474 						  &credits, &meta_ac);
6475 	if (ret) {
6476 		mlog_errno(ret);
6477 		goto out;
6478 	}
6479 
6480 	handle = ocfs2_start_trans(osb, credits);
6481 	if (IS_ERR(handle)) {
6482 		ret = PTR_ERR(handle);
6483 		mlog_errno(ret);
6484 		goto out;
6485 	}
6486 
6487 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6488 				      args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6489 	if (ret) {
6490 		mlog_errno(ret);
6491 		goto out_commit;
6492 	}
6493 
6494 	memcpy(args->new_bh->b_data + header_off,
6495 	       args->old_bh->b_data + header_off, inline_size);
6496 
6497 	new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6498 	new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6499 
6500 	ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6501 					 args->new_bh, new_xh, &vb, meta_ac,
6502 					 ocfs2_get_xattr_value_root, NULL);
6503 	if (ret) {
6504 		mlog_errno(ret);
6505 		goto out_commit;
6506 	}
6507 
6508 	new_oi = OCFS2_I(args->new_inode);
6509 	spin_lock(&new_oi->ip_lock);
6510 	new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6511 	new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6512 	spin_unlock(&new_oi->ip_lock);
6513 
6514 	ocfs2_journal_dirty(handle, args->new_bh);
6515 
6516 out_commit:
6517 	ocfs2_commit_trans(osb, handle);
6518 
6519 out:
6520 	if (meta_ac)
6521 		ocfs2_free_alloc_context(meta_ac);
6522 	return ret;
6523 }
6524 
6525 static int ocfs2_create_empty_xattr_block(struct inode *inode,
6526 					  struct buffer_head *fe_bh,
6527 					  struct buffer_head **ret_bh,
6528 					  int indexed)
6529 {
6530 	int ret;
6531 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6532 	struct ocfs2_xattr_set_ctxt ctxt;
6533 
6534 	memset(&ctxt, 0, sizeof(ctxt));
6535 	ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac);
6536 	if (ret < 0) {
6537 		mlog_errno(ret);
6538 		return ret;
6539 	}
6540 
6541 	ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6542 	if (IS_ERR(ctxt.handle)) {
6543 		ret = PTR_ERR(ctxt.handle);
6544 		mlog_errno(ret);
6545 		goto out;
6546 	}
6547 
6548 	mlog(0, "create new xattr block for inode %llu, index = %d\n",
6549 	     (unsigned long long)fe_bh->b_blocknr, indexed);
6550 	ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed,
6551 				       ret_bh);
6552 	if (ret)
6553 		mlog_errno(ret);
6554 
6555 	ocfs2_commit_trans(osb, ctxt.handle);
6556 out:
6557 	ocfs2_free_alloc_context(ctxt.meta_ac);
6558 	return ret;
6559 }
6560 
6561 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6562 				     struct buffer_head *blk_bh,
6563 				     struct buffer_head *new_blk_bh)
6564 {
6565 	int ret = 0, credits = 0;
6566 	handle_t *handle;
6567 	struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6568 	struct ocfs2_dinode *new_di;
6569 	struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6570 	int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6571 	struct ocfs2_xattr_block *xb =
6572 			(struct ocfs2_xattr_block *)blk_bh->b_data;
6573 	struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6574 	struct ocfs2_xattr_block *new_xb =
6575 			(struct ocfs2_xattr_block *)new_blk_bh->b_data;
6576 	struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6577 	struct ocfs2_alloc_context *meta_ac;
6578 	struct ocfs2_xattr_value_buf vb = {
6579 		.vb_bh = new_blk_bh,
6580 		.vb_access = ocfs2_journal_access_xb,
6581 	};
6582 
6583 	ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6584 						  &credits, &meta_ac);
6585 	if (ret) {
6586 		mlog_errno(ret);
6587 		return ret;
6588 	}
6589 
6590 	/* One more credits in case we need to add xattr flags in new inode. */
6591 	handle = ocfs2_start_trans(osb, credits + 1);
6592 	if (IS_ERR(handle)) {
6593 		ret = PTR_ERR(handle);
6594 		mlog_errno(ret);
6595 		goto out;
6596 	}
6597 
6598 	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6599 		ret = ocfs2_journal_access_di(handle,
6600 					      INODE_CACHE(args->new_inode),
6601 					      args->new_bh,
6602 					      OCFS2_JOURNAL_ACCESS_WRITE);
6603 		if (ret) {
6604 			mlog_errno(ret);
6605 			goto out_commit;
6606 		}
6607 	}
6608 
6609 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6610 				      new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6611 	if (ret) {
6612 		mlog_errno(ret);
6613 		goto out_commit;
6614 	}
6615 
6616 	memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6617 	       osb->sb->s_blocksize - header_off);
6618 
6619 	ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6620 					 new_blk_bh, new_xh, &vb, meta_ac,
6621 					 ocfs2_get_xattr_value_root, NULL);
6622 	if (ret) {
6623 		mlog_errno(ret);
6624 		goto out_commit;
6625 	}
6626 
6627 	ocfs2_journal_dirty(handle, new_blk_bh);
6628 
6629 	if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6630 		new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6631 		spin_lock(&new_oi->ip_lock);
6632 		new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6633 		new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6634 		spin_unlock(&new_oi->ip_lock);
6635 
6636 		ocfs2_journal_dirty(handle, args->new_bh);
6637 	}
6638 
6639 out_commit:
6640 	ocfs2_commit_trans(osb, handle);
6641 
6642 out:
6643 	ocfs2_free_alloc_context(meta_ac);
6644 	return ret;
6645 }
6646 
6647 struct ocfs2_reflink_xattr_tree_args {
6648 	struct ocfs2_xattr_reflink *reflink;
6649 	struct buffer_head *old_blk_bh;
6650 	struct buffer_head *new_blk_bh;
6651 	struct ocfs2_xattr_bucket *old_bucket;
6652 	struct ocfs2_xattr_bucket *new_bucket;
6653 };
6654 
6655 /*
6656  * NOTE:
6657  * We have to handle the case that both old bucket and new bucket
6658  * will call this function to get the right ret_bh.
6659  * So The caller must give us the right bh.
6660  */
6661 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6662 					struct buffer_head *bh,
6663 					struct ocfs2_xattr_header *xh,
6664 					int offset,
6665 					struct ocfs2_xattr_value_root **xv,
6666 					struct buffer_head **ret_bh,
6667 					void *para)
6668 {
6669 	struct ocfs2_reflink_xattr_tree_args *args =
6670 			(struct ocfs2_reflink_xattr_tree_args *)para;
6671 	struct ocfs2_xattr_bucket *bucket;
6672 
6673 	if (bh == args->old_bucket->bu_bhs[0])
6674 		bucket = args->old_bucket;
6675 	else
6676 		bucket = args->new_bucket;
6677 
6678 	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6679 					       xv, ret_bh);
6680 }
6681 
6682 struct ocfs2_value_tree_metas {
6683 	int num_metas;
6684 	int credits;
6685 	int num_recs;
6686 };
6687 
6688 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6689 					struct buffer_head *bh,
6690 					struct ocfs2_xattr_header *xh,
6691 					int offset,
6692 					struct ocfs2_xattr_value_root **xv,
6693 					struct buffer_head **ret_bh,
6694 					void *para)
6695 {
6696 	struct ocfs2_xattr_bucket *bucket =
6697 				(struct ocfs2_xattr_bucket *)para;
6698 
6699 	return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6700 					       xv, ret_bh);
6701 }
6702 
6703 static int ocfs2_calc_value_tree_metas(struct inode *inode,
6704 				      struct ocfs2_xattr_bucket *bucket,
6705 				      void *para)
6706 {
6707 	struct ocfs2_value_tree_metas *metas =
6708 			(struct ocfs2_value_tree_metas *)para;
6709 	struct ocfs2_xattr_header *xh =
6710 			(struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6711 
6712 	/* Add the credits for this bucket first. */
6713 	metas->credits += bucket->bu_blocks;
6714 	return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6715 					xh, &metas->num_metas,
6716 					&metas->credits, &metas->num_recs,
6717 					ocfs2_value_tree_metas_in_bucket,
6718 					bucket);
6719 }
6720 
6721 /*
6722  * Given a xattr extent rec starting from blkno and having len clusters,
6723  * iterate all the buckets calculate how much metadata we need for reflinking
6724  * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6725  */
6726 static int ocfs2_lock_reflink_xattr_rec_allocators(
6727 				struct ocfs2_reflink_xattr_tree_args *args,
6728 				struct ocfs2_extent_tree *xt_et,
6729 				u64 blkno, u32 len, int *credits,
6730 				struct ocfs2_alloc_context **meta_ac,
6731 				struct ocfs2_alloc_context **data_ac)
6732 {
6733 	int ret, num_free_extents;
6734 	struct ocfs2_value_tree_metas metas;
6735 	struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6736 	struct ocfs2_refcount_block *rb;
6737 
6738 	memset(&metas, 0, sizeof(metas));
6739 
6740 	ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6741 					  ocfs2_calc_value_tree_metas, &metas);
6742 	if (ret) {
6743 		mlog_errno(ret);
6744 		goto out;
6745 	}
6746 
6747 	*credits = metas.credits;
6748 
6749 	/*
6750 	 * Calculate we need for refcount tree change.
6751 	 *
6752 	 * We need to add/modify num_recs in refcount tree, so just calculate
6753 	 * an approximate number we need for refcount tree change.
6754 	 * Sometimes we need to split the tree, and after split,  half recs
6755 	 * will be moved to the new block, and a new block can only provide
6756 	 * half number of recs. So we multiple new blocks by 2.
6757 	 * In the end, we have to add credits for modifying the already
6758 	 * existed refcount block.
6759 	 */
6760 	rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6761 	metas.num_recs =
6762 		(metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6763 		 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6764 	metas.num_metas += metas.num_recs;
6765 	*credits += metas.num_recs +
6766 		    metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6767 	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6768 		*credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6769 			    le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6770 	else
6771 		*credits += 1;
6772 
6773 	/* count in the xattr tree change. */
6774 	num_free_extents = ocfs2_num_free_extents(osb, xt_et);
6775 	if (num_free_extents < 0) {
6776 		ret = num_free_extents;
6777 		mlog_errno(ret);
6778 		goto out;
6779 	}
6780 
6781 	if (num_free_extents < len)
6782 		metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6783 
6784 	*credits += ocfs2_calc_extend_credits(osb->sb,
6785 					      xt_et->et_root_el, len);
6786 
6787 	if (metas.num_metas) {
6788 		ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6789 							meta_ac);
6790 		if (ret) {
6791 			mlog_errno(ret);
6792 			goto out;
6793 		}
6794 	}
6795 
6796 	if (len) {
6797 		ret = ocfs2_reserve_clusters(osb, len, data_ac);
6798 		if (ret)
6799 			mlog_errno(ret);
6800 	}
6801 out:
6802 	if (ret) {
6803 		if (*meta_ac) {
6804 			ocfs2_free_alloc_context(*meta_ac);
6805 			meta_ac = NULL;
6806 		}
6807 	}
6808 
6809 	return ret;
6810 }
6811 
6812 static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6813 				u64 blkno, u64 new_blkno, u32 clusters,
6814 				struct ocfs2_alloc_context *meta_ac,
6815 				struct ocfs2_alloc_context *data_ac,
6816 				struct ocfs2_reflink_xattr_tree_args *args)
6817 {
6818 	int i, j, ret = 0;
6819 	struct super_block *sb = args->reflink->old_inode->i_sb;
6820 	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
6821 	u32 num_buckets = clusters * bpc;
6822 	int bpb = args->old_bucket->bu_blocks;
6823 	struct ocfs2_xattr_value_buf vb = {
6824 		.vb_access = ocfs2_journal_access,
6825 	};
6826 
6827 	for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6828 		ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6829 		if (ret) {
6830 			mlog_errno(ret);
6831 			break;
6832 		}
6833 
6834 		ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
6835 		if (ret) {
6836 			mlog_errno(ret);
6837 			break;
6838 		}
6839 
6840 		/*
6841 		 * The real bucket num in this series of blocks is stored
6842 		 * in the 1st bucket.
6843 		 */
6844 		if (i == 0)
6845 			num_buckets = le16_to_cpu(
6846 				bucket_xh(args->old_bucket)->xh_num_buckets);
6847 
6848 		ret = ocfs2_xattr_bucket_journal_access(handle,
6849 						args->new_bucket,
6850 						OCFS2_JOURNAL_ACCESS_CREATE);
6851 		if (ret) {
6852 			mlog_errno(ret);
6853 			break;
6854 		}
6855 
6856 		for (j = 0; j < bpb; j++)
6857 			memcpy(bucket_block(args->new_bucket, j),
6858 			       bucket_block(args->old_bucket, j),
6859 			       sb->s_blocksize);
6860 
6861 		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6862 
6863 		ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6864 					args->old_bucket->bu_bhs[0],
6865 					bucket_xh(args->old_bucket),
6866 					args->new_bucket->bu_bhs[0],
6867 					bucket_xh(args->new_bucket),
6868 					&vb, meta_ac,
6869 					ocfs2_get_reflink_xattr_value_root,
6870 					args);
6871 		if (ret) {
6872 			mlog_errno(ret);
6873 			break;
6874 		}
6875 
6876 		/*
6877 		 * Re-access and dirty the bucket to calculate metaecc.
6878 		 * Because we may extend the transaction in reflink_xattr_header
6879 		 * which will let the already accessed block gone.
6880 		 */
6881 		ret = ocfs2_xattr_bucket_journal_access(handle,
6882 						args->new_bucket,
6883 						OCFS2_JOURNAL_ACCESS_WRITE);
6884 		if (ret) {
6885 			mlog_errno(ret);
6886 			break;
6887 		}
6888 
6889 		ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6890 		ocfs2_xattr_bucket_relse(args->old_bucket);
6891 		ocfs2_xattr_bucket_relse(args->new_bucket);
6892 	}
6893 
6894 	ocfs2_xattr_bucket_relse(args->old_bucket);
6895 	ocfs2_xattr_bucket_relse(args->new_bucket);
6896 	return ret;
6897 }
6898 /*
6899  * Create the same xattr extent record in the new inode's xattr tree.
6900  */
6901 static int ocfs2_reflink_xattr_rec(struct inode *inode,
6902 				   struct buffer_head *root_bh,
6903 				   u64 blkno,
6904 				   u32 cpos,
6905 				   u32 len,
6906 				   void *para)
6907 {
6908 	int ret, credits = 0;
6909 	u32 p_cluster, num_clusters;
6910 	u64 new_blkno;
6911 	handle_t *handle;
6912 	struct ocfs2_reflink_xattr_tree_args *args =
6913 			(struct ocfs2_reflink_xattr_tree_args *)para;
6914 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6915 	struct ocfs2_alloc_context *meta_ac = NULL;
6916 	struct ocfs2_alloc_context *data_ac = NULL;
6917 	struct ocfs2_extent_tree et;
6918 
6919 	ocfs2_init_xattr_tree_extent_tree(&et,
6920 					  INODE_CACHE(args->reflink->new_inode),
6921 					  args->new_blk_bh);
6922 
6923 	ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
6924 						      len, &credits,
6925 						      &meta_ac, &data_ac);
6926 	if (ret) {
6927 		mlog_errno(ret);
6928 		goto out;
6929 	}
6930 
6931 	handle = ocfs2_start_trans(osb, credits);
6932 	if (IS_ERR(handle)) {
6933 		ret = PTR_ERR(handle);
6934 		mlog_errno(ret);
6935 		goto out;
6936 	}
6937 
6938 	ret = ocfs2_claim_clusters(osb, handle, data_ac,
6939 				   len, &p_cluster, &num_clusters);
6940 	if (ret) {
6941 		mlog_errno(ret);
6942 		goto out_commit;
6943 	}
6944 
6945 	new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
6946 
6947 	mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
6948 	     (unsigned long long)blkno, (unsigned long long)new_blkno, len);
6949 	ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
6950 					  meta_ac, data_ac, args);
6951 	if (ret) {
6952 		mlog_errno(ret);
6953 		goto out_commit;
6954 	}
6955 
6956 	mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6957 	     (unsigned long long)new_blkno, len, cpos);
6958 	ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
6959 				  len, 0, meta_ac);
6960 	if (ret)
6961 		mlog_errno(ret);
6962 
6963 out_commit:
6964 	ocfs2_commit_trans(osb, handle);
6965 
6966 out:
6967 	if (meta_ac)
6968 		ocfs2_free_alloc_context(meta_ac);
6969 	if (data_ac)
6970 		ocfs2_free_alloc_context(data_ac);
6971 	return ret;
6972 }
6973 
6974 /*
6975  * Create reflinked xattr buckets.
6976  * We will add bucket one by one, and refcount all the xattrs in the bucket
6977  * if they are stored outside.
6978  */
6979 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
6980 				    struct buffer_head *blk_bh,
6981 				    struct buffer_head *new_blk_bh)
6982 {
6983 	int ret;
6984 	struct ocfs2_reflink_xattr_tree_args para;
6985 
6986 	memset(&para, 0, sizeof(para));
6987 	para.reflink = args;
6988 	para.old_blk_bh = blk_bh;
6989 	para.new_blk_bh = new_blk_bh;
6990 
6991 	para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
6992 	if (!para.old_bucket) {
6993 		mlog_errno(-ENOMEM);
6994 		return -ENOMEM;
6995 	}
6996 
6997 	para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
6998 	if (!para.new_bucket) {
6999 		ret = -ENOMEM;
7000 		mlog_errno(ret);
7001 		goto out;
7002 	}
7003 
7004 	ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
7005 					      ocfs2_reflink_xattr_rec,
7006 					      &para);
7007 	if (ret)
7008 		mlog_errno(ret);
7009 
7010 out:
7011 	ocfs2_xattr_bucket_free(para.old_bucket);
7012 	ocfs2_xattr_bucket_free(para.new_bucket);
7013 	return ret;
7014 }
7015 
7016 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
7017 					struct buffer_head *blk_bh)
7018 {
7019 	int ret, indexed = 0;
7020 	struct buffer_head *new_blk_bh = NULL;
7021 	struct ocfs2_xattr_block *xb =
7022 			(struct ocfs2_xattr_block *)blk_bh->b_data;
7023 
7024 
7025 	if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
7026 		indexed = 1;
7027 
7028 	ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
7029 					     &new_blk_bh, indexed);
7030 	if (ret) {
7031 		mlog_errno(ret);
7032 		goto out;
7033 	}
7034 
7035 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED))
7036 		ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
7037 	else
7038 		ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
7039 	if (ret)
7040 		mlog_errno(ret);
7041 
7042 out:
7043 	brelse(new_blk_bh);
7044 	return ret;
7045 }
7046 
7047 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
7048 {
7049 	int type = ocfs2_xattr_get_type(xe);
7050 
7051 	return type != OCFS2_XATTR_INDEX_SECURITY &&
7052 	       type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
7053 	       type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
7054 }
7055 
7056 int ocfs2_reflink_xattrs(struct inode *old_inode,
7057 			 struct buffer_head *old_bh,
7058 			 struct inode *new_inode,
7059 			 struct buffer_head *new_bh,
7060 			 bool preserve_security)
7061 {
7062 	int ret;
7063 	struct ocfs2_xattr_reflink args;
7064 	struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
7065 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
7066 	struct buffer_head *blk_bh = NULL;
7067 	struct ocfs2_cached_dealloc_ctxt dealloc;
7068 	struct ocfs2_refcount_tree *ref_tree;
7069 	struct buffer_head *ref_root_bh = NULL;
7070 
7071 	ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7072 				       le64_to_cpu(di->i_refcount_loc),
7073 				       1, &ref_tree, &ref_root_bh);
7074 	if (ret) {
7075 		mlog_errno(ret);
7076 		goto out;
7077 	}
7078 
7079 	ocfs2_init_dealloc_ctxt(&dealloc);
7080 
7081 	args.old_inode = old_inode;
7082 	args.new_inode = new_inode;
7083 	args.old_bh = old_bh;
7084 	args.new_bh = new_bh;
7085 	args.ref_ci = &ref_tree->rf_ci;
7086 	args.ref_root_bh = ref_root_bh;
7087 	args.dealloc = &dealloc;
7088 	if (preserve_security)
7089 		args.xattr_reflinked = NULL;
7090 	else
7091 		args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
7092 
7093 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7094 		ret = ocfs2_reflink_xattr_inline(&args);
7095 		if (ret) {
7096 			mlog_errno(ret);
7097 			goto out_unlock;
7098 		}
7099 	}
7100 
7101 	if (!di->i_xattr_loc)
7102 		goto out_unlock;
7103 
7104 	ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7105 				     &blk_bh);
7106 	if (ret < 0) {
7107 		mlog_errno(ret);
7108 		goto out_unlock;
7109 	}
7110 
7111 	ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7112 	if (ret)
7113 		mlog_errno(ret);
7114 
7115 	brelse(blk_bh);
7116 
7117 out_unlock:
7118 	ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7119 				   ref_tree, 1);
7120 	brelse(ref_root_bh);
7121 
7122 	if (ocfs2_dealloc_has_cluster(&dealloc)) {
7123 		ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7124 		ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7125 	}
7126 
7127 out:
7128 	return ret;
7129 }
7130 
7131 /*
7132  * Initialize security and acl for a already created inode.
7133  * Used for reflink a non-preserve-security file.
7134  *
7135  * It uses common api like ocfs2_xattr_set, so the caller
7136  * must not hold any lock expect i_mutex.
7137  */
7138 int ocfs2_init_security_and_acl(struct inode *dir,
7139 				struct inode *inode)
7140 {
7141 	int ret = 0;
7142 	struct buffer_head *dir_bh = NULL;
7143 	struct ocfs2_security_xattr_info si = {
7144 		.enable = 1,
7145 	};
7146 
7147 	ret = ocfs2_init_security_get(inode, dir, &si);
7148 	if (!ret) {
7149 		ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7150 				      si.name, si.value, si.value_len,
7151 				      XATTR_CREATE);
7152 		if (ret) {
7153 			mlog_errno(ret);
7154 			goto leave;
7155 		}
7156 	} else if (ret != -EOPNOTSUPP) {
7157 		mlog_errno(ret);
7158 		goto leave;
7159 	}
7160 
7161 	ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7162 	if (ret) {
7163 		mlog_errno(ret);
7164 		goto leave;
7165 	}
7166 
7167 	ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7168 	if (ret)
7169 		mlog_errno(ret);
7170 
7171 	ocfs2_inode_unlock(dir, 0);
7172 	brelse(dir_bh);
7173 leave:
7174 	return ret;
7175 }
7176 /*
7177  * 'security' attributes support
7178  */
7179 static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
7180 					size_t list_size, const char *name,
7181 					size_t name_len, int type)
7182 {
7183 	const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7184 	const size_t total_len = prefix_len + name_len + 1;
7185 
7186 	if (list && total_len <= list_size) {
7187 		memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
7188 		memcpy(list + prefix_len, name, name_len);
7189 		list[prefix_len + name_len] = '\0';
7190 	}
7191 	return total_len;
7192 }
7193 
7194 static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
7195 				    void *buffer, size_t size, int type)
7196 {
7197 	if (strcmp(name, "") == 0)
7198 		return -EINVAL;
7199 	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7200 			       name, buffer, size);
7201 }
7202 
7203 static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7204 		const void *value, size_t size, int flags, int type)
7205 {
7206 	if (strcmp(name, "") == 0)
7207 		return -EINVAL;
7208 
7209 	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7210 			       name, value, size, flags);
7211 }
7212 
7213 int ocfs2_init_security_get(struct inode *inode,
7214 			    struct inode *dir,
7215 			    struct ocfs2_security_xattr_info *si)
7216 {
7217 	/* check whether ocfs2 support feature xattr */
7218 	if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7219 		return -EOPNOTSUPP;
7220 	return security_inode_init_security(inode, dir, &si->name, &si->value,
7221 					    &si->value_len);
7222 }
7223 
7224 int ocfs2_init_security_set(handle_t *handle,
7225 			    struct inode *inode,
7226 			    struct buffer_head *di_bh,
7227 			    struct ocfs2_security_xattr_info *si,
7228 			    struct ocfs2_alloc_context *xattr_ac,
7229 			    struct ocfs2_alloc_context *data_ac)
7230 {
7231 	return ocfs2_xattr_set_handle(handle, inode, di_bh,
7232 				     OCFS2_XATTR_INDEX_SECURITY,
7233 				     si->name, si->value, si->value_len, 0,
7234 				     xattr_ac, data_ac);
7235 }
7236 
7237 struct xattr_handler ocfs2_xattr_security_handler = {
7238 	.prefix	= XATTR_SECURITY_PREFIX,
7239 	.list	= ocfs2_xattr_security_list,
7240 	.get	= ocfs2_xattr_security_get,
7241 	.set	= ocfs2_xattr_security_set,
7242 };
7243 
7244 /*
7245  * 'trusted' attributes support
7246  */
7247 static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
7248 				       size_t list_size, const char *name,
7249 				       size_t name_len, int type)
7250 {
7251 	const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7252 	const size_t total_len = prefix_len + name_len + 1;
7253 
7254 	if (list && total_len <= list_size) {
7255 		memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
7256 		memcpy(list + prefix_len, name, name_len);
7257 		list[prefix_len + name_len] = '\0';
7258 	}
7259 	return total_len;
7260 }
7261 
7262 static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
7263 		void *buffer, size_t size, int type)
7264 {
7265 	if (strcmp(name, "") == 0)
7266 		return -EINVAL;
7267 	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7268 			       name, buffer, size);
7269 }
7270 
7271 static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7272 		const void *value, size_t size, int flags, int type)
7273 {
7274 	if (strcmp(name, "") == 0)
7275 		return -EINVAL;
7276 
7277 	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7278 			       name, value, size, flags);
7279 }
7280 
7281 struct xattr_handler ocfs2_xattr_trusted_handler = {
7282 	.prefix	= XATTR_TRUSTED_PREFIX,
7283 	.list	= ocfs2_xattr_trusted_list,
7284 	.get	= ocfs2_xattr_trusted_get,
7285 	.set	= ocfs2_xattr_trusted_set,
7286 };
7287 
7288 /*
7289  * 'user' attributes support
7290  */
7291 static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
7292 				    size_t list_size, const char *name,
7293 				    size_t name_len, int type)
7294 {
7295 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7296 	const size_t total_len = prefix_len + name_len + 1;
7297 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7298 
7299 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7300 		return 0;
7301 
7302 	if (list && total_len <= list_size) {
7303 		memcpy(list, XATTR_USER_PREFIX, prefix_len);
7304 		memcpy(list + prefix_len, name, name_len);
7305 		list[prefix_len + name_len] = '\0';
7306 	}
7307 	return total_len;
7308 }
7309 
7310 static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
7311 		void *buffer, size_t size, int type)
7312 {
7313 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7314 
7315 	if (strcmp(name, "") == 0)
7316 		return -EINVAL;
7317 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7318 		return -EOPNOTSUPP;
7319 	return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
7320 			       buffer, size);
7321 }
7322 
7323 static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7324 		const void *value, size_t size, int flags, int type)
7325 {
7326 	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7327 
7328 	if (strcmp(name, "") == 0)
7329 		return -EINVAL;
7330 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7331 		return -EOPNOTSUPP;
7332 
7333 	return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
7334 			       name, value, size, flags);
7335 }
7336 
7337 struct xattr_handler ocfs2_xattr_user_handler = {
7338 	.prefix	= XATTR_USER_PREFIX,
7339 	.list	= ocfs2_xattr_user_list,
7340 	.get	= ocfs2_xattr_user_get,
7341 	.set	= ocfs2_xattr_user_set,
7342 };
7343