xref: /openbmc/linux/fs/ocfs2/xattr.c (revision 492a8a33)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21 
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39 
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42 
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "blockcheck.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 #include "refcounttree.h"
59 
60 struct ocfs2_xattr_def_value_root {
61 	struct ocfs2_xattr_value_root	xv;
62 	struct ocfs2_extent_rec		er;
63 };
64 
65 struct ocfs2_xattr_bucket {
66 	/* The inode these xattrs are associated with */
67 	struct inode *bu_inode;
68 
69 	/* The actual buffers that make up the bucket */
70 	struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
71 
72 	/* How many blocks make up one bucket for this filesystem */
73 	int bu_blocks;
74 };
75 
76 struct ocfs2_xattr_set_ctxt {
77 	handle_t *handle;
78 	struct ocfs2_alloc_context *meta_ac;
79 	struct ocfs2_alloc_context *data_ac;
80 	struct ocfs2_cached_dealloc_ctxt dealloc;
81 };
82 
83 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
84 #define OCFS2_XATTR_INLINE_SIZE	80
85 #define OCFS2_XATTR_HEADER_GAP	4
86 #define OCFS2_XATTR_FREE_IN_IBODY	(OCFS2_MIN_XATTR_INLINE_SIZE \
87 					 - sizeof(struct ocfs2_xattr_header) \
88 					 - OCFS2_XATTR_HEADER_GAP)
89 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)	((ptr)->i_sb->s_blocksize \
90 					 - sizeof(struct ocfs2_xattr_block) \
91 					 - sizeof(struct ocfs2_xattr_header) \
92 					 - OCFS2_XATTR_HEADER_GAP)
93 
94 static struct ocfs2_xattr_def_value_root def_xv = {
95 	.xv.xr_list.l_count = cpu_to_le16(1),
96 };
97 
98 struct xattr_handler *ocfs2_xattr_handlers[] = {
99 	&ocfs2_xattr_user_handler,
100 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
101 	&ocfs2_xattr_acl_access_handler,
102 	&ocfs2_xattr_acl_default_handler,
103 #endif
104 	&ocfs2_xattr_trusted_handler,
105 	&ocfs2_xattr_security_handler,
106 	NULL
107 };
108 
109 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
110 	[OCFS2_XATTR_INDEX_USER]	= &ocfs2_xattr_user_handler,
111 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
112 	[OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
113 					= &ocfs2_xattr_acl_access_handler,
114 	[OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
115 					= &ocfs2_xattr_acl_default_handler,
116 #endif
117 	[OCFS2_XATTR_INDEX_TRUSTED]	= &ocfs2_xattr_trusted_handler,
118 	[OCFS2_XATTR_INDEX_SECURITY]	= &ocfs2_xattr_security_handler,
119 };
120 
121 struct ocfs2_xattr_info {
122 	int name_index;
123 	const char *name;
124 	const void *value;
125 	size_t value_len;
126 };
127 
128 struct ocfs2_xattr_search {
129 	struct buffer_head *inode_bh;
130 	/*
131 	 * xattr_bh point to the block buffer head which has extended attribute
132 	 * when extended attribute in inode, xattr_bh is equal to inode_bh.
133 	 */
134 	struct buffer_head *xattr_bh;
135 	struct ocfs2_xattr_header *header;
136 	struct ocfs2_xattr_bucket *bucket;
137 	void *base;
138 	void *end;
139 	struct ocfs2_xattr_entry *here;
140 	int not_found;
141 };
142 
143 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
144 					     struct ocfs2_xattr_header *xh,
145 					     int index,
146 					     int *block_off,
147 					     int *new_offset);
148 
149 static int ocfs2_xattr_block_find(struct inode *inode,
150 				  int name_index,
151 				  const char *name,
152 				  struct ocfs2_xattr_search *xs);
153 static int ocfs2_xattr_index_block_find(struct inode *inode,
154 					struct buffer_head *root_bh,
155 					int name_index,
156 					const char *name,
157 					struct ocfs2_xattr_search *xs);
158 
159 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
160 					struct ocfs2_xattr_tree_root *xt,
161 					char *buffer,
162 					size_t buffer_size);
163 
164 static int ocfs2_xattr_create_index_block(struct inode *inode,
165 					  struct ocfs2_xattr_search *xs,
166 					  struct ocfs2_xattr_set_ctxt *ctxt);
167 
168 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
169 					     struct ocfs2_xattr_info *xi,
170 					     struct ocfs2_xattr_search *xs,
171 					     struct ocfs2_xattr_set_ctxt *ctxt);
172 
173 static int ocfs2_delete_xattr_index_block(struct inode *inode,
174 					  struct buffer_head *xb_bh);
175 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
176 				  u64 src_blk, u64 last_blk, u64 to_blk,
177 				  unsigned int start_bucket,
178 				  u32 *first_hash);
179 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
180 					struct ocfs2_dinode *di,
181 					struct ocfs2_xattr_info *xi,
182 					struct ocfs2_xattr_search *xis,
183 					struct ocfs2_xattr_search *xbs,
184 					struct ocfs2_refcount_tree **ref_tree,
185 					int *meta_need,
186 					int *credits);
187 
188 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
189 {
190 	return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
191 }
192 
193 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
194 {
195 	return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
196 }
197 
198 static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
199 {
200 	u16 len = sb->s_blocksize -
201 		 offsetof(struct ocfs2_xattr_header, xh_entries);
202 
203 	return len / sizeof(struct ocfs2_xattr_entry);
204 }
205 
206 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
207 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
208 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
209 
210 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
211 {
212 	struct ocfs2_xattr_bucket *bucket;
213 	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
214 
215 	BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
216 
217 	bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
218 	if (bucket) {
219 		bucket->bu_inode = inode;
220 		bucket->bu_blocks = blks;
221 	}
222 
223 	return bucket;
224 }
225 
226 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
227 {
228 	int i;
229 
230 	for (i = 0; i < bucket->bu_blocks; i++) {
231 		brelse(bucket->bu_bhs[i]);
232 		bucket->bu_bhs[i] = NULL;
233 	}
234 }
235 
236 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
237 {
238 	if (bucket) {
239 		ocfs2_xattr_bucket_relse(bucket);
240 		bucket->bu_inode = NULL;
241 		kfree(bucket);
242 	}
243 }
244 
245 /*
246  * A bucket that has never been written to disk doesn't need to be
247  * read.  We just need the buffer_heads.  Don't call this for
248  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
249  * them fully.
250  */
251 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
252 				   u64 xb_blkno)
253 {
254 	int i, rc = 0;
255 
256 	for (i = 0; i < bucket->bu_blocks; i++) {
257 		bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
258 					      xb_blkno + i);
259 		if (!bucket->bu_bhs[i]) {
260 			rc = -EIO;
261 			mlog_errno(rc);
262 			break;
263 		}
264 
265 		if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
266 					   bucket->bu_bhs[i]))
267 			ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
268 						      bucket->bu_bhs[i]);
269 	}
270 
271 	if (rc)
272 		ocfs2_xattr_bucket_relse(bucket);
273 	return rc;
274 }
275 
276 /* Read the xattr bucket at xb_blkno */
277 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
278 				   u64 xb_blkno)
279 {
280 	int rc;
281 
282 	rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
283 			       bucket->bu_blocks, bucket->bu_bhs, 0,
284 			       NULL);
285 	if (!rc) {
286 		spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
287 		rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
288 						 bucket->bu_bhs,
289 						 bucket->bu_blocks,
290 						 &bucket_xh(bucket)->xh_check);
291 		spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
292 		if (rc)
293 			mlog_errno(rc);
294 	}
295 
296 	if (rc)
297 		ocfs2_xattr_bucket_relse(bucket);
298 	return rc;
299 }
300 
301 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
302 					     struct ocfs2_xattr_bucket *bucket,
303 					     int type)
304 {
305 	int i, rc = 0;
306 
307 	for (i = 0; i < bucket->bu_blocks; i++) {
308 		rc = ocfs2_journal_access(handle,
309 					  INODE_CACHE(bucket->bu_inode),
310 					  bucket->bu_bhs[i], type);
311 		if (rc) {
312 			mlog_errno(rc);
313 			break;
314 		}
315 	}
316 
317 	return rc;
318 }
319 
320 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
321 					     struct ocfs2_xattr_bucket *bucket)
322 {
323 	int i;
324 
325 	spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
326 	ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
327 				   bucket->bu_bhs, bucket->bu_blocks,
328 				   &bucket_xh(bucket)->xh_check);
329 	spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
330 
331 	for (i = 0; i < bucket->bu_blocks; i++)
332 		ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
333 }
334 
335 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
336 					 struct ocfs2_xattr_bucket *src)
337 {
338 	int i;
339 	int blocksize = src->bu_inode->i_sb->s_blocksize;
340 
341 	BUG_ON(dest->bu_blocks != src->bu_blocks);
342 	BUG_ON(dest->bu_inode != src->bu_inode);
343 
344 	for (i = 0; i < src->bu_blocks; i++) {
345 		memcpy(bucket_block(dest, i), bucket_block(src, i),
346 		       blocksize);
347 	}
348 }
349 
350 static int ocfs2_validate_xattr_block(struct super_block *sb,
351 				      struct buffer_head *bh)
352 {
353 	int rc;
354 	struct ocfs2_xattr_block *xb =
355 		(struct ocfs2_xattr_block *)bh->b_data;
356 
357 	mlog(0, "Validating xattr block %llu\n",
358 	     (unsigned long long)bh->b_blocknr);
359 
360 	BUG_ON(!buffer_uptodate(bh));
361 
362 	/*
363 	 * If the ecc fails, we return the error but otherwise
364 	 * leave the filesystem running.  We know any error is
365 	 * local to this block.
366 	 */
367 	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
368 	if (rc)
369 		return rc;
370 
371 	/*
372 	 * Errors after here are fatal
373 	 */
374 
375 	if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
376 		ocfs2_error(sb,
377 			    "Extended attribute block #%llu has bad "
378 			    "signature %.*s",
379 			    (unsigned long long)bh->b_blocknr, 7,
380 			    xb->xb_signature);
381 		return -EINVAL;
382 	}
383 
384 	if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
385 		ocfs2_error(sb,
386 			    "Extended attribute block #%llu has an "
387 			    "invalid xb_blkno of %llu",
388 			    (unsigned long long)bh->b_blocknr,
389 			    (unsigned long long)le64_to_cpu(xb->xb_blkno));
390 		return -EINVAL;
391 	}
392 
393 	if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
394 		ocfs2_error(sb,
395 			    "Extended attribute block #%llu has an invalid "
396 			    "xb_fs_generation of #%u",
397 			    (unsigned long long)bh->b_blocknr,
398 			    le32_to_cpu(xb->xb_fs_generation));
399 		return -EINVAL;
400 	}
401 
402 	return 0;
403 }
404 
405 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
406 				  struct buffer_head **bh)
407 {
408 	int rc;
409 	struct buffer_head *tmp = *bh;
410 
411 	rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
412 			      ocfs2_validate_xattr_block);
413 
414 	/* If ocfs2_read_block() got us a new bh, pass it up. */
415 	if (!rc && !*bh)
416 		*bh = tmp;
417 
418 	return rc;
419 }
420 
421 static inline const char *ocfs2_xattr_prefix(int name_index)
422 {
423 	struct xattr_handler *handler = NULL;
424 
425 	if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
426 		handler = ocfs2_xattr_handler_map[name_index];
427 
428 	return handler ? handler->prefix : NULL;
429 }
430 
431 static u32 ocfs2_xattr_name_hash(struct inode *inode,
432 				 const char *name,
433 				 int name_len)
434 {
435 	/* Get hash value of uuid from super block */
436 	u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
437 	int i;
438 
439 	/* hash extended attribute name */
440 	for (i = 0; i < name_len; i++) {
441 		hash = (hash << OCFS2_HASH_SHIFT) ^
442 		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
443 		       *name++;
444 	}
445 
446 	return hash;
447 }
448 
449 /*
450  * ocfs2_xattr_hash_entry()
451  *
452  * Compute the hash of an extended attribute.
453  */
454 static void ocfs2_xattr_hash_entry(struct inode *inode,
455 				   struct ocfs2_xattr_header *header,
456 				   struct ocfs2_xattr_entry *entry)
457 {
458 	u32 hash = 0;
459 	char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
460 
461 	hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
462 	entry->xe_name_hash = cpu_to_le32(hash);
463 
464 	return;
465 }
466 
467 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
468 {
469 	int size = 0;
470 
471 	if (value_len <= OCFS2_XATTR_INLINE_SIZE)
472 		size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
473 	else
474 		size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
475 	size += sizeof(struct ocfs2_xattr_entry);
476 
477 	return size;
478 }
479 
480 int ocfs2_calc_security_init(struct inode *dir,
481 			     struct ocfs2_security_xattr_info *si,
482 			     int *want_clusters,
483 			     int *xattr_credits,
484 			     struct ocfs2_alloc_context **xattr_ac)
485 {
486 	int ret = 0;
487 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
488 	int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
489 						 si->value_len);
490 
491 	/*
492 	 * The max space of security xattr taken inline is
493 	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
494 	 * So reserve one metadata block for it is ok.
495 	 */
496 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
497 	    s_size > OCFS2_XATTR_FREE_IN_IBODY) {
498 		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
499 		if (ret) {
500 			mlog_errno(ret);
501 			return ret;
502 		}
503 		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
504 	}
505 
506 	/* reserve clusters for xattr value which will be set in B tree*/
507 	if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
508 		int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
509 							    si->value_len);
510 
511 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
512 							   new_clusters);
513 		*want_clusters += new_clusters;
514 	}
515 	return ret;
516 }
517 
518 int ocfs2_calc_xattr_init(struct inode *dir,
519 			  struct buffer_head *dir_bh,
520 			  int mode,
521 			  struct ocfs2_security_xattr_info *si,
522 			  int *want_clusters,
523 			  int *xattr_credits,
524 			  int *want_meta)
525 {
526 	int ret = 0;
527 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
528 	int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
529 
530 	if (si->enable)
531 		s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
532 						     si->value_len);
533 
534 	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
535 		acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
536 					OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
537 					"", NULL, 0);
538 		if (acl_len > 0) {
539 			a_size = ocfs2_xattr_entry_real_size(0, acl_len);
540 			if (S_ISDIR(mode))
541 				a_size <<= 1;
542 		} else if (acl_len != 0 && acl_len != -ENODATA) {
543 			mlog_errno(ret);
544 			return ret;
545 		}
546 	}
547 
548 	if (!(s_size + a_size))
549 		return ret;
550 
551 	/*
552 	 * The max space of security xattr taken inline is
553 	 * 256(name) + 80(value) + 16(entry) = 352 bytes,
554 	 * The max space of acl xattr taken inline is
555 	 * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
556 	 * when blocksize = 512, may reserve one more cluser for
557 	 * xattr bucket, otherwise reserve one metadata block
558 	 * for them is ok.
559 	 * If this is a new directory with inline data,
560 	 * we choose to reserve the entire inline area for
561 	 * directory contents and force an external xattr block.
562 	 */
563 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
564 	    (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
565 	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
566 		*want_meta = *want_meta + 1;
567 		*xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
568 	}
569 
570 	if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
571 	    (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
572 		*want_clusters += 1;
573 		*xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
574 	}
575 
576 	/*
577 	 * reserve credits and clusters for xattrs which has large value
578 	 * and have to be set outside
579 	 */
580 	if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
581 		new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
582 							si->value_len);
583 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
584 							   new_clusters);
585 		*want_clusters += new_clusters;
586 	}
587 	if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
588 	    acl_len > OCFS2_XATTR_INLINE_SIZE) {
589 		/* for directory, it has DEFAULT and ACCESS two types of acls */
590 		new_clusters = (S_ISDIR(mode) ? 2 : 1) *
591 				ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
592 		*xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
593 							   new_clusters);
594 		*want_clusters += new_clusters;
595 	}
596 
597 	return ret;
598 }
599 
600 static int ocfs2_xattr_extend_allocation(struct inode *inode,
601 					 u32 clusters_to_add,
602 					 struct ocfs2_xattr_value_buf *vb,
603 					 struct ocfs2_xattr_set_ctxt *ctxt)
604 {
605 	int status = 0;
606 	handle_t *handle = ctxt->handle;
607 	enum ocfs2_alloc_restarted why;
608 	u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
609 	struct ocfs2_extent_tree et;
610 
611 	mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
612 
613 	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
614 
615 	status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
616 			      OCFS2_JOURNAL_ACCESS_WRITE);
617 	if (status < 0) {
618 		mlog_errno(status);
619 		goto leave;
620 	}
621 
622 	prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
623 	status = ocfs2_add_clusters_in_btree(handle,
624 					     &et,
625 					     &logical_start,
626 					     clusters_to_add,
627 					     0,
628 					     ctxt->data_ac,
629 					     ctxt->meta_ac,
630 					     &why);
631 	if (status < 0) {
632 		mlog_errno(status);
633 		goto leave;
634 	}
635 
636 	status = ocfs2_journal_dirty(handle, vb->vb_bh);
637 	if (status < 0) {
638 		mlog_errno(status);
639 		goto leave;
640 	}
641 
642 	clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
643 
644 	/*
645 	 * We should have already allocated enough space before the transaction,
646 	 * so no need to restart.
647 	 */
648 	BUG_ON(why != RESTART_NONE || clusters_to_add);
649 
650 leave:
651 
652 	return status;
653 }
654 
655 static int __ocfs2_remove_xattr_range(struct inode *inode,
656 				      struct ocfs2_xattr_value_buf *vb,
657 				      u32 cpos, u32 phys_cpos, u32 len,
658 				      unsigned int ext_flags,
659 				      struct ocfs2_xattr_set_ctxt *ctxt)
660 {
661 	int ret;
662 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
663 	handle_t *handle = ctxt->handle;
664 	struct ocfs2_extent_tree et;
665 
666 	ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
667 
668 	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
669 			    OCFS2_JOURNAL_ACCESS_WRITE);
670 	if (ret) {
671 		mlog_errno(ret);
672 		goto out;
673 	}
674 
675 	ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
676 				  &ctxt->dealloc);
677 	if (ret) {
678 		mlog_errno(ret);
679 		goto out;
680 	}
681 
682 	le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
683 
684 	ret = ocfs2_journal_dirty(handle, vb->vb_bh);
685 	if (ret) {
686 		mlog_errno(ret);
687 		goto out;
688 	}
689 
690 	if (ext_flags & OCFS2_EXT_REFCOUNTED)
691 		ret = ocfs2_decrease_refcount(inode, handle,
692 					ocfs2_blocks_to_clusters(inode->i_sb,
693 								 phys_blkno),
694 					len, ctxt->meta_ac, &ctxt->dealloc, 1);
695 	else
696 		ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
697 						  phys_blkno, len);
698 	if (ret)
699 		mlog_errno(ret);
700 
701 out:
702 	return ret;
703 }
704 
705 static int ocfs2_xattr_shrink_size(struct inode *inode,
706 				   u32 old_clusters,
707 				   u32 new_clusters,
708 				   struct ocfs2_xattr_value_buf *vb,
709 				   struct ocfs2_xattr_set_ctxt *ctxt)
710 {
711 	int ret = 0;
712 	unsigned int ext_flags;
713 	u32 trunc_len, cpos, phys_cpos, alloc_size;
714 	u64 block;
715 
716 	if (old_clusters <= new_clusters)
717 		return 0;
718 
719 	cpos = new_clusters;
720 	trunc_len = old_clusters - new_clusters;
721 	while (trunc_len) {
722 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
723 					       &alloc_size,
724 					       &vb->vb_xv->xr_list, &ext_flags);
725 		if (ret) {
726 			mlog_errno(ret);
727 			goto out;
728 		}
729 
730 		if (alloc_size > trunc_len)
731 			alloc_size = trunc_len;
732 
733 		ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
734 						 phys_cpos, alloc_size,
735 						 ext_flags, ctxt);
736 		if (ret) {
737 			mlog_errno(ret);
738 			goto out;
739 		}
740 
741 		block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
742 		ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
743 						       block, alloc_size);
744 		cpos += alloc_size;
745 		trunc_len -= alloc_size;
746 	}
747 
748 out:
749 	return ret;
750 }
751 
752 static int ocfs2_xattr_value_truncate(struct inode *inode,
753 				      struct ocfs2_xattr_value_buf *vb,
754 				      int len,
755 				      struct ocfs2_xattr_set_ctxt *ctxt)
756 {
757 	int ret;
758 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
759 	u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
760 
761 	if (new_clusters == old_clusters)
762 		return 0;
763 
764 	if (new_clusters > old_clusters)
765 		ret = ocfs2_xattr_extend_allocation(inode,
766 						    new_clusters - old_clusters,
767 						    vb, ctxt);
768 	else
769 		ret = ocfs2_xattr_shrink_size(inode,
770 					      old_clusters, new_clusters,
771 					      vb, ctxt);
772 
773 	return ret;
774 }
775 
776 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
777 				  size_t *result, const char *prefix,
778 				  const char *name, int name_len)
779 {
780 	char *p = buffer + *result;
781 	int prefix_len = strlen(prefix);
782 	int total_len = prefix_len + name_len + 1;
783 
784 	*result += total_len;
785 
786 	/* we are just looking for how big our buffer needs to be */
787 	if (!size)
788 		return 0;
789 
790 	if (*result > size)
791 		return -ERANGE;
792 
793 	memcpy(p, prefix, prefix_len);
794 	memcpy(p + prefix_len, name, name_len);
795 	p[prefix_len + name_len] = '\0';
796 
797 	return 0;
798 }
799 
800 static int ocfs2_xattr_list_entries(struct inode *inode,
801 				    struct ocfs2_xattr_header *header,
802 				    char *buffer, size_t buffer_size)
803 {
804 	size_t result = 0;
805 	int i, type, ret;
806 	const char *prefix, *name;
807 
808 	for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
809 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
810 		type = ocfs2_xattr_get_type(entry);
811 		prefix = ocfs2_xattr_prefix(type);
812 
813 		if (prefix) {
814 			name = (const char *)header +
815 				le16_to_cpu(entry->xe_name_offset);
816 
817 			ret = ocfs2_xattr_list_entry(buffer, buffer_size,
818 						     &result, prefix, name,
819 						     entry->xe_name_len);
820 			if (ret)
821 				return ret;
822 		}
823 	}
824 
825 	return result;
826 }
827 
828 static int ocfs2_xattr_ibody_list(struct inode *inode,
829 				  struct ocfs2_dinode *di,
830 				  char *buffer,
831 				  size_t buffer_size)
832 {
833 	struct ocfs2_xattr_header *header = NULL;
834 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
835 	int ret = 0;
836 
837 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
838 		return ret;
839 
840 	header = (struct ocfs2_xattr_header *)
841 		 ((void *)di + inode->i_sb->s_blocksize -
842 		 le16_to_cpu(di->i_xattr_inline_size));
843 
844 	ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
845 
846 	return ret;
847 }
848 
849 static int ocfs2_xattr_block_list(struct inode *inode,
850 				  struct ocfs2_dinode *di,
851 				  char *buffer,
852 				  size_t buffer_size)
853 {
854 	struct buffer_head *blk_bh = NULL;
855 	struct ocfs2_xattr_block *xb;
856 	int ret = 0;
857 
858 	if (!di->i_xattr_loc)
859 		return ret;
860 
861 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
862 				     &blk_bh);
863 	if (ret < 0) {
864 		mlog_errno(ret);
865 		return ret;
866 	}
867 
868 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
869 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
870 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
871 		ret = ocfs2_xattr_list_entries(inode, header,
872 					       buffer, buffer_size);
873 	} else {
874 		struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
875 		ret = ocfs2_xattr_tree_list_index_block(inode, xt,
876 						   buffer, buffer_size);
877 	}
878 
879 	brelse(blk_bh);
880 
881 	return ret;
882 }
883 
884 ssize_t ocfs2_listxattr(struct dentry *dentry,
885 			char *buffer,
886 			size_t size)
887 {
888 	int ret = 0, i_ret = 0, b_ret = 0;
889 	struct buffer_head *di_bh = NULL;
890 	struct ocfs2_dinode *di = NULL;
891 	struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
892 
893 	if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
894 		return -EOPNOTSUPP;
895 
896 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
897 		return ret;
898 
899 	ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
900 	if (ret < 0) {
901 		mlog_errno(ret);
902 		return ret;
903 	}
904 
905 	di = (struct ocfs2_dinode *)di_bh->b_data;
906 
907 	down_read(&oi->ip_xattr_sem);
908 	i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
909 	if (i_ret < 0)
910 		b_ret = 0;
911 	else {
912 		if (buffer) {
913 			buffer += i_ret;
914 			size -= i_ret;
915 		}
916 		b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
917 					       buffer, size);
918 		if (b_ret < 0)
919 			i_ret = 0;
920 	}
921 	up_read(&oi->ip_xattr_sem);
922 	ocfs2_inode_unlock(dentry->d_inode, 0);
923 
924 	brelse(di_bh);
925 
926 	return i_ret + b_ret;
927 }
928 
929 static int ocfs2_xattr_find_entry(int name_index,
930 				  const char *name,
931 				  struct ocfs2_xattr_search *xs)
932 {
933 	struct ocfs2_xattr_entry *entry;
934 	size_t name_len;
935 	int i, cmp = 1;
936 
937 	if (name == NULL)
938 		return -EINVAL;
939 
940 	name_len = strlen(name);
941 	entry = xs->here;
942 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
943 		cmp = name_index - ocfs2_xattr_get_type(entry);
944 		if (!cmp)
945 			cmp = name_len - entry->xe_name_len;
946 		if (!cmp)
947 			cmp = memcmp(name, (xs->base +
948 				     le16_to_cpu(entry->xe_name_offset)),
949 				     name_len);
950 		if (cmp == 0)
951 			break;
952 		entry += 1;
953 	}
954 	xs->here = entry;
955 
956 	return cmp ? -ENODATA : 0;
957 }
958 
959 static int ocfs2_xattr_get_value_outside(struct inode *inode,
960 					 struct ocfs2_xattr_value_root *xv,
961 					 void *buffer,
962 					 size_t len)
963 {
964 	u32 cpos, p_cluster, num_clusters, bpc, clusters;
965 	u64 blkno;
966 	int i, ret = 0;
967 	size_t cplen, blocksize;
968 	struct buffer_head *bh = NULL;
969 	struct ocfs2_extent_list *el;
970 
971 	el = &xv->xr_list;
972 	clusters = le32_to_cpu(xv->xr_clusters);
973 	bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
974 	blocksize = inode->i_sb->s_blocksize;
975 
976 	cpos = 0;
977 	while (cpos < clusters) {
978 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
979 					       &num_clusters, el, NULL);
980 		if (ret) {
981 			mlog_errno(ret);
982 			goto out;
983 		}
984 
985 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
986 		/* Copy ocfs2_xattr_value */
987 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
988 			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
989 					       &bh, NULL);
990 			if (ret) {
991 				mlog_errno(ret);
992 				goto out;
993 			}
994 
995 			cplen = len >= blocksize ? blocksize : len;
996 			memcpy(buffer, bh->b_data, cplen);
997 			len -= cplen;
998 			buffer += cplen;
999 
1000 			brelse(bh);
1001 			bh = NULL;
1002 			if (len == 0)
1003 				break;
1004 		}
1005 		cpos += num_clusters;
1006 	}
1007 out:
1008 	return ret;
1009 }
1010 
1011 static int ocfs2_xattr_ibody_get(struct inode *inode,
1012 				 int name_index,
1013 				 const char *name,
1014 				 void *buffer,
1015 				 size_t buffer_size,
1016 				 struct ocfs2_xattr_search *xs)
1017 {
1018 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1019 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1020 	struct ocfs2_xattr_value_root *xv;
1021 	size_t size;
1022 	int ret = 0;
1023 
1024 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1025 		return -ENODATA;
1026 
1027 	xs->end = (void *)di + inode->i_sb->s_blocksize;
1028 	xs->header = (struct ocfs2_xattr_header *)
1029 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
1030 	xs->base = (void *)xs->header;
1031 	xs->here = xs->header->xh_entries;
1032 
1033 	ret = ocfs2_xattr_find_entry(name_index, name, xs);
1034 	if (ret)
1035 		return ret;
1036 	size = le64_to_cpu(xs->here->xe_value_size);
1037 	if (buffer) {
1038 		if (size > buffer_size)
1039 			return -ERANGE;
1040 		if (ocfs2_xattr_is_local(xs->here)) {
1041 			memcpy(buffer, (void *)xs->base +
1042 			       le16_to_cpu(xs->here->xe_name_offset) +
1043 			       OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1044 		} else {
1045 			xv = (struct ocfs2_xattr_value_root *)
1046 				(xs->base + le16_to_cpu(
1047 				 xs->here->xe_name_offset) +
1048 				OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1049 			ret = ocfs2_xattr_get_value_outside(inode, xv,
1050 							    buffer, size);
1051 			if (ret < 0) {
1052 				mlog_errno(ret);
1053 				return ret;
1054 			}
1055 		}
1056 	}
1057 
1058 	return size;
1059 }
1060 
1061 static int ocfs2_xattr_block_get(struct inode *inode,
1062 				 int name_index,
1063 				 const char *name,
1064 				 void *buffer,
1065 				 size_t buffer_size,
1066 				 struct ocfs2_xattr_search *xs)
1067 {
1068 	struct ocfs2_xattr_block *xb;
1069 	struct ocfs2_xattr_value_root *xv;
1070 	size_t size;
1071 	int ret = -ENODATA, name_offset, name_len, i;
1072 	int uninitialized_var(block_off);
1073 
1074 	xs->bucket = ocfs2_xattr_bucket_new(inode);
1075 	if (!xs->bucket) {
1076 		ret = -ENOMEM;
1077 		mlog_errno(ret);
1078 		goto cleanup;
1079 	}
1080 
1081 	ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1082 	if (ret) {
1083 		mlog_errno(ret);
1084 		goto cleanup;
1085 	}
1086 
1087 	if (xs->not_found) {
1088 		ret = -ENODATA;
1089 		goto cleanup;
1090 	}
1091 
1092 	xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1093 	size = le64_to_cpu(xs->here->xe_value_size);
1094 	if (buffer) {
1095 		ret = -ERANGE;
1096 		if (size > buffer_size)
1097 			goto cleanup;
1098 
1099 		name_offset = le16_to_cpu(xs->here->xe_name_offset);
1100 		name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1101 		i = xs->here - xs->header->xh_entries;
1102 
1103 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1104 			ret = ocfs2_xattr_bucket_get_name_value(inode,
1105 								bucket_xh(xs->bucket),
1106 								i,
1107 								&block_off,
1108 								&name_offset);
1109 			xs->base = bucket_block(xs->bucket, block_off);
1110 		}
1111 		if (ocfs2_xattr_is_local(xs->here)) {
1112 			memcpy(buffer, (void *)xs->base +
1113 			       name_offset + name_len, size);
1114 		} else {
1115 			xv = (struct ocfs2_xattr_value_root *)
1116 				(xs->base + name_offset + name_len);
1117 			ret = ocfs2_xattr_get_value_outside(inode, xv,
1118 							    buffer, size);
1119 			if (ret < 0) {
1120 				mlog_errno(ret);
1121 				goto cleanup;
1122 			}
1123 		}
1124 	}
1125 	ret = size;
1126 cleanup:
1127 	ocfs2_xattr_bucket_free(xs->bucket);
1128 
1129 	brelse(xs->xattr_bh);
1130 	xs->xattr_bh = NULL;
1131 	return ret;
1132 }
1133 
1134 int ocfs2_xattr_get_nolock(struct inode *inode,
1135 			   struct buffer_head *di_bh,
1136 			   int name_index,
1137 			   const char *name,
1138 			   void *buffer,
1139 			   size_t buffer_size)
1140 {
1141 	int ret;
1142 	struct ocfs2_dinode *di = NULL;
1143 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1144 	struct ocfs2_xattr_search xis = {
1145 		.not_found = -ENODATA,
1146 	};
1147 	struct ocfs2_xattr_search xbs = {
1148 		.not_found = -ENODATA,
1149 	};
1150 
1151 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1152 		return -EOPNOTSUPP;
1153 
1154 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1155 		ret = -ENODATA;
1156 
1157 	xis.inode_bh = xbs.inode_bh = di_bh;
1158 	di = (struct ocfs2_dinode *)di_bh->b_data;
1159 
1160 	down_read(&oi->ip_xattr_sem);
1161 	ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1162 				    buffer_size, &xis);
1163 	if (ret == -ENODATA && di->i_xattr_loc)
1164 		ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1165 					    buffer_size, &xbs);
1166 	up_read(&oi->ip_xattr_sem);
1167 
1168 	return ret;
1169 }
1170 
1171 /* ocfs2_xattr_get()
1172  *
1173  * Copy an extended attribute into the buffer provided.
1174  * Buffer is NULL to compute the size of buffer required.
1175  */
1176 static int ocfs2_xattr_get(struct inode *inode,
1177 			   int name_index,
1178 			   const char *name,
1179 			   void *buffer,
1180 			   size_t buffer_size)
1181 {
1182 	int ret;
1183 	struct buffer_head *di_bh = NULL;
1184 
1185 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
1186 	if (ret < 0) {
1187 		mlog_errno(ret);
1188 		return ret;
1189 	}
1190 	ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1191 				     name, buffer, buffer_size);
1192 
1193 	ocfs2_inode_unlock(inode, 0);
1194 
1195 	brelse(di_bh);
1196 
1197 	return ret;
1198 }
1199 
1200 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1201 					   handle_t *handle,
1202 					   struct ocfs2_xattr_value_buf *vb,
1203 					   const void *value,
1204 					   int value_len)
1205 {
1206 	int ret = 0, i, cp_len;
1207 	u16 blocksize = inode->i_sb->s_blocksize;
1208 	u32 p_cluster, num_clusters;
1209 	u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1210 	u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1211 	u64 blkno;
1212 	struct buffer_head *bh = NULL;
1213 	unsigned int ext_flags;
1214 	struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1215 
1216 	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1217 
1218 	while (cpos < clusters) {
1219 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1220 					       &num_clusters, &xv->xr_list,
1221 					       &ext_flags);
1222 		if (ret) {
1223 			mlog_errno(ret);
1224 			goto out;
1225 		}
1226 
1227 		BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1228 
1229 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1230 
1231 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1232 			ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1233 					       &bh, NULL);
1234 			if (ret) {
1235 				mlog_errno(ret);
1236 				goto out;
1237 			}
1238 
1239 			ret = ocfs2_journal_access(handle,
1240 						   INODE_CACHE(inode),
1241 						   bh,
1242 						   OCFS2_JOURNAL_ACCESS_WRITE);
1243 			if (ret < 0) {
1244 				mlog_errno(ret);
1245 				goto out;
1246 			}
1247 
1248 			cp_len = value_len > blocksize ? blocksize : value_len;
1249 			memcpy(bh->b_data, value, cp_len);
1250 			value_len -= cp_len;
1251 			value += cp_len;
1252 			if (cp_len < blocksize)
1253 				memset(bh->b_data + cp_len, 0,
1254 				       blocksize - cp_len);
1255 
1256 			ret = ocfs2_journal_dirty(handle, bh);
1257 			if (ret < 0) {
1258 				mlog_errno(ret);
1259 				goto out;
1260 			}
1261 			brelse(bh);
1262 			bh = NULL;
1263 
1264 			/*
1265 			 * XXX: do we need to empty all the following
1266 			 * blocks in this cluster?
1267 			 */
1268 			if (!value_len)
1269 				break;
1270 		}
1271 		cpos += num_clusters;
1272 	}
1273 out:
1274 	brelse(bh);
1275 
1276 	return ret;
1277 }
1278 
1279 static int ocfs2_xattr_cleanup(struct inode *inode,
1280 			       handle_t *handle,
1281 			       struct ocfs2_xattr_info *xi,
1282 			       struct ocfs2_xattr_search *xs,
1283 			       struct ocfs2_xattr_value_buf *vb,
1284 			       size_t offs)
1285 {
1286 	int ret = 0;
1287 	size_t name_len = strlen(xi->name);
1288 	void *val = xs->base + offs;
1289 	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1290 
1291 	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1292 			    OCFS2_JOURNAL_ACCESS_WRITE);
1293 	if (ret) {
1294 		mlog_errno(ret);
1295 		goto out;
1296 	}
1297 	/* Decrease xattr count */
1298 	le16_add_cpu(&xs->header->xh_count, -1);
1299 	/* Remove the xattr entry and tree root which has already be set*/
1300 	memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1301 	memset(val, 0, size);
1302 
1303 	ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1304 	if (ret < 0)
1305 		mlog_errno(ret);
1306 out:
1307 	return ret;
1308 }
1309 
1310 static int ocfs2_xattr_update_entry(struct inode *inode,
1311 				    handle_t *handle,
1312 				    struct ocfs2_xattr_info *xi,
1313 				    struct ocfs2_xattr_search *xs,
1314 				    struct ocfs2_xattr_value_buf *vb,
1315 				    size_t offs)
1316 {
1317 	int ret;
1318 
1319 	ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1320 			    OCFS2_JOURNAL_ACCESS_WRITE);
1321 	if (ret) {
1322 		mlog_errno(ret);
1323 		goto out;
1324 	}
1325 
1326 	xs->here->xe_name_offset = cpu_to_le16(offs);
1327 	xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1328 	if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1329 		ocfs2_xattr_set_local(xs->here, 1);
1330 	else
1331 		ocfs2_xattr_set_local(xs->here, 0);
1332 	ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1333 
1334 	ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1335 	if (ret < 0)
1336 		mlog_errno(ret);
1337 out:
1338 	return ret;
1339 }
1340 
1341 /*
1342  * ocfs2_xattr_set_value_outside()
1343  *
1344  * Set large size value in B tree.
1345  */
1346 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1347 					 struct ocfs2_xattr_info *xi,
1348 					 struct ocfs2_xattr_search *xs,
1349 					 struct ocfs2_xattr_set_ctxt *ctxt,
1350 					 struct ocfs2_xattr_value_buf *vb,
1351 					 size_t offs)
1352 {
1353 	size_t name_len = strlen(xi->name);
1354 	void *val = xs->base + offs;
1355 	struct ocfs2_xattr_value_root *xv = NULL;
1356 	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1357 	int ret = 0;
1358 
1359 	memset(val, 0, size);
1360 	memcpy(val, xi->name, name_len);
1361 	xv = (struct ocfs2_xattr_value_root *)
1362 		(val + OCFS2_XATTR_SIZE(name_len));
1363 	xv->xr_clusters = 0;
1364 	xv->xr_last_eb_blk = 0;
1365 	xv->xr_list.l_tree_depth = 0;
1366 	xv->xr_list.l_count = cpu_to_le16(1);
1367 	xv->xr_list.l_next_free_rec = 0;
1368 	vb->vb_xv = xv;
1369 
1370 	ret = ocfs2_xattr_value_truncate(inode, vb, xi->value_len, ctxt);
1371 	if (ret < 0) {
1372 		mlog_errno(ret);
1373 		return ret;
1374 	}
1375 	ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs);
1376 	if (ret < 0) {
1377 		mlog_errno(ret);
1378 		return ret;
1379 	}
1380 	ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb,
1381 					      xi->value, xi->value_len);
1382 	if (ret < 0)
1383 		mlog_errno(ret);
1384 
1385 	return ret;
1386 }
1387 
1388 /*
1389  * ocfs2_xattr_set_entry_local()
1390  *
1391  * Set, replace or remove extended attribute in local.
1392  */
1393 static void ocfs2_xattr_set_entry_local(struct inode *inode,
1394 					struct ocfs2_xattr_info *xi,
1395 					struct ocfs2_xattr_search *xs,
1396 					struct ocfs2_xattr_entry *last,
1397 					size_t min_offs)
1398 {
1399 	size_t name_len = strlen(xi->name);
1400 	int i;
1401 
1402 	if (xi->value && xs->not_found) {
1403 		/* Insert the new xattr entry. */
1404 		le16_add_cpu(&xs->header->xh_count, 1);
1405 		ocfs2_xattr_set_type(last, xi->name_index);
1406 		ocfs2_xattr_set_local(last, 1);
1407 		last->xe_name_len = name_len;
1408 	} else {
1409 		void *first_val;
1410 		void *val;
1411 		size_t offs, size;
1412 
1413 		first_val = xs->base + min_offs;
1414 		offs = le16_to_cpu(xs->here->xe_name_offset);
1415 		val = xs->base + offs;
1416 
1417 		if (le64_to_cpu(xs->here->xe_value_size) >
1418 		    OCFS2_XATTR_INLINE_SIZE)
1419 			size = OCFS2_XATTR_SIZE(name_len) +
1420 				OCFS2_XATTR_ROOT_SIZE;
1421 		else
1422 			size = OCFS2_XATTR_SIZE(name_len) +
1423 			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1424 
1425 		if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1426 				OCFS2_XATTR_SIZE(xi->value_len)) {
1427 			/* The old and the new value have the
1428 			   same size. Just replace the value. */
1429 			ocfs2_xattr_set_local(xs->here, 1);
1430 			xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1431 			/* Clear value bytes. */
1432 			memset(val + OCFS2_XATTR_SIZE(name_len),
1433 			       0,
1434 			       OCFS2_XATTR_SIZE(xi->value_len));
1435 			memcpy(val + OCFS2_XATTR_SIZE(name_len),
1436 			       xi->value,
1437 			       xi->value_len);
1438 			return;
1439 		}
1440 		/* Remove the old name+value. */
1441 		memmove(first_val + size, first_val, val - first_val);
1442 		memset(first_val, 0, size);
1443 		xs->here->xe_name_hash = 0;
1444 		xs->here->xe_name_offset = 0;
1445 		ocfs2_xattr_set_local(xs->here, 1);
1446 		xs->here->xe_value_size = 0;
1447 
1448 		min_offs += size;
1449 
1450 		/* Adjust all value offsets. */
1451 		last = xs->header->xh_entries;
1452 		for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1453 			size_t o = le16_to_cpu(last->xe_name_offset);
1454 
1455 			if (o < offs)
1456 				last->xe_name_offset = cpu_to_le16(o + size);
1457 			last += 1;
1458 		}
1459 
1460 		if (!xi->value) {
1461 			/* Remove the old entry. */
1462 			last -= 1;
1463 			memmove(xs->here, xs->here + 1,
1464 				(void *)last - (void *)xs->here);
1465 			memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1466 			le16_add_cpu(&xs->header->xh_count, -1);
1467 		}
1468 	}
1469 	if (xi->value) {
1470 		/* Insert the new name+value. */
1471 		size_t size = OCFS2_XATTR_SIZE(name_len) +
1472 				OCFS2_XATTR_SIZE(xi->value_len);
1473 		void *val = xs->base + min_offs - size;
1474 
1475 		xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1476 		memset(val, 0, size);
1477 		memcpy(val, xi->name, name_len);
1478 		memcpy(val + OCFS2_XATTR_SIZE(name_len),
1479 		       xi->value,
1480 		       xi->value_len);
1481 		xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1482 		ocfs2_xattr_set_local(xs->here, 1);
1483 		ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1484 	}
1485 
1486 	return;
1487 }
1488 
1489 /*
1490  * ocfs2_xattr_set_entry()
1491  *
1492  * Set extended attribute entry into inode or block.
1493  *
1494  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1495  * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1496  * then set value in B tree with set_value_outside().
1497  */
1498 static int ocfs2_xattr_set_entry(struct inode *inode,
1499 				 struct ocfs2_xattr_info *xi,
1500 				 struct ocfs2_xattr_search *xs,
1501 				 struct ocfs2_xattr_set_ctxt *ctxt,
1502 				 int flag)
1503 {
1504 	struct ocfs2_xattr_entry *last;
1505 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1506 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1507 	size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1508 	size_t size_l = 0;
1509 	handle_t *handle = ctxt->handle;
1510 	int free, i, ret;
1511 	struct ocfs2_xattr_info xi_l = {
1512 		.name_index = xi->name_index,
1513 		.name = xi->name,
1514 		.value = xi->value,
1515 		.value_len = xi->value_len,
1516 	};
1517 	struct ocfs2_xattr_value_buf vb = {
1518 		.vb_bh = xs->xattr_bh,
1519 		.vb_access = ocfs2_journal_access_di,
1520 	};
1521 
1522 	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1523 		BUG_ON(xs->xattr_bh == xs->inode_bh);
1524 		vb.vb_access = ocfs2_journal_access_xb;
1525 	} else
1526 		BUG_ON(xs->xattr_bh != xs->inode_bh);
1527 
1528 	/* Compute min_offs, last and free space. */
1529 	last = xs->header->xh_entries;
1530 
1531 	for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1532 		size_t offs = le16_to_cpu(last->xe_name_offset);
1533 		if (offs < min_offs)
1534 			min_offs = offs;
1535 		last += 1;
1536 	}
1537 
1538 	free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
1539 	if (free < 0)
1540 		return -EIO;
1541 
1542 	if (!xs->not_found) {
1543 		size_t size = 0;
1544 		if (ocfs2_xattr_is_local(xs->here))
1545 			size = OCFS2_XATTR_SIZE(name_len) +
1546 			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1547 		else
1548 			size = OCFS2_XATTR_SIZE(name_len) +
1549 				OCFS2_XATTR_ROOT_SIZE;
1550 		free += (size + sizeof(struct ocfs2_xattr_entry));
1551 	}
1552 	/* Check free space in inode or block */
1553 	if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1554 		if (free < sizeof(struct ocfs2_xattr_entry) +
1555 			   OCFS2_XATTR_SIZE(name_len) +
1556 			   OCFS2_XATTR_ROOT_SIZE) {
1557 			ret = -ENOSPC;
1558 			goto out;
1559 		}
1560 		size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1561 		xi_l.value = (void *)&def_xv;
1562 		xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1563 	} else if (xi->value) {
1564 		if (free < sizeof(struct ocfs2_xattr_entry) +
1565 			   OCFS2_XATTR_SIZE(name_len) +
1566 			   OCFS2_XATTR_SIZE(xi->value_len)) {
1567 			ret = -ENOSPC;
1568 			goto out;
1569 		}
1570 	}
1571 
1572 	if (!xs->not_found) {
1573 		/* For existing extended attribute */
1574 		size_t size = OCFS2_XATTR_SIZE(name_len) +
1575 			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1576 		size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1577 		void *val = xs->base + offs;
1578 
1579 		if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1580 			/* Replace existing local xattr with tree root */
1581 			ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1582 							    ctxt, &vb, offs);
1583 			if (ret < 0)
1584 				mlog_errno(ret);
1585 			goto out;
1586 		} else if (!ocfs2_xattr_is_local(xs->here)) {
1587 			/* For existing xattr which has value outside */
1588 			vb.vb_xv = (struct ocfs2_xattr_value_root *)
1589 				(val + OCFS2_XATTR_SIZE(name_len));
1590 
1591 			if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1592 				/*
1593 				 * If new value need set outside also,
1594 				 * first truncate old value to new value,
1595 				 * then set new value with set_value_outside().
1596 				 */
1597 				ret = ocfs2_xattr_value_truncate(inode,
1598 								 &vb,
1599 								 xi->value_len,
1600 								 ctxt);
1601 				if (ret < 0) {
1602 					mlog_errno(ret);
1603 					goto out;
1604 				}
1605 
1606 				ret = ocfs2_xattr_update_entry(inode,
1607 							       handle,
1608 							       xi,
1609 							       xs,
1610 							       &vb,
1611 							       offs);
1612 				if (ret < 0) {
1613 					mlog_errno(ret);
1614 					goto out;
1615 				}
1616 
1617 				ret = __ocfs2_xattr_set_value_outside(inode,
1618 								handle,
1619 								&vb,
1620 								xi->value,
1621 								xi->value_len);
1622 				if (ret < 0)
1623 					mlog_errno(ret);
1624 				goto out;
1625 			} else {
1626 				/*
1627 				 * If new value need set in local,
1628 				 * just trucate old value to zero.
1629 				 */
1630 				 ret = ocfs2_xattr_value_truncate(inode,
1631 								  &vb,
1632 								  0,
1633 								  ctxt);
1634 				if (ret < 0)
1635 					mlog_errno(ret);
1636 			}
1637 		}
1638 	}
1639 
1640 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
1641 				      OCFS2_JOURNAL_ACCESS_WRITE);
1642 	if (ret) {
1643 		mlog_errno(ret);
1644 		goto out;
1645 	}
1646 
1647 	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1648 		ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh,
1649 				   OCFS2_JOURNAL_ACCESS_WRITE);
1650 		if (ret) {
1651 			mlog_errno(ret);
1652 			goto out;
1653 		}
1654 	}
1655 
1656 	/*
1657 	 * Set value in local, include set tree root in local.
1658 	 * This is the first step for value size >INLINE_SIZE.
1659 	 */
1660 	ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1661 
1662 	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1663 		ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1664 		if (ret < 0) {
1665 			mlog_errno(ret);
1666 			goto out;
1667 		}
1668 	}
1669 
1670 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1671 	    (flag & OCFS2_INLINE_XATTR_FL)) {
1672 		struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1673 		unsigned int xattrsize = osb->s_xattr_inline_size;
1674 
1675 		/*
1676 		 * Adjust extent record count or inline data size
1677 		 * to reserve space for extended attribute.
1678 		 */
1679 		if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1680 			struct ocfs2_inline_data *idata = &di->id2.i_data;
1681 			le16_add_cpu(&idata->id_count, -xattrsize);
1682 		} else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1683 			struct ocfs2_extent_list *el = &di->id2.i_list;
1684 			le16_add_cpu(&el->l_count, -(xattrsize /
1685 					sizeof(struct ocfs2_extent_rec)));
1686 		}
1687 		di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1688 	}
1689 	/* Update xattr flag */
1690 	spin_lock(&oi->ip_lock);
1691 	oi->ip_dyn_features |= flag;
1692 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1693 	spin_unlock(&oi->ip_lock);
1694 
1695 	ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1696 	if (ret < 0)
1697 		mlog_errno(ret);
1698 
1699 	if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1700 		/*
1701 		 * Set value outside in B tree.
1702 		 * This is the second step for value size > INLINE_SIZE.
1703 		 */
1704 		size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1705 		ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt,
1706 						    &vb, offs);
1707 		if (ret < 0) {
1708 			int ret2;
1709 
1710 			mlog_errno(ret);
1711 			/*
1712 			 * If set value outside failed, we have to clean
1713 			 * the junk tree root we have already set in local.
1714 			 */
1715 			ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
1716 						   xi, xs, &vb, offs);
1717 			if (ret2 < 0)
1718 				mlog_errno(ret2);
1719 		}
1720 	}
1721 out:
1722 	return ret;
1723 }
1724 
1725 static int ocfs2_remove_value_outside(struct inode*inode,
1726 				      struct ocfs2_xattr_value_buf *vb,
1727 				      struct ocfs2_xattr_header *header)
1728 {
1729 	int ret = 0, i;
1730 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1731 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1732 
1733 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
1734 
1735 	ctxt.handle = ocfs2_start_trans(osb,
1736 					ocfs2_remove_extent_credits(osb->sb));
1737 	if (IS_ERR(ctxt.handle)) {
1738 		ret = PTR_ERR(ctxt.handle);
1739 		mlog_errno(ret);
1740 		goto out;
1741 	}
1742 
1743 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1744 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1745 
1746 		if (!ocfs2_xattr_is_local(entry)) {
1747 			void *val;
1748 
1749 			val = (void *)header +
1750 				le16_to_cpu(entry->xe_name_offset);
1751 			vb->vb_xv = (struct ocfs2_xattr_value_root *)
1752 				(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1753 			ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
1754 			if (ret < 0) {
1755 				mlog_errno(ret);
1756 				break;
1757 			}
1758 		}
1759 	}
1760 
1761 	ocfs2_commit_trans(osb, ctxt.handle);
1762 	ocfs2_schedule_truncate_log_flush(osb, 1);
1763 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
1764 out:
1765 	return ret;
1766 }
1767 
1768 static int ocfs2_xattr_ibody_remove(struct inode *inode,
1769 				    struct buffer_head *di_bh)
1770 {
1771 
1772 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1773 	struct ocfs2_xattr_header *header;
1774 	int ret;
1775 	struct ocfs2_xattr_value_buf vb = {
1776 		.vb_bh = di_bh,
1777 		.vb_access = ocfs2_journal_access_di,
1778 	};
1779 
1780 	header = (struct ocfs2_xattr_header *)
1781 		 ((void *)di + inode->i_sb->s_blocksize -
1782 		 le16_to_cpu(di->i_xattr_inline_size));
1783 
1784 	ret = ocfs2_remove_value_outside(inode, &vb, header);
1785 
1786 	return ret;
1787 }
1788 
1789 static int ocfs2_xattr_block_remove(struct inode *inode,
1790 				    struct buffer_head *blk_bh)
1791 {
1792 	struct ocfs2_xattr_block *xb;
1793 	int ret = 0;
1794 	struct ocfs2_xattr_value_buf vb = {
1795 		.vb_bh = blk_bh,
1796 		.vb_access = ocfs2_journal_access_xb,
1797 	};
1798 
1799 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1800 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1801 		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1802 		ret = ocfs2_remove_value_outside(inode, &vb, header);
1803 	} else
1804 		ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
1805 
1806 	return ret;
1807 }
1808 
1809 static int ocfs2_xattr_free_block(struct inode *inode,
1810 				  u64 block)
1811 {
1812 	struct inode *xb_alloc_inode;
1813 	struct buffer_head *xb_alloc_bh = NULL;
1814 	struct buffer_head *blk_bh = NULL;
1815 	struct ocfs2_xattr_block *xb;
1816 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1817 	handle_t *handle;
1818 	int ret = 0;
1819 	u64 blk, bg_blkno;
1820 	u16 bit;
1821 
1822 	ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
1823 	if (ret < 0) {
1824 		mlog_errno(ret);
1825 		goto out;
1826 	}
1827 
1828 	ret = ocfs2_xattr_block_remove(inode, blk_bh);
1829 	if (ret < 0) {
1830 		mlog_errno(ret);
1831 		goto out;
1832 	}
1833 
1834 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1835 	blk = le64_to_cpu(xb->xb_blkno);
1836 	bit = le16_to_cpu(xb->xb_suballoc_bit);
1837 	bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1838 
1839 	xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1840 				EXTENT_ALLOC_SYSTEM_INODE,
1841 				le16_to_cpu(xb->xb_suballoc_slot));
1842 	if (!xb_alloc_inode) {
1843 		ret = -ENOMEM;
1844 		mlog_errno(ret);
1845 		goto out;
1846 	}
1847 	mutex_lock(&xb_alloc_inode->i_mutex);
1848 
1849 	ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1850 	if (ret < 0) {
1851 		mlog_errno(ret);
1852 		goto out_mutex;
1853 	}
1854 
1855 	handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
1856 	if (IS_ERR(handle)) {
1857 		ret = PTR_ERR(handle);
1858 		mlog_errno(ret);
1859 		goto out_unlock;
1860 	}
1861 
1862 	ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1863 				       bit, bg_blkno, 1);
1864 	if (ret < 0)
1865 		mlog_errno(ret);
1866 
1867 	ocfs2_commit_trans(osb, handle);
1868 out_unlock:
1869 	ocfs2_inode_unlock(xb_alloc_inode, 1);
1870 	brelse(xb_alloc_bh);
1871 out_mutex:
1872 	mutex_unlock(&xb_alloc_inode->i_mutex);
1873 	iput(xb_alloc_inode);
1874 out:
1875 	brelse(blk_bh);
1876 	return ret;
1877 }
1878 
1879 /*
1880  * ocfs2_xattr_remove()
1881  *
1882  * Free extended attribute resources associated with this inode.
1883  */
1884 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1885 {
1886 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1887 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1888 	handle_t *handle;
1889 	int ret;
1890 
1891 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1892 		return 0;
1893 
1894 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1895 		return 0;
1896 
1897 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1898 		ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1899 		if (ret < 0) {
1900 			mlog_errno(ret);
1901 			goto out;
1902 		}
1903 	}
1904 
1905 	if (di->i_xattr_loc) {
1906 		ret = ocfs2_xattr_free_block(inode,
1907 					     le64_to_cpu(di->i_xattr_loc));
1908 		if (ret < 0) {
1909 			mlog_errno(ret);
1910 			goto out;
1911 		}
1912 	}
1913 
1914 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1915 				   OCFS2_INODE_UPDATE_CREDITS);
1916 	if (IS_ERR(handle)) {
1917 		ret = PTR_ERR(handle);
1918 		mlog_errno(ret);
1919 		goto out;
1920 	}
1921 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
1922 				      OCFS2_JOURNAL_ACCESS_WRITE);
1923 	if (ret) {
1924 		mlog_errno(ret);
1925 		goto out_commit;
1926 	}
1927 
1928 	di->i_xattr_loc = 0;
1929 
1930 	spin_lock(&oi->ip_lock);
1931 	oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1932 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1933 	spin_unlock(&oi->ip_lock);
1934 
1935 	ret = ocfs2_journal_dirty(handle, di_bh);
1936 	if (ret < 0)
1937 		mlog_errno(ret);
1938 out_commit:
1939 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1940 out:
1941 	return ret;
1942 }
1943 
1944 static int ocfs2_xattr_has_space_inline(struct inode *inode,
1945 					struct ocfs2_dinode *di)
1946 {
1947 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1948 	unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1949 	int free;
1950 
1951 	if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1952 		return 0;
1953 
1954 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1955 		struct ocfs2_inline_data *idata = &di->id2.i_data;
1956 		free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1957 	} else if (ocfs2_inode_is_fast_symlink(inode)) {
1958 		free = ocfs2_fast_symlink_chars(inode->i_sb) -
1959 			le64_to_cpu(di->i_size);
1960 	} else {
1961 		struct ocfs2_extent_list *el = &di->id2.i_list;
1962 		free = (le16_to_cpu(el->l_count) -
1963 			le16_to_cpu(el->l_next_free_rec)) *
1964 			sizeof(struct ocfs2_extent_rec);
1965 	}
1966 	if (free >= xattrsize)
1967 		return 1;
1968 
1969 	return 0;
1970 }
1971 
1972 /*
1973  * ocfs2_xattr_ibody_find()
1974  *
1975  * Find extended attribute in inode block and
1976  * fill search info into struct ocfs2_xattr_search.
1977  */
1978 static int ocfs2_xattr_ibody_find(struct inode *inode,
1979 				  int name_index,
1980 				  const char *name,
1981 				  struct ocfs2_xattr_search *xs)
1982 {
1983 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1984 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1985 	int ret;
1986 	int has_space = 0;
1987 
1988 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1989 		return 0;
1990 
1991 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1992 		down_read(&oi->ip_alloc_sem);
1993 		has_space = ocfs2_xattr_has_space_inline(inode, di);
1994 		up_read(&oi->ip_alloc_sem);
1995 		if (!has_space)
1996 			return 0;
1997 	}
1998 
1999 	xs->xattr_bh = xs->inode_bh;
2000 	xs->end = (void *)di + inode->i_sb->s_blocksize;
2001 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2002 		xs->header = (struct ocfs2_xattr_header *)
2003 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
2004 	else
2005 		xs->header = (struct ocfs2_xattr_header *)
2006 			(xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2007 	xs->base = (void *)xs->header;
2008 	xs->here = xs->header->xh_entries;
2009 
2010 	/* Find the named attribute. */
2011 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2012 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
2013 		if (ret && ret != -ENODATA)
2014 			return ret;
2015 		xs->not_found = ret;
2016 	}
2017 
2018 	return 0;
2019 }
2020 
2021 /*
2022  * ocfs2_xattr_ibody_set()
2023  *
2024  * Set, replace or remove an extended attribute into inode block.
2025  *
2026  */
2027 static int ocfs2_xattr_ibody_set(struct inode *inode,
2028 				 struct ocfs2_xattr_info *xi,
2029 				 struct ocfs2_xattr_search *xs,
2030 				 struct ocfs2_xattr_set_ctxt *ctxt)
2031 {
2032 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2033 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2034 	int ret;
2035 
2036 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2037 		return -ENOSPC;
2038 
2039 	down_write(&oi->ip_alloc_sem);
2040 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2041 		if (!ocfs2_xattr_has_space_inline(inode, di)) {
2042 			ret = -ENOSPC;
2043 			goto out;
2044 		}
2045 	}
2046 
2047 	ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2048 				(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
2049 out:
2050 	up_write(&oi->ip_alloc_sem);
2051 
2052 	return ret;
2053 }
2054 
2055 /*
2056  * ocfs2_xattr_block_find()
2057  *
2058  * Find extended attribute in external block and
2059  * fill search info into struct ocfs2_xattr_search.
2060  */
2061 static int ocfs2_xattr_block_find(struct inode *inode,
2062 				  int name_index,
2063 				  const char *name,
2064 				  struct ocfs2_xattr_search *xs)
2065 {
2066 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2067 	struct buffer_head *blk_bh = NULL;
2068 	struct ocfs2_xattr_block *xb;
2069 	int ret = 0;
2070 
2071 	if (!di->i_xattr_loc)
2072 		return ret;
2073 
2074 	ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2075 				     &blk_bh);
2076 	if (ret < 0) {
2077 		mlog_errno(ret);
2078 		return ret;
2079 	}
2080 
2081 	xs->xattr_bh = blk_bh;
2082 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2083 
2084 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2085 		xs->header = &xb->xb_attrs.xb_header;
2086 		xs->base = (void *)xs->header;
2087 		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2088 		xs->here = xs->header->xh_entries;
2089 
2090 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
2091 	} else
2092 		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2093 						   name_index,
2094 						   name, xs);
2095 
2096 	if (ret && ret != -ENODATA) {
2097 		xs->xattr_bh = NULL;
2098 		goto cleanup;
2099 	}
2100 	xs->not_found = ret;
2101 	return 0;
2102 cleanup:
2103 	brelse(blk_bh);
2104 
2105 	return ret;
2106 }
2107 
2108 /*
2109  * ocfs2_xattr_block_set()
2110  *
2111  * Set, replace or remove an extended attribute into external block.
2112  *
2113  */
2114 static int ocfs2_xattr_block_set(struct inode *inode,
2115 				 struct ocfs2_xattr_info *xi,
2116 				 struct ocfs2_xattr_search *xs,
2117 				 struct ocfs2_xattr_set_ctxt *ctxt)
2118 {
2119 	struct buffer_head *new_bh = NULL;
2120 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2121 	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
2122 	handle_t *handle = ctxt->handle;
2123 	struct ocfs2_xattr_block *xblk = NULL;
2124 	u16 suballoc_bit_start;
2125 	u32 num_got;
2126 	u64 first_blkno;
2127 	int ret;
2128 
2129 	if (!xs->xattr_bh) {
2130 		ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
2131 					      xs->inode_bh,
2132 					      OCFS2_JOURNAL_ACCESS_CREATE);
2133 		if (ret < 0) {
2134 			mlog_errno(ret);
2135 			goto end;
2136 		}
2137 
2138 		ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
2139 					   &suballoc_bit_start, &num_got,
2140 					   &first_blkno);
2141 		if (ret < 0) {
2142 			mlog_errno(ret);
2143 			goto end;
2144 		}
2145 
2146 		new_bh = sb_getblk(inode->i_sb, first_blkno);
2147 		ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2148 
2149 		ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
2150 					      new_bh,
2151 					      OCFS2_JOURNAL_ACCESS_CREATE);
2152 		if (ret < 0) {
2153 			mlog_errno(ret);
2154 			goto end;
2155 		}
2156 
2157 		/* Initialize ocfs2_xattr_block */
2158 		xs->xattr_bh = new_bh;
2159 		xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2160 		memset(xblk, 0, inode->i_sb->s_blocksize);
2161 		strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2162 		xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
2163 		xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2164 		xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2165 		xblk->xb_blkno = cpu_to_le64(first_blkno);
2166 
2167 		xs->header = &xblk->xb_attrs.xb_header;
2168 		xs->base = (void *)xs->header;
2169 		xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2170 		xs->here = xs->header->xh_entries;
2171 
2172 		ret = ocfs2_journal_dirty(handle, new_bh);
2173 		if (ret < 0) {
2174 			mlog_errno(ret);
2175 			goto end;
2176 		}
2177 		di->i_xattr_loc = cpu_to_le64(first_blkno);
2178 		ocfs2_journal_dirty(handle, xs->inode_bh);
2179 	} else
2180 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2181 
2182 	if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2183 		/* Set extended attribute into external block */
2184 		ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2185 					    OCFS2_HAS_XATTR_FL);
2186 		if (!ret || ret != -ENOSPC)
2187 			goto end;
2188 
2189 		ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2190 		if (ret)
2191 			goto end;
2192 	}
2193 
2194 	ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2195 
2196 end:
2197 
2198 	return ret;
2199 }
2200 
2201 /* Check whether the new xattr can be inserted into the inode. */
2202 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2203 				       struct ocfs2_xattr_info *xi,
2204 				       struct ocfs2_xattr_search *xs)
2205 {
2206 	u64 value_size;
2207 	struct ocfs2_xattr_entry *last;
2208 	int free, i;
2209 	size_t min_offs = xs->end - xs->base;
2210 
2211 	if (!xs->header)
2212 		return 0;
2213 
2214 	last = xs->header->xh_entries;
2215 
2216 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2217 		size_t offs = le16_to_cpu(last->xe_name_offset);
2218 		if (offs < min_offs)
2219 			min_offs = offs;
2220 		last += 1;
2221 	}
2222 
2223 	free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2224 	if (free < 0)
2225 		return 0;
2226 
2227 	BUG_ON(!xs->not_found);
2228 
2229 	if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2230 		value_size = OCFS2_XATTR_ROOT_SIZE;
2231 	else
2232 		value_size = OCFS2_XATTR_SIZE(xi->value_len);
2233 
2234 	if (free >= sizeof(struct ocfs2_xattr_entry) +
2235 		   OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
2236 		return 1;
2237 
2238 	return 0;
2239 }
2240 
2241 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2242 				     struct ocfs2_dinode *di,
2243 				     struct ocfs2_xattr_info *xi,
2244 				     struct ocfs2_xattr_search *xis,
2245 				     struct ocfs2_xattr_search *xbs,
2246 				     int *clusters_need,
2247 				     int *meta_need,
2248 				     int *credits_need)
2249 {
2250 	int ret = 0, old_in_xb = 0;
2251 	int clusters_add = 0, meta_add = 0, credits = 0;
2252 	struct buffer_head *bh = NULL;
2253 	struct ocfs2_xattr_block *xb = NULL;
2254 	struct ocfs2_xattr_entry *xe = NULL;
2255 	struct ocfs2_xattr_value_root *xv = NULL;
2256 	char *base = NULL;
2257 	int name_offset, name_len = 0;
2258 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2259 						    xi->value_len);
2260 	u64 value_size;
2261 
2262 	/*
2263 	 * Calculate the clusters we need to write.
2264 	 * No matter whether we replace an old one or add a new one,
2265 	 * we need this for writing.
2266 	 */
2267 	if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2268 		credits += new_clusters *
2269 			   ocfs2_clusters_to_blocks(inode->i_sb, 1);
2270 
2271 	if (xis->not_found && xbs->not_found) {
2272 		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2273 
2274 		if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2275 			clusters_add += new_clusters;
2276 			credits += ocfs2_calc_extend_credits(inode->i_sb,
2277 							&def_xv.xv.xr_list,
2278 							new_clusters);
2279 		}
2280 
2281 		goto meta_guess;
2282 	}
2283 
2284 	if (!xis->not_found) {
2285 		xe = xis->here;
2286 		name_offset = le16_to_cpu(xe->xe_name_offset);
2287 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2288 		base = xis->base;
2289 		credits += OCFS2_INODE_UPDATE_CREDITS;
2290 	} else {
2291 		int i, block_off = 0;
2292 		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2293 		xe = xbs->here;
2294 		name_offset = le16_to_cpu(xe->xe_name_offset);
2295 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2296 		i = xbs->here - xbs->header->xh_entries;
2297 		old_in_xb = 1;
2298 
2299 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2300 			ret = ocfs2_xattr_bucket_get_name_value(inode,
2301 							bucket_xh(xbs->bucket),
2302 							i, &block_off,
2303 							&name_offset);
2304 			base = bucket_block(xbs->bucket, block_off);
2305 			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2306 		} else {
2307 			base = xbs->base;
2308 			credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2309 		}
2310 	}
2311 
2312 	/*
2313 	 * delete a xattr doesn't need metadata and cluster allocation.
2314 	 * so just calculate the credits and return.
2315 	 *
2316 	 * The credits for removing the value tree will be extended
2317 	 * by ocfs2_remove_extent itself.
2318 	 */
2319 	if (!xi->value) {
2320 		if (!ocfs2_xattr_is_local(xe))
2321 			credits += ocfs2_remove_extent_credits(inode->i_sb);
2322 
2323 		goto out;
2324 	}
2325 
2326 	/* do cluster allocation guess first. */
2327 	value_size = le64_to_cpu(xe->xe_value_size);
2328 
2329 	if (old_in_xb) {
2330 		/*
2331 		 * In xattr set, we always try to set the xe in inode first,
2332 		 * so if it can be inserted into inode successfully, the old
2333 		 * one will be removed from the xattr block, and this xattr
2334 		 * will be inserted into inode as a new xattr in inode.
2335 		 */
2336 		if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
2337 			clusters_add += new_clusters;
2338 			credits += ocfs2_remove_extent_credits(inode->i_sb) +
2339 				    OCFS2_INODE_UPDATE_CREDITS;
2340 			if (!ocfs2_xattr_is_local(xe))
2341 				credits += ocfs2_calc_extend_credits(
2342 							inode->i_sb,
2343 							&def_xv.xv.xr_list,
2344 							new_clusters);
2345 			goto out;
2346 		}
2347 	}
2348 
2349 	if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2350 		/* the new values will be stored outside. */
2351 		u32 old_clusters = 0;
2352 
2353 		if (!ocfs2_xattr_is_local(xe)) {
2354 			old_clusters =	ocfs2_clusters_for_bytes(inode->i_sb,
2355 								 value_size);
2356 			xv = (struct ocfs2_xattr_value_root *)
2357 			     (base + name_offset + name_len);
2358 			value_size = OCFS2_XATTR_ROOT_SIZE;
2359 		} else
2360 			xv = &def_xv.xv;
2361 
2362 		if (old_clusters >= new_clusters) {
2363 			credits += ocfs2_remove_extent_credits(inode->i_sb);
2364 			goto out;
2365 		} else {
2366 			meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
2367 			clusters_add += new_clusters - old_clusters;
2368 			credits += ocfs2_calc_extend_credits(inode->i_sb,
2369 							     &xv->xr_list,
2370 							     new_clusters -
2371 							     old_clusters);
2372 			if (value_size >= OCFS2_XATTR_ROOT_SIZE)
2373 				goto out;
2374 		}
2375 	} else {
2376 		/*
2377 		 * Now the new value will be stored inside. So if the new
2378 		 * value is smaller than the size of value root or the old
2379 		 * value, we don't need any allocation, otherwise we have
2380 		 * to guess metadata allocation.
2381 		 */
2382 		if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
2383 		    (!ocfs2_xattr_is_local(xe) &&
2384 		     OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
2385 			goto out;
2386 	}
2387 
2388 meta_guess:
2389 	/* calculate metadata allocation. */
2390 	if (di->i_xattr_loc) {
2391 		if (!xbs->xattr_bh) {
2392 			ret = ocfs2_read_xattr_block(inode,
2393 						     le64_to_cpu(di->i_xattr_loc),
2394 						     &bh);
2395 			if (ret) {
2396 				mlog_errno(ret);
2397 				goto out;
2398 			}
2399 
2400 			xb = (struct ocfs2_xattr_block *)bh->b_data;
2401 		} else
2402 			xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2403 
2404 		/*
2405 		 * If there is already an xattr tree, good, we can calculate
2406 		 * like other b-trees. Otherwise we may have the chance of
2407 		 * create a tree, the credit calculation is borrowed from
2408 		 * ocfs2_calc_extend_credits with root_el = NULL. And the
2409 		 * new tree will be cluster based, so no meta is needed.
2410 		 */
2411 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2412 			struct ocfs2_extent_list *el =
2413 				 &xb->xb_attrs.xb_root.xt_list;
2414 			meta_add += ocfs2_extend_meta_needed(el);
2415 			credits += ocfs2_calc_extend_credits(inode->i_sb,
2416 							     el, 1);
2417 		} else
2418 			credits += OCFS2_SUBALLOC_ALLOC + 1;
2419 
2420 		/*
2421 		 * This cluster will be used either for new bucket or for
2422 		 * new xattr block.
2423 		 * If the cluster size is the same as the bucket size, one
2424 		 * more is needed since we may need to extend the bucket
2425 		 * also.
2426 		 */
2427 		clusters_add += 1;
2428 		credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2429 		if (OCFS2_XATTR_BUCKET_SIZE ==
2430 			OCFS2_SB(inode->i_sb)->s_clustersize) {
2431 			credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2432 			clusters_add += 1;
2433 		}
2434 	} else {
2435 		meta_add += 1;
2436 		credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
2437 	}
2438 out:
2439 	if (clusters_need)
2440 		*clusters_need = clusters_add;
2441 	if (meta_need)
2442 		*meta_need = meta_add;
2443 	if (credits_need)
2444 		*credits_need = credits;
2445 	brelse(bh);
2446 	return ret;
2447 }
2448 
2449 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2450 				     struct ocfs2_dinode *di,
2451 				     struct ocfs2_xattr_info *xi,
2452 				     struct ocfs2_xattr_search *xis,
2453 				     struct ocfs2_xattr_search *xbs,
2454 				     struct ocfs2_xattr_set_ctxt *ctxt,
2455 				     int extra_meta,
2456 				     int *credits)
2457 {
2458 	int clusters_add, meta_add, ret;
2459 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2460 
2461 	memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
2462 
2463 	ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
2464 
2465 	ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
2466 					&clusters_add, &meta_add, credits);
2467 	if (ret) {
2468 		mlog_errno(ret);
2469 		return ret;
2470 	}
2471 
2472 	meta_add += extra_meta;
2473 	mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2474 	     "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
2475 
2476 	if (meta_add) {
2477 		ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
2478 							&ctxt->meta_ac);
2479 		if (ret) {
2480 			mlog_errno(ret);
2481 			goto out;
2482 		}
2483 	}
2484 
2485 	if (clusters_add) {
2486 		ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
2487 		if (ret)
2488 			mlog_errno(ret);
2489 	}
2490 out:
2491 	if (ret) {
2492 		if (ctxt->meta_ac) {
2493 			ocfs2_free_alloc_context(ctxt->meta_ac);
2494 			ctxt->meta_ac = NULL;
2495 		}
2496 
2497 		/*
2498 		 * We cannot have an error and a non null ctxt->data_ac.
2499 		 */
2500 	}
2501 
2502 	return ret;
2503 }
2504 
2505 static int __ocfs2_xattr_set_handle(struct inode *inode,
2506 				    struct ocfs2_dinode *di,
2507 				    struct ocfs2_xattr_info *xi,
2508 				    struct ocfs2_xattr_search *xis,
2509 				    struct ocfs2_xattr_search *xbs,
2510 				    struct ocfs2_xattr_set_ctxt *ctxt)
2511 {
2512 	int ret = 0, credits, old_found;
2513 
2514 	if (!xi->value) {
2515 		/* Remove existing extended attribute */
2516 		if (!xis->not_found)
2517 			ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2518 		else if (!xbs->not_found)
2519 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2520 	} else {
2521 		/* We always try to set extended attribute into inode first*/
2522 		ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2523 		if (!ret && !xbs->not_found) {
2524 			/*
2525 			 * If succeed and that extended attribute existing in
2526 			 * external block, then we will remove it.
2527 			 */
2528 			xi->value = NULL;
2529 			xi->value_len = 0;
2530 
2531 			old_found = xis->not_found;
2532 			xis->not_found = -ENODATA;
2533 			ret = ocfs2_calc_xattr_set_need(inode,
2534 							di,
2535 							xi,
2536 							xis,
2537 							xbs,
2538 							NULL,
2539 							NULL,
2540 							&credits);
2541 			xis->not_found = old_found;
2542 			if (ret) {
2543 				mlog_errno(ret);
2544 				goto out;
2545 			}
2546 
2547 			ret = ocfs2_extend_trans(ctxt->handle, credits +
2548 					ctxt->handle->h_buffer_credits);
2549 			if (ret) {
2550 				mlog_errno(ret);
2551 				goto out;
2552 			}
2553 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2554 		} else if (ret == -ENOSPC) {
2555 			if (di->i_xattr_loc && !xbs->xattr_bh) {
2556 				ret = ocfs2_xattr_block_find(inode,
2557 							     xi->name_index,
2558 							     xi->name, xbs);
2559 				if (ret)
2560 					goto out;
2561 
2562 				old_found = xis->not_found;
2563 				xis->not_found = -ENODATA;
2564 				ret = ocfs2_calc_xattr_set_need(inode,
2565 								di,
2566 								xi,
2567 								xis,
2568 								xbs,
2569 								NULL,
2570 								NULL,
2571 								&credits);
2572 				xis->not_found = old_found;
2573 				if (ret) {
2574 					mlog_errno(ret);
2575 					goto out;
2576 				}
2577 
2578 				ret = ocfs2_extend_trans(ctxt->handle, credits +
2579 					ctxt->handle->h_buffer_credits);
2580 				if (ret) {
2581 					mlog_errno(ret);
2582 					goto out;
2583 				}
2584 			}
2585 			/*
2586 			 * If no space in inode, we will set extended attribute
2587 			 * into external block.
2588 			 */
2589 			ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2590 			if (ret)
2591 				goto out;
2592 			if (!xis->not_found) {
2593 				/*
2594 				 * If succeed and that extended attribute
2595 				 * existing in inode, we will remove it.
2596 				 */
2597 				xi->value = NULL;
2598 				xi->value_len = 0;
2599 				xbs->not_found = -ENODATA;
2600 				ret = ocfs2_calc_xattr_set_need(inode,
2601 								di,
2602 								xi,
2603 								xis,
2604 								xbs,
2605 								NULL,
2606 								NULL,
2607 								&credits);
2608 				if (ret) {
2609 					mlog_errno(ret);
2610 					goto out;
2611 				}
2612 
2613 				ret = ocfs2_extend_trans(ctxt->handle, credits +
2614 						ctxt->handle->h_buffer_credits);
2615 				if (ret) {
2616 					mlog_errno(ret);
2617 					goto out;
2618 				}
2619 				ret = ocfs2_xattr_ibody_set(inode, xi,
2620 							    xis, ctxt);
2621 			}
2622 		}
2623 	}
2624 
2625 	if (!ret) {
2626 		/* Update inode ctime. */
2627 		ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2628 					      xis->inode_bh,
2629 					      OCFS2_JOURNAL_ACCESS_WRITE);
2630 		if (ret) {
2631 			mlog_errno(ret);
2632 			goto out;
2633 		}
2634 
2635 		inode->i_ctime = CURRENT_TIME;
2636 		di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
2637 		di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
2638 		ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
2639 	}
2640 out:
2641 	return ret;
2642 }
2643 
2644 /*
2645  * This function only called duing creating inode
2646  * for init security/acl xattrs of the new inode.
2647  * All transanction credits have been reserved in mknod.
2648  */
2649 int ocfs2_xattr_set_handle(handle_t *handle,
2650 			   struct inode *inode,
2651 			   struct buffer_head *di_bh,
2652 			   int name_index,
2653 			   const char *name,
2654 			   const void *value,
2655 			   size_t value_len,
2656 			   int flags,
2657 			   struct ocfs2_alloc_context *meta_ac,
2658 			   struct ocfs2_alloc_context *data_ac)
2659 {
2660 	struct ocfs2_dinode *di;
2661 	int ret;
2662 
2663 	struct ocfs2_xattr_info xi = {
2664 		.name_index = name_index,
2665 		.name = name,
2666 		.value = value,
2667 		.value_len = value_len,
2668 	};
2669 
2670 	struct ocfs2_xattr_search xis = {
2671 		.not_found = -ENODATA,
2672 	};
2673 
2674 	struct ocfs2_xattr_search xbs = {
2675 		.not_found = -ENODATA,
2676 	};
2677 
2678 	struct ocfs2_xattr_set_ctxt ctxt = {
2679 		.handle = handle,
2680 		.meta_ac = meta_ac,
2681 		.data_ac = data_ac,
2682 	};
2683 
2684 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2685 		return -EOPNOTSUPP;
2686 
2687 	/*
2688 	 * In extreme situation, may need xattr bucket when
2689 	 * block size is too small. And we have already reserved
2690 	 * the credits for bucket in mknod.
2691 	 */
2692 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
2693 		xbs.bucket = ocfs2_xattr_bucket_new(inode);
2694 		if (!xbs.bucket) {
2695 			mlog_errno(-ENOMEM);
2696 			return -ENOMEM;
2697 		}
2698 	}
2699 
2700 	xis.inode_bh = xbs.inode_bh = di_bh;
2701 	di = (struct ocfs2_dinode *)di_bh->b_data;
2702 
2703 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
2704 
2705 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2706 	if (ret)
2707 		goto cleanup;
2708 	if (xis.not_found) {
2709 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2710 		if (ret)
2711 			goto cleanup;
2712 	}
2713 
2714 	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2715 
2716 cleanup:
2717 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
2718 	brelse(xbs.xattr_bh);
2719 	ocfs2_xattr_bucket_free(xbs.bucket);
2720 
2721 	return ret;
2722 }
2723 
2724 /*
2725  * ocfs2_xattr_set()
2726  *
2727  * Set, replace or remove an extended attribute for this inode.
2728  * value is NULL to remove an existing extended attribute, else either
2729  * create or replace an extended attribute.
2730  */
2731 int ocfs2_xattr_set(struct inode *inode,
2732 		    int name_index,
2733 		    const char *name,
2734 		    const void *value,
2735 		    size_t value_len,
2736 		    int flags)
2737 {
2738 	struct buffer_head *di_bh = NULL;
2739 	struct ocfs2_dinode *di;
2740 	int ret, credits, ref_meta = 0, ref_credits = 0;
2741 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2742 	struct inode *tl_inode = osb->osb_tl_inode;
2743 	struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2744 	struct ocfs2_refcount_tree *ref_tree = NULL;
2745 
2746 	struct ocfs2_xattr_info xi = {
2747 		.name_index = name_index,
2748 		.name = name,
2749 		.value = value,
2750 		.value_len = value_len,
2751 	};
2752 
2753 	struct ocfs2_xattr_search xis = {
2754 		.not_found = -ENODATA,
2755 	};
2756 
2757 	struct ocfs2_xattr_search xbs = {
2758 		.not_found = -ENODATA,
2759 	};
2760 
2761 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2762 		return -EOPNOTSUPP;
2763 
2764 	/*
2765 	 * Only xbs will be used on indexed trees.  xis doesn't need a
2766 	 * bucket.
2767 	 */
2768 	xbs.bucket = ocfs2_xattr_bucket_new(inode);
2769 	if (!xbs.bucket) {
2770 		mlog_errno(-ENOMEM);
2771 		return -ENOMEM;
2772 	}
2773 
2774 	ret = ocfs2_inode_lock(inode, &di_bh, 1);
2775 	if (ret < 0) {
2776 		mlog_errno(ret);
2777 		goto cleanup_nolock;
2778 	}
2779 	xis.inode_bh = xbs.inode_bh = di_bh;
2780 	di = (struct ocfs2_dinode *)di_bh->b_data;
2781 
2782 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
2783 	/*
2784 	 * Scan inode and external block to find the same name
2785 	 * extended attribute and collect search infomation.
2786 	 */
2787 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2788 	if (ret)
2789 		goto cleanup;
2790 	if (xis.not_found) {
2791 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2792 		if (ret)
2793 			goto cleanup;
2794 	}
2795 
2796 	if (xis.not_found && xbs.not_found) {
2797 		ret = -ENODATA;
2798 		if (flags & XATTR_REPLACE)
2799 			goto cleanup;
2800 		ret = 0;
2801 		if (!value)
2802 			goto cleanup;
2803 	} else {
2804 		ret = -EEXIST;
2805 		if (flags & XATTR_CREATE)
2806 			goto cleanup;
2807 	}
2808 
2809 	/* Check whether the value is refcounted and do some prepartion. */
2810 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
2811 	    (!xis.not_found || !xbs.not_found)) {
2812 		ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
2813 						   &xis, &xbs, &ref_tree,
2814 						   &ref_meta, &ref_credits);
2815 		if (ret) {
2816 			mlog_errno(ret);
2817 			goto cleanup;
2818 		}
2819 	}
2820 
2821 	mutex_lock(&tl_inode->i_mutex);
2822 
2823 	if (ocfs2_truncate_log_needs_flush(osb)) {
2824 		ret = __ocfs2_flush_truncate_log(osb);
2825 		if (ret < 0) {
2826 			mutex_unlock(&tl_inode->i_mutex);
2827 			mlog_errno(ret);
2828 			goto cleanup;
2829 		}
2830 	}
2831 	mutex_unlock(&tl_inode->i_mutex);
2832 
2833 	ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
2834 					&xbs, &ctxt, ref_meta, &credits);
2835 	if (ret) {
2836 		mlog_errno(ret);
2837 		goto cleanup;
2838 	}
2839 
2840 	/* we need to update inode's ctime field, so add credit for it. */
2841 	credits += OCFS2_INODE_UPDATE_CREDITS;
2842 	ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
2843 	if (IS_ERR(ctxt.handle)) {
2844 		ret = PTR_ERR(ctxt.handle);
2845 		mlog_errno(ret);
2846 		goto cleanup;
2847 	}
2848 
2849 	ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2850 
2851 	ocfs2_commit_trans(osb, ctxt.handle);
2852 
2853 	if (ctxt.data_ac)
2854 		ocfs2_free_alloc_context(ctxt.data_ac);
2855 	if (ctxt.meta_ac)
2856 		ocfs2_free_alloc_context(ctxt.meta_ac);
2857 	if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
2858 		ocfs2_schedule_truncate_log_flush(osb, 1);
2859 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
2860 cleanup:
2861 	if (ref_tree)
2862 		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
2863 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
2864 	ocfs2_inode_unlock(inode, 1);
2865 cleanup_nolock:
2866 	brelse(di_bh);
2867 	brelse(xbs.xattr_bh);
2868 	ocfs2_xattr_bucket_free(xbs.bucket);
2869 
2870 	return ret;
2871 }
2872 
2873 /*
2874  * Find the xattr extent rec which may contains name_hash.
2875  * e_cpos will be the first name hash of the xattr rec.
2876  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2877  */
2878 static int ocfs2_xattr_get_rec(struct inode *inode,
2879 			       u32 name_hash,
2880 			       u64 *p_blkno,
2881 			       u32 *e_cpos,
2882 			       u32 *num_clusters,
2883 			       struct ocfs2_extent_list *el)
2884 {
2885 	int ret = 0, i;
2886 	struct buffer_head *eb_bh = NULL;
2887 	struct ocfs2_extent_block *eb;
2888 	struct ocfs2_extent_rec *rec = NULL;
2889 	u64 e_blkno = 0;
2890 
2891 	if (el->l_tree_depth) {
2892 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
2893 				      &eb_bh);
2894 		if (ret) {
2895 			mlog_errno(ret);
2896 			goto out;
2897 		}
2898 
2899 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2900 		el = &eb->h_list;
2901 
2902 		if (el->l_tree_depth) {
2903 			ocfs2_error(inode->i_sb,
2904 				    "Inode %lu has non zero tree depth in "
2905 				    "xattr tree block %llu\n", inode->i_ino,
2906 				    (unsigned long long)eb_bh->b_blocknr);
2907 			ret = -EROFS;
2908 			goto out;
2909 		}
2910 	}
2911 
2912 	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2913 		rec = &el->l_recs[i];
2914 
2915 		if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2916 			e_blkno = le64_to_cpu(rec->e_blkno);
2917 			break;
2918 		}
2919 	}
2920 
2921 	if (!e_blkno) {
2922 		ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2923 			    "record (%u, %u, 0) in xattr", inode->i_ino,
2924 			    le32_to_cpu(rec->e_cpos),
2925 			    ocfs2_rec_clusters(el, rec));
2926 		ret = -EROFS;
2927 		goto out;
2928 	}
2929 
2930 	*p_blkno = le64_to_cpu(rec->e_blkno);
2931 	*num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2932 	if (e_cpos)
2933 		*e_cpos = le32_to_cpu(rec->e_cpos);
2934 out:
2935 	brelse(eb_bh);
2936 	return ret;
2937 }
2938 
2939 typedef int (xattr_bucket_func)(struct inode *inode,
2940 				struct ocfs2_xattr_bucket *bucket,
2941 				void *para);
2942 
2943 static int ocfs2_find_xe_in_bucket(struct inode *inode,
2944 				   struct ocfs2_xattr_bucket *bucket,
2945 				   int name_index,
2946 				   const char *name,
2947 				   u32 name_hash,
2948 				   u16 *xe_index,
2949 				   int *found)
2950 {
2951 	int i, ret = 0, cmp = 1, block_off, new_offset;
2952 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
2953 	size_t name_len = strlen(name);
2954 	struct ocfs2_xattr_entry *xe = NULL;
2955 	char *xe_name;
2956 
2957 	/*
2958 	 * We don't use binary search in the bucket because there
2959 	 * may be multiple entries with the same name hash.
2960 	 */
2961 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
2962 		xe = &xh->xh_entries[i];
2963 
2964 		if (name_hash > le32_to_cpu(xe->xe_name_hash))
2965 			continue;
2966 		else if (name_hash < le32_to_cpu(xe->xe_name_hash))
2967 			break;
2968 
2969 		cmp = name_index - ocfs2_xattr_get_type(xe);
2970 		if (!cmp)
2971 			cmp = name_len - xe->xe_name_len;
2972 		if (cmp)
2973 			continue;
2974 
2975 		ret = ocfs2_xattr_bucket_get_name_value(inode,
2976 							xh,
2977 							i,
2978 							&block_off,
2979 							&new_offset);
2980 		if (ret) {
2981 			mlog_errno(ret);
2982 			break;
2983 		}
2984 
2985 
2986 		xe_name = bucket_block(bucket, block_off) + new_offset;
2987 		if (!memcmp(name, xe_name, name_len)) {
2988 			*xe_index = i;
2989 			*found = 1;
2990 			ret = 0;
2991 			break;
2992 		}
2993 	}
2994 
2995 	return ret;
2996 }
2997 
2998 /*
2999  * Find the specified xattr entry in a series of buckets.
3000  * This series start from p_blkno and last for num_clusters.
3001  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3002  * the num of the valid buckets.
3003  *
3004  * Return the buffer_head this xattr should reside in. And if the xattr's
3005  * hash is in the gap of 2 buckets, return the lower bucket.
3006  */
3007 static int ocfs2_xattr_bucket_find(struct inode *inode,
3008 				   int name_index,
3009 				   const char *name,
3010 				   u32 name_hash,
3011 				   u64 p_blkno,
3012 				   u32 first_hash,
3013 				   u32 num_clusters,
3014 				   struct ocfs2_xattr_search *xs)
3015 {
3016 	int ret, found = 0;
3017 	struct ocfs2_xattr_header *xh = NULL;
3018 	struct ocfs2_xattr_entry *xe = NULL;
3019 	u16 index = 0;
3020 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3021 	int low_bucket = 0, bucket, high_bucket;
3022 	struct ocfs2_xattr_bucket *search;
3023 	u32 last_hash;
3024 	u64 blkno, lower_blkno = 0;
3025 
3026 	search = ocfs2_xattr_bucket_new(inode);
3027 	if (!search) {
3028 		ret = -ENOMEM;
3029 		mlog_errno(ret);
3030 		goto out;
3031 	}
3032 
3033 	ret = ocfs2_read_xattr_bucket(search, p_blkno);
3034 	if (ret) {
3035 		mlog_errno(ret);
3036 		goto out;
3037 	}
3038 
3039 	xh = bucket_xh(search);
3040 	high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3041 	while (low_bucket <= high_bucket) {
3042 		ocfs2_xattr_bucket_relse(search);
3043 
3044 		bucket = (low_bucket + high_bucket) / 2;
3045 		blkno = p_blkno + bucket * blk_per_bucket;
3046 		ret = ocfs2_read_xattr_bucket(search, blkno);
3047 		if (ret) {
3048 			mlog_errno(ret);
3049 			goto out;
3050 		}
3051 
3052 		xh = bucket_xh(search);
3053 		xe = &xh->xh_entries[0];
3054 		if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3055 			high_bucket = bucket - 1;
3056 			continue;
3057 		}
3058 
3059 		/*
3060 		 * Check whether the hash of the last entry in our
3061 		 * bucket is larger than the search one. for an empty
3062 		 * bucket, the last one is also the first one.
3063 		 */
3064 		if (xh->xh_count)
3065 			xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3066 
3067 		last_hash = le32_to_cpu(xe->xe_name_hash);
3068 
3069 		/* record lower_blkno which may be the insert place. */
3070 		lower_blkno = blkno;
3071 
3072 		if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3073 			low_bucket = bucket + 1;
3074 			continue;
3075 		}
3076 
3077 		/* the searched xattr should reside in this bucket if exists. */
3078 		ret = ocfs2_find_xe_in_bucket(inode, search,
3079 					      name_index, name, name_hash,
3080 					      &index, &found);
3081 		if (ret) {
3082 			mlog_errno(ret);
3083 			goto out;
3084 		}
3085 		break;
3086 	}
3087 
3088 	/*
3089 	 * Record the bucket we have found.
3090 	 * When the xattr's hash value is in the gap of 2 buckets, we will
3091 	 * always set it to the previous bucket.
3092 	 */
3093 	if (!lower_blkno)
3094 		lower_blkno = p_blkno;
3095 
3096 	/* This should be in cache - we just read it during the search */
3097 	ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3098 	if (ret) {
3099 		mlog_errno(ret);
3100 		goto out;
3101 	}
3102 
3103 	xs->header = bucket_xh(xs->bucket);
3104 	xs->base = bucket_block(xs->bucket, 0);
3105 	xs->end = xs->base + inode->i_sb->s_blocksize;
3106 
3107 	if (found) {
3108 		xs->here = &xs->header->xh_entries[index];
3109 		mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3110 		     (unsigned long long)bucket_blkno(xs->bucket), index);
3111 	} else
3112 		ret = -ENODATA;
3113 
3114 out:
3115 	ocfs2_xattr_bucket_free(search);
3116 	return ret;
3117 }
3118 
3119 static int ocfs2_xattr_index_block_find(struct inode *inode,
3120 					struct buffer_head *root_bh,
3121 					int name_index,
3122 					const char *name,
3123 					struct ocfs2_xattr_search *xs)
3124 {
3125 	int ret;
3126 	struct ocfs2_xattr_block *xb =
3127 			(struct ocfs2_xattr_block *)root_bh->b_data;
3128 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3129 	struct ocfs2_extent_list *el = &xb_root->xt_list;
3130 	u64 p_blkno = 0;
3131 	u32 first_hash, num_clusters = 0;
3132 	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3133 
3134 	if (le16_to_cpu(el->l_next_free_rec) == 0)
3135 		return -ENODATA;
3136 
3137 	mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3138 	     name, name_hash, name_index);
3139 
3140 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3141 				  &num_clusters, el);
3142 	if (ret) {
3143 		mlog_errno(ret);
3144 		goto out;
3145 	}
3146 
3147 	BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3148 
3149 	mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3150 	     "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3151 	     first_hash);
3152 
3153 	ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3154 				      p_blkno, first_hash, num_clusters, xs);
3155 
3156 out:
3157 	return ret;
3158 }
3159 
3160 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3161 				       u64 blkno,
3162 				       u32 clusters,
3163 				       xattr_bucket_func *func,
3164 				       void *para)
3165 {
3166 	int i, ret = 0;
3167 	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3168 	u32 num_buckets = clusters * bpc;
3169 	struct ocfs2_xattr_bucket *bucket;
3170 
3171 	bucket = ocfs2_xattr_bucket_new(inode);
3172 	if (!bucket) {
3173 		mlog_errno(-ENOMEM);
3174 		return -ENOMEM;
3175 	}
3176 
3177 	mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3178 	     clusters, (unsigned long long)blkno);
3179 
3180 	for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3181 		ret = ocfs2_read_xattr_bucket(bucket, blkno);
3182 		if (ret) {
3183 			mlog_errno(ret);
3184 			break;
3185 		}
3186 
3187 		/*
3188 		 * The real bucket num in this series of blocks is stored
3189 		 * in the 1st bucket.
3190 		 */
3191 		if (i == 0)
3192 			num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3193 
3194 		mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3195 		     (unsigned long long)blkno,
3196 		     le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3197 		if (func) {
3198 			ret = func(inode, bucket, para);
3199 			if (ret && ret != -ERANGE)
3200 				mlog_errno(ret);
3201 			/* Fall through to bucket_relse() */
3202 		}
3203 
3204 		ocfs2_xattr_bucket_relse(bucket);
3205 		if (ret)
3206 			break;
3207 	}
3208 
3209 	ocfs2_xattr_bucket_free(bucket);
3210 	return ret;
3211 }
3212 
3213 struct ocfs2_xattr_tree_list {
3214 	char *buffer;
3215 	size_t buffer_size;
3216 	size_t result;
3217 };
3218 
3219 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
3220 					     struct ocfs2_xattr_header *xh,
3221 					     int index,
3222 					     int *block_off,
3223 					     int *new_offset)
3224 {
3225 	u16 name_offset;
3226 
3227 	if (index < 0 || index >= le16_to_cpu(xh->xh_count))
3228 		return -EINVAL;
3229 
3230 	name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3231 
3232 	*block_off = name_offset >> inode->i_sb->s_blocksize_bits;
3233 	*new_offset = name_offset % inode->i_sb->s_blocksize;
3234 
3235 	return 0;
3236 }
3237 
3238 static int ocfs2_list_xattr_bucket(struct inode *inode,
3239 				   struct ocfs2_xattr_bucket *bucket,
3240 				   void *para)
3241 {
3242 	int ret = 0, type;
3243 	struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
3244 	int i, block_off, new_offset;
3245 	const char *prefix, *name;
3246 
3247 	for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
3248 		struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
3249 		type = ocfs2_xattr_get_type(entry);
3250 		prefix = ocfs2_xattr_prefix(type);
3251 
3252 		if (prefix) {
3253 			ret = ocfs2_xattr_bucket_get_name_value(inode,
3254 								bucket_xh(bucket),
3255 								i,
3256 								&block_off,
3257 								&new_offset);
3258 			if (ret)
3259 				break;
3260 
3261 			name = (const char *)bucket_block(bucket, block_off) +
3262 				new_offset;
3263 			ret = ocfs2_xattr_list_entry(xl->buffer,
3264 						     xl->buffer_size,
3265 						     &xl->result,
3266 						     prefix, name,
3267 						     entry->xe_name_len);
3268 			if (ret)
3269 				break;
3270 		}
3271 	}
3272 
3273 	return ret;
3274 }
3275 
3276 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3277 					     struct ocfs2_xattr_tree_root *xt,
3278 					     char *buffer,
3279 					     size_t buffer_size)
3280 {
3281 	struct ocfs2_extent_list *el = &xt->xt_list;
3282 	int ret = 0;
3283 	u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3284 	u64 p_blkno = 0;
3285 	struct ocfs2_xattr_tree_list xl = {
3286 		.buffer = buffer,
3287 		.buffer_size = buffer_size,
3288 		.result = 0,
3289 	};
3290 
3291 	if (le16_to_cpu(el->l_next_free_rec) == 0)
3292 		return 0;
3293 
3294 	while (name_hash > 0) {
3295 		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
3296 					  &e_cpos, &num_clusters, el);
3297 		if (ret) {
3298 			mlog_errno(ret);
3299 			goto out;
3300 		}
3301 
3302 		ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
3303 						  ocfs2_list_xattr_bucket,
3304 						  &xl);
3305 		if (ret) {
3306 			if (ret != -ERANGE)
3307 				mlog_errno(ret);
3308 			goto out;
3309 		}
3310 
3311 		if (e_cpos == 0)
3312 			break;
3313 
3314 		name_hash = e_cpos - 1;
3315 	}
3316 
3317 	ret = xl.result;
3318 out:
3319 	return ret;
3320 }
3321 
3322 static int cmp_xe(const void *a, const void *b)
3323 {
3324 	const struct ocfs2_xattr_entry *l = a, *r = b;
3325 	u32 l_hash = le32_to_cpu(l->xe_name_hash);
3326 	u32 r_hash = le32_to_cpu(r->xe_name_hash);
3327 
3328 	if (l_hash > r_hash)
3329 		return 1;
3330 	if (l_hash < r_hash)
3331 		return -1;
3332 	return 0;
3333 }
3334 
3335 static void swap_xe(void *a, void *b, int size)
3336 {
3337 	struct ocfs2_xattr_entry *l = a, *r = b, tmp;
3338 
3339 	tmp = *l;
3340 	memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
3341 	memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
3342 }
3343 
3344 /*
3345  * When the ocfs2_xattr_block is filled up, new bucket will be created
3346  * and all the xattr entries will be moved to the new bucket.
3347  * The header goes at the start of the bucket, and the names+values are
3348  * filled from the end.  This is why *target starts as the last buffer.
3349  * Note: we need to sort the entries since they are not saved in order
3350  * in the ocfs2_xattr_block.
3351  */
3352 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
3353 					   struct buffer_head *xb_bh,
3354 					   struct ocfs2_xattr_bucket *bucket)
3355 {
3356 	int i, blocksize = inode->i_sb->s_blocksize;
3357 	int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3358 	u16 offset, size, off_change;
3359 	struct ocfs2_xattr_entry *xe;
3360 	struct ocfs2_xattr_block *xb =
3361 				(struct ocfs2_xattr_block *)xb_bh->b_data;
3362 	struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
3363 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3364 	u16 count = le16_to_cpu(xb_xh->xh_count);
3365 	char *src = xb_bh->b_data;
3366 	char *target = bucket_block(bucket, blks - 1);
3367 
3368 	mlog(0, "cp xattr from block %llu to bucket %llu\n",
3369 	     (unsigned long long)xb_bh->b_blocknr,
3370 	     (unsigned long long)bucket_blkno(bucket));
3371 
3372 	for (i = 0; i < blks; i++)
3373 		memset(bucket_block(bucket, i), 0, blocksize);
3374 
3375 	/*
3376 	 * Since the xe_name_offset is based on ocfs2_xattr_header,
3377 	 * there is a offset change corresponding to the change of
3378 	 * ocfs2_xattr_header's position.
3379 	 */
3380 	off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3381 	xe = &xb_xh->xh_entries[count - 1];
3382 	offset = le16_to_cpu(xe->xe_name_offset) + off_change;
3383 	size = blocksize - offset;
3384 
3385 	/* copy all the names and values. */
3386 	memcpy(target + offset, src + offset, size);
3387 
3388 	/* Init new header now. */
3389 	xh->xh_count = xb_xh->xh_count;
3390 	xh->xh_num_buckets = cpu_to_le16(1);
3391 	xh->xh_name_value_len = cpu_to_le16(size);
3392 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
3393 
3394 	/* copy all the entries. */
3395 	target = bucket_block(bucket, 0);
3396 	offset = offsetof(struct ocfs2_xattr_header, xh_entries);
3397 	size = count * sizeof(struct ocfs2_xattr_entry);
3398 	memcpy(target + offset, (char *)xb_xh + offset, size);
3399 
3400 	/* Change the xe offset for all the xe because of the move. */
3401 	off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
3402 		 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3403 	for (i = 0; i < count; i++)
3404 		le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
3405 
3406 	mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
3407 	     offset, size, off_change);
3408 
3409 	sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
3410 	     cmp_xe, swap_xe);
3411 }
3412 
3413 /*
3414  * After we move xattr from block to index btree, we have to
3415  * update ocfs2_xattr_search to the new xe and base.
3416  *
3417  * When the entry is in xattr block, xattr_bh indicates the storage place.
3418  * While if the entry is in index b-tree, "bucket" indicates the
3419  * real place of the xattr.
3420  */
3421 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
3422 					    struct ocfs2_xattr_search *xs,
3423 					    struct buffer_head *old_bh)
3424 {
3425 	char *buf = old_bh->b_data;
3426 	struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
3427 	struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
3428 	int i;
3429 
3430 	xs->header = bucket_xh(xs->bucket);
3431 	xs->base = bucket_block(xs->bucket, 0);
3432 	xs->end = xs->base + inode->i_sb->s_blocksize;
3433 
3434 	if (xs->not_found)
3435 		return;
3436 
3437 	i = xs->here - old_xh->xh_entries;
3438 	xs->here = &xs->header->xh_entries[i];
3439 }
3440 
3441 static int ocfs2_xattr_create_index_block(struct inode *inode,
3442 					  struct ocfs2_xattr_search *xs,
3443 					  struct ocfs2_xattr_set_ctxt *ctxt)
3444 {
3445 	int ret;
3446 	u32 bit_off, len;
3447 	u64 blkno;
3448 	handle_t *handle = ctxt->handle;
3449 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3450 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
3451 	struct buffer_head *xb_bh = xs->xattr_bh;
3452 	struct ocfs2_xattr_block *xb =
3453 			(struct ocfs2_xattr_block *)xb_bh->b_data;
3454 	struct ocfs2_xattr_tree_root *xr;
3455 	u16 xb_flags = le16_to_cpu(xb->xb_flags);
3456 
3457 	mlog(0, "create xattr index block for %llu\n",
3458 	     (unsigned long long)xb_bh->b_blocknr);
3459 
3460 	BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
3461 	BUG_ON(!xs->bucket);
3462 
3463 	/*
3464 	 * XXX:
3465 	 * We can use this lock for now, and maybe move to a dedicated mutex
3466 	 * if performance becomes a problem later.
3467 	 */
3468 	down_write(&oi->ip_alloc_sem);
3469 
3470 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
3471 				      OCFS2_JOURNAL_ACCESS_WRITE);
3472 	if (ret) {
3473 		mlog_errno(ret);
3474 		goto out;
3475 	}
3476 
3477 	ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
3478 				     1, 1, &bit_off, &len);
3479 	if (ret) {
3480 		mlog_errno(ret);
3481 		goto out;
3482 	}
3483 
3484 	/*
3485 	 * The bucket may spread in many blocks, and
3486 	 * we will only touch the 1st block and the last block
3487 	 * in the whole bucket(one for entry and one for data).
3488 	 */
3489 	blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
3490 
3491 	mlog(0, "allocate 1 cluster from %llu to xattr block\n",
3492 	     (unsigned long long)blkno);
3493 
3494 	ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
3495 	if (ret) {
3496 		mlog_errno(ret);
3497 		goto out;
3498 	}
3499 
3500 	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
3501 						OCFS2_JOURNAL_ACCESS_CREATE);
3502 	if (ret) {
3503 		mlog_errno(ret);
3504 		goto out;
3505 	}
3506 
3507 	ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
3508 	ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
3509 
3510 	ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
3511 
3512 	/* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
3513 	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
3514 	       offsetof(struct ocfs2_xattr_block, xb_attrs));
3515 
3516 	xr = &xb->xb_attrs.xb_root;
3517 	xr->xt_clusters = cpu_to_le32(1);
3518 	xr->xt_last_eb_blk = 0;
3519 	xr->xt_list.l_tree_depth = 0;
3520 	xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
3521 	xr->xt_list.l_next_free_rec = cpu_to_le16(1);
3522 
3523 	xr->xt_list.l_recs[0].e_cpos = 0;
3524 	xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
3525 	xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
3526 
3527 	xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
3528 
3529 	ocfs2_journal_dirty(handle, xb_bh);
3530 
3531 out:
3532 	up_write(&oi->ip_alloc_sem);
3533 
3534 	return ret;
3535 }
3536 
3537 static int cmp_xe_offset(const void *a, const void *b)
3538 {
3539 	const struct ocfs2_xattr_entry *l = a, *r = b;
3540 	u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
3541 	u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
3542 
3543 	if (l_name_offset < r_name_offset)
3544 		return 1;
3545 	if (l_name_offset > r_name_offset)
3546 		return -1;
3547 	return 0;
3548 }
3549 
3550 /*
3551  * defrag a xattr bucket if we find that the bucket has some
3552  * holes beteen name/value pairs.
3553  * We will move all the name/value pairs to the end of the bucket
3554  * so that we can spare some space for insertion.
3555  */
3556 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
3557 				     handle_t *handle,
3558 				     struct ocfs2_xattr_bucket *bucket)
3559 {
3560 	int ret, i;
3561 	size_t end, offset, len, value_len;
3562 	struct ocfs2_xattr_header *xh;
3563 	char *entries, *buf, *bucket_buf = NULL;
3564 	u64 blkno = bucket_blkno(bucket);
3565 	u16 xh_free_start;
3566 	size_t blocksize = inode->i_sb->s_blocksize;
3567 	struct ocfs2_xattr_entry *xe;
3568 
3569 	/*
3570 	 * In order to make the operation more efficient and generic,
3571 	 * we copy all the blocks into a contiguous memory and do the
3572 	 * defragment there, so if anything is error, we will not touch
3573 	 * the real block.
3574 	 */
3575 	bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
3576 	if (!bucket_buf) {
3577 		ret = -EIO;
3578 		goto out;
3579 	}
3580 
3581 	buf = bucket_buf;
3582 	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3583 		memcpy(buf, bucket_block(bucket, i), blocksize);
3584 
3585 	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
3586 						OCFS2_JOURNAL_ACCESS_WRITE);
3587 	if (ret < 0) {
3588 		mlog_errno(ret);
3589 		goto out;
3590 	}
3591 
3592 	xh = (struct ocfs2_xattr_header *)bucket_buf;
3593 	entries = (char *)xh->xh_entries;
3594 	xh_free_start = le16_to_cpu(xh->xh_free_start);
3595 
3596 	mlog(0, "adjust xattr bucket in %llu, count = %u, "
3597 	     "xh_free_start = %u, xh_name_value_len = %u.\n",
3598 	     (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
3599 	     xh_free_start, le16_to_cpu(xh->xh_name_value_len));
3600 
3601 	/*
3602 	 * sort all the entries by their offset.
3603 	 * the largest will be the first, so that we can
3604 	 * move them to the end one by one.
3605 	 */
3606 	sort(entries, le16_to_cpu(xh->xh_count),
3607 	     sizeof(struct ocfs2_xattr_entry),
3608 	     cmp_xe_offset, swap_xe);
3609 
3610 	/* Move all name/values to the end of the bucket. */
3611 	xe = xh->xh_entries;
3612 	end = OCFS2_XATTR_BUCKET_SIZE;
3613 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
3614 		offset = le16_to_cpu(xe->xe_name_offset);
3615 		if (ocfs2_xattr_is_local(xe))
3616 			value_len = OCFS2_XATTR_SIZE(
3617 					le64_to_cpu(xe->xe_value_size));
3618 		else
3619 			value_len = OCFS2_XATTR_ROOT_SIZE;
3620 		len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
3621 
3622 		/*
3623 		 * We must make sure that the name/value pair
3624 		 * exist in the same block. So adjust end to
3625 		 * the previous block end if needed.
3626 		 */
3627 		if (((end - len) / blocksize !=
3628 			(end - 1) / blocksize))
3629 			end = end - end % blocksize;
3630 
3631 		if (end > offset + len) {
3632 			memmove(bucket_buf + end - len,
3633 				bucket_buf + offset, len);
3634 			xe->xe_name_offset = cpu_to_le16(end - len);
3635 		}
3636 
3637 		mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
3638 				"bucket %llu\n", (unsigned long long)blkno);
3639 
3640 		end -= len;
3641 	}
3642 
3643 	mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
3644 			"bucket %llu\n", (unsigned long long)blkno);
3645 
3646 	if (xh_free_start == end)
3647 		goto out;
3648 
3649 	memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
3650 	xh->xh_free_start = cpu_to_le16(end);
3651 
3652 	/* sort the entries by their name_hash. */
3653 	sort(entries, le16_to_cpu(xh->xh_count),
3654 	     sizeof(struct ocfs2_xattr_entry),
3655 	     cmp_xe, swap_xe);
3656 
3657 	buf = bucket_buf;
3658 	for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3659 		memcpy(bucket_block(bucket, i), buf, blocksize);
3660 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
3661 
3662 out:
3663 	kfree(bucket_buf);
3664 	return ret;
3665 }
3666 
3667 /*
3668  * prev_blkno points to the start of an existing extent.  new_blkno
3669  * points to a newly allocated extent.  Because we know each of our
3670  * clusters contains more than bucket, we can easily split one cluster
3671  * at a bucket boundary.  So we take the last cluster of the existing
3672  * extent and split it down the middle.  We move the last half of the
3673  * buckets in the last cluster of the existing extent over to the new
3674  * extent.
3675  *
3676  * first_bh is the buffer at prev_blkno so we can update the existing
3677  * extent's bucket count.  header_bh is the bucket were we were hoping
3678  * to insert our xattr.  If the bucket move places the target in the new
3679  * extent, we'll update first_bh and header_bh after modifying the old
3680  * extent.
3681  *
3682  * first_hash will be set as the 1st xe's name_hash in the new extent.
3683  */
3684 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3685 					       handle_t *handle,
3686 					       struct ocfs2_xattr_bucket *first,
3687 					       struct ocfs2_xattr_bucket *target,
3688 					       u64 new_blkno,
3689 					       u32 num_clusters,
3690 					       u32 *first_hash)
3691 {
3692 	int ret;
3693 	struct super_block *sb = inode->i_sb;
3694 	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
3695 	int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
3696 	int to_move = num_buckets / 2;
3697 	u64 src_blkno;
3698 	u64 last_cluster_blkno = bucket_blkno(first) +
3699 		((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
3700 
3701 	BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
3702 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
3703 
3704 	mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3705 	     (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
3706 
3707 	ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
3708 				     last_cluster_blkno, new_blkno,
3709 				     to_move, first_hash);
3710 	if (ret) {
3711 		mlog_errno(ret);
3712 		goto out;
3713 	}
3714 
3715 	/* This is the first bucket that got moved */
3716 	src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
3717 
3718 	/*
3719 	 * If the target bucket was part of the moved buckets, we need to
3720 	 * update first and target.
3721 	 */
3722 	if (bucket_blkno(target) >= src_blkno) {
3723 		/* Find the block for the new target bucket */
3724 		src_blkno = new_blkno +
3725 			(bucket_blkno(target) - src_blkno);
3726 
3727 		ocfs2_xattr_bucket_relse(first);
3728 		ocfs2_xattr_bucket_relse(target);
3729 
3730 		/*
3731 		 * These shouldn't fail - the buffers are in the
3732 		 * journal from ocfs2_cp_xattr_bucket().
3733 		 */
3734 		ret = ocfs2_read_xattr_bucket(first, new_blkno);
3735 		if (ret) {
3736 			mlog_errno(ret);
3737 			goto out;
3738 		}
3739 		ret = ocfs2_read_xattr_bucket(target, src_blkno);
3740 		if (ret)
3741 			mlog_errno(ret);
3742 
3743 	}
3744 
3745 out:
3746 	return ret;
3747 }
3748 
3749 /*
3750  * Find the suitable pos when we divide a bucket into 2.
3751  * We have to make sure the xattrs with the same hash value exist
3752  * in the same bucket.
3753  *
3754  * If this ocfs2_xattr_header covers more than one hash value, find a
3755  * place where the hash value changes.  Try to find the most even split.
3756  * The most common case is that all entries have different hash values,
3757  * and the first check we make will find a place to split.
3758  */
3759 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3760 {
3761 	struct ocfs2_xattr_entry *entries = xh->xh_entries;
3762 	int count = le16_to_cpu(xh->xh_count);
3763 	int delta, middle = count / 2;
3764 
3765 	/*
3766 	 * We start at the middle.  Each step gets farther away in both
3767 	 * directions.  We therefore hit the change in hash value
3768 	 * nearest to the middle.  Note that this loop does not execute for
3769 	 * count < 2.
3770 	 */
3771 	for (delta = 0; delta < middle; delta++) {
3772 		/* Let's check delta earlier than middle */
3773 		if (cmp_xe(&entries[middle - delta - 1],
3774 			   &entries[middle - delta]))
3775 			return middle - delta;
3776 
3777 		/* For even counts, don't walk off the end */
3778 		if ((middle + delta + 1) == count)
3779 			continue;
3780 
3781 		/* Now try delta past middle */
3782 		if (cmp_xe(&entries[middle + delta],
3783 			   &entries[middle + delta + 1]))
3784 			return middle + delta + 1;
3785 	}
3786 
3787 	/* Every entry had the same hash */
3788 	return count;
3789 }
3790 
3791 /*
3792  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
3793  * first_hash will record the 1st hash of the new bucket.
3794  *
3795  * Normally half of the xattrs will be moved.  But we have to make
3796  * sure that the xattrs with the same hash value are stored in the
3797  * same bucket. If all the xattrs in this bucket have the same hash
3798  * value, the new bucket will be initialized as an empty one and the
3799  * first_hash will be initialized as (hash_value+1).
3800  */
3801 static int ocfs2_divide_xattr_bucket(struct inode *inode,
3802 				    handle_t *handle,
3803 				    u64 blk,
3804 				    u64 new_blk,
3805 				    u32 *first_hash,
3806 				    int new_bucket_head)
3807 {
3808 	int ret, i;
3809 	int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
3810 	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3811 	struct ocfs2_xattr_header *xh;
3812 	struct ocfs2_xattr_entry *xe;
3813 	int blocksize = inode->i_sb->s_blocksize;
3814 
3815 	mlog(0, "move some of xattrs from bucket %llu to %llu\n",
3816 	     (unsigned long long)blk, (unsigned long long)new_blk);
3817 
3818 	s_bucket = ocfs2_xattr_bucket_new(inode);
3819 	t_bucket = ocfs2_xattr_bucket_new(inode);
3820 	if (!s_bucket || !t_bucket) {
3821 		ret = -ENOMEM;
3822 		mlog_errno(ret);
3823 		goto out;
3824 	}
3825 
3826 	ret = ocfs2_read_xattr_bucket(s_bucket, blk);
3827 	if (ret) {
3828 		mlog_errno(ret);
3829 		goto out;
3830 	}
3831 
3832 	ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
3833 						OCFS2_JOURNAL_ACCESS_WRITE);
3834 	if (ret) {
3835 		mlog_errno(ret);
3836 		goto out;
3837 	}
3838 
3839 	/*
3840 	 * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
3841 	 * there's no need to read it.
3842 	 */
3843 	ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
3844 	if (ret) {
3845 		mlog_errno(ret);
3846 		goto out;
3847 	}
3848 
3849 	/*
3850 	 * Hey, if we're overwriting t_bucket, what difference does
3851 	 * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
3852 	 * same part of ocfs2_cp_xattr_bucket().
3853 	 */
3854 	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
3855 						new_bucket_head ?
3856 						OCFS2_JOURNAL_ACCESS_CREATE :
3857 						OCFS2_JOURNAL_ACCESS_WRITE);
3858 	if (ret) {
3859 		mlog_errno(ret);
3860 		goto out;
3861 	}
3862 
3863 	xh = bucket_xh(s_bucket);
3864 	count = le16_to_cpu(xh->xh_count);
3865 	start = ocfs2_xattr_find_divide_pos(xh);
3866 
3867 	if (start == count) {
3868 		xe = &xh->xh_entries[start-1];
3869 
3870 		/*
3871 		 * initialized a new empty bucket here.
3872 		 * The hash value is set as one larger than
3873 		 * that of the last entry in the previous bucket.
3874 		 */
3875 		for (i = 0; i < t_bucket->bu_blocks; i++)
3876 			memset(bucket_block(t_bucket, i), 0, blocksize);
3877 
3878 		xh = bucket_xh(t_bucket);
3879 		xh->xh_free_start = cpu_to_le16(blocksize);
3880 		xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3881 		le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
3882 
3883 		goto set_num_buckets;
3884 	}
3885 
3886 	/* copy the whole bucket to the new first. */
3887 	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
3888 
3889 	/* update the new bucket. */
3890 	xh = bucket_xh(t_bucket);
3891 
3892 	/*
3893 	 * Calculate the total name/value len and xh_free_start for
3894 	 * the old bucket first.
3895 	 */
3896 	name_offset = OCFS2_XATTR_BUCKET_SIZE;
3897 	name_value_len = 0;
3898 	for (i = 0; i < start; i++) {
3899 		xe = &xh->xh_entries[i];
3900 		xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3901 		if (ocfs2_xattr_is_local(xe))
3902 			xe_len +=
3903 			   OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3904 		else
3905 			xe_len += OCFS2_XATTR_ROOT_SIZE;
3906 		name_value_len += xe_len;
3907 		if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3908 			name_offset = le16_to_cpu(xe->xe_name_offset);
3909 	}
3910 
3911 	/*
3912 	 * Now begin the modification to the new bucket.
3913 	 *
3914 	 * In the new bucket, We just move the xattr entry to the beginning
3915 	 * and don't touch the name/value. So there will be some holes in the
3916 	 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3917 	 * called.
3918 	 */
3919 	xe = &xh->xh_entries[start];
3920 	len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3921 	mlog(0, "mv xattr entry len %d from %d to %d\n", len,
3922 	     (int)((char *)xe - (char *)xh),
3923 	     (int)((char *)xh->xh_entries - (char *)xh));
3924 	memmove((char *)xh->xh_entries, (char *)xe, len);
3925 	xe = &xh->xh_entries[count - start];
3926 	len = sizeof(struct ocfs2_xattr_entry) * start;
3927 	memset((char *)xe, 0, len);
3928 
3929 	le16_add_cpu(&xh->xh_count, -start);
3930 	le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3931 
3932 	/* Calculate xh_free_start for the new bucket. */
3933 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3934 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3935 		xe = &xh->xh_entries[i];
3936 		xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3937 		if (ocfs2_xattr_is_local(xe))
3938 			xe_len +=
3939 			   OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3940 		else
3941 			xe_len += OCFS2_XATTR_ROOT_SIZE;
3942 		if (le16_to_cpu(xe->xe_name_offset) <
3943 		    le16_to_cpu(xh->xh_free_start))
3944 			xh->xh_free_start = xe->xe_name_offset;
3945 	}
3946 
3947 set_num_buckets:
3948 	/* set xh->xh_num_buckets for the new xh. */
3949 	if (new_bucket_head)
3950 		xh->xh_num_buckets = cpu_to_le16(1);
3951 	else
3952 		xh->xh_num_buckets = 0;
3953 
3954 	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
3955 
3956 	/* store the first_hash of the new bucket. */
3957 	if (first_hash)
3958 		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3959 
3960 	/*
3961 	 * Now only update the 1st block of the old bucket.  If we
3962 	 * just added a new empty bucket, there is no need to modify
3963 	 * it.
3964 	 */
3965 	if (start == count)
3966 		goto out;
3967 
3968 	xh = bucket_xh(s_bucket);
3969 	memset(&xh->xh_entries[start], 0,
3970 	       sizeof(struct ocfs2_xattr_entry) * (count - start));
3971 	xh->xh_count = cpu_to_le16(start);
3972 	xh->xh_free_start = cpu_to_le16(name_offset);
3973 	xh->xh_name_value_len = cpu_to_le16(name_value_len);
3974 
3975 	ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
3976 
3977 out:
3978 	ocfs2_xattr_bucket_free(s_bucket);
3979 	ocfs2_xattr_bucket_free(t_bucket);
3980 
3981 	return ret;
3982 }
3983 
3984 /*
3985  * Copy xattr from one bucket to another bucket.
3986  *
3987  * The caller must make sure that the journal transaction
3988  * has enough space for journaling.
3989  */
3990 static int ocfs2_cp_xattr_bucket(struct inode *inode,
3991 				 handle_t *handle,
3992 				 u64 s_blkno,
3993 				 u64 t_blkno,
3994 				 int t_is_new)
3995 {
3996 	int ret;
3997 	struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
3998 
3999 	BUG_ON(s_blkno == t_blkno);
4000 
4001 	mlog(0, "cp bucket %llu to %llu, target is %d\n",
4002 	     (unsigned long long)s_blkno, (unsigned long long)t_blkno,
4003 	     t_is_new);
4004 
4005 	s_bucket = ocfs2_xattr_bucket_new(inode);
4006 	t_bucket = ocfs2_xattr_bucket_new(inode);
4007 	if (!s_bucket || !t_bucket) {
4008 		ret = -ENOMEM;
4009 		mlog_errno(ret);
4010 		goto out;
4011 	}
4012 
4013 	ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4014 	if (ret)
4015 		goto out;
4016 
4017 	/*
4018 	 * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4019 	 * there's no need to read it.
4020 	 */
4021 	ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
4022 	if (ret)
4023 		goto out;
4024 
4025 	/*
4026 	 * Hey, if we're overwriting t_bucket, what difference does
4027 	 * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4028 	 * cluster to fill, we came here from
4029 	 * ocfs2_mv_xattr_buckets(), and it is really new -
4030 	 * ACCESS_CREATE is required.  But we also might have moved data
4031 	 * out of t_bucket before extending back into it.
4032 	 * ocfs2_add_new_xattr_bucket() can do this - its call to
4033 	 * ocfs2_add_new_xattr_cluster() may have created a new extent
4034 	 * and copied out the end of the old extent.  Then it re-extends
4035 	 * the old extent back to create space for new xattrs.  That's
4036 	 * how we get here, and the bucket isn't really new.
4037 	 */
4038 	ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4039 						t_is_new ?
4040 						OCFS2_JOURNAL_ACCESS_CREATE :
4041 						OCFS2_JOURNAL_ACCESS_WRITE);
4042 	if (ret)
4043 		goto out;
4044 
4045 	ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4046 	ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4047 
4048 out:
4049 	ocfs2_xattr_bucket_free(t_bucket);
4050 	ocfs2_xattr_bucket_free(s_bucket);
4051 
4052 	return ret;
4053 }
4054 
4055 /*
4056  * src_blk points to the start of an existing extent.  last_blk points to
4057  * last cluster in that extent.  to_blk points to a newly allocated
4058  * extent.  We copy the buckets from the cluster at last_blk to the new
4059  * extent.  If start_bucket is non-zero, we skip that many buckets before
4060  * we start copying.  The new extent's xh_num_buckets gets set to the
4061  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4062  * by the same amount.
4063  */
4064 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4065 				  u64 src_blk, u64 last_blk, u64 to_blk,
4066 				  unsigned int start_bucket,
4067 				  u32 *first_hash)
4068 {
4069 	int i, ret, credits;
4070 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4071 	int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4072 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4073 	struct ocfs2_xattr_bucket *old_first, *new_first;
4074 
4075 	mlog(0, "mv xattrs from cluster %llu to %llu\n",
4076 	     (unsigned long long)last_blk, (unsigned long long)to_blk);
4077 
4078 	BUG_ON(start_bucket >= num_buckets);
4079 	if (start_bucket) {
4080 		num_buckets -= start_bucket;
4081 		last_blk += (start_bucket * blks_per_bucket);
4082 	}
4083 
4084 	/* The first bucket of the original extent */
4085 	old_first = ocfs2_xattr_bucket_new(inode);
4086 	/* The first bucket of the new extent */
4087 	new_first = ocfs2_xattr_bucket_new(inode);
4088 	if (!old_first || !new_first) {
4089 		ret = -ENOMEM;
4090 		mlog_errno(ret);
4091 		goto out;
4092 	}
4093 
4094 	ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4095 	if (ret) {
4096 		mlog_errno(ret);
4097 		goto out;
4098 	}
4099 
4100 	/*
4101 	 * We need to update the first bucket of the old extent and all
4102 	 * the buckets going to the new extent.
4103 	 */
4104 	credits = ((num_buckets + 1) * blks_per_bucket) +
4105 		handle->h_buffer_credits;
4106 	ret = ocfs2_extend_trans(handle, credits);
4107 	if (ret) {
4108 		mlog_errno(ret);
4109 		goto out;
4110 	}
4111 
4112 	ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4113 						OCFS2_JOURNAL_ACCESS_WRITE);
4114 	if (ret) {
4115 		mlog_errno(ret);
4116 		goto out;
4117 	}
4118 
4119 	for (i = 0; i < num_buckets; i++) {
4120 		ret = ocfs2_cp_xattr_bucket(inode, handle,
4121 					    last_blk + (i * blks_per_bucket),
4122 					    to_blk + (i * blks_per_bucket),
4123 					    1);
4124 		if (ret) {
4125 			mlog_errno(ret);
4126 			goto out;
4127 		}
4128 	}
4129 
4130 	/*
4131 	 * Get the new bucket ready before we dirty anything
4132 	 * (This actually shouldn't fail, because we already dirtied
4133 	 * it once in ocfs2_cp_xattr_bucket()).
4134 	 */
4135 	ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4136 	if (ret) {
4137 		mlog_errno(ret);
4138 		goto out;
4139 	}
4140 	ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4141 						OCFS2_JOURNAL_ACCESS_WRITE);
4142 	if (ret) {
4143 		mlog_errno(ret);
4144 		goto out;
4145 	}
4146 
4147 	/* Now update the headers */
4148 	le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4149 	ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4150 
4151 	bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4152 	ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4153 
4154 	if (first_hash)
4155 		*first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4156 
4157 out:
4158 	ocfs2_xattr_bucket_free(new_first);
4159 	ocfs2_xattr_bucket_free(old_first);
4160 	return ret;
4161 }
4162 
4163 /*
4164  * Move some xattrs in this cluster to the new cluster.
4165  * This function should only be called when bucket size == cluster size.
4166  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4167  */
4168 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4169 				      handle_t *handle,
4170 				      u64 prev_blk,
4171 				      u64 new_blk,
4172 				      u32 *first_hash)
4173 {
4174 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4175 	int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
4176 
4177 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4178 
4179 	ret = ocfs2_extend_trans(handle, credits);
4180 	if (ret) {
4181 		mlog_errno(ret);
4182 		return ret;
4183 	}
4184 
4185 	/* Move half of the xattr in start_blk to the next bucket. */
4186 	return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4187 					  new_blk, first_hash, 1);
4188 }
4189 
4190 /*
4191  * Move some xattrs from the old cluster to the new one since they are not
4192  * contiguous in ocfs2 xattr tree.
4193  *
4194  * new_blk starts a new separate cluster, and we will move some xattrs from
4195  * prev_blk to it. v_start will be set as the first name hash value in this
4196  * new cluster so that it can be used as e_cpos during tree insertion and
4197  * don't collide with our original b-tree operations. first_bh and header_bh
4198  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4199  * to extend the insert bucket.
4200  *
4201  * The problem is how much xattr should we move to the new one and when should
4202  * we update first_bh and header_bh?
4203  * 1. If cluster size > bucket size, that means the previous cluster has more
4204  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4205  *    update the first_bh and header_bh if the insert bucket has been moved
4206  *    to the new cluster.
4207  * 2. If cluster_size == bucket_size:
4208  *    a) If the previous extent rec has more than one cluster and the insert
4209  *       place isn't in the last cluster, copy the entire last cluster to the
4210  *       new one. This time, we don't need to upate the first_bh and header_bh
4211  *       since they will not be moved into the new cluster.
4212  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4213  *       the new one. And we set the extend flag to zero if the insert place is
4214  *       moved into the new allocated cluster since no extend is needed.
4215  */
4216 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
4217 					    handle_t *handle,
4218 					    struct ocfs2_xattr_bucket *first,
4219 					    struct ocfs2_xattr_bucket *target,
4220 					    u64 new_blk,
4221 					    u32 prev_clusters,
4222 					    u32 *v_start,
4223 					    int *extend)
4224 {
4225 	int ret;
4226 
4227 	mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
4228 	     (unsigned long long)bucket_blkno(first), prev_clusters,
4229 	     (unsigned long long)new_blk);
4230 
4231 	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
4232 		ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
4233 							  handle,
4234 							  first, target,
4235 							  new_blk,
4236 							  prev_clusters,
4237 							  v_start);
4238 		if (ret)
4239 			mlog_errno(ret);
4240 	} else {
4241 		/* The start of the last cluster in the first extent */
4242 		u64 last_blk = bucket_blkno(first) +
4243 			((prev_clusters - 1) *
4244 			 ocfs2_clusters_to_blocks(inode->i_sb, 1));
4245 
4246 		if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
4247 			ret = ocfs2_mv_xattr_buckets(inode, handle,
4248 						     bucket_blkno(first),
4249 						     last_blk, new_blk, 0,
4250 						     v_start);
4251 			if (ret)
4252 				mlog_errno(ret);
4253 		} else {
4254 			ret = ocfs2_divide_xattr_cluster(inode, handle,
4255 							 last_blk, new_blk,
4256 							 v_start);
4257 			if (ret)
4258 				mlog_errno(ret);
4259 
4260 			if ((bucket_blkno(target) == last_blk) && extend)
4261 				*extend = 0;
4262 		}
4263 	}
4264 
4265 	return ret;
4266 }
4267 
4268 /*
4269  * Add a new cluster for xattr storage.
4270  *
4271  * If the new cluster is contiguous with the previous one, it will be
4272  * appended to the same extent record, and num_clusters will be updated.
4273  * If not, we will insert a new extent for it and move some xattrs in
4274  * the last cluster into the new allocated one.
4275  * We also need to limit the maximum size of a btree leaf, otherwise we'll
4276  * lose the benefits of hashing because we'll have to search large leaves.
4277  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
4278  * if it's bigger).
4279  *
4280  * first_bh is the first block of the previous extent rec and header_bh
4281  * indicates the bucket we will insert the new xattrs. They will be updated
4282  * when the header_bh is moved into the new cluster.
4283  */
4284 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4285 				       struct buffer_head *root_bh,
4286 				       struct ocfs2_xattr_bucket *first,
4287 				       struct ocfs2_xattr_bucket *target,
4288 				       u32 *num_clusters,
4289 				       u32 prev_cpos,
4290 				       int *extend,
4291 				       struct ocfs2_xattr_set_ctxt *ctxt)
4292 {
4293 	int ret;
4294 	u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4295 	u32 prev_clusters = *num_clusters;
4296 	u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
4297 	u64 block;
4298 	handle_t *handle = ctxt->handle;
4299 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4300 	struct ocfs2_extent_tree et;
4301 
4302 	mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
4303 	     "previous xattr blkno = %llu\n",
4304 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
4305 	     prev_cpos, (unsigned long long)bucket_blkno(first));
4306 
4307 	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
4308 
4309 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
4310 				      OCFS2_JOURNAL_ACCESS_WRITE);
4311 	if (ret < 0) {
4312 		mlog_errno(ret);
4313 		goto leave;
4314 	}
4315 
4316 	ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
4317 				     clusters_to_add, &bit_off, &num_bits);
4318 	if (ret < 0) {
4319 		if (ret != -ENOSPC)
4320 			mlog_errno(ret);
4321 		goto leave;
4322 	}
4323 
4324 	BUG_ON(num_bits > clusters_to_add);
4325 
4326 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4327 	mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
4328 	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4329 
4330 	if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
4331 	    (prev_clusters + num_bits) << osb->s_clustersize_bits <=
4332 	     OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
4333 		/*
4334 		 * If this cluster is contiguous with the old one and
4335 		 * adding this new cluster, we don't surpass the limit of
4336 		 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
4337 		 * initialized and used like other buckets in the previous
4338 		 * cluster.
4339 		 * So add it as a contiguous one. The caller will handle
4340 		 * its init process.
4341 		 */
4342 		v_start = prev_cpos + prev_clusters;
4343 		*num_clusters = prev_clusters + num_bits;
4344 		mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
4345 		     num_bits);
4346 	} else {
4347 		ret = ocfs2_adjust_xattr_cross_cluster(inode,
4348 						       handle,
4349 						       first,
4350 						       target,
4351 						       block,
4352 						       prev_clusters,
4353 						       &v_start,
4354 						       extend);
4355 		if (ret) {
4356 			mlog_errno(ret);
4357 			goto leave;
4358 		}
4359 	}
4360 
4361 	mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
4362 	     num_bits, (unsigned long long)block, v_start);
4363 	ret = ocfs2_insert_extent(handle, &et, v_start, block,
4364 				  num_bits, 0, ctxt->meta_ac);
4365 	if (ret < 0) {
4366 		mlog_errno(ret);
4367 		goto leave;
4368 	}
4369 
4370 	ret = ocfs2_journal_dirty(handle, root_bh);
4371 	if (ret < 0)
4372 		mlog_errno(ret);
4373 
4374 leave:
4375 	return ret;
4376 }
4377 
4378 /*
4379  * We are given an extent.  'first' is the bucket at the very front of
4380  * the extent.  The extent has space for an additional bucket past
4381  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
4382  * of the target bucket.  We wish to shift every bucket past the target
4383  * down one, filling in that additional space.  When we get back to the
4384  * target, we split the target between itself and the now-empty bucket
4385  * at target+1 (aka, target_blkno + blks_per_bucket).
4386  */
4387 static int ocfs2_extend_xattr_bucket(struct inode *inode,
4388 				     handle_t *handle,
4389 				     struct ocfs2_xattr_bucket *first,
4390 				     u64 target_blk,
4391 				     u32 num_clusters)
4392 {
4393 	int ret, credits;
4394 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4395 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4396 	u64 end_blk;
4397 	u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
4398 
4399 	mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
4400 	     "from %llu, len = %u\n", (unsigned long long)target_blk,
4401 	     (unsigned long long)bucket_blkno(first), num_clusters);
4402 
4403 	/* The extent must have room for an additional bucket */
4404 	BUG_ON(new_bucket >=
4405 	       (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
4406 
4407 	/* end_blk points to the last existing bucket */
4408 	end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
4409 
4410 	/*
4411 	 * end_blk is the start of the last existing bucket.
4412 	 * Thus, (end_blk - target_blk) covers the target bucket and
4413 	 * every bucket after it up to, but not including, the last
4414 	 * existing bucket.  Then we add the last existing bucket, the
4415 	 * new bucket, and the first bucket (3 * blk_per_bucket).
4416 	 */
4417 	credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
4418 		  handle->h_buffer_credits;
4419 	ret = ocfs2_extend_trans(handle, credits);
4420 	if (ret) {
4421 		mlog_errno(ret);
4422 		goto out;
4423 	}
4424 
4425 	ret = ocfs2_xattr_bucket_journal_access(handle, first,
4426 						OCFS2_JOURNAL_ACCESS_WRITE);
4427 	if (ret) {
4428 		mlog_errno(ret);
4429 		goto out;
4430 	}
4431 
4432 	while (end_blk != target_blk) {
4433 		ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
4434 					    end_blk + blk_per_bucket, 0);
4435 		if (ret)
4436 			goto out;
4437 		end_blk -= blk_per_bucket;
4438 	}
4439 
4440 	/* Move half of the xattr in target_blkno to the next bucket. */
4441 	ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
4442 					target_blk + blk_per_bucket, NULL, 0);
4443 
4444 	le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
4445 	ocfs2_xattr_bucket_journal_dirty(handle, first);
4446 
4447 out:
4448 	return ret;
4449 }
4450 
4451 /*
4452  * Add new xattr bucket in an extent record and adjust the buckets
4453  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
4454  * bucket we want to insert into.
4455  *
4456  * In the easy case, we will move all the buckets after target down by
4457  * one. Half of target's xattrs will be moved to the next bucket.
4458  *
4459  * If current cluster is full, we'll allocate a new one.  This may not
4460  * be contiguous.  The underlying calls will make sure that there is
4461  * space for the insert, shifting buckets around if necessary.
4462  * 'target' may be moved by those calls.
4463  */
4464 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
4465 				      struct buffer_head *xb_bh,
4466 				      struct ocfs2_xattr_bucket *target,
4467 				      struct ocfs2_xattr_set_ctxt *ctxt)
4468 {
4469 	struct ocfs2_xattr_block *xb =
4470 			(struct ocfs2_xattr_block *)xb_bh->b_data;
4471 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
4472 	struct ocfs2_extent_list *el = &xb_root->xt_list;
4473 	u32 name_hash =
4474 		le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
4475 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4476 	int ret, num_buckets, extend = 1;
4477 	u64 p_blkno;
4478 	u32 e_cpos, num_clusters;
4479 	/* The bucket at the front of the extent */
4480 	struct ocfs2_xattr_bucket *first;
4481 
4482 	mlog(0, "Add new xattr bucket starting from %llu\n",
4483 	     (unsigned long long)bucket_blkno(target));
4484 
4485 	/* The first bucket of the original extent */
4486 	first = ocfs2_xattr_bucket_new(inode);
4487 	if (!first) {
4488 		ret = -ENOMEM;
4489 		mlog_errno(ret);
4490 		goto out;
4491 	}
4492 
4493 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
4494 				  &num_clusters, el);
4495 	if (ret) {
4496 		mlog_errno(ret);
4497 		goto out;
4498 	}
4499 
4500 	ret = ocfs2_read_xattr_bucket(first, p_blkno);
4501 	if (ret) {
4502 		mlog_errno(ret);
4503 		goto out;
4504 	}
4505 
4506 	num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
4507 	if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
4508 		/*
4509 		 * This can move first+target if the target bucket moves
4510 		 * to the new extent.
4511 		 */
4512 		ret = ocfs2_add_new_xattr_cluster(inode,
4513 						  xb_bh,
4514 						  first,
4515 						  target,
4516 						  &num_clusters,
4517 						  e_cpos,
4518 						  &extend,
4519 						  ctxt);
4520 		if (ret) {
4521 			mlog_errno(ret);
4522 			goto out;
4523 		}
4524 	}
4525 
4526 	if (extend) {
4527 		ret = ocfs2_extend_xattr_bucket(inode,
4528 						ctxt->handle,
4529 						first,
4530 						bucket_blkno(target),
4531 						num_clusters);
4532 		if (ret)
4533 			mlog_errno(ret);
4534 	}
4535 
4536 out:
4537 	ocfs2_xattr_bucket_free(first);
4538 
4539 	return ret;
4540 }
4541 
4542 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
4543 					struct ocfs2_xattr_bucket *bucket,
4544 					int offs)
4545 {
4546 	int block_off = offs >> inode->i_sb->s_blocksize_bits;
4547 
4548 	offs = offs % inode->i_sb->s_blocksize;
4549 	return bucket_block(bucket, block_off) + offs;
4550 }
4551 
4552 /*
4553  * Handle the normal xattr set, including replace, delete and new.
4554  *
4555  * Note: "local" indicates the real data's locality. So we can't
4556  * just its bucket locality by its length.
4557  */
4558 static void ocfs2_xattr_set_entry_normal(struct inode *inode,
4559 					 struct ocfs2_xattr_info *xi,
4560 					 struct ocfs2_xattr_search *xs,
4561 					 u32 name_hash,
4562 					 int local)
4563 {
4564 	struct ocfs2_xattr_entry *last, *xe;
4565 	int name_len = strlen(xi->name);
4566 	struct ocfs2_xattr_header *xh = xs->header;
4567 	u16 count = le16_to_cpu(xh->xh_count), start;
4568 	size_t blocksize = inode->i_sb->s_blocksize;
4569 	char *val;
4570 	size_t offs, size, new_size;
4571 
4572 	last = &xh->xh_entries[count];
4573 	if (!xs->not_found) {
4574 		xe = xs->here;
4575 		offs = le16_to_cpu(xe->xe_name_offset);
4576 		if (ocfs2_xattr_is_local(xe))
4577 			size = OCFS2_XATTR_SIZE(name_len) +
4578 			OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4579 		else
4580 			size = OCFS2_XATTR_SIZE(name_len) +
4581 			OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4582 
4583 		/*
4584 		 * If the new value will be stored outside, xi->value has been
4585 		 * initalized as an empty ocfs2_xattr_value_root, and the same
4586 		 * goes with xi->value_len, so we can set new_size safely here.
4587 		 * See ocfs2_xattr_set_in_bucket.
4588 		 */
4589 		new_size = OCFS2_XATTR_SIZE(name_len) +
4590 			   OCFS2_XATTR_SIZE(xi->value_len);
4591 
4592 		le16_add_cpu(&xh->xh_name_value_len, -size);
4593 		if (xi->value) {
4594 			if (new_size > size)
4595 				goto set_new_name_value;
4596 
4597 			/* Now replace the old value with new one. */
4598 			if (local)
4599 				xe->xe_value_size = cpu_to_le64(xi->value_len);
4600 			else
4601 				xe->xe_value_size = 0;
4602 
4603 			val = ocfs2_xattr_bucket_get_val(inode,
4604 							 xs->bucket, offs);
4605 			memset(val + OCFS2_XATTR_SIZE(name_len), 0,
4606 			       size - OCFS2_XATTR_SIZE(name_len));
4607 			if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
4608 				memcpy(val + OCFS2_XATTR_SIZE(name_len),
4609 				       xi->value, xi->value_len);
4610 
4611 			le16_add_cpu(&xh->xh_name_value_len, new_size);
4612 			ocfs2_xattr_set_local(xe, local);
4613 			return;
4614 		} else {
4615 			/*
4616 			 * Remove the old entry if there is more than one.
4617 			 * We don't remove the last entry so that we can
4618 			 * use it to indicate the hash value of the empty
4619 			 * bucket.
4620 			 */
4621 			last -= 1;
4622 			le16_add_cpu(&xh->xh_count, -1);
4623 			if (xh->xh_count) {
4624 				memmove(xe, xe + 1,
4625 					(void *)last - (void *)xe);
4626 				memset(last, 0,
4627 				       sizeof(struct ocfs2_xattr_entry));
4628 			} else
4629 				xh->xh_free_start =
4630 					cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4631 
4632 			return;
4633 		}
4634 	} else {
4635 		/* find a new entry for insert. */
4636 		int low = 0, high = count - 1, tmp;
4637 		struct ocfs2_xattr_entry *tmp_xe;
4638 
4639 		while (low <= high && count) {
4640 			tmp = (low + high) / 2;
4641 			tmp_xe = &xh->xh_entries[tmp];
4642 
4643 			if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
4644 				low = tmp + 1;
4645 			else if (name_hash <
4646 				 le32_to_cpu(tmp_xe->xe_name_hash))
4647 				high = tmp - 1;
4648 			else {
4649 				low = tmp;
4650 				break;
4651 			}
4652 		}
4653 
4654 		xe = &xh->xh_entries[low];
4655 		if (low != count)
4656 			memmove(xe + 1, xe, (void *)last - (void *)xe);
4657 
4658 		le16_add_cpu(&xh->xh_count, 1);
4659 		memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
4660 		xe->xe_name_hash = cpu_to_le32(name_hash);
4661 		xe->xe_name_len = name_len;
4662 		ocfs2_xattr_set_type(xe, xi->name_index);
4663 	}
4664 
4665 set_new_name_value:
4666 	/* Insert the new name+value. */
4667 	size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4668 
4669 	/*
4670 	 * We must make sure that the name/value pair
4671 	 * exists in the same block.
4672 	 */
4673 	offs = le16_to_cpu(xh->xh_free_start);
4674 	start = offs - size;
4675 
4676 	if (start >> inode->i_sb->s_blocksize_bits !=
4677 	    (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4678 		offs = offs - offs % blocksize;
4679 		xh->xh_free_start = cpu_to_le16(offs);
4680 	}
4681 
4682 	val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
4683 	xe->xe_name_offset = cpu_to_le16(offs - size);
4684 
4685 	memset(val, 0, size);
4686 	memcpy(val, xi->name, name_len);
4687 	memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4688 
4689 	xe->xe_value_size = cpu_to_le64(xi->value_len);
4690 	ocfs2_xattr_set_local(xe, local);
4691 	xs->here = xe;
4692 	le16_add_cpu(&xh->xh_free_start, -size);
4693 	le16_add_cpu(&xh->xh_name_value_len, size);
4694 
4695 	return;
4696 }
4697 
4698 /*
4699  * Set the xattr entry in the specified bucket.
4700  * The bucket is indicated by xs->bucket and it should have the enough
4701  * space for the xattr insertion.
4702  */
4703 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4704 					   handle_t *handle,
4705 					   struct ocfs2_xattr_info *xi,
4706 					   struct ocfs2_xattr_search *xs,
4707 					   u32 name_hash,
4708 					   int local)
4709 {
4710 	int ret;
4711 	u64 blkno;
4712 
4713 	mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4714 	     (unsigned long)xi->value_len, xi->name_index,
4715 	     (unsigned long long)bucket_blkno(xs->bucket));
4716 
4717 	if (!xs->bucket->bu_bhs[1]) {
4718 		blkno = bucket_blkno(xs->bucket);
4719 		ocfs2_xattr_bucket_relse(xs->bucket);
4720 		ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
4721 		if (ret) {
4722 			mlog_errno(ret);
4723 			goto out;
4724 		}
4725 	}
4726 
4727 	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4728 						OCFS2_JOURNAL_ACCESS_WRITE);
4729 	if (ret < 0) {
4730 		mlog_errno(ret);
4731 		goto out;
4732 	}
4733 
4734 	ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4735 	ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4736 
4737 out:
4738 	return ret;
4739 }
4740 
4741 /*
4742  * Truncate the specified xe_off entry in xattr bucket.
4743  * bucket is indicated by header_bh and len is the new length.
4744  * Both the ocfs2_xattr_value_root and the entry will be updated here.
4745  *
4746  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4747  */
4748 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4749 					     struct ocfs2_xattr_bucket *bucket,
4750 					     int xe_off,
4751 					     int len,
4752 					     struct ocfs2_xattr_set_ctxt *ctxt)
4753 {
4754 	int ret, offset;
4755 	u64 value_blk;
4756 	struct ocfs2_xattr_entry *xe;
4757 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4758 	size_t blocksize = inode->i_sb->s_blocksize;
4759 	struct ocfs2_xattr_value_buf vb = {
4760 		.vb_access = ocfs2_journal_access,
4761 	};
4762 
4763 	xe = &xh->xh_entries[xe_off];
4764 
4765 	BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4766 
4767 	offset = le16_to_cpu(xe->xe_name_offset) +
4768 		 OCFS2_XATTR_SIZE(xe->xe_name_len);
4769 
4770 	value_blk = offset / blocksize;
4771 
4772 	/* We don't allow ocfs2_xattr_value to be stored in different block. */
4773 	BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4774 
4775 	vb.vb_bh = bucket->bu_bhs[value_blk];
4776 	BUG_ON(!vb.vb_bh);
4777 
4778 	vb.vb_xv = (struct ocfs2_xattr_value_root *)
4779 		(vb.vb_bh->b_data + offset % blocksize);
4780 
4781 	/*
4782 	 * From here on out we have to dirty the bucket.  The generic
4783 	 * value calls only modify one of the bucket's bhs, but we need
4784 	 * to send the bucket at once.  So if they error, they *could* have
4785 	 * modified something.  We have to assume they did, and dirty
4786 	 * the whole bucket.  This leaves us in a consistent state.
4787 	 */
4788 	mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4789 	     xe_off, (unsigned long long)bucket_blkno(bucket), len);
4790 	ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
4791 	if (ret) {
4792 		mlog_errno(ret);
4793 		goto out;
4794 	}
4795 
4796 	ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
4797 						OCFS2_JOURNAL_ACCESS_WRITE);
4798 	if (ret) {
4799 		mlog_errno(ret);
4800 		goto out;
4801 	}
4802 
4803 	xe->xe_value_size = cpu_to_le64(len);
4804 
4805 	ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
4806 
4807 out:
4808 	return ret;
4809 }
4810 
4811 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4812 					struct ocfs2_xattr_search *xs,
4813 					int len,
4814 					struct ocfs2_xattr_set_ctxt *ctxt)
4815 {
4816 	int ret, offset;
4817 	struct ocfs2_xattr_entry *xe = xs->here;
4818 	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4819 
4820 	BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4821 
4822 	offset = xe - xh->xh_entries;
4823 	ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket,
4824 						offset, len, ctxt);
4825 	if (ret)
4826 		mlog_errno(ret);
4827 
4828 	return ret;
4829 }
4830 
4831 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4832 						handle_t *handle,
4833 						struct ocfs2_xattr_search *xs,
4834 						char *val,
4835 						int value_len)
4836 {
4837 	int ret, offset, block_off;
4838 	struct ocfs2_xattr_value_root *xv;
4839 	struct ocfs2_xattr_entry *xe = xs->here;
4840 	struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4841 	void *base;
4842 	struct ocfs2_xattr_value_buf vb = {
4843 		.vb_access = ocfs2_journal_access,
4844 	};
4845 
4846 	BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4847 
4848 	ret = ocfs2_xattr_bucket_get_name_value(inode, xh,
4849 						xe - xh->xh_entries,
4850 						&block_off,
4851 						&offset);
4852 	if (ret) {
4853 		mlog_errno(ret);
4854 		goto out;
4855 	}
4856 
4857 	base = bucket_block(xs->bucket, block_off);
4858 	xv = (struct ocfs2_xattr_value_root *)(base + offset +
4859 		 OCFS2_XATTR_SIZE(xe->xe_name_len));
4860 
4861 	vb.vb_xv = xv;
4862 	vb.vb_bh = xs->bucket->bu_bhs[block_off];
4863 	ret = __ocfs2_xattr_set_value_outside(inode, handle,
4864 					      &vb, val, value_len);
4865 	if (ret)
4866 		mlog_errno(ret);
4867 out:
4868 	return ret;
4869 }
4870 
4871 static int ocfs2_rm_xattr_cluster(struct inode *inode,
4872 				  struct buffer_head *root_bh,
4873 				  u64 blkno,
4874 				  u32 cpos,
4875 				  u32 len)
4876 {
4877 	int ret;
4878 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4879 	struct inode *tl_inode = osb->osb_tl_inode;
4880 	handle_t *handle;
4881 	struct ocfs2_xattr_block *xb =
4882 			(struct ocfs2_xattr_block *)root_bh->b_data;
4883 	struct ocfs2_alloc_context *meta_ac = NULL;
4884 	struct ocfs2_cached_dealloc_ctxt dealloc;
4885 	struct ocfs2_extent_tree et;
4886 
4887 	ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
4888 
4889 	ocfs2_init_dealloc_ctxt(&dealloc);
4890 
4891 	mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4892 	     cpos, len, (unsigned long long)blkno);
4893 
4894 	ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
4895 					       len);
4896 
4897 	ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
4898 	if (ret) {
4899 		mlog_errno(ret);
4900 		return ret;
4901 	}
4902 
4903 	mutex_lock(&tl_inode->i_mutex);
4904 
4905 	if (ocfs2_truncate_log_needs_flush(osb)) {
4906 		ret = __ocfs2_flush_truncate_log(osb);
4907 		if (ret < 0) {
4908 			mlog_errno(ret);
4909 			goto out;
4910 		}
4911 	}
4912 
4913 	handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
4914 	if (IS_ERR(handle)) {
4915 		ret = -ENOMEM;
4916 		mlog_errno(ret);
4917 		goto out;
4918 	}
4919 
4920 	ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
4921 				      OCFS2_JOURNAL_ACCESS_WRITE);
4922 	if (ret) {
4923 		mlog_errno(ret);
4924 		goto out_commit;
4925 	}
4926 
4927 	ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
4928 				  &dealloc);
4929 	if (ret) {
4930 		mlog_errno(ret);
4931 		goto out_commit;
4932 	}
4933 
4934 	le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4935 
4936 	ret = ocfs2_journal_dirty(handle, root_bh);
4937 	if (ret) {
4938 		mlog_errno(ret);
4939 		goto out_commit;
4940 	}
4941 
4942 	ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4943 	if (ret)
4944 		mlog_errno(ret);
4945 
4946 out_commit:
4947 	ocfs2_commit_trans(osb, handle);
4948 out:
4949 	ocfs2_schedule_truncate_log_flush(osb, 1);
4950 
4951 	mutex_unlock(&tl_inode->i_mutex);
4952 
4953 	if (meta_ac)
4954 		ocfs2_free_alloc_context(meta_ac);
4955 
4956 	ocfs2_run_deallocs(osb, &dealloc);
4957 
4958 	return ret;
4959 }
4960 
4961 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4962 					 handle_t *handle,
4963 					 struct ocfs2_xattr_search *xs)
4964 {
4965 	struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4966 	struct ocfs2_xattr_entry *last = &xh->xh_entries[
4967 						le16_to_cpu(xh->xh_count) - 1];
4968 	int ret = 0;
4969 
4970 	ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4971 						OCFS2_JOURNAL_ACCESS_WRITE);
4972 	if (ret) {
4973 		mlog_errno(ret);
4974 		return;
4975 	}
4976 
4977 	/* Remove the old entry. */
4978 	memmove(xs->here, xs->here + 1,
4979 		(void *)last - (void *)xs->here);
4980 	memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4981 	le16_add_cpu(&xh->xh_count, -1);
4982 
4983 	ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4984 }
4985 
4986 /*
4987  * Set the xattr name/value in the bucket specified in xs.
4988  *
4989  * As the new value in xi may be stored in the bucket or in an outside cluster,
4990  * we divide the whole process into 3 steps:
4991  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4992  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4993  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4994  * 4. If the clusters for the new outside value can't be allocated, we need
4995  *    to free the xattr we allocated in set.
4996  */
4997 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4998 				     struct ocfs2_xattr_info *xi,
4999 				     struct ocfs2_xattr_search *xs,
5000 				     struct ocfs2_xattr_set_ctxt *ctxt)
5001 {
5002 	int ret, local = 1;
5003 	size_t value_len;
5004 	char *val = (char *)xi->value;
5005 	struct ocfs2_xattr_entry *xe = xs->here;
5006 	u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name,
5007 					      strlen(xi->name));
5008 
5009 	if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
5010 		/*
5011 		 * We need to truncate the xattr storage first.
5012 		 *
5013 		 * If both the old and new value are stored to
5014 		 * outside block, we only need to truncate
5015 		 * the storage and then set the value outside.
5016 		 *
5017 		 * If the new value should be stored within block,
5018 		 * we should free all the outside block first and
5019 		 * the modification to the xattr block will be done
5020 		 * by following steps.
5021 		 */
5022 		if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
5023 			value_len = xi->value_len;
5024 		else
5025 			value_len = 0;
5026 
5027 		ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5028 							   value_len,
5029 							   ctxt);
5030 		if (ret)
5031 			goto out;
5032 
5033 		if (value_len)
5034 			goto set_value_outside;
5035 	}
5036 
5037 	value_len = xi->value_len;
5038 	/* So we have to handle the inside block change now. */
5039 	if (value_len > OCFS2_XATTR_INLINE_SIZE) {
5040 		/*
5041 		 * If the new value will be stored outside of block,
5042 		 * initalize a new empty value root and insert it first.
5043 		 */
5044 		local = 0;
5045 		xi->value = &def_xv;
5046 		xi->value_len = OCFS2_XATTR_ROOT_SIZE;
5047 	}
5048 
5049 	ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
5050 					      name_hash, local);
5051 	if (ret) {
5052 		mlog_errno(ret);
5053 		goto out;
5054 	}
5055 
5056 	if (value_len <= OCFS2_XATTR_INLINE_SIZE)
5057 		goto out;
5058 
5059 	/* allocate the space now for the outside block storage. */
5060 	ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5061 						   value_len, ctxt);
5062 	if (ret) {
5063 		mlog_errno(ret);
5064 
5065 		if (xs->not_found) {
5066 			/*
5067 			 * We can't allocate enough clusters for outside
5068 			 * storage and we have allocated xattr already,
5069 			 * so need to remove it.
5070 			 */
5071 			ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
5072 		}
5073 		goto out;
5074 	}
5075 
5076 set_value_outside:
5077 	ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
5078 						   xs, val, value_len);
5079 out:
5080 	return ret;
5081 }
5082 
5083 /*
5084  * check whether the xattr bucket is filled up with the same hash value.
5085  * If we want to insert the xattr with the same hash, return -ENOSPC.
5086  * If we want to insert a xattr with different hash value, go ahead
5087  * and ocfs2_divide_xattr_bucket will handle this.
5088  */
5089 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5090 					      struct ocfs2_xattr_bucket *bucket,
5091 					      const char *name)
5092 {
5093 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5094 	u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5095 
5096 	if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5097 		return 0;
5098 
5099 	if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5100 	    xh->xh_entries[0].xe_name_hash) {
5101 		mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5102 		     "hash = %u\n",
5103 		     (unsigned long long)bucket_blkno(bucket),
5104 		     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5105 		return -ENOSPC;
5106 	}
5107 
5108 	return 0;
5109 }
5110 
5111 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5112 					     struct ocfs2_xattr_info *xi,
5113 					     struct ocfs2_xattr_search *xs,
5114 					     struct ocfs2_xattr_set_ctxt *ctxt)
5115 {
5116 	struct ocfs2_xattr_header *xh;
5117 	struct ocfs2_xattr_entry *xe;
5118 	u16 count, header_size, xh_free_start;
5119 	int free, max_free, need, old;
5120 	size_t value_size = 0, name_len = strlen(xi->name);
5121 	size_t blocksize = inode->i_sb->s_blocksize;
5122 	int ret, allocation = 0;
5123 
5124 	mlog_entry("Set xattr %s in xattr index block\n", xi->name);
5125 
5126 try_again:
5127 	xh = xs->header;
5128 	count = le16_to_cpu(xh->xh_count);
5129 	xh_free_start = le16_to_cpu(xh->xh_free_start);
5130 	header_size = sizeof(struct ocfs2_xattr_header) +
5131 			count * sizeof(struct ocfs2_xattr_entry);
5132 	max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
5133 		le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
5134 
5135 	mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
5136 			"of %u which exceed block size\n",
5137 			(unsigned long long)bucket_blkno(xs->bucket),
5138 			header_size);
5139 
5140 	if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
5141 		value_size = OCFS2_XATTR_ROOT_SIZE;
5142 	else if (xi->value)
5143 		value_size = OCFS2_XATTR_SIZE(xi->value_len);
5144 
5145 	if (xs->not_found)
5146 		need = sizeof(struct ocfs2_xattr_entry) +
5147 			OCFS2_XATTR_SIZE(name_len) + value_size;
5148 	else {
5149 		need = value_size + OCFS2_XATTR_SIZE(name_len);
5150 
5151 		/*
5152 		 * We only replace the old value if the new length is smaller
5153 		 * than the old one. Otherwise we will allocate new space in the
5154 		 * bucket to store it.
5155 		 */
5156 		xe = xs->here;
5157 		if (ocfs2_xattr_is_local(xe))
5158 			old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
5159 		else
5160 			old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
5161 
5162 		if (old >= value_size)
5163 			need = 0;
5164 	}
5165 
5166 	free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
5167 	/*
5168 	 * We need to make sure the new name/value pair
5169 	 * can exist in the same block.
5170 	 */
5171 	if (xh_free_start % blocksize < need)
5172 		free -= xh_free_start % blocksize;
5173 
5174 	mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
5175 	     "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
5176 	     " %u\n", xs->not_found,
5177 	     (unsigned long long)bucket_blkno(xs->bucket),
5178 	     free, need, max_free, le16_to_cpu(xh->xh_free_start),
5179 	     le16_to_cpu(xh->xh_name_value_len));
5180 
5181 	if (free < need ||
5182 	    (xs->not_found &&
5183 	     count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
5184 		if (need <= max_free &&
5185 		    count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
5186 			/*
5187 			 * We can create the space by defragment. Since only the
5188 			 * name/value will be moved, the xe shouldn't be changed
5189 			 * in xs.
5190 			 */
5191 			ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5192 							xs->bucket);
5193 			if (ret) {
5194 				mlog_errno(ret);
5195 				goto out;
5196 			}
5197 
5198 			xh_free_start = le16_to_cpu(xh->xh_free_start);
5199 			free = xh_free_start - header_size
5200 				- OCFS2_XATTR_HEADER_GAP;
5201 			if (xh_free_start % blocksize < need)
5202 				free -= xh_free_start % blocksize;
5203 
5204 			if (free >= need)
5205 				goto xattr_set;
5206 
5207 			mlog(0, "Can't get enough space for xattr insert by "
5208 			     "defragment. Need %u bytes, but we have %d, so "
5209 			     "allocate new bucket for it.\n", need, free);
5210 		}
5211 
5212 		/*
5213 		 * We have to add new buckets or clusters and one
5214 		 * allocation should leave us enough space for insert.
5215 		 */
5216 		BUG_ON(allocation);
5217 
5218 		/*
5219 		 * We do not allow for overlapping ranges between buckets. And
5220 		 * the maximum number of collisions we will allow for then is
5221 		 * one bucket's worth, so check it here whether we need to
5222 		 * add a new bucket for the insert.
5223 		 */
5224 		ret = ocfs2_check_xattr_bucket_collision(inode,
5225 							 xs->bucket,
5226 							 xi->name);
5227 		if (ret) {
5228 			mlog_errno(ret);
5229 			goto out;
5230 		}
5231 
5232 		ret = ocfs2_add_new_xattr_bucket(inode,
5233 						 xs->xattr_bh,
5234 						 xs->bucket,
5235 						 ctxt);
5236 		if (ret) {
5237 			mlog_errno(ret);
5238 			goto out;
5239 		}
5240 
5241 		/*
5242 		 * ocfs2_add_new_xattr_bucket() will have updated
5243 		 * xs->bucket if it moved, but it will not have updated
5244 		 * any of the other search fields.  Thus, we drop it and
5245 		 * re-search.  Everything should be cached, so it'll be
5246 		 * quick.
5247 		 */
5248 		ocfs2_xattr_bucket_relse(xs->bucket);
5249 		ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5250 						   xi->name_index,
5251 						   xi->name, xs);
5252 		if (ret && ret != -ENODATA)
5253 			goto out;
5254 		xs->not_found = ret;
5255 		allocation = 1;
5256 		goto try_again;
5257 	}
5258 
5259 xattr_set:
5260 	ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
5261 out:
5262 	mlog_exit(ret);
5263 	return ret;
5264 }
5265 
5266 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5267 					struct ocfs2_xattr_bucket *bucket,
5268 					void *para)
5269 {
5270 	int ret = 0;
5271 	struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5272 	u16 i;
5273 	struct ocfs2_xattr_entry *xe;
5274 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5275 	struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5276 	int credits = ocfs2_remove_extent_credits(osb->sb) +
5277 		ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5278 
5279 
5280 	ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5281 
5282 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5283 		xe = &xh->xh_entries[i];
5284 		if (ocfs2_xattr_is_local(xe))
5285 			continue;
5286 
5287 		ctxt.handle = ocfs2_start_trans(osb, credits);
5288 		if (IS_ERR(ctxt.handle)) {
5289 			ret = PTR_ERR(ctxt.handle);
5290 			mlog_errno(ret);
5291 			break;
5292 		}
5293 
5294 		ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5295 							i, 0, &ctxt);
5296 
5297 		ocfs2_commit_trans(osb, ctxt.handle);
5298 		if (ret) {
5299 			mlog_errno(ret);
5300 			break;
5301 		}
5302 	}
5303 
5304 	ocfs2_schedule_truncate_log_flush(osb, 1);
5305 	ocfs2_run_deallocs(osb, &ctxt.dealloc);
5306 	return ret;
5307 }
5308 
5309 static int ocfs2_delete_xattr_index_block(struct inode *inode,
5310 					  struct buffer_head *xb_bh)
5311 {
5312 	struct ocfs2_xattr_block *xb =
5313 			(struct ocfs2_xattr_block *)xb_bh->b_data;
5314 	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
5315 	int ret = 0;
5316 	u32 name_hash = UINT_MAX, e_cpos, num_clusters;
5317 	u64 p_blkno;
5318 
5319 	if (le16_to_cpu(el->l_next_free_rec) == 0)
5320 		return 0;
5321 
5322 	while (name_hash > 0) {
5323 		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
5324 					  &e_cpos, &num_clusters, el);
5325 		if (ret) {
5326 			mlog_errno(ret);
5327 			goto out;
5328 		}
5329 
5330 		ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
5331 						  ocfs2_delete_xattr_in_bucket,
5332 						  NULL);
5333 		if (ret) {
5334 			mlog_errno(ret);
5335 			goto out;
5336 		}
5337 
5338 		ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
5339 					     p_blkno, e_cpos, num_clusters);
5340 		if (ret) {
5341 			mlog_errno(ret);
5342 			break;
5343 		}
5344 
5345 		if (e_cpos == 0)
5346 			break;
5347 
5348 		name_hash = e_cpos - 1;
5349 	}
5350 
5351 out:
5352 	return ret;
5353 }
5354 
5355 /*
5356  * Whenever we modify a xattr value root in the bucket(e.g, CoW
5357  * or change the extent record flag), we need to recalculate
5358  * the metaecc for the whole bucket. So it is done here.
5359  *
5360  * Note:
5361  * We have to give the extra credits for the caller.
5362  */
5363 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5364 					    handle_t *handle,
5365 					    void *para)
5366 {
5367 	int ret;
5368 	struct ocfs2_xattr_bucket *bucket =
5369 			(struct ocfs2_xattr_bucket *)para;
5370 
5371 	ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5372 						OCFS2_JOURNAL_ACCESS_WRITE);
5373 	if (ret) {
5374 		mlog_errno(ret);
5375 		return ret;
5376 	}
5377 
5378 	ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5379 
5380 	return 0;
5381 }
5382 
5383 /*
5384  * Special action we need if the xattr value is refcounted.
5385  *
5386  * 1. If the xattr is refcounted, lock the tree.
5387  * 2. CoW the xattr if we are setting the new value and the value
5388  *    will be stored outside.
5389  * 3. In other case, decrease_refcount will work for us, so just
5390  *    lock the refcount tree, calculate the meta and credits is OK.
5391  *
5392  * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5393  * currently CoW is a completed transaction, while this function
5394  * will also lock the allocators and let us deadlock. So we will
5395  * CoW the whole xattr value.
5396  */
5397 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5398 					struct ocfs2_dinode *di,
5399 					struct ocfs2_xattr_info *xi,
5400 					struct ocfs2_xattr_search *xis,
5401 					struct ocfs2_xattr_search *xbs,
5402 					struct ocfs2_refcount_tree **ref_tree,
5403 					int *meta_add,
5404 					int *credits)
5405 {
5406 	int ret = 0;
5407 	struct ocfs2_xattr_block *xb;
5408 	struct ocfs2_xattr_entry *xe;
5409 	char *base;
5410 	u32 p_cluster, num_clusters;
5411 	unsigned int ext_flags;
5412 	int name_offset, name_len;
5413 	struct ocfs2_xattr_value_buf vb;
5414 	struct ocfs2_xattr_bucket *bucket = NULL;
5415 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5416 	struct ocfs2_post_refcount refcount;
5417 	struct ocfs2_post_refcount *p = NULL;
5418 	struct buffer_head *ref_root_bh = NULL;
5419 
5420 	if (!xis->not_found) {
5421 		xe = xis->here;
5422 		name_offset = le16_to_cpu(xe->xe_name_offset);
5423 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5424 		base = xis->base;
5425 		vb.vb_bh = xis->inode_bh;
5426 		vb.vb_access = ocfs2_journal_access_di;
5427 	} else {
5428 		int i, block_off = 0;
5429 		xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5430 		xe = xbs->here;
5431 		name_offset = le16_to_cpu(xe->xe_name_offset);
5432 		name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5433 		i = xbs->here - xbs->header->xh_entries;
5434 
5435 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5436 			ret = ocfs2_xattr_bucket_get_name_value(inode,
5437 							bucket_xh(xbs->bucket),
5438 							i, &block_off,
5439 							&name_offset);
5440 			if (ret) {
5441 				mlog_errno(ret);
5442 				goto out;
5443 			}
5444 			base = bucket_block(xbs->bucket, block_off);
5445 			vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5446 			vb.vb_access = ocfs2_journal_access;
5447 
5448 			if (ocfs2_meta_ecc(osb)) {
5449 				/*create parameters for ocfs2_post_refcount. */
5450 				bucket = xbs->bucket;
5451 				refcount.credits = bucket->bu_blocks;
5452 				refcount.para = bucket;
5453 				refcount.func =
5454 					ocfs2_xattr_bucket_post_refcount;
5455 				p = &refcount;
5456 			}
5457 		} else {
5458 			base = xbs->base;
5459 			vb.vb_bh = xbs->xattr_bh;
5460 			vb.vb_access = ocfs2_journal_access_xb;
5461 		}
5462 	}
5463 
5464 	if (ocfs2_xattr_is_local(xe))
5465 		goto out;
5466 
5467 	vb.vb_xv = (struct ocfs2_xattr_value_root *)
5468 				(base + name_offset + name_len);
5469 
5470 	ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5471 				       &num_clusters, &vb.vb_xv->xr_list,
5472 				       &ext_flags);
5473 	if (ret) {
5474 		mlog_errno(ret);
5475 		goto out;
5476 	}
5477 
5478 	/*
5479 	 * We just need to check the 1st extent record, since we always
5480 	 * CoW the whole xattr. So there shouldn't be a xattr with
5481 	 * some REFCOUNT extent recs after the 1st one.
5482 	 */
5483 	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5484 		goto out;
5485 
5486 	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5487 				       1, ref_tree, &ref_root_bh);
5488 	if (ret) {
5489 		mlog_errno(ret);
5490 		goto out;
5491 	}
5492 
5493 	/*
5494 	 * If we are deleting the xattr or the new size will be stored inside,
5495 	 * cool, leave it there, the xattr truncate process will remove them
5496 	 * for us(it still needs the refcount tree lock and the meta, credits).
5497 	 * And the worse case is that every cluster truncate will split the
5498 	 * refcount tree, and make the original extent become 3. So we will need
5499 	 * 2 * cluster more extent recs at most.
5500 	 */
5501 	if (!xi->value || xi->value_len <= OCFS2_XATTR_INLINE_SIZE) {
5502 
5503 		ret = ocfs2_refcounted_xattr_delete_need(inode,
5504 							 &(*ref_tree)->rf_ci,
5505 							 ref_root_bh, vb.vb_xv,
5506 							 meta_add, credits);
5507 		if (ret)
5508 			mlog_errno(ret);
5509 		goto out;
5510 	}
5511 
5512 	ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5513 				       *ref_tree, ref_root_bh, 0,
5514 				       le32_to_cpu(vb.vb_xv->xr_clusters), p);
5515 	if (ret)
5516 		mlog_errno(ret);
5517 
5518 out:
5519 	brelse(ref_root_bh);
5520 	return ret;
5521 }
5522 
5523 /*
5524  * 'security' attributes support
5525  */
5526 static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
5527 					size_t list_size, const char *name,
5528 					size_t name_len)
5529 {
5530 	const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
5531 	const size_t total_len = prefix_len + name_len + 1;
5532 
5533 	if (list && total_len <= list_size) {
5534 		memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
5535 		memcpy(list + prefix_len, name, name_len);
5536 		list[prefix_len + name_len] = '\0';
5537 	}
5538 	return total_len;
5539 }
5540 
5541 static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
5542 				    void *buffer, size_t size)
5543 {
5544 	if (strcmp(name, "") == 0)
5545 		return -EINVAL;
5546 	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
5547 			       buffer, size);
5548 }
5549 
5550 static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
5551 				    const void *value, size_t size, int flags)
5552 {
5553 	if (strcmp(name, "") == 0)
5554 		return -EINVAL;
5555 
5556 	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
5557 			       size, flags);
5558 }
5559 
5560 int ocfs2_init_security_get(struct inode *inode,
5561 			    struct inode *dir,
5562 			    struct ocfs2_security_xattr_info *si)
5563 {
5564 	/* check whether ocfs2 support feature xattr */
5565 	if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
5566 		return -EOPNOTSUPP;
5567 	return security_inode_init_security(inode, dir, &si->name, &si->value,
5568 					    &si->value_len);
5569 }
5570 
5571 int ocfs2_init_security_set(handle_t *handle,
5572 			    struct inode *inode,
5573 			    struct buffer_head *di_bh,
5574 			    struct ocfs2_security_xattr_info *si,
5575 			    struct ocfs2_alloc_context *xattr_ac,
5576 			    struct ocfs2_alloc_context *data_ac)
5577 {
5578 	return ocfs2_xattr_set_handle(handle, inode, di_bh,
5579 				     OCFS2_XATTR_INDEX_SECURITY,
5580 				     si->name, si->value, si->value_len, 0,
5581 				     xattr_ac, data_ac);
5582 }
5583 
5584 struct xattr_handler ocfs2_xattr_security_handler = {
5585 	.prefix	= XATTR_SECURITY_PREFIX,
5586 	.list	= ocfs2_xattr_security_list,
5587 	.get	= ocfs2_xattr_security_get,
5588 	.set	= ocfs2_xattr_security_set,
5589 };
5590 
5591 /*
5592  * 'trusted' attributes support
5593  */
5594 static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
5595 				       size_t list_size, const char *name,
5596 				       size_t name_len)
5597 {
5598 	const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
5599 	const size_t total_len = prefix_len + name_len + 1;
5600 
5601 	if (list && total_len <= list_size) {
5602 		memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
5603 		memcpy(list + prefix_len, name, name_len);
5604 		list[prefix_len + name_len] = '\0';
5605 	}
5606 	return total_len;
5607 }
5608 
5609 static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
5610 				   void *buffer, size_t size)
5611 {
5612 	if (strcmp(name, "") == 0)
5613 		return -EINVAL;
5614 	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
5615 			       buffer, size);
5616 }
5617 
5618 static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
5619 				   const void *value, size_t size, int flags)
5620 {
5621 	if (strcmp(name, "") == 0)
5622 		return -EINVAL;
5623 
5624 	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
5625 			       size, flags);
5626 }
5627 
5628 struct xattr_handler ocfs2_xattr_trusted_handler = {
5629 	.prefix	= XATTR_TRUSTED_PREFIX,
5630 	.list	= ocfs2_xattr_trusted_list,
5631 	.get	= ocfs2_xattr_trusted_get,
5632 	.set	= ocfs2_xattr_trusted_set,
5633 };
5634 
5635 /*
5636  * 'user' attributes support
5637  */
5638 static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
5639 				    size_t list_size, const char *name,
5640 				    size_t name_len)
5641 {
5642 	const size_t prefix_len = XATTR_USER_PREFIX_LEN;
5643 	const size_t total_len = prefix_len + name_len + 1;
5644 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5645 
5646 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5647 		return 0;
5648 
5649 	if (list && total_len <= list_size) {
5650 		memcpy(list, XATTR_USER_PREFIX, prefix_len);
5651 		memcpy(list + prefix_len, name, name_len);
5652 		list[prefix_len + name_len] = '\0';
5653 	}
5654 	return total_len;
5655 }
5656 
5657 static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
5658 				void *buffer, size_t size)
5659 {
5660 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5661 
5662 	if (strcmp(name, "") == 0)
5663 		return -EINVAL;
5664 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5665 		return -EOPNOTSUPP;
5666 	return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
5667 			       buffer, size);
5668 }
5669 
5670 static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
5671 				const void *value, size_t size, int flags)
5672 {
5673 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5674 
5675 	if (strcmp(name, "") == 0)
5676 		return -EINVAL;
5677 	if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
5678 		return -EOPNOTSUPP;
5679 
5680 	return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
5681 			       size, flags);
5682 }
5683 
5684 struct xattr_handler ocfs2_xattr_user_handler = {
5685 	.prefix	= XATTR_USER_PREFIX,
5686 	.list	= ocfs2_xattr_user_list,
5687 	.get	= ocfs2_xattr_user_get,
5688 	.set	= ocfs2_xattr_user_set,
5689 };
5690