xref: /openbmc/linux/fs/ocfs2/xattr.c (revision a3944256)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is taken from ext3.
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU General Public
13  * License as published by the Free Software Foundation; either
14  * version 2 of the License, or (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public
22  * License along with this program; if not, write to the
23  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24  * Boston, MA 021110-1307, USA.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/types.h>
30 #include <linux/slab.h>
31 #include <linux/highmem.h>
32 #include <linux/pagemap.h>
33 #include <linux/uio.h>
34 #include <linux/sched.h>
35 #include <linux/splice.h>
36 #include <linux/mount.h>
37 #include <linux/writeback.h>
38 #include <linux/falloc.h>
39 #include <linux/sort.h>
40 
41 #define MLOG_MASK_PREFIX ML_XATTR
42 #include <cluster/masklog.h>
43 
44 #include "ocfs2.h"
45 #include "alloc.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 
59 
60 struct ocfs2_xattr_def_value_root {
61 	struct ocfs2_xattr_value_root	xv;
62 	struct ocfs2_extent_rec		er;
63 };
64 
65 struct ocfs2_xattr_bucket {
66 	struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
67 	struct ocfs2_xattr_header *xh;
68 };
69 
70 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
71 #define OCFS2_XATTR_INLINE_SIZE	80
72 
73 static struct ocfs2_xattr_def_value_root def_xv = {
74 	.xv.xr_list.l_count = cpu_to_le16(1),
75 };
76 
77 struct xattr_handler *ocfs2_xattr_handlers[] = {
78 	&ocfs2_xattr_user_handler,
79 	&ocfs2_xattr_trusted_handler,
80 	NULL
81 };
82 
83 static struct xattr_handler *ocfs2_xattr_handler_map[] = {
84 	[OCFS2_XATTR_INDEX_USER]	= &ocfs2_xattr_user_handler,
85 	[OCFS2_XATTR_INDEX_TRUSTED]	= &ocfs2_xattr_trusted_handler,
86 };
87 
88 struct ocfs2_xattr_info {
89 	int name_index;
90 	const char *name;
91 	const void *value;
92 	size_t value_len;
93 };
94 
95 struct ocfs2_xattr_search {
96 	struct buffer_head *inode_bh;
97 	/*
98 	 * xattr_bh point to the block buffer head which has extended attribute
99 	 * when extended attribute in inode, xattr_bh is equal to inode_bh.
100 	 */
101 	struct buffer_head *xattr_bh;
102 	struct ocfs2_xattr_header *header;
103 	struct ocfs2_xattr_bucket bucket;
104 	void *base;
105 	void *end;
106 	struct ocfs2_xattr_entry *here;
107 	int not_found;
108 };
109 
110 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
111 					     struct ocfs2_xattr_header *xh,
112 					     int index,
113 					     int *block_off,
114 					     int *new_offset);
115 
116 static int ocfs2_xattr_index_block_find(struct inode *inode,
117 					struct buffer_head *root_bh,
118 					int name_index,
119 					const char *name,
120 					struct ocfs2_xattr_search *xs);
121 
122 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
123 					struct ocfs2_xattr_tree_root *xt,
124 					char *buffer,
125 					size_t buffer_size);
126 
127 static int ocfs2_xattr_create_index_block(struct inode *inode,
128 					  struct ocfs2_xattr_search *xs);
129 
130 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
131 					     struct ocfs2_xattr_info *xi,
132 					     struct ocfs2_xattr_search *xs);
133 
134 static int ocfs2_delete_xattr_index_block(struct inode *inode,
135 					  struct buffer_head *xb_bh);
136 
137 static inline struct xattr_handler *ocfs2_xattr_handler(int name_index)
138 {
139 	struct xattr_handler *handler = NULL;
140 
141 	if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
142 		handler = ocfs2_xattr_handler_map[name_index];
143 
144 	return handler;
145 }
146 
147 static inline u32 ocfs2_xattr_name_hash(struct inode *inode,
148 					char *prefix,
149 					int prefix_len,
150 					char *name,
151 					int name_len)
152 {
153 	/* Get hash value of uuid from super block */
154 	u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
155 	int i;
156 
157 	/* hash extended attribute prefix */
158 	for (i = 0; i < prefix_len; i++) {
159 		hash = (hash << OCFS2_HASH_SHIFT) ^
160 		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
161 		       *prefix++;
162 	}
163 	/* hash extended attribute name */
164 	for (i = 0; i < name_len; i++) {
165 		hash = (hash << OCFS2_HASH_SHIFT) ^
166 		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
167 		       *name++;
168 	}
169 
170 	return hash;
171 }
172 
173 /*
174  * ocfs2_xattr_hash_entry()
175  *
176  * Compute the hash of an extended attribute.
177  */
178 static void ocfs2_xattr_hash_entry(struct inode *inode,
179 				   struct ocfs2_xattr_header *header,
180 				   struct ocfs2_xattr_entry *entry)
181 {
182 	u32 hash = 0;
183 	struct xattr_handler *handler =
184 			ocfs2_xattr_handler(ocfs2_xattr_get_type(entry));
185 	char *prefix = handler->prefix;
186 	char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
187 	int prefix_len = strlen(handler->prefix);
188 
189 	hash = ocfs2_xattr_name_hash(inode, prefix, prefix_len, name,
190 				     entry->xe_name_len);
191 	entry->xe_name_hash = cpu_to_le32(hash);
192 
193 	return;
194 }
195 
196 static int ocfs2_xattr_extend_allocation(struct inode *inode,
197 					 u32 clusters_to_add,
198 					 struct buffer_head *xattr_bh,
199 					 struct ocfs2_xattr_value_root *xv)
200 {
201 	int status = 0;
202 	int restart_func = 0;
203 	int credits = 0;
204 	handle_t *handle = NULL;
205 	struct ocfs2_alloc_context *data_ac = NULL;
206 	struct ocfs2_alloc_context *meta_ac = NULL;
207 	enum ocfs2_alloc_restarted why;
208 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
209 	struct ocfs2_extent_list *root_el = &xv->xr_list;
210 	u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
211 
212 	mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
213 
214 restart_all:
215 
216 	status = ocfs2_lock_allocators(inode, xattr_bh, root_el,
217 				       clusters_to_add, 0, &data_ac,
218 				       &meta_ac, OCFS2_XATTR_VALUE_EXTENT, xv);
219 	if (status) {
220 		mlog_errno(status);
221 		goto leave;
222 	}
223 
224 	credits = ocfs2_calc_extend_credits(osb->sb, root_el, clusters_to_add);
225 	handle = ocfs2_start_trans(osb, credits);
226 	if (IS_ERR(handle)) {
227 		status = PTR_ERR(handle);
228 		handle = NULL;
229 		mlog_errno(status);
230 		goto leave;
231 	}
232 
233 restarted_transaction:
234 	status = ocfs2_journal_access(handle, inode, xattr_bh,
235 				      OCFS2_JOURNAL_ACCESS_WRITE);
236 	if (status < 0) {
237 		mlog_errno(status);
238 		goto leave;
239 	}
240 
241 	prev_clusters = le32_to_cpu(xv->xr_clusters);
242 	status = ocfs2_add_clusters_in_btree(osb,
243 					     inode,
244 					     &logical_start,
245 					     clusters_to_add,
246 					     0,
247 					     xattr_bh,
248 					     root_el,
249 					     handle,
250 					     data_ac,
251 					     meta_ac,
252 					     &why,
253 					     OCFS2_XATTR_VALUE_EXTENT,
254 					     xv);
255 	if ((status < 0) && (status != -EAGAIN)) {
256 		if (status != -ENOSPC)
257 			mlog_errno(status);
258 		goto leave;
259 	}
260 
261 	status = ocfs2_journal_dirty(handle, xattr_bh);
262 	if (status < 0) {
263 		mlog_errno(status);
264 		goto leave;
265 	}
266 
267 	clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
268 
269 	if (why != RESTART_NONE && clusters_to_add) {
270 		if (why == RESTART_META) {
271 			mlog(0, "restarting function.\n");
272 			restart_func = 1;
273 		} else {
274 			BUG_ON(why != RESTART_TRANS);
275 
276 			mlog(0, "restarting transaction.\n");
277 			/* TODO: This can be more intelligent. */
278 			credits = ocfs2_calc_extend_credits(osb->sb,
279 							    root_el,
280 							    clusters_to_add);
281 			status = ocfs2_extend_trans(handle, credits);
282 			if (status < 0) {
283 				/* handle still has to be committed at
284 				 * this point. */
285 				status = -ENOMEM;
286 				mlog_errno(status);
287 				goto leave;
288 			}
289 			goto restarted_transaction;
290 		}
291 	}
292 
293 leave:
294 	if (handle) {
295 		ocfs2_commit_trans(osb, handle);
296 		handle = NULL;
297 	}
298 	if (data_ac) {
299 		ocfs2_free_alloc_context(data_ac);
300 		data_ac = NULL;
301 	}
302 	if (meta_ac) {
303 		ocfs2_free_alloc_context(meta_ac);
304 		meta_ac = NULL;
305 	}
306 	if ((!status) && restart_func) {
307 		restart_func = 0;
308 		goto restart_all;
309 	}
310 
311 	return status;
312 }
313 
314 static int __ocfs2_remove_xattr_range(struct inode *inode,
315 				      struct buffer_head *root_bh,
316 				      struct ocfs2_xattr_value_root *xv,
317 				      u32 cpos, u32 phys_cpos, u32 len,
318 				      struct ocfs2_cached_dealloc_ctxt *dealloc)
319 {
320 	int ret;
321 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
322 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
323 	struct inode *tl_inode = osb->osb_tl_inode;
324 	handle_t *handle;
325 	struct ocfs2_alloc_context *meta_ac = NULL;
326 
327 	ret = ocfs2_lock_allocators(inode, root_bh, &xv->xr_list,
328 				    0, 1, NULL, &meta_ac,
329 				    OCFS2_XATTR_VALUE_EXTENT, xv);
330 	if (ret) {
331 		mlog_errno(ret);
332 		return ret;
333 	}
334 
335 	mutex_lock(&tl_inode->i_mutex);
336 
337 	if (ocfs2_truncate_log_needs_flush(osb)) {
338 		ret = __ocfs2_flush_truncate_log(osb);
339 		if (ret < 0) {
340 			mlog_errno(ret);
341 			goto out;
342 		}
343 	}
344 
345 	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
346 	if (IS_ERR(handle)) {
347 		ret = PTR_ERR(handle);
348 		mlog_errno(ret);
349 		goto out;
350 	}
351 
352 	ret = ocfs2_journal_access(handle, inode, root_bh,
353 				   OCFS2_JOURNAL_ACCESS_WRITE);
354 	if (ret) {
355 		mlog_errno(ret);
356 		goto out_commit;
357 	}
358 
359 	ret = ocfs2_remove_extent(inode, root_bh, cpos, len, handle, meta_ac,
360 				  dealloc, OCFS2_XATTR_VALUE_EXTENT, xv);
361 	if (ret) {
362 		mlog_errno(ret);
363 		goto out_commit;
364 	}
365 
366 	le32_add_cpu(&xv->xr_clusters, -len);
367 
368 	ret = ocfs2_journal_dirty(handle, root_bh);
369 	if (ret) {
370 		mlog_errno(ret);
371 		goto out_commit;
372 	}
373 
374 	ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
375 	if (ret)
376 		mlog_errno(ret);
377 
378 out_commit:
379 	ocfs2_commit_trans(osb, handle);
380 out:
381 	mutex_unlock(&tl_inode->i_mutex);
382 
383 	if (meta_ac)
384 		ocfs2_free_alloc_context(meta_ac);
385 
386 	return ret;
387 }
388 
389 static int ocfs2_xattr_shrink_size(struct inode *inode,
390 				   u32 old_clusters,
391 				   u32 new_clusters,
392 				   struct buffer_head *root_bh,
393 				   struct ocfs2_xattr_value_root *xv)
394 {
395 	int ret = 0;
396 	u32 trunc_len, cpos, phys_cpos, alloc_size;
397 	u64 block;
398 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
399 	struct ocfs2_cached_dealloc_ctxt dealloc;
400 
401 	ocfs2_init_dealloc_ctxt(&dealloc);
402 
403 	if (old_clusters <= new_clusters)
404 		return 0;
405 
406 	cpos = new_clusters;
407 	trunc_len = old_clusters - new_clusters;
408 	while (trunc_len) {
409 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
410 					       &alloc_size, &xv->xr_list);
411 		if (ret) {
412 			mlog_errno(ret);
413 			goto out;
414 		}
415 
416 		if (alloc_size > trunc_len)
417 			alloc_size = trunc_len;
418 
419 		ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
420 						 phys_cpos, alloc_size,
421 						 &dealloc);
422 		if (ret) {
423 			mlog_errno(ret);
424 			goto out;
425 		}
426 
427 		block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
428 		ocfs2_remove_xattr_clusters_from_cache(inode, block,
429 						       alloc_size);
430 		cpos += alloc_size;
431 		trunc_len -= alloc_size;
432 	}
433 
434 out:
435 	ocfs2_schedule_truncate_log_flush(osb, 1);
436 	ocfs2_run_deallocs(osb, &dealloc);
437 
438 	return ret;
439 }
440 
441 static int ocfs2_xattr_value_truncate(struct inode *inode,
442 				      struct buffer_head *root_bh,
443 				      struct ocfs2_xattr_value_root *xv,
444 				      int len)
445 {
446 	int ret;
447 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
448 	u32 old_clusters = le32_to_cpu(xv->xr_clusters);
449 
450 	if (new_clusters == old_clusters)
451 		return 0;
452 
453 	if (new_clusters > old_clusters)
454 		ret = ocfs2_xattr_extend_allocation(inode,
455 						    new_clusters - old_clusters,
456 						    root_bh, xv);
457 	else
458 		ret = ocfs2_xattr_shrink_size(inode,
459 					      old_clusters, new_clusters,
460 					      root_bh, xv);
461 
462 	return ret;
463 }
464 
465 static int ocfs2_xattr_list_entries(struct inode *inode,
466 				    struct ocfs2_xattr_header *header,
467 				    char *buffer, size_t buffer_size)
468 {
469 	size_t rest = buffer_size;
470 	int i;
471 
472 	for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
473 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
474 		struct xattr_handler *handler =
475 			ocfs2_xattr_handler(ocfs2_xattr_get_type(entry));
476 
477 		if (handler) {
478 			size_t size = handler->list(inode, buffer, rest,
479 					((char *)header +
480 					le16_to_cpu(entry->xe_name_offset)),
481 					entry->xe_name_len);
482 			if (buffer) {
483 				if (size > rest)
484 					return -ERANGE;
485 				buffer += size;
486 			}
487 			rest -= size;
488 		}
489 	}
490 
491 	return buffer_size - rest;
492 }
493 
494 static int ocfs2_xattr_ibody_list(struct inode *inode,
495 				  struct ocfs2_dinode *di,
496 				  char *buffer,
497 				  size_t buffer_size)
498 {
499 	struct ocfs2_xattr_header *header = NULL;
500 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
501 	int ret = 0;
502 
503 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
504 		return ret;
505 
506 	header = (struct ocfs2_xattr_header *)
507 		 ((void *)di + inode->i_sb->s_blocksize -
508 		 le16_to_cpu(di->i_xattr_inline_size));
509 
510 	ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
511 
512 	return ret;
513 }
514 
515 static int ocfs2_xattr_block_list(struct inode *inode,
516 				  struct ocfs2_dinode *di,
517 				  char *buffer,
518 				  size_t buffer_size)
519 {
520 	struct buffer_head *blk_bh = NULL;
521 	struct ocfs2_xattr_block *xb;
522 	int ret = 0;
523 
524 	if (!di->i_xattr_loc)
525 		return ret;
526 
527 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
528 			       le64_to_cpu(di->i_xattr_loc),
529 			       &blk_bh, OCFS2_BH_CACHED, inode);
530 	if (ret < 0) {
531 		mlog_errno(ret);
532 		return ret;
533 	}
534 	/*Verify the signature of xattr block*/
535 	if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
536 		   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
537 		ret = -EFAULT;
538 		goto cleanup;
539 	}
540 
541 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
542 
543 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
544 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
545 		ret = ocfs2_xattr_list_entries(inode, header,
546 					       buffer, buffer_size);
547 	} else {
548 		struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
549 		ret = ocfs2_xattr_tree_list_index_block(inode, xt,
550 						   buffer, buffer_size);
551 	}
552 cleanup:
553 	brelse(blk_bh);
554 
555 	return ret;
556 }
557 
558 ssize_t ocfs2_listxattr(struct dentry *dentry,
559 			char *buffer,
560 			size_t size)
561 {
562 	int ret = 0, i_ret = 0, b_ret = 0;
563 	struct buffer_head *di_bh = NULL;
564 	struct ocfs2_dinode *di = NULL;
565 	struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
566 
567 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
568 		return ret;
569 
570 	ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
571 	if (ret < 0) {
572 		mlog_errno(ret);
573 		return ret;
574 	}
575 
576 	di = (struct ocfs2_dinode *)di_bh->b_data;
577 
578 	down_read(&oi->ip_xattr_sem);
579 	i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
580 	if (i_ret < 0)
581 		b_ret = 0;
582 	else {
583 		if (buffer) {
584 			buffer += i_ret;
585 			size -= i_ret;
586 		}
587 		b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
588 					       buffer, size);
589 		if (b_ret < 0)
590 			i_ret = 0;
591 	}
592 	up_read(&oi->ip_xattr_sem);
593 	ocfs2_inode_unlock(dentry->d_inode, 0);
594 
595 	brelse(di_bh);
596 
597 	return i_ret + b_ret;
598 }
599 
600 static int ocfs2_xattr_find_entry(int name_index,
601 				  const char *name,
602 				  struct ocfs2_xattr_search *xs)
603 {
604 	struct ocfs2_xattr_entry *entry;
605 	size_t name_len;
606 	int i, cmp = 1;
607 
608 	if (name == NULL)
609 		return -EINVAL;
610 
611 	name_len = strlen(name);
612 	entry = xs->here;
613 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
614 		cmp = name_index - ocfs2_xattr_get_type(entry);
615 		if (!cmp)
616 			cmp = name_len - entry->xe_name_len;
617 		if (!cmp)
618 			cmp = memcmp(name, (xs->base +
619 				     le16_to_cpu(entry->xe_name_offset)),
620 				     name_len);
621 		if (cmp == 0)
622 			break;
623 		entry += 1;
624 	}
625 	xs->here = entry;
626 
627 	return cmp ? -ENODATA : 0;
628 }
629 
630 static int ocfs2_xattr_get_value_outside(struct inode *inode,
631 					 struct ocfs2_xattr_value_root *xv,
632 					 void *buffer,
633 					 size_t len)
634 {
635 	u32 cpos, p_cluster, num_clusters, bpc, clusters;
636 	u64 blkno;
637 	int i, ret = 0;
638 	size_t cplen, blocksize;
639 	struct buffer_head *bh = NULL;
640 	struct ocfs2_extent_list *el;
641 
642 	el = &xv->xr_list;
643 	clusters = le32_to_cpu(xv->xr_clusters);
644 	bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
645 	blocksize = inode->i_sb->s_blocksize;
646 
647 	cpos = 0;
648 	while (cpos < clusters) {
649 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
650 					       &num_clusters, el);
651 		if (ret) {
652 			mlog_errno(ret);
653 			goto out;
654 		}
655 
656 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
657 		/* Copy ocfs2_xattr_value */
658 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
659 			ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
660 					       &bh, OCFS2_BH_CACHED, inode);
661 			if (ret) {
662 				mlog_errno(ret);
663 				goto out;
664 			}
665 
666 			cplen = len >= blocksize ? blocksize : len;
667 			memcpy(buffer, bh->b_data, cplen);
668 			len -= cplen;
669 			buffer += cplen;
670 
671 			brelse(bh);
672 			bh = NULL;
673 			if (len == 0)
674 				break;
675 		}
676 		cpos += num_clusters;
677 	}
678 out:
679 	return ret;
680 }
681 
682 static int ocfs2_xattr_ibody_get(struct inode *inode,
683 				 int name_index,
684 				 const char *name,
685 				 void *buffer,
686 				 size_t buffer_size,
687 				 struct ocfs2_xattr_search *xs)
688 {
689 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
690 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
691 	struct ocfs2_xattr_value_root *xv;
692 	size_t size;
693 	int ret = 0;
694 
695 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
696 		return -ENODATA;
697 
698 	xs->end = (void *)di + inode->i_sb->s_blocksize;
699 	xs->header = (struct ocfs2_xattr_header *)
700 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
701 	xs->base = (void *)xs->header;
702 	xs->here = xs->header->xh_entries;
703 
704 	ret = ocfs2_xattr_find_entry(name_index, name, xs);
705 	if (ret)
706 		return ret;
707 	size = le64_to_cpu(xs->here->xe_value_size);
708 	if (buffer) {
709 		if (size > buffer_size)
710 			return -ERANGE;
711 		if (ocfs2_xattr_is_local(xs->here)) {
712 			memcpy(buffer, (void *)xs->base +
713 			       le16_to_cpu(xs->here->xe_name_offset) +
714 			       OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
715 		} else {
716 			xv = (struct ocfs2_xattr_value_root *)
717 				(xs->base + le16_to_cpu(
718 				 xs->here->xe_name_offset) +
719 				OCFS2_XATTR_SIZE(xs->here->xe_name_len));
720 			ret = ocfs2_xattr_get_value_outside(inode, xv,
721 							    buffer, size);
722 			if (ret < 0) {
723 				mlog_errno(ret);
724 				return ret;
725 			}
726 		}
727 	}
728 
729 	return size;
730 }
731 
732 static int ocfs2_xattr_block_get(struct inode *inode,
733 				 int name_index,
734 				 const char *name,
735 				 void *buffer,
736 				 size_t buffer_size,
737 				 struct ocfs2_xattr_search *xs)
738 {
739 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
740 	struct buffer_head *blk_bh = NULL;
741 	struct ocfs2_xattr_block *xb;
742 	struct ocfs2_xattr_value_root *xv;
743 	size_t size;
744 	int ret = -ENODATA, name_offset, name_len, block_off, i;
745 
746 	if (!di->i_xattr_loc)
747 		return ret;
748 
749 	memset(&xs->bucket, 0, sizeof(xs->bucket));
750 
751 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
752 			       le64_to_cpu(di->i_xattr_loc),
753 			       &blk_bh, OCFS2_BH_CACHED, inode);
754 	if (ret < 0) {
755 		mlog_errno(ret);
756 		return ret;
757 	}
758 	/*Verify the signature of xattr block*/
759 	if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
760 		   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
761 		ret = -EFAULT;
762 		goto cleanup;
763 	}
764 
765 	xs->xattr_bh = blk_bh;
766 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
767 
768 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
769 		xs->header = &xb->xb_attrs.xb_header;
770 		xs->base = (void *)xs->header;
771 		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
772 		xs->here = xs->header->xh_entries;
773 
774 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
775 	} else
776 		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
777 						   name_index,
778 						   name, xs);
779 
780 	if (ret)
781 		goto cleanup;
782 	size = le64_to_cpu(xs->here->xe_value_size);
783 	if (buffer) {
784 		ret = -ERANGE;
785 		if (size > buffer_size)
786 			goto cleanup;
787 
788 		name_offset = le16_to_cpu(xs->here->xe_name_offset);
789 		name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
790 		i = xs->here - xs->header->xh_entries;
791 
792 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
793 			ret = ocfs2_xattr_bucket_get_name_value(inode,
794 								xs->bucket.xh,
795 								i,
796 								&block_off,
797 								&name_offset);
798 			xs->base = xs->bucket.bhs[block_off]->b_data;
799 		}
800 		if (ocfs2_xattr_is_local(xs->here)) {
801 			memcpy(buffer, (void *)xs->base +
802 			       name_offset + name_len, size);
803 		} else {
804 			xv = (struct ocfs2_xattr_value_root *)
805 				(xs->base + name_offset + name_len);
806 			ret = ocfs2_xattr_get_value_outside(inode, xv,
807 							    buffer, size);
808 			if (ret < 0) {
809 				mlog_errno(ret);
810 				goto cleanup;
811 			}
812 		}
813 	}
814 	ret = size;
815 cleanup:
816 	for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++)
817 		brelse(xs->bucket.bhs[i]);
818 	memset(&xs->bucket, 0, sizeof(xs->bucket));
819 
820 	brelse(blk_bh);
821 	return ret;
822 }
823 
824 /* ocfs2_xattr_get()
825  *
826  * Copy an extended attribute into the buffer provided.
827  * Buffer is NULL to compute the size of buffer required.
828  */
829 int ocfs2_xattr_get(struct inode *inode,
830 		    int name_index,
831 		    const char *name,
832 		    void *buffer,
833 		    size_t buffer_size)
834 {
835 	int ret;
836 	struct ocfs2_dinode *di = NULL;
837 	struct buffer_head *di_bh = NULL;
838 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
839 	struct ocfs2_xattr_search xis = {
840 		.not_found = -ENODATA,
841 	};
842 	struct ocfs2_xattr_search xbs = {
843 		.not_found = -ENODATA,
844 	};
845 
846 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
847 		ret = -ENODATA;
848 
849 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
850 	if (ret < 0) {
851 		mlog_errno(ret);
852 		return ret;
853 	}
854 	xis.inode_bh = xbs.inode_bh = di_bh;
855 	di = (struct ocfs2_dinode *)di_bh->b_data;
856 
857 	down_read(&oi->ip_xattr_sem);
858 	ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
859 				    buffer_size, &xis);
860 	if (ret == -ENODATA)
861 		ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
862 					    buffer_size, &xbs);
863 	up_read(&oi->ip_xattr_sem);
864 	ocfs2_inode_unlock(inode, 0);
865 
866 	brelse(di_bh);
867 
868 	return ret;
869 }
870 
871 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
872 					   struct ocfs2_xattr_value_root *xv,
873 					   const void *value,
874 					   int value_len)
875 {
876 	int ret = 0, i, cp_len, credits;
877 	u16 blocksize = inode->i_sb->s_blocksize;
878 	u32 p_cluster, num_clusters;
879 	u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
880 	u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
881 	u64 blkno;
882 	struct buffer_head *bh = NULL;
883 	handle_t *handle;
884 
885 	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
886 
887 	credits = clusters * bpc;
888 	handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits);
889 	if (IS_ERR(handle)) {
890 		ret = PTR_ERR(handle);
891 		mlog_errno(ret);
892 		goto out;
893 	}
894 
895 	while (cpos < clusters) {
896 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
897 					       &num_clusters, &xv->xr_list);
898 		if (ret) {
899 			mlog_errno(ret);
900 			goto out_commit;
901 		}
902 
903 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
904 
905 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
906 			ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
907 					       &bh, OCFS2_BH_CACHED, inode);
908 			if (ret) {
909 				mlog_errno(ret);
910 				goto out_commit;
911 			}
912 
913 			ret = ocfs2_journal_access(handle,
914 						   inode,
915 						   bh,
916 						   OCFS2_JOURNAL_ACCESS_WRITE);
917 			if (ret < 0) {
918 				mlog_errno(ret);
919 				goto out_commit;
920 			}
921 
922 			cp_len = value_len > blocksize ? blocksize : value_len;
923 			memcpy(bh->b_data, value, cp_len);
924 			value_len -= cp_len;
925 			value += cp_len;
926 			if (cp_len < blocksize)
927 				memset(bh->b_data + cp_len, 0,
928 				       blocksize - cp_len);
929 
930 			ret = ocfs2_journal_dirty(handle, bh);
931 			if (ret < 0) {
932 				mlog_errno(ret);
933 				goto out_commit;
934 			}
935 			brelse(bh);
936 			bh = NULL;
937 
938 			/*
939 			 * XXX: do we need to empty all the following
940 			 * blocks in this cluster?
941 			 */
942 			if (!value_len)
943 				break;
944 		}
945 		cpos += num_clusters;
946 	}
947 out_commit:
948 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
949 out:
950 	brelse(bh);
951 
952 	return ret;
953 }
954 
955 static int ocfs2_xattr_cleanup(struct inode *inode,
956 			       struct ocfs2_xattr_info *xi,
957 			       struct ocfs2_xattr_search *xs,
958 			       size_t offs)
959 {
960 	handle_t *handle = NULL;
961 	int ret = 0;
962 	size_t name_len = strlen(xi->name);
963 	void *val = xs->base + offs;
964 	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
965 
966 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
967 				   OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
968 	if (IS_ERR(handle)) {
969 		ret = PTR_ERR(handle);
970 		mlog_errno(ret);
971 		goto out;
972 	}
973 	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
974 				   OCFS2_JOURNAL_ACCESS_WRITE);
975 	if (ret) {
976 		mlog_errno(ret);
977 		goto out_commit;
978 	}
979 	/* Decrease xattr count */
980 	le16_add_cpu(&xs->header->xh_count, -1);
981 	/* Remove the xattr entry and tree root which has already be set*/
982 	memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
983 	memset(val, 0, size);
984 
985 	ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
986 	if (ret < 0)
987 		mlog_errno(ret);
988 out_commit:
989 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
990 out:
991 	return ret;
992 }
993 
994 static int ocfs2_xattr_update_entry(struct inode *inode,
995 				    struct ocfs2_xattr_info *xi,
996 				    struct ocfs2_xattr_search *xs,
997 				    size_t offs)
998 {
999 	handle_t *handle = NULL;
1000 	int ret = 0;
1001 
1002 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1003 				   OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1004 	if (IS_ERR(handle)) {
1005 		ret = PTR_ERR(handle);
1006 		mlog_errno(ret);
1007 		goto out;
1008 	}
1009 	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1010 				   OCFS2_JOURNAL_ACCESS_WRITE);
1011 	if (ret) {
1012 		mlog_errno(ret);
1013 		goto out_commit;
1014 	}
1015 
1016 	xs->here->xe_name_offset = cpu_to_le16(offs);
1017 	xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1018 	if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1019 		ocfs2_xattr_set_local(xs->here, 1);
1020 	else
1021 		ocfs2_xattr_set_local(xs->here, 0);
1022 	ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1023 
1024 	ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1025 	if (ret < 0)
1026 		mlog_errno(ret);
1027 out_commit:
1028 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1029 out:
1030 	return ret;
1031 }
1032 
1033 /*
1034  * ocfs2_xattr_set_value_outside()
1035  *
1036  * Set large size value in B tree.
1037  */
1038 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1039 					 struct ocfs2_xattr_info *xi,
1040 					 struct ocfs2_xattr_search *xs,
1041 					 size_t offs)
1042 {
1043 	size_t name_len = strlen(xi->name);
1044 	void *val = xs->base + offs;
1045 	struct ocfs2_xattr_value_root *xv = NULL;
1046 	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1047 	int ret = 0;
1048 
1049 	memset(val, 0, size);
1050 	memcpy(val, xi->name, name_len);
1051 	xv = (struct ocfs2_xattr_value_root *)
1052 		(val + OCFS2_XATTR_SIZE(name_len));
1053 	xv->xr_clusters = 0;
1054 	xv->xr_last_eb_blk = 0;
1055 	xv->xr_list.l_tree_depth = 0;
1056 	xv->xr_list.l_count = cpu_to_le16(1);
1057 	xv->xr_list.l_next_free_rec = 0;
1058 
1059 	ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
1060 					 xi->value_len);
1061 	if (ret < 0) {
1062 		mlog_errno(ret);
1063 		return ret;
1064 	}
1065 	ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value,
1066 					      xi->value_len);
1067 	if (ret < 0) {
1068 		mlog_errno(ret);
1069 		return ret;
1070 	}
1071 	ret = ocfs2_xattr_update_entry(inode, xi, xs, offs);
1072 	if (ret < 0)
1073 		mlog_errno(ret);
1074 
1075 	return ret;
1076 }
1077 
1078 /*
1079  * ocfs2_xattr_set_entry_local()
1080  *
1081  * Set, replace or remove extended attribute in local.
1082  */
1083 static void ocfs2_xattr_set_entry_local(struct inode *inode,
1084 					struct ocfs2_xattr_info *xi,
1085 					struct ocfs2_xattr_search *xs,
1086 					struct ocfs2_xattr_entry *last,
1087 					size_t min_offs)
1088 {
1089 	size_t name_len = strlen(xi->name);
1090 	int i;
1091 
1092 	if (xi->value && xs->not_found) {
1093 		/* Insert the new xattr entry. */
1094 		le16_add_cpu(&xs->header->xh_count, 1);
1095 		ocfs2_xattr_set_type(last, xi->name_index);
1096 		ocfs2_xattr_set_local(last, 1);
1097 		last->xe_name_len = name_len;
1098 	} else {
1099 		void *first_val;
1100 		void *val;
1101 		size_t offs, size;
1102 
1103 		first_val = xs->base + min_offs;
1104 		offs = le16_to_cpu(xs->here->xe_name_offset);
1105 		val = xs->base + offs;
1106 
1107 		if (le64_to_cpu(xs->here->xe_value_size) >
1108 		    OCFS2_XATTR_INLINE_SIZE)
1109 			size = OCFS2_XATTR_SIZE(name_len) +
1110 				OCFS2_XATTR_ROOT_SIZE;
1111 		else
1112 			size = OCFS2_XATTR_SIZE(name_len) +
1113 			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1114 
1115 		if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1116 				OCFS2_XATTR_SIZE(xi->value_len)) {
1117 			/* The old and the new value have the
1118 			   same size. Just replace the value. */
1119 			ocfs2_xattr_set_local(xs->here, 1);
1120 			xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1121 			/* Clear value bytes. */
1122 			memset(val + OCFS2_XATTR_SIZE(name_len),
1123 			       0,
1124 			       OCFS2_XATTR_SIZE(xi->value_len));
1125 			memcpy(val + OCFS2_XATTR_SIZE(name_len),
1126 			       xi->value,
1127 			       xi->value_len);
1128 			return;
1129 		}
1130 		/* Remove the old name+value. */
1131 		memmove(first_val + size, first_val, val - first_val);
1132 		memset(first_val, 0, size);
1133 		xs->here->xe_name_hash = 0;
1134 		xs->here->xe_name_offset = 0;
1135 		ocfs2_xattr_set_local(xs->here, 1);
1136 		xs->here->xe_value_size = 0;
1137 
1138 		min_offs += size;
1139 
1140 		/* Adjust all value offsets. */
1141 		last = xs->header->xh_entries;
1142 		for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1143 			size_t o = le16_to_cpu(last->xe_name_offset);
1144 
1145 			if (o < offs)
1146 				last->xe_name_offset = cpu_to_le16(o + size);
1147 			last += 1;
1148 		}
1149 
1150 		if (!xi->value) {
1151 			/* Remove the old entry. */
1152 			last -= 1;
1153 			memmove(xs->here, xs->here + 1,
1154 				(void *)last - (void *)xs->here);
1155 			memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1156 			le16_add_cpu(&xs->header->xh_count, -1);
1157 		}
1158 	}
1159 	if (xi->value) {
1160 		/* Insert the new name+value. */
1161 		size_t size = OCFS2_XATTR_SIZE(name_len) +
1162 				OCFS2_XATTR_SIZE(xi->value_len);
1163 		void *val = xs->base + min_offs - size;
1164 
1165 		xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1166 		memset(val, 0, size);
1167 		memcpy(val, xi->name, name_len);
1168 		memcpy(val + OCFS2_XATTR_SIZE(name_len),
1169 		       xi->value,
1170 		       xi->value_len);
1171 		xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1172 		ocfs2_xattr_set_local(xs->here, 1);
1173 		ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1174 	}
1175 
1176 	return;
1177 }
1178 
1179 /*
1180  * ocfs2_xattr_set_entry()
1181  *
1182  * Set extended attribute entry into inode or block.
1183  *
1184  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1185  * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1186  * then set value in B tree with set_value_outside().
1187  */
1188 static int ocfs2_xattr_set_entry(struct inode *inode,
1189 				 struct ocfs2_xattr_info *xi,
1190 				 struct ocfs2_xattr_search *xs,
1191 				 int flag)
1192 {
1193 	struct ocfs2_xattr_entry *last;
1194 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1195 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1196 	size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1197 	size_t size_l = 0;
1198 	handle_t *handle = NULL;
1199 	int free, i, ret;
1200 	struct ocfs2_xattr_info xi_l = {
1201 		.name_index = xi->name_index,
1202 		.name = xi->name,
1203 		.value = xi->value,
1204 		.value_len = xi->value_len,
1205 	};
1206 
1207 	/* Compute min_offs, last and free space. */
1208 	last = xs->header->xh_entries;
1209 
1210 	for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1211 		size_t offs = le16_to_cpu(last->xe_name_offset);
1212 		if (offs < min_offs)
1213 			min_offs = offs;
1214 		last += 1;
1215 	}
1216 
1217 	free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
1218 	if (free < 0)
1219 		return -EFAULT;
1220 
1221 	if (!xs->not_found) {
1222 		size_t size = 0;
1223 		if (ocfs2_xattr_is_local(xs->here))
1224 			size = OCFS2_XATTR_SIZE(name_len) +
1225 			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1226 		else
1227 			size = OCFS2_XATTR_SIZE(name_len) +
1228 				OCFS2_XATTR_ROOT_SIZE;
1229 		free += (size + sizeof(struct ocfs2_xattr_entry));
1230 	}
1231 	/* Check free space in inode or block */
1232 	if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1233 		if (free < sizeof(struct ocfs2_xattr_entry) +
1234 			   OCFS2_XATTR_SIZE(name_len) +
1235 			   OCFS2_XATTR_ROOT_SIZE) {
1236 			ret = -ENOSPC;
1237 			goto out;
1238 		}
1239 		size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1240 		xi_l.value = (void *)&def_xv;
1241 		xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1242 	} else if (xi->value) {
1243 		if (free < sizeof(struct ocfs2_xattr_entry) +
1244 			   OCFS2_XATTR_SIZE(name_len) +
1245 			   OCFS2_XATTR_SIZE(xi->value_len)) {
1246 			ret = -ENOSPC;
1247 			goto out;
1248 		}
1249 	}
1250 
1251 	if (!xs->not_found) {
1252 		/* For existing extended attribute */
1253 		size_t size = OCFS2_XATTR_SIZE(name_len) +
1254 			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1255 		size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1256 		void *val = xs->base + offs;
1257 
1258 		if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1259 			/* Replace existing local xattr with tree root */
1260 			ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1261 							    offs);
1262 			if (ret < 0)
1263 				mlog_errno(ret);
1264 			goto out;
1265 		} else if (!ocfs2_xattr_is_local(xs->here)) {
1266 			/* For existing xattr which has value outside */
1267 			struct ocfs2_xattr_value_root *xv = NULL;
1268 			xv = (struct ocfs2_xattr_value_root *)(val +
1269 				OCFS2_XATTR_SIZE(name_len));
1270 
1271 			if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1272 				/*
1273 				 * If new value need set outside also,
1274 				 * first truncate old value to new value,
1275 				 * then set new value with set_value_outside().
1276 				 */
1277 				ret = ocfs2_xattr_value_truncate(inode,
1278 								 xs->xattr_bh,
1279 								 xv,
1280 								 xi->value_len);
1281 				if (ret < 0) {
1282 					mlog_errno(ret);
1283 					goto out;
1284 				}
1285 
1286 				ret = __ocfs2_xattr_set_value_outside(inode,
1287 								xv,
1288 								xi->value,
1289 								xi->value_len);
1290 				if (ret < 0) {
1291 					mlog_errno(ret);
1292 					goto out;
1293 				}
1294 
1295 				ret = ocfs2_xattr_update_entry(inode,
1296 							       xi,
1297 							       xs,
1298 							       offs);
1299 				if (ret < 0)
1300 					mlog_errno(ret);
1301 				goto out;
1302 			} else {
1303 				/*
1304 				 * If new value need set in local,
1305 				 * just trucate old value to zero.
1306 				 */
1307 				 ret = ocfs2_xattr_value_truncate(inode,
1308 								 xs->xattr_bh,
1309 								 xv,
1310 								 0);
1311 				if (ret < 0)
1312 					mlog_errno(ret);
1313 			}
1314 		}
1315 	}
1316 
1317 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1318 				   OCFS2_INODE_UPDATE_CREDITS);
1319 	if (IS_ERR(handle)) {
1320 		ret = PTR_ERR(handle);
1321 		mlog_errno(ret);
1322 		goto out;
1323 	}
1324 
1325 	ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1326 				   OCFS2_JOURNAL_ACCESS_WRITE);
1327 	if (ret) {
1328 		mlog_errno(ret);
1329 		goto out_commit;
1330 	}
1331 
1332 	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1333 		/*set extended attribue in external blcok*/
1334 		ret = ocfs2_extend_trans(handle,
1335 					 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1336 		if (ret) {
1337 			mlog_errno(ret);
1338 			goto out_commit;
1339 		}
1340 		ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1341 					   OCFS2_JOURNAL_ACCESS_WRITE);
1342 		if (ret) {
1343 			mlog_errno(ret);
1344 			goto out_commit;
1345 		}
1346 	}
1347 
1348 	/*
1349 	 * Set value in local, include set tree root in local.
1350 	 * This is the first step for value size >INLINE_SIZE.
1351 	 */
1352 	ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1353 
1354 	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1355 		ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1356 		if (ret < 0) {
1357 			mlog_errno(ret);
1358 			goto out_commit;
1359 		}
1360 	}
1361 
1362 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1363 	    (flag & OCFS2_INLINE_XATTR_FL)) {
1364 		struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1365 		unsigned int xattrsize = osb->s_xattr_inline_size;
1366 
1367 		/*
1368 		 * Adjust extent record count or inline data size
1369 		 * to reserve space for extended attribute.
1370 		 */
1371 		if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1372 			struct ocfs2_inline_data *idata = &di->id2.i_data;
1373 			le16_add_cpu(&idata->id_count, -xattrsize);
1374 		} else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1375 			struct ocfs2_extent_list *el = &di->id2.i_list;
1376 			le16_add_cpu(&el->l_count, -(xattrsize /
1377 					sizeof(struct ocfs2_extent_rec)));
1378 		}
1379 		di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1380 	}
1381 	/* Update xattr flag */
1382 	spin_lock(&oi->ip_lock);
1383 	oi->ip_dyn_features |= flag;
1384 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1385 	spin_unlock(&oi->ip_lock);
1386 	/* Update inode ctime */
1387 	inode->i_ctime = CURRENT_TIME;
1388 	di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1389 	di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1390 
1391 	ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1392 	if (ret < 0)
1393 		mlog_errno(ret);
1394 
1395 out_commit:
1396 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1397 
1398 	if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1399 		/*
1400 		 * Set value outside in B tree.
1401 		 * This is the second step for value size > INLINE_SIZE.
1402 		 */
1403 		size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1404 		ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs);
1405 		if (ret < 0) {
1406 			int ret2;
1407 
1408 			mlog_errno(ret);
1409 			/*
1410 			 * If set value outside failed, we have to clean
1411 			 * the junk tree root we have already set in local.
1412 			 */
1413 			ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs);
1414 			if (ret2 < 0)
1415 				mlog_errno(ret2);
1416 		}
1417 	}
1418 out:
1419 	return ret;
1420 
1421 }
1422 
1423 static int ocfs2_xattr_free_block(handle_t *handle,
1424 				  struct ocfs2_super *osb,
1425 				  struct ocfs2_xattr_block *xb)
1426 {
1427 	struct inode *xb_alloc_inode;
1428 	struct buffer_head *xb_alloc_bh = NULL;
1429 	u64 blk = le64_to_cpu(xb->xb_blkno);
1430 	u16 bit = le16_to_cpu(xb->xb_suballoc_bit);
1431 	u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1432 	int ret = 0;
1433 
1434 	xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1435 				EXTENT_ALLOC_SYSTEM_INODE,
1436 				le16_to_cpu(xb->xb_suballoc_slot));
1437 	if (!xb_alloc_inode) {
1438 		ret = -ENOMEM;
1439 		mlog_errno(ret);
1440 		goto out;
1441 	}
1442 	mutex_lock(&xb_alloc_inode->i_mutex);
1443 
1444 	ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1445 	if (ret < 0) {
1446 		mlog_errno(ret);
1447 		goto out_mutex;
1448 	}
1449 	ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE);
1450 	if (ret < 0) {
1451 		mlog_errno(ret);
1452 		goto out_unlock;
1453 	}
1454 	ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1455 				       bit, bg_blkno, 1);
1456 	if (ret < 0)
1457 		mlog_errno(ret);
1458 out_unlock:
1459 	ocfs2_inode_unlock(xb_alloc_inode, 1);
1460 	brelse(xb_alloc_bh);
1461 out_mutex:
1462 	mutex_unlock(&xb_alloc_inode->i_mutex);
1463 	iput(xb_alloc_inode);
1464 out:
1465 	return ret;
1466 }
1467 
1468 static int ocfs2_remove_value_outside(struct inode*inode,
1469 				      struct buffer_head *bh,
1470 				      struct ocfs2_xattr_header *header)
1471 {
1472 	int ret = 0, i;
1473 
1474 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1475 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1476 
1477 		if (!ocfs2_xattr_is_local(entry)) {
1478 			struct ocfs2_xattr_value_root *xv;
1479 			void *val;
1480 
1481 			val = (void *)header +
1482 				le16_to_cpu(entry->xe_name_offset);
1483 			xv = (struct ocfs2_xattr_value_root *)
1484 				(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1485 			ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0);
1486 			if (ret < 0) {
1487 				mlog_errno(ret);
1488 				return ret;
1489 			}
1490 		}
1491 	}
1492 
1493 	return ret;
1494 }
1495 
1496 static int ocfs2_xattr_ibody_remove(struct inode *inode,
1497 				    struct buffer_head *di_bh)
1498 {
1499 
1500 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1501 	struct ocfs2_xattr_header *header;
1502 	int ret;
1503 
1504 	header = (struct ocfs2_xattr_header *)
1505 		 ((void *)di + inode->i_sb->s_blocksize -
1506 		 le16_to_cpu(di->i_xattr_inline_size));
1507 
1508 	ret = ocfs2_remove_value_outside(inode, di_bh, header);
1509 
1510 	return ret;
1511 }
1512 
1513 static int ocfs2_xattr_block_remove(struct inode *inode,
1514 				    struct buffer_head *blk_bh)
1515 {
1516 	struct ocfs2_xattr_block *xb;
1517 	int ret = 0;
1518 
1519 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1520 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1521 		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1522 		ret = ocfs2_remove_value_outside(inode, blk_bh, header);
1523 	} else
1524 		ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
1525 
1526 	return ret;
1527 }
1528 
1529 /*
1530  * ocfs2_xattr_remove()
1531  *
1532  * Free extended attribute resources associated with this inode.
1533  */
1534 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1535 {
1536 	struct ocfs2_xattr_block *xb;
1537 	struct buffer_head *blk_bh = NULL;
1538 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1539 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1540 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1541 	handle_t *handle;
1542 	int ret;
1543 
1544 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1545 		return 0;
1546 
1547 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1548 		ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1549 		if (ret < 0) {
1550 			mlog_errno(ret);
1551 			goto out;
1552 		}
1553 	}
1554 	if (di->i_xattr_loc) {
1555 		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
1556 				       le64_to_cpu(di->i_xattr_loc),
1557 				       &blk_bh, OCFS2_BH_CACHED, inode);
1558 		if (ret < 0) {
1559 			mlog_errno(ret);
1560 			return ret;
1561 		}
1562 		/*Verify the signature of xattr block*/
1563 		if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
1564 			   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
1565 			ret = -EFAULT;
1566 			goto out;
1567 		}
1568 
1569 		ret = ocfs2_xattr_block_remove(inode, blk_bh);
1570 		if (ret < 0) {
1571 			mlog_errno(ret);
1572 			goto out;
1573 		}
1574 	}
1575 
1576 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1577 				   OCFS2_INODE_UPDATE_CREDITS);
1578 	if (IS_ERR(handle)) {
1579 		ret = PTR_ERR(handle);
1580 		mlog_errno(ret);
1581 		goto out;
1582 	}
1583 	ret = ocfs2_journal_access(handle, inode, di_bh,
1584 				   OCFS2_JOURNAL_ACCESS_WRITE);
1585 	if (ret) {
1586 		mlog_errno(ret);
1587 		goto out_commit;
1588 	}
1589 
1590 	if (di->i_xattr_loc) {
1591 		xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1592 		ocfs2_xattr_free_block(handle, osb, xb);
1593 		di->i_xattr_loc = cpu_to_le64(0);
1594 	}
1595 
1596 	spin_lock(&oi->ip_lock);
1597 	oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1598 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1599 	spin_unlock(&oi->ip_lock);
1600 
1601 	ret = ocfs2_journal_dirty(handle, di_bh);
1602 	if (ret < 0)
1603 		mlog_errno(ret);
1604 out_commit:
1605 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1606 out:
1607 	brelse(blk_bh);
1608 
1609 	return ret;
1610 }
1611 
1612 static int ocfs2_xattr_has_space_inline(struct inode *inode,
1613 					struct ocfs2_dinode *di)
1614 {
1615 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1616 	unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1617 	int free;
1618 
1619 	if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1620 		return 0;
1621 
1622 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1623 		struct ocfs2_inline_data *idata = &di->id2.i_data;
1624 		free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1625 	} else if (ocfs2_inode_is_fast_symlink(inode)) {
1626 		free = ocfs2_fast_symlink_chars(inode->i_sb) -
1627 			le64_to_cpu(di->i_size);
1628 	} else {
1629 		struct ocfs2_extent_list *el = &di->id2.i_list;
1630 		free = (le16_to_cpu(el->l_count) -
1631 			le16_to_cpu(el->l_next_free_rec)) *
1632 			sizeof(struct ocfs2_extent_rec);
1633 	}
1634 	if (free >= xattrsize)
1635 		return 1;
1636 
1637 	return 0;
1638 }
1639 
1640 /*
1641  * ocfs2_xattr_ibody_find()
1642  *
1643  * Find extended attribute in inode block and
1644  * fill search info into struct ocfs2_xattr_search.
1645  */
1646 static int ocfs2_xattr_ibody_find(struct inode *inode,
1647 				  int name_index,
1648 				  const char *name,
1649 				  struct ocfs2_xattr_search *xs)
1650 {
1651 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1652 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1653 	int ret;
1654 	int has_space = 0;
1655 
1656 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1657 		return 0;
1658 
1659 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1660 		down_read(&oi->ip_alloc_sem);
1661 		has_space = ocfs2_xattr_has_space_inline(inode, di);
1662 		up_read(&oi->ip_alloc_sem);
1663 		if (!has_space)
1664 			return 0;
1665 	}
1666 
1667 	xs->xattr_bh = xs->inode_bh;
1668 	xs->end = (void *)di + inode->i_sb->s_blocksize;
1669 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1670 		xs->header = (struct ocfs2_xattr_header *)
1671 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
1672 	else
1673 		xs->header = (struct ocfs2_xattr_header *)
1674 			(xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
1675 	xs->base = (void *)xs->header;
1676 	xs->here = xs->header->xh_entries;
1677 
1678 	/* Find the named attribute. */
1679 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1680 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
1681 		if (ret && ret != -ENODATA)
1682 			return ret;
1683 		xs->not_found = ret;
1684 	}
1685 
1686 	return 0;
1687 }
1688 
1689 /*
1690  * ocfs2_xattr_ibody_set()
1691  *
1692  * Set, replace or remove an extended attribute into inode block.
1693  *
1694  */
1695 static int ocfs2_xattr_ibody_set(struct inode *inode,
1696 				 struct ocfs2_xattr_info *xi,
1697 				 struct ocfs2_xattr_search *xs)
1698 {
1699 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1700 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1701 	int ret;
1702 
1703 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1704 		return -ENOSPC;
1705 
1706 	down_write(&oi->ip_alloc_sem);
1707 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1708 		if (!ocfs2_xattr_has_space_inline(inode, di)) {
1709 			ret = -ENOSPC;
1710 			goto out;
1711 		}
1712 	}
1713 
1714 	ret = ocfs2_xattr_set_entry(inode, xi, xs,
1715 				(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
1716 out:
1717 	up_write(&oi->ip_alloc_sem);
1718 
1719 	return ret;
1720 }
1721 
1722 /*
1723  * ocfs2_xattr_block_find()
1724  *
1725  * Find extended attribute in external block and
1726  * fill search info into struct ocfs2_xattr_search.
1727  */
1728 static int ocfs2_xattr_block_find(struct inode *inode,
1729 				  int name_index,
1730 				  const char *name,
1731 				  struct ocfs2_xattr_search *xs)
1732 {
1733 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1734 	struct buffer_head *blk_bh = NULL;
1735 	struct ocfs2_xattr_block *xb;
1736 	int ret = 0;
1737 
1738 	if (!di->i_xattr_loc)
1739 		return ret;
1740 
1741 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
1742 			       le64_to_cpu(di->i_xattr_loc),
1743 			       &blk_bh, OCFS2_BH_CACHED, inode);
1744 	if (ret < 0) {
1745 		mlog_errno(ret);
1746 		return ret;
1747 	}
1748 	/*Verify the signature of xattr block*/
1749 	if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
1750 		   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
1751 			ret = -EFAULT;
1752 			goto cleanup;
1753 	}
1754 
1755 	xs->xattr_bh = blk_bh;
1756 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1757 
1758 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1759 		xs->header = &xb->xb_attrs.xb_header;
1760 		xs->base = (void *)xs->header;
1761 		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
1762 		xs->here = xs->header->xh_entries;
1763 
1764 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
1765 	} else
1766 		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
1767 						   name_index,
1768 						   name, xs);
1769 
1770 	if (ret && ret != -ENODATA) {
1771 		xs->xattr_bh = NULL;
1772 		goto cleanup;
1773 	}
1774 	xs->not_found = ret;
1775 	return 0;
1776 cleanup:
1777 	brelse(blk_bh);
1778 
1779 	return ret;
1780 }
1781 
1782 /*
1783  * When all the xattrs are deleted from index btree, the ocfs2_xattr_tree
1784  * will be erased and ocfs2_xattr_block will have its ocfs2_xattr_header
1785  * re-initialized.
1786  */
1787 static int ocfs2_restore_xattr_block(struct inode *inode,
1788 				     struct ocfs2_xattr_search *xs)
1789 {
1790 	int ret;
1791 	handle_t *handle;
1792 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1793 	struct ocfs2_xattr_block *xb =
1794 		(struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1795 	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
1796 	u16 xb_flags = le16_to_cpu(xb->xb_flags);
1797 
1798 	BUG_ON(!(xb_flags & OCFS2_XATTR_INDEXED) ||
1799 		le16_to_cpu(el->l_next_free_rec) != 0);
1800 
1801 	handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1802 	if (IS_ERR(handle)) {
1803 		ret = PTR_ERR(handle);
1804 		handle = NULL;
1805 		goto out;
1806 	}
1807 
1808 	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1809 				   OCFS2_JOURNAL_ACCESS_WRITE);
1810 	if (ret < 0) {
1811 		mlog_errno(ret);
1812 		goto out_commit;
1813 	}
1814 
1815 	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
1816 	       offsetof(struct ocfs2_xattr_block, xb_attrs));
1817 
1818 	xb->xb_flags = cpu_to_le16(xb_flags & ~OCFS2_XATTR_INDEXED);
1819 
1820 	ocfs2_journal_dirty(handle, xs->xattr_bh);
1821 
1822 out_commit:
1823 	ocfs2_commit_trans(osb, handle);
1824 out:
1825 	return ret;
1826 }
1827 
1828 /*
1829  * ocfs2_xattr_block_set()
1830  *
1831  * Set, replace or remove an extended attribute into external block.
1832  *
1833  */
1834 static int ocfs2_xattr_block_set(struct inode *inode,
1835 				 struct ocfs2_xattr_info *xi,
1836 				 struct ocfs2_xattr_search *xs)
1837 {
1838 	struct buffer_head *new_bh = NULL;
1839 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1840 	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
1841 	struct ocfs2_alloc_context *meta_ac = NULL;
1842 	handle_t *handle = NULL;
1843 	struct ocfs2_xattr_block *xblk = NULL;
1844 	u16 suballoc_bit_start;
1845 	u32 num_got;
1846 	u64 first_blkno;
1847 	int ret;
1848 
1849 	if (!xs->xattr_bh) {
1850 		/*
1851 		 * Alloc one external block for extended attribute
1852 		 * outside of inode.
1853 		 */
1854 		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
1855 		if (ret < 0) {
1856 			mlog_errno(ret);
1857 			goto out;
1858 		}
1859 		handle = ocfs2_start_trans(osb,
1860 					   OCFS2_XATTR_BLOCK_CREATE_CREDITS);
1861 		if (IS_ERR(handle)) {
1862 			ret = PTR_ERR(handle);
1863 			mlog_errno(ret);
1864 			goto out;
1865 		}
1866 		ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1867 					   OCFS2_JOURNAL_ACCESS_CREATE);
1868 		if (ret < 0) {
1869 			mlog_errno(ret);
1870 			goto out_commit;
1871 		}
1872 
1873 		ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
1874 					   &suballoc_bit_start, &num_got,
1875 					   &first_blkno);
1876 		if (ret < 0) {
1877 			mlog_errno(ret);
1878 			goto out_commit;
1879 		}
1880 
1881 		new_bh = sb_getblk(inode->i_sb, first_blkno);
1882 		ocfs2_set_new_buffer_uptodate(inode, new_bh);
1883 
1884 		ret = ocfs2_journal_access(handle, inode, new_bh,
1885 					   OCFS2_JOURNAL_ACCESS_CREATE);
1886 		if (ret < 0) {
1887 			mlog_errno(ret);
1888 			goto out_commit;
1889 		}
1890 
1891 		/* Initialize ocfs2_xattr_block */
1892 		xs->xattr_bh = new_bh;
1893 		xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
1894 		memset(xblk, 0, inode->i_sb->s_blocksize);
1895 		strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
1896 		xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
1897 		xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1898 		xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
1899 		xblk->xb_blkno = cpu_to_le64(first_blkno);
1900 
1901 		xs->header = &xblk->xb_attrs.xb_header;
1902 		xs->base = (void *)xs->header;
1903 		xs->end = (void *)xblk + inode->i_sb->s_blocksize;
1904 		xs->here = xs->header->xh_entries;
1905 
1906 
1907 		ret = ocfs2_journal_dirty(handle, new_bh);
1908 		if (ret < 0) {
1909 			mlog_errno(ret);
1910 			goto out_commit;
1911 		}
1912 		di->i_xattr_loc = cpu_to_le64(first_blkno);
1913 		ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1914 		if (ret < 0)
1915 			mlog_errno(ret);
1916 out_commit:
1917 		ocfs2_commit_trans(osb, handle);
1918 out:
1919 		if (meta_ac)
1920 			ocfs2_free_alloc_context(meta_ac);
1921 		if (ret < 0)
1922 			return ret;
1923 	} else
1924 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1925 
1926 	if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
1927 		/* Set extended attribute into external block */
1928 		ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL);
1929 		if (!ret || ret != -ENOSPC)
1930 			goto end;
1931 
1932 		ret = ocfs2_xattr_create_index_block(inode, xs);
1933 		if (ret)
1934 			goto end;
1935 	}
1936 
1937 	ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs);
1938 	if (!ret && xblk->xb_attrs.xb_root.xt_list.l_next_free_rec == 0)
1939 		ret = ocfs2_restore_xattr_block(inode, xs);
1940 
1941 end:
1942 
1943 	return ret;
1944 }
1945 
1946 /*
1947  * ocfs2_xattr_set()
1948  *
1949  * Set, replace or remove an extended attribute for this inode.
1950  * value is NULL to remove an existing extended attribute, else either
1951  * create or replace an extended attribute.
1952  */
1953 int ocfs2_xattr_set(struct inode *inode,
1954 		    int name_index,
1955 		    const char *name,
1956 		    const void *value,
1957 		    size_t value_len,
1958 		    int flags)
1959 {
1960 	struct buffer_head *di_bh = NULL;
1961 	struct ocfs2_dinode *di;
1962 	int ret;
1963 	u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
1964 
1965 	struct ocfs2_xattr_info xi = {
1966 		.name_index = name_index,
1967 		.name = name,
1968 		.value = value,
1969 		.value_len = value_len,
1970 	};
1971 
1972 	struct ocfs2_xattr_search xis = {
1973 		.not_found = -ENODATA,
1974 	};
1975 
1976 	struct ocfs2_xattr_search xbs = {
1977 		.not_found = -ENODATA,
1978 	};
1979 
1980 	ret = ocfs2_inode_lock(inode, &di_bh, 1);
1981 	if (ret < 0) {
1982 		mlog_errno(ret);
1983 		return ret;
1984 	}
1985 	xis.inode_bh = xbs.inode_bh = di_bh;
1986 	di = (struct ocfs2_dinode *)di_bh->b_data;
1987 
1988 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
1989 	/*
1990 	 * Scan inode and external block to find the same name
1991 	 * extended attribute and collect search infomation.
1992 	 */
1993 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
1994 	if (ret)
1995 		goto cleanup;
1996 	if (xis.not_found) {
1997 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
1998 		if (ret)
1999 			goto cleanup;
2000 	}
2001 
2002 	if (xis.not_found && xbs.not_found) {
2003 		ret = -ENODATA;
2004 		if (flags & XATTR_REPLACE)
2005 			goto cleanup;
2006 		ret = 0;
2007 		if (!value)
2008 			goto cleanup;
2009 	} else {
2010 		ret = -EEXIST;
2011 		if (flags & XATTR_CREATE)
2012 			goto cleanup;
2013 	}
2014 
2015 	if (!value) {
2016 		/* Remove existing extended attribute */
2017 		if (!xis.not_found)
2018 			ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2019 		else if (!xbs.not_found)
2020 			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2021 	} else {
2022 		/* We always try to set extended attribute into inode first*/
2023 		ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2024 		if (!ret && !xbs.not_found) {
2025 			/*
2026 			 * If succeed and that extended attribute existing in
2027 			 * external block, then we will remove it.
2028 			 */
2029 			xi.value = NULL;
2030 			xi.value_len = 0;
2031 			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2032 		} else if (ret == -ENOSPC) {
2033 			if (di->i_xattr_loc && !xbs.xattr_bh) {
2034 				ret = ocfs2_xattr_block_find(inode, name_index,
2035 							     name, &xbs);
2036 				if (ret)
2037 					goto cleanup;
2038 			}
2039 			/*
2040 			 * If no space in inode, we will set extended attribute
2041 			 * into external block.
2042 			 */
2043 			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2044 			if (ret)
2045 				goto cleanup;
2046 			if (!xis.not_found) {
2047 				/*
2048 				 * If succeed and that extended attribute
2049 				 * existing in inode, we will remove it.
2050 				 */
2051 				xi.value = NULL;
2052 				xi.value_len = 0;
2053 				ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2054 			}
2055 		}
2056 	}
2057 cleanup:
2058 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
2059 	ocfs2_inode_unlock(inode, 1);
2060 	brelse(di_bh);
2061 	brelse(xbs.xattr_bh);
2062 	for (i = 0; i < blk_per_bucket; i++)
2063 		brelse(xbs.bucket.bhs[i]);
2064 
2065 	return ret;
2066 }
2067 
2068 static inline u32 ocfs2_xattr_hash_by_name(struct inode *inode,
2069 					   int name_index,
2070 					   const char *suffix_name)
2071 {
2072 	struct xattr_handler *handler = ocfs2_xattr_handler(name_index);
2073 	char *prefix = handler->prefix;
2074 	int prefix_len = strlen(handler->prefix);
2075 
2076 	return ocfs2_xattr_name_hash(inode, prefix, prefix_len,
2077 				     (char *)suffix_name, strlen(suffix_name));
2078 }
2079 
2080 /*
2081  * Find the xattr extent rec which may contains name_hash.
2082  * e_cpos will be the first name hash of the xattr rec.
2083  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2084  */
2085 static int ocfs2_xattr_get_rec(struct inode *inode,
2086 			       u32 name_hash,
2087 			       u64 *p_blkno,
2088 			       u32 *e_cpos,
2089 			       u32 *num_clusters,
2090 			       struct ocfs2_extent_list *el)
2091 {
2092 	int ret = 0, i;
2093 	struct buffer_head *eb_bh = NULL;
2094 	struct ocfs2_extent_block *eb;
2095 	struct ocfs2_extent_rec *rec = NULL;
2096 	u64 e_blkno = 0;
2097 
2098 	if (el->l_tree_depth) {
2099 		ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
2100 		if (ret) {
2101 			mlog_errno(ret);
2102 			goto out;
2103 		}
2104 
2105 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2106 		el = &eb->h_list;
2107 
2108 		if (el->l_tree_depth) {
2109 			ocfs2_error(inode->i_sb,
2110 				    "Inode %lu has non zero tree depth in "
2111 				    "xattr tree block %llu\n", inode->i_ino,
2112 				    (unsigned long long)eb_bh->b_blocknr);
2113 			ret = -EROFS;
2114 			goto out;
2115 		}
2116 	}
2117 
2118 	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2119 		rec = &el->l_recs[i];
2120 
2121 		if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2122 			e_blkno = le64_to_cpu(rec->e_blkno);
2123 			break;
2124 		}
2125 	}
2126 
2127 	if (!e_blkno) {
2128 		ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2129 			    "record (%u, %u, 0) in xattr", inode->i_ino,
2130 			    le32_to_cpu(rec->e_cpos),
2131 			    ocfs2_rec_clusters(el, rec));
2132 		ret = -EROFS;
2133 		goto out;
2134 	}
2135 
2136 	*p_blkno = le64_to_cpu(rec->e_blkno);
2137 	*num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2138 	if (e_cpos)
2139 		*e_cpos = le32_to_cpu(rec->e_cpos);
2140 out:
2141 	brelse(eb_bh);
2142 	return ret;
2143 }
2144 
2145 typedef int (xattr_bucket_func)(struct inode *inode,
2146 				struct ocfs2_xattr_bucket *bucket,
2147 				void *para);
2148 
2149 static int ocfs2_find_xe_in_bucket(struct inode *inode,
2150 				   struct buffer_head *header_bh,
2151 				   int name_index,
2152 				   const char *name,
2153 				   u32 name_hash,
2154 				   u16 *xe_index,
2155 				   int *found)
2156 {
2157 	int i, ret = 0, cmp = 1, block_off, new_offset;
2158 	struct ocfs2_xattr_header *xh =
2159 			(struct ocfs2_xattr_header *)header_bh->b_data;
2160 	size_t name_len = strlen(name);
2161 	struct ocfs2_xattr_entry *xe = NULL;
2162 	struct buffer_head *name_bh = NULL;
2163 	char *xe_name;
2164 
2165 	/*
2166 	 * We don't use binary search in the bucket because there
2167 	 * may be multiple entries with the same name hash.
2168 	 */
2169 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
2170 		xe = &xh->xh_entries[i];
2171 
2172 		if (name_hash > le32_to_cpu(xe->xe_name_hash))
2173 			continue;
2174 		else if (name_hash < le32_to_cpu(xe->xe_name_hash))
2175 			break;
2176 
2177 		cmp = name_index - ocfs2_xattr_get_type(xe);
2178 		if (!cmp)
2179 			cmp = name_len - xe->xe_name_len;
2180 		if (cmp)
2181 			continue;
2182 
2183 		ret = ocfs2_xattr_bucket_get_name_value(inode,
2184 							xh,
2185 							i,
2186 							&block_off,
2187 							&new_offset);
2188 		if (ret) {
2189 			mlog_errno(ret);
2190 			break;
2191 		}
2192 
2193 		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
2194 				       header_bh->b_blocknr + block_off,
2195 				       &name_bh, OCFS2_BH_CACHED, inode);
2196 		if (ret) {
2197 			mlog_errno(ret);
2198 			break;
2199 		}
2200 		xe_name = name_bh->b_data + new_offset;
2201 
2202 		cmp = memcmp(name, xe_name, name_len);
2203 		brelse(name_bh);
2204 		name_bh = NULL;
2205 
2206 		if (cmp == 0) {
2207 			*xe_index = i;
2208 			*found = 1;
2209 			ret = 0;
2210 			break;
2211 		}
2212 	}
2213 
2214 	return ret;
2215 }
2216 
2217 /*
2218  * Find the specified xattr entry in a series of buckets.
2219  * This series start from p_blkno and last for num_clusters.
2220  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
2221  * the num of the valid buckets.
2222  *
2223  * Return the buffer_head this xattr should reside in. And if the xattr's
2224  * hash is in the gap of 2 buckets, return the lower bucket.
2225  */
2226 static int ocfs2_xattr_bucket_find(struct inode *inode,
2227 				   int name_index,
2228 				   const char *name,
2229 				   u32 name_hash,
2230 				   u64 p_blkno,
2231 				   u32 first_hash,
2232 				   u32 num_clusters,
2233 				   struct ocfs2_xattr_search *xs)
2234 {
2235 	int ret, found = 0;
2236 	struct buffer_head *bh = NULL;
2237 	struct buffer_head *lower_bh = NULL;
2238 	struct ocfs2_xattr_header *xh = NULL;
2239 	struct ocfs2_xattr_entry *xe = NULL;
2240 	u16 index = 0;
2241 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2242 	int low_bucket = 0, bucket, high_bucket;
2243 	u32 last_hash;
2244 	u64 blkno;
2245 
2246 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno,
2247 			       &bh, OCFS2_BH_CACHED, inode);
2248 	if (ret) {
2249 		mlog_errno(ret);
2250 		goto out;
2251 	}
2252 
2253 	xh = (struct ocfs2_xattr_header *)bh->b_data;
2254 	high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
2255 
2256 	while (low_bucket <= high_bucket) {
2257 		brelse(bh);
2258 		bh = NULL;
2259 		bucket = (low_bucket + high_bucket) / 2;
2260 
2261 		blkno = p_blkno + bucket * blk_per_bucket;
2262 
2263 		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
2264 				       &bh, OCFS2_BH_CACHED, inode);
2265 		if (ret) {
2266 			mlog_errno(ret);
2267 			goto out;
2268 		}
2269 
2270 		xh = (struct ocfs2_xattr_header *)bh->b_data;
2271 		xe = &xh->xh_entries[0];
2272 		if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
2273 			high_bucket = bucket - 1;
2274 			continue;
2275 		}
2276 
2277 		/*
2278 		 * Check whether the hash of the last entry in our
2279 		 * bucket is larger than the search one.
2280 		 */
2281 		xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
2282 		last_hash = le32_to_cpu(xe->xe_name_hash);
2283 
2284 		/* record lower_bh which may be the insert place. */
2285 		brelse(lower_bh);
2286 		lower_bh = bh;
2287 		bh = NULL;
2288 
2289 		if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
2290 			low_bucket = bucket + 1;
2291 			continue;
2292 		}
2293 
2294 		/* the searched xattr should reside in this bucket if exists. */
2295 		ret = ocfs2_find_xe_in_bucket(inode, lower_bh,
2296 					      name_index, name, name_hash,
2297 					      &index, &found);
2298 		if (ret) {
2299 			mlog_errno(ret);
2300 			goto out;
2301 		}
2302 		break;
2303 	}
2304 
2305 	/*
2306 	 * Record the bucket we have found.
2307 	 * When the xattr's hash value is in the gap of 2 buckets, we will
2308 	 * always set it to the previous bucket.
2309 	 */
2310 	if (!lower_bh) {
2311 		/*
2312 		 * We can't find any bucket whose first name_hash is less
2313 		 * than the find name_hash.
2314 		 */
2315 		BUG_ON(bh->b_blocknr != p_blkno);
2316 		lower_bh = bh;
2317 		bh = NULL;
2318 	}
2319 	xs->bucket.bhs[0] = lower_bh;
2320 	xs->bucket.xh = (struct ocfs2_xattr_header *)
2321 					xs->bucket.bhs[0]->b_data;
2322 	lower_bh = NULL;
2323 
2324 	xs->header = xs->bucket.xh;
2325 	xs->base = xs->bucket.bhs[0]->b_data;
2326 	xs->end = xs->base + inode->i_sb->s_blocksize;
2327 
2328 	if (found) {
2329 		/*
2330 		 * If we have found the xattr enty, read all the blocks in
2331 		 * this bucket.
2332 		 */
2333 		ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
2334 					xs->bucket.bhs[0]->b_blocknr + 1,
2335 					blk_per_bucket - 1, &xs->bucket.bhs[1],
2336 					OCFS2_BH_CACHED, inode);
2337 		if (ret) {
2338 			mlog_errno(ret);
2339 			goto out;
2340 		}
2341 
2342 		xs->here = &xs->header->xh_entries[index];
2343 		mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
2344 		     (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index);
2345 	} else
2346 		ret = -ENODATA;
2347 
2348 out:
2349 	brelse(bh);
2350 	brelse(lower_bh);
2351 	return ret;
2352 }
2353 
2354 static int ocfs2_xattr_index_block_find(struct inode *inode,
2355 					struct buffer_head *root_bh,
2356 					int name_index,
2357 					const char *name,
2358 					struct ocfs2_xattr_search *xs)
2359 {
2360 	int ret;
2361 	struct ocfs2_xattr_block *xb =
2362 			(struct ocfs2_xattr_block *)root_bh->b_data;
2363 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
2364 	struct ocfs2_extent_list *el = &xb_root->xt_list;
2365 	u64 p_blkno = 0;
2366 	u32 first_hash, num_clusters = 0;
2367 	u32 name_hash = ocfs2_xattr_hash_by_name(inode, name_index, name);
2368 
2369 	if (le16_to_cpu(el->l_next_free_rec) == 0)
2370 		return -ENODATA;
2371 
2372 	mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
2373 	     name, name_hash, name_index);
2374 
2375 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
2376 				  &num_clusters, el);
2377 	if (ret) {
2378 		mlog_errno(ret);
2379 		goto out;
2380 	}
2381 
2382 	BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
2383 
2384 	mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
2385 	     "in the rec is %u\n", num_clusters, p_blkno, first_hash);
2386 
2387 	ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
2388 				      p_blkno, first_hash, num_clusters, xs);
2389 
2390 out:
2391 	return ret;
2392 }
2393 
2394 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2395 				       u64 blkno,
2396 				       u32 clusters,
2397 				       xattr_bucket_func *func,
2398 				       void *para)
2399 {
2400 	int i, j, ret = 0;
2401 	int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2402 	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
2403 	u32 num_buckets = clusters * bpc;
2404 	struct ocfs2_xattr_bucket bucket;
2405 
2406 	memset(&bucket, 0, sizeof(bucket));
2407 
2408 	mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
2409 	     clusters, blkno);
2410 
2411 	for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
2412 		ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
2413 					blkno, blk_per_bucket,
2414 					bucket.bhs, OCFS2_BH_CACHED, inode);
2415 		if (ret) {
2416 			mlog_errno(ret);
2417 			goto out;
2418 		}
2419 
2420 		bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data;
2421 		/*
2422 		 * The real bucket num in this series of blocks is stored
2423 		 * in the 1st bucket.
2424 		 */
2425 		if (i == 0)
2426 			num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets);
2427 
2428 		mlog(0, "iterating xattr bucket %llu\n", blkno);
2429 		if (func) {
2430 			ret = func(inode, &bucket, para);
2431 			if (ret) {
2432 				mlog_errno(ret);
2433 				break;
2434 			}
2435 		}
2436 
2437 		for (j = 0; j < blk_per_bucket; j++)
2438 			brelse(bucket.bhs[j]);
2439 		memset(&bucket, 0, sizeof(bucket));
2440 	}
2441 
2442 out:
2443 	for (j = 0; j < blk_per_bucket; j++)
2444 		brelse(bucket.bhs[j]);
2445 
2446 	return ret;
2447 }
2448 
2449 struct ocfs2_xattr_tree_list {
2450 	char *buffer;
2451 	size_t buffer_size;
2452 };
2453 
2454 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
2455 					     struct ocfs2_xattr_header *xh,
2456 					     int index,
2457 					     int *block_off,
2458 					     int *new_offset)
2459 {
2460 	u16 name_offset;
2461 
2462 	if (index < 0 || index >= le16_to_cpu(xh->xh_count))
2463 		return -EINVAL;
2464 
2465 	name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
2466 
2467 	*block_off = name_offset >> inode->i_sb->s_blocksize_bits;
2468 	*new_offset = name_offset % inode->i_sb->s_blocksize;
2469 
2470 	return 0;
2471 }
2472 
2473 static int ocfs2_list_xattr_bucket(struct inode *inode,
2474 				   struct ocfs2_xattr_bucket *bucket,
2475 				   void *para)
2476 {
2477 	int ret = 0;
2478 	struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
2479 	size_t size;
2480 	int i, block_off, new_offset;
2481 
2482 	for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) {
2483 		struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i];
2484 		struct xattr_handler *handler =
2485 			ocfs2_xattr_handler(ocfs2_xattr_get_type(entry));
2486 
2487 		if (handler) {
2488 			ret = ocfs2_xattr_bucket_get_name_value(inode,
2489 								bucket->xh,
2490 								i,
2491 								&block_off,
2492 								&new_offset);
2493 			if (ret)
2494 				break;
2495 			size = handler->list(inode, xl->buffer, xl->buffer_size,
2496 					     bucket->bhs[block_off]->b_data +
2497 					     new_offset,
2498 					     entry->xe_name_len);
2499 			if (xl->buffer) {
2500 				if (size > xl->buffer_size)
2501 					return -ERANGE;
2502 				xl->buffer += size;
2503 			}
2504 			xl->buffer_size -= size;
2505 		}
2506 	}
2507 
2508 	return ret;
2509 }
2510 
2511 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
2512 					     struct ocfs2_xattr_tree_root *xt,
2513 					     char *buffer,
2514 					     size_t buffer_size)
2515 {
2516 	struct ocfs2_extent_list *el = &xt->xt_list;
2517 	int ret = 0;
2518 	u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
2519 	u64 p_blkno = 0;
2520 	struct ocfs2_xattr_tree_list xl = {
2521 		.buffer = buffer,
2522 		.buffer_size = buffer_size,
2523 	};
2524 
2525 	if (le16_to_cpu(el->l_next_free_rec) == 0)
2526 		return 0;
2527 
2528 	while (name_hash > 0) {
2529 		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
2530 					  &e_cpos, &num_clusters, el);
2531 		if (ret) {
2532 			mlog_errno(ret);
2533 			goto out;
2534 		}
2535 
2536 		ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
2537 						  ocfs2_list_xattr_bucket,
2538 						  &xl);
2539 		if (ret) {
2540 			mlog_errno(ret);
2541 			goto out;
2542 		}
2543 
2544 		if (e_cpos == 0)
2545 			break;
2546 
2547 		name_hash = e_cpos - 1;
2548 	}
2549 
2550 	ret = buffer_size - xl.buffer_size;
2551 out:
2552 	return ret;
2553 }
2554 
2555 static int cmp_xe(const void *a, const void *b)
2556 {
2557 	const struct ocfs2_xattr_entry *l = a, *r = b;
2558 	u32 l_hash = le32_to_cpu(l->xe_name_hash);
2559 	u32 r_hash = le32_to_cpu(r->xe_name_hash);
2560 
2561 	if (l_hash > r_hash)
2562 		return 1;
2563 	if (l_hash < r_hash)
2564 		return -1;
2565 	return 0;
2566 }
2567 
2568 static void swap_xe(void *a, void *b, int size)
2569 {
2570 	struct ocfs2_xattr_entry *l = a, *r = b, tmp;
2571 
2572 	tmp = *l;
2573 	memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
2574 	memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
2575 }
2576 
2577 /*
2578  * When the ocfs2_xattr_block is filled up, new bucket will be created
2579  * and all the xattr entries will be moved to the new bucket.
2580  * Note: we need to sort the entries since they are not saved in order
2581  * in the ocfs2_xattr_block.
2582  */
2583 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2584 					   struct buffer_head *xb_bh,
2585 					   struct buffer_head *xh_bh,
2586 					   struct buffer_head *data_bh)
2587 {
2588 	int i, blocksize = inode->i_sb->s_blocksize;
2589 	u16 offset, size, off_change;
2590 	struct ocfs2_xattr_entry *xe;
2591 	struct ocfs2_xattr_block *xb =
2592 				(struct ocfs2_xattr_block *)xb_bh->b_data;
2593 	struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
2594 	struct ocfs2_xattr_header *xh =
2595 				(struct ocfs2_xattr_header *)xh_bh->b_data;
2596 	u16 count = le16_to_cpu(xb_xh->xh_count);
2597 	char *target = xh_bh->b_data, *src = xb_bh->b_data;
2598 
2599 	mlog(0, "cp xattr from block %llu to bucket %llu\n",
2600 	     (unsigned long long)xb_bh->b_blocknr,
2601 	     (unsigned long long)xh_bh->b_blocknr);
2602 
2603 	memset(xh_bh->b_data, 0, blocksize);
2604 	if (data_bh)
2605 		memset(data_bh->b_data, 0, blocksize);
2606 	/*
2607 	 * Since the xe_name_offset is based on ocfs2_xattr_header,
2608 	 * there is a offset change corresponding to the change of
2609 	 * ocfs2_xattr_header's position.
2610 	 */
2611 	off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2612 	xe = &xb_xh->xh_entries[count - 1];
2613 	offset = le16_to_cpu(xe->xe_name_offset) + off_change;
2614 	size = blocksize - offset;
2615 
2616 	/* copy all the names and values. */
2617 	if (data_bh)
2618 		target = data_bh->b_data;
2619 	memcpy(target + offset, src + offset, size);
2620 
2621 	/* Init new header now. */
2622 	xh->xh_count = xb_xh->xh_count;
2623 	xh->xh_num_buckets = cpu_to_le16(1);
2624 	xh->xh_name_value_len = cpu_to_le16(size);
2625 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
2626 
2627 	/* copy all the entries. */
2628 	target = xh_bh->b_data;
2629 	offset = offsetof(struct ocfs2_xattr_header, xh_entries);
2630 	size = count * sizeof(struct ocfs2_xattr_entry);
2631 	memcpy(target + offset, (char *)xb_xh + offset, size);
2632 
2633 	/* Change the xe offset for all the xe because of the move. */
2634 	off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
2635 		 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2636 	for (i = 0; i < count; i++)
2637 		le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
2638 
2639 	mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
2640 	     offset, size, off_change);
2641 
2642 	sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
2643 	     cmp_xe, swap_xe);
2644 }
2645 
2646 /*
2647  * After we move xattr from block to index btree, we have to
2648  * update ocfs2_xattr_search to the new xe and base.
2649  *
2650  * When the entry is in xattr block, xattr_bh indicates the storage place.
2651  * While if the entry is in index b-tree, "bucket" indicates the
2652  * real place of the xattr.
2653  */
2654 static int ocfs2_xattr_update_xattr_search(struct inode *inode,
2655 					   struct ocfs2_xattr_search *xs,
2656 					   struct buffer_head *old_bh,
2657 					   struct buffer_head *new_bh)
2658 {
2659 	int ret = 0;
2660 	char *buf = old_bh->b_data;
2661 	struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
2662 	struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
2663 	int i, blocksize = inode->i_sb->s_blocksize;
2664 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2665 
2666 	xs->bucket.bhs[0] = new_bh;
2667 	get_bh(new_bh);
2668 	xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data;
2669 	xs->header = xs->bucket.xh;
2670 
2671 	xs->base = new_bh->b_data;
2672 	xs->end = xs->base + inode->i_sb->s_blocksize;
2673 
2674 	if (!xs->not_found) {
2675 		if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
2676 			ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
2677 					xs->bucket.bhs[0]->b_blocknr + 1,
2678 					blk_per_bucket - 1, &xs->bucket.bhs[1],
2679 					OCFS2_BH_CACHED, inode);
2680 			if (ret) {
2681 				mlog_errno(ret);
2682 				return ret;
2683 			}
2684 
2685 			i = xs->here - old_xh->xh_entries;
2686 			xs->here = &xs->header->xh_entries[i];
2687 		}
2688 	}
2689 
2690 	return ret;
2691 }
2692 
2693 static int ocfs2_xattr_create_index_block(struct inode *inode,
2694 					  struct ocfs2_xattr_search *xs)
2695 {
2696 	int ret, credits = OCFS2_SUBALLOC_ALLOC;
2697 	u32 bit_off, len;
2698 	u64 blkno;
2699 	handle_t *handle;
2700 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2701 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2702 	struct ocfs2_alloc_context *data_ac;
2703 	struct buffer_head *xh_bh = NULL, *data_bh = NULL;
2704 	struct buffer_head *xb_bh = xs->xattr_bh;
2705 	struct ocfs2_xattr_block *xb =
2706 			(struct ocfs2_xattr_block *)xb_bh->b_data;
2707 	struct ocfs2_xattr_tree_root *xr;
2708 	u16 xb_flags = le16_to_cpu(xb->xb_flags);
2709 	u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2710 
2711 	mlog(0, "create xattr index block for %llu\n",
2712 	     (unsigned long long)xb_bh->b_blocknr);
2713 
2714 	BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
2715 
2716 	ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
2717 	if (ret) {
2718 		mlog_errno(ret);
2719 		goto out;
2720 	}
2721 
2722 	/*
2723 	 * XXX:
2724 	 * We can use this lock for now, and maybe move to a dedicated mutex
2725 	 * if performance becomes a problem later.
2726 	 */
2727 	down_write(&oi->ip_alloc_sem);
2728 
2729 	/*
2730 	 * 3 more credits, one for xattr block update, one for the 1st block
2731 	 * of the new xattr bucket and one for the value/data.
2732 	 */
2733 	credits += 3;
2734 	handle = ocfs2_start_trans(osb, credits);
2735 	if (IS_ERR(handle)) {
2736 		ret = PTR_ERR(handle);
2737 		mlog_errno(ret);
2738 		goto out_sem;
2739 	}
2740 
2741 	ret = ocfs2_journal_access(handle, inode, xb_bh,
2742 				   OCFS2_JOURNAL_ACCESS_WRITE);
2743 	if (ret) {
2744 		mlog_errno(ret);
2745 		goto out_commit;
2746 	}
2747 
2748 	ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
2749 	if (ret) {
2750 		mlog_errno(ret);
2751 		goto out_commit;
2752 	}
2753 
2754 	/*
2755 	 * The bucket may spread in many blocks, and
2756 	 * we will only touch the 1st block and the last block
2757 	 * in the whole bucket(one for entry and one for data).
2758 	 */
2759 	blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
2760 
2761 	mlog(0, "allocate 1 cluster from %llu to xattr block\n", blkno);
2762 
2763 	xh_bh = sb_getblk(inode->i_sb, blkno);
2764 	if (!xh_bh) {
2765 		ret = -EIO;
2766 		mlog_errno(ret);
2767 		goto out_commit;
2768 	}
2769 
2770 	ocfs2_set_new_buffer_uptodate(inode, xh_bh);
2771 
2772 	ret = ocfs2_journal_access(handle, inode, xh_bh,
2773 				   OCFS2_JOURNAL_ACCESS_CREATE);
2774 	if (ret) {
2775 		mlog_errno(ret);
2776 		goto out_commit;
2777 	}
2778 
2779 	if (bpb > 1) {
2780 		data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
2781 		if (!data_bh) {
2782 			ret = -EIO;
2783 			mlog_errno(ret);
2784 			goto out_commit;
2785 		}
2786 
2787 		ocfs2_set_new_buffer_uptodate(inode, data_bh);
2788 
2789 		ret = ocfs2_journal_access(handle, inode, data_bh,
2790 					   OCFS2_JOURNAL_ACCESS_CREATE);
2791 		if (ret) {
2792 			mlog_errno(ret);
2793 			goto out_commit;
2794 		}
2795 	}
2796 
2797 	ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh);
2798 
2799 	ocfs2_journal_dirty(handle, xh_bh);
2800 	if (data_bh)
2801 		ocfs2_journal_dirty(handle, data_bh);
2802 
2803 	ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
2804 
2805 	/* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
2806 	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
2807 	       offsetof(struct ocfs2_xattr_block, xb_attrs));
2808 
2809 	xr = &xb->xb_attrs.xb_root;
2810 	xr->xt_clusters = cpu_to_le32(1);
2811 	xr->xt_last_eb_blk = 0;
2812 	xr->xt_list.l_tree_depth = 0;
2813 	xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
2814 	xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2815 
2816 	xr->xt_list.l_recs[0].e_cpos = 0;
2817 	xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
2818 	xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
2819 
2820 	xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
2821 
2822 	ret = ocfs2_journal_dirty(handle, xb_bh);
2823 	if (ret) {
2824 		mlog_errno(ret);
2825 		goto out_commit;
2826 	}
2827 
2828 out_commit:
2829 	ocfs2_commit_trans(osb, handle);
2830 
2831 out_sem:
2832 	up_write(&oi->ip_alloc_sem);
2833 
2834 out:
2835 	if (data_ac)
2836 		ocfs2_free_alloc_context(data_ac);
2837 
2838 	brelse(xh_bh);
2839 	brelse(data_bh);
2840 
2841 	return ret;
2842 }
2843 
2844 static int cmp_xe_offset(const void *a, const void *b)
2845 {
2846 	const struct ocfs2_xattr_entry *l = a, *r = b;
2847 	u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
2848 	u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
2849 
2850 	if (l_name_offset < r_name_offset)
2851 		return 1;
2852 	if (l_name_offset > r_name_offset)
2853 		return -1;
2854 	return 0;
2855 }
2856 
2857 /*
2858  * defrag a xattr bucket if we find that the bucket has some
2859  * holes beteen name/value pairs.
2860  * We will move all the name/value pairs to the end of the bucket
2861  * so that we can spare some space for insertion.
2862  */
2863 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2864 				     struct ocfs2_xattr_bucket *bucket)
2865 {
2866 	int ret, i;
2867 	size_t end, offset, len, value_len;
2868 	struct ocfs2_xattr_header *xh;
2869 	char *entries, *buf, *bucket_buf = NULL;
2870 	u64 blkno = bucket->bhs[0]->b_blocknr;
2871 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2872 	u16 xh_free_start;
2873 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2874 	size_t blocksize = inode->i_sb->s_blocksize;
2875 	handle_t *handle;
2876 	struct buffer_head **bhs;
2877 	struct ocfs2_xattr_entry *xe;
2878 
2879 	bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
2880 			GFP_NOFS);
2881 	if (!bhs)
2882 		return -ENOMEM;
2883 
2884 	ret = ocfs2_read_blocks(osb, blkno, blk_per_bucket, bhs,
2885 				OCFS2_BH_CACHED, inode);
2886 	if (ret)
2887 		goto out;
2888 
2889 	/*
2890 	 * In order to make the operation more efficient and generic,
2891 	 * we copy all the blocks into a contiguous memory and do the
2892 	 * defragment there, so if anything is error, we will not touch
2893 	 * the real block.
2894 	 */
2895 	bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
2896 	if (!bucket_buf) {
2897 		ret = -EIO;
2898 		goto out;
2899 	}
2900 
2901 	buf = bucket_buf;
2902 	for (i = 0; i < blk_per_bucket; i++, buf += blocksize)
2903 		memcpy(buf, bhs[i]->b_data, blocksize);
2904 
2905 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket);
2906 	if (IS_ERR(handle)) {
2907 		ret = PTR_ERR(handle);
2908 		handle = NULL;
2909 		mlog_errno(ret);
2910 		goto out;
2911 	}
2912 
2913 	for (i = 0; i < blk_per_bucket; i++) {
2914 		ret = ocfs2_journal_access(handle, inode, bhs[i],
2915 					   OCFS2_JOURNAL_ACCESS_WRITE);
2916 		if (ret < 0) {
2917 			mlog_errno(ret);
2918 			goto commit;
2919 		}
2920 	}
2921 
2922 	xh = (struct ocfs2_xattr_header *)bucket_buf;
2923 	entries = (char *)xh->xh_entries;
2924 	xh_free_start = le16_to_cpu(xh->xh_free_start);
2925 
2926 	mlog(0, "adjust xattr bucket in %llu, count = %u, "
2927 	     "xh_free_start = %u, xh_name_value_len = %u.\n",
2928 	     blkno, le16_to_cpu(xh->xh_count), xh_free_start,
2929 	     le16_to_cpu(xh->xh_name_value_len));
2930 
2931 	/*
2932 	 * sort all the entries by their offset.
2933 	 * the largest will be the first, so that we can
2934 	 * move them to the end one by one.
2935 	 */
2936 	sort(entries, le16_to_cpu(xh->xh_count),
2937 	     sizeof(struct ocfs2_xattr_entry),
2938 	     cmp_xe_offset, swap_xe);
2939 
2940 	/* Move all name/values to the end of the bucket. */
2941 	xe = xh->xh_entries;
2942 	end = OCFS2_XATTR_BUCKET_SIZE;
2943 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
2944 		offset = le16_to_cpu(xe->xe_name_offset);
2945 		if (ocfs2_xattr_is_local(xe))
2946 			value_len = OCFS2_XATTR_SIZE(
2947 					le64_to_cpu(xe->xe_value_size));
2948 		else
2949 			value_len = OCFS2_XATTR_ROOT_SIZE;
2950 		len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
2951 
2952 		/*
2953 		 * We must make sure that the name/value pair
2954 		 * exist in the same block. So adjust end to
2955 		 * the previous block end if needed.
2956 		 */
2957 		if (((end - len) / blocksize !=
2958 			(end - 1) / blocksize))
2959 			end = end - end % blocksize;
2960 
2961 		if (end > offset + len) {
2962 			memmove(bucket_buf + end - len,
2963 				bucket_buf + offset, len);
2964 			xe->xe_name_offset = cpu_to_le16(end - len);
2965 		}
2966 
2967 		mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
2968 				"bucket %llu\n", (unsigned long long)blkno);
2969 
2970 		end -= len;
2971 	}
2972 
2973 	mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
2974 			"bucket %llu\n", (unsigned long long)blkno);
2975 
2976 	if (xh_free_start == end)
2977 		goto commit;
2978 
2979 	memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
2980 	xh->xh_free_start = cpu_to_le16(end);
2981 
2982 	/* sort the entries by their name_hash. */
2983 	sort(entries, le16_to_cpu(xh->xh_count),
2984 	     sizeof(struct ocfs2_xattr_entry),
2985 	     cmp_xe, swap_xe);
2986 
2987 	buf = bucket_buf;
2988 	for (i = 0; i < blk_per_bucket; i++, buf += blocksize) {
2989 		memcpy(bhs[i]->b_data, buf, blocksize);
2990 		ocfs2_journal_dirty(handle, bhs[i]);
2991 	}
2992 
2993 commit:
2994 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2995 out:
2996 
2997 	if (bhs) {
2998 		for (i = 0; i < blk_per_bucket; i++)
2999 			brelse(bhs[i]);
3000 	}
3001 	kfree(bhs);
3002 
3003 	kfree(bucket_buf);
3004 	return ret;
3005 }
3006 
3007 /*
3008  * Move half nums of the xattr bucket in the previous cluster to this new
3009  * cluster. We only touch the last cluster of the previous extend record.
3010  *
3011  * first_bh is the first buffer_head of a series of bucket in the same
3012  * extent rec and header_bh is the header of one bucket in this cluster.
3013  * They will be updated if we move the data header_bh contains to the new
3014  * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
3015  */
3016 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3017 					       handle_t *handle,
3018 					       struct buffer_head **first_bh,
3019 					       struct buffer_head **header_bh,
3020 					       u64 new_blkno,
3021 					       u64 prev_blkno,
3022 					       u32 num_clusters,
3023 					       u32 *first_hash)
3024 {
3025 	int i, ret, credits;
3026 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3027 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3028 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3029 	int blocksize = inode->i_sb->s_blocksize;
3030 	struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL;
3031 	struct ocfs2_xattr_header *new_xh;
3032 	struct ocfs2_xattr_header *xh =
3033 			(struct ocfs2_xattr_header *)((*first_bh)->b_data);
3034 
3035 	BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
3036 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
3037 
3038 	prev_bh = *first_bh;
3039 	get_bh(prev_bh);
3040 	xh = (struct ocfs2_xattr_header *)prev_bh->b_data;
3041 
3042 	prev_blkno += (num_clusters - 1) * bpc + bpc / 2;
3043 
3044 	mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3045 	     prev_blkno, new_blkno);
3046 
3047 	/*
3048 	 * We need to update the 1st half of the new cluster and
3049 	 * 1 more for the update of the 1st bucket of the previous
3050 	 * extent record.
3051 	 */
3052 	credits = bpc / 2 + 1;
3053 	ret = ocfs2_extend_trans(handle, credits);
3054 	if (ret) {
3055 		mlog_errno(ret);
3056 		goto out;
3057 	}
3058 
3059 	ret = ocfs2_journal_access(handle, inode, prev_bh,
3060 				   OCFS2_JOURNAL_ACCESS_WRITE);
3061 	if (ret) {
3062 		mlog_errno(ret);
3063 		goto out;
3064 	}
3065 
3066 	for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) {
3067 		old_bh = new_bh = NULL;
3068 		new_bh = sb_getblk(inode->i_sb, new_blkno);
3069 		if (!new_bh) {
3070 			ret = -EIO;
3071 			mlog_errno(ret);
3072 			goto out;
3073 		}
3074 
3075 		ocfs2_set_new_buffer_uptodate(inode, new_bh);
3076 
3077 		ret = ocfs2_journal_access(handle, inode, new_bh,
3078 					   OCFS2_JOURNAL_ACCESS_CREATE);
3079 		if (ret < 0) {
3080 			mlog_errno(ret);
3081 			brelse(new_bh);
3082 			goto out;
3083 		}
3084 
3085 		ret = ocfs2_read_block(osb, prev_blkno,
3086 				       &old_bh, OCFS2_BH_CACHED, inode);
3087 		if (ret < 0) {
3088 			mlog_errno(ret);
3089 			brelse(new_bh);
3090 			goto out;
3091 		}
3092 
3093 		memcpy(new_bh->b_data, old_bh->b_data, blocksize);
3094 
3095 		if (i == 0) {
3096 			new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
3097 			new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);
3098 
3099 			if (first_hash)
3100 				*first_hash = le32_to_cpu(
3101 					new_xh->xh_entries[0].xe_name_hash);
3102 			new_first_bh = new_bh;
3103 			get_bh(new_first_bh);
3104 		}
3105 
3106 		ocfs2_journal_dirty(handle, new_bh);
3107 
3108 		if (*header_bh == old_bh) {
3109 			brelse(*header_bh);
3110 			*header_bh = new_bh;
3111 			get_bh(*header_bh);
3112 
3113 			brelse(*first_bh);
3114 			*first_bh = new_first_bh;
3115 			get_bh(*first_bh);
3116 		}
3117 		brelse(new_bh);
3118 		brelse(old_bh);
3119 	}
3120 
3121 	le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));
3122 
3123 	ocfs2_journal_dirty(handle, prev_bh);
3124 out:
3125 	brelse(prev_bh);
3126 	brelse(new_first_bh);
3127 	return ret;
3128 }
3129 
3130 static int ocfs2_read_xattr_bucket(struct inode *inode,
3131 				   u64 blkno,
3132 				   struct buffer_head **bhs,
3133 				   int new)
3134 {
3135 	int ret = 0;
3136 	u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3137 
3138 	if (!new)
3139 		return ocfs2_read_blocks(OCFS2_SB(inode->i_sb), blkno,
3140 					 blk_per_bucket, bhs,
3141 					 OCFS2_BH_CACHED, inode);
3142 
3143 	for (i = 0; i < blk_per_bucket; i++) {
3144 		bhs[i] = sb_getblk(inode->i_sb, blkno + i);
3145 		if (bhs[i] == NULL) {
3146 			ret = -EIO;
3147 			mlog_errno(ret);
3148 			break;
3149 		}
3150 		ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
3151 	}
3152 
3153 	return ret;
3154 }
3155 
3156 /*
3157  * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk).
3158  * first_hash will record the 1st hash of the new bucket.
3159  */
3160 static int ocfs2_half_xattr_bucket(struct inode *inode,
3161 				   handle_t *handle,
3162 				   u64 blk,
3163 				   u64 new_blk,
3164 				   u32 *first_hash,
3165 				   int new_bucket_head)
3166 {
3167 	int ret, i;
3168 	u16 count, start, len, name_value_len, xe_len, name_offset;
3169 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3170 	struct buffer_head **s_bhs, **t_bhs = NULL;
3171 	struct ocfs2_xattr_header *xh;
3172 	struct ocfs2_xattr_entry *xe;
3173 	int blocksize = inode->i_sb->s_blocksize;
3174 
3175 	mlog(0, "move half of xattrs from bucket %llu to %llu\n",
3176 	     blk, new_blk);
3177 
3178 	s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3179 	if (!s_bhs)
3180 		return -ENOMEM;
3181 
3182 	ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0);
3183 	if (ret) {
3184 		mlog_errno(ret);
3185 		goto out;
3186 	}
3187 
3188 	ret = ocfs2_journal_access(handle, inode, s_bhs[0],
3189 				   OCFS2_JOURNAL_ACCESS_WRITE);
3190 	if (ret) {
3191 		mlog_errno(ret);
3192 		goto out;
3193 	}
3194 
3195 	t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3196 	if (!t_bhs) {
3197 		ret = -ENOMEM;
3198 		goto out;
3199 	}
3200 
3201 	ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head);
3202 	if (ret) {
3203 		mlog_errno(ret);
3204 		goto out;
3205 	}
3206 
3207 	for (i = 0; i < blk_per_bucket; i++) {
3208 		ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3209 					   OCFS2_JOURNAL_ACCESS_CREATE);
3210 		if (ret) {
3211 			mlog_errno(ret);
3212 			goto out;
3213 		}
3214 	}
3215 
3216 	/* copy the whole bucket to the new first. */
3217 	for (i = 0; i < blk_per_bucket; i++)
3218 		memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3219 
3220 	/* update the new bucket. */
3221 	xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
3222 	count = le16_to_cpu(xh->xh_count);
3223 	start = count / 2;
3224 
3225 	/*
3226 	 * Calculate the total name/value len and xh_free_start for
3227 	 * the old bucket first.
3228 	 */
3229 	name_offset = OCFS2_XATTR_BUCKET_SIZE;
3230 	name_value_len = 0;
3231 	for (i = 0; i < start; i++) {
3232 		xe = &xh->xh_entries[i];
3233 		xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3234 		if (ocfs2_xattr_is_local(xe))
3235 			xe_len +=
3236 			   OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3237 		else
3238 			xe_len += OCFS2_XATTR_ROOT_SIZE;
3239 		name_value_len += xe_len;
3240 		if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3241 			name_offset = le16_to_cpu(xe->xe_name_offset);
3242 	}
3243 
3244 	/*
3245 	 * Now begin the modification to the new bucket.
3246 	 *
3247 	 * In the new bucket, We just move the xattr entry to the beginning
3248 	 * and don't touch the name/value. So there will be some holes in the
3249 	 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3250 	 * called.
3251 	 */
3252 	xe = &xh->xh_entries[start];
3253 	len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3254 	mlog(0, "mv xattr entry len %d from %d to %d\n", len,
3255 	     (char *)xe - (char *)xh, (char *)xh->xh_entries - (char *)xh);
3256 	memmove((char *)xh->xh_entries, (char *)xe, len);
3257 	xe = &xh->xh_entries[count - start];
3258 	len = sizeof(struct ocfs2_xattr_entry) * start;
3259 	memset((char *)xe, 0, len);
3260 
3261 	le16_add_cpu(&xh->xh_count, -start);
3262 	le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3263 
3264 	/* Calculate xh_free_start for the new bucket. */
3265 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3266 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3267 		xe = &xh->xh_entries[i];
3268 		xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3269 		if (ocfs2_xattr_is_local(xe))
3270 			xe_len +=
3271 			   OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3272 		else
3273 			xe_len += OCFS2_XATTR_ROOT_SIZE;
3274 		if (le16_to_cpu(xe->xe_name_offset) <
3275 		    le16_to_cpu(xh->xh_free_start))
3276 			xh->xh_free_start = xe->xe_name_offset;
3277 	}
3278 
3279 	/* set xh->xh_num_buckets for the new xh. */
3280 	if (new_bucket_head)
3281 		xh->xh_num_buckets = cpu_to_le16(1);
3282 	else
3283 		xh->xh_num_buckets = 0;
3284 
3285 	for (i = 0; i < blk_per_bucket; i++) {
3286 		ocfs2_journal_dirty(handle, t_bhs[i]);
3287 		if (ret)
3288 			mlog_errno(ret);
3289 	}
3290 
3291 	/* store the first_hash of the new bucket. */
3292 	if (first_hash)
3293 		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3294 
3295 	/*
3296 	 * Now only update the 1st block of the old bucket.
3297 	 * Please note that the entry has been sorted already above.
3298 	 */
3299 	xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
3300 	memset(&xh->xh_entries[start], 0,
3301 	       sizeof(struct ocfs2_xattr_entry) * (count - start));
3302 	xh->xh_count = cpu_to_le16(start);
3303 	xh->xh_free_start = cpu_to_le16(name_offset);
3304 	xh->xh_name_value_len = cpu_to_le16(name_value_len);
3305 
3306 	ocfs2_journal_dirty(handle, s_bhs[0]);
3307 	if (ret)
3308 		mlog_errno(ret);
3309 
3310 out:
3311 	if (s_bhs) {
3312 		for (i = 0; i < blk_per_bucket; i++)
3313 			brelse(s_bhs[i]);
3314 	}
3315 	kfree(s_bhs);
3316 
3317 	if (t_bhs) {
3318 		for (i = 0; i < blk_per_bucket; i++)
3319 			brelse(t_bhs[i]);
3320 	}
3321 	kfree(t_bhs);
3322 
3323 	return ret;
3324 }
3325 
3326 /*
3327  * Copy xattr from one bucket to another bucket.
3328  *
3329  * The caller must make sure that the journal transaction
3330  * has enough space for journaling.
3331  */
3332 static int ocfs2_cp_xattr_bucket(struct inode *inode,
3333 				 handle_t *handle,
3334 				 u64 s_blkno,
3335 				 u64 t_blkno,
3336 				 int t_is_new)
3337 {
3338 	int ret, i;
3339 	int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3340 	int blocksize = inode->i_sb->s_blocksize;
3341 	struct buffer_head **s_bhs, **t_bhs = NULL;
3342 
3343 	BUG_ON(s_blkno == t_blkno);
3344 
3345 	mlog(0, "cp bucket %llu to %llu, target is %d\n",
3346 	     s_blkno, t_blkno, t_is_new);
3347 
3348 	s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3349 			GFP_NOFS);
3350 	if (!s_bhs)
3351 		return -ENOMEM;
3352 
3353 	ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0);
3354 	if (ret)
3355 		goto out;
3356 
3357 	t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3358 			GFP_NOFS);
3359 	if (!t_bhs) {
3360 		ret = -ENOMEM;
3361 		goto out;
3362 	}
3363 
3364 	ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new);
3365 	if (ret)
3366 		goto out;
3367 
3368 	for (i = 0; i < blk_per_bucket; i++) {
3369 		ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3370 					   OCFS2_JOURNAL_ACCESS_WRITE);
3371 		if (ret)
3372 			goto out;
3373 	}
3374 
3375 	for (i = 0; i < blk_per_bucket; i++) {
3376 		memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3377 		ocfs2_journal_dirty(handle, t_bhs[i]);
3378 	}
3379 
3380 out:
3381 	if (s_bhs) {
3382 		for (i = 0; i < blk_per_bucket; i++)
3383 			brelse(s_bhs[i]);
3384 	}
3385 	kfree(s_bhs);
3386 
3387 	if (t_bhs) {
3388 		for (i = 0; i < blk_per_bucket; i++)
3389 			brelse(t_bhs[i]);
3390 	}
3391 	kfree(t_bhs);
3392 
3393 	return ret;
3394 }
3395 
3396 /*
3397  * Copy one xattr cluster from src_blk to to_blk.
3398  * The to_blk will become the first bucket header of the cluster, so its
3399  * xh_num_buckets will be initialized as the bucket num in the cluster.
3400  */
3401 static int ocfs2_cp_xattr_cluster(struct inode *inode,
3402 				  handle_t *handle,
3403 				  struct buffer_head *first_bh,
3404 				  u64 src_blk,
3405 				  u64 to_blk,
3406 				  u32 *first_hash)
3407 {
3408 	int i, ret, credits;
3409 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3410 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3411 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3412 	struct buffer_head *bh = NULL;
3413 	struct ocfs2_xattr_header *xh;
3414 	u64 to_blk_start = to_blk;
3415 
3416 	mlog(0, "cp xattrs from cluster %llu to %llu\n", src_blk, to_blk);
3417 
3418 	/*
3419 	 * We need to update the new cluster and 1 more for the update of
3420 	 * the 1st bucket of the previous extent rec.
3421 	 */
3422 	credits = bpc + 1;
3423 	ret = ocfs2_extend_trans(handle, credits);
3424 	if (ret) {
3425 		mlog_errno(ret);
3426 		goto out;
3427 	}
3428 
3429 	ret = ocfs2_journal_access(handle, inode, first_bh,
3430 				   OCFS2_JOURNAL_ACCESS_WRITE);
3431 	if (ret) {
3432 		mlog_errno(ret);
3433 		goto out;
3434 	}
3435 
3436 	for (i = 0; i < num_buckets; i++) {
3437 		ret = ocfs2_cp_xattr_bucket(inode, handle,
3438 					    src_blk, to_blk, 1);
3439 		if (ret) {
3440 			mlog_errno(ret);
3441 			goto out;
3442 		}
3443 
3444 		src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3445 		to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3446 	}
3447 
3448 	/* update the old bucket header. */
3449 	xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3450 	le16_add_cpu(&xh->xh_num_buckets, -num_buckets);
3451 
3452 	ocfs2_journal_dirty(handle, first_bh);
3453 
3454 	/* update the new bucket header. */
3455 	ret = ocfs2_read_block(osb, to_blk_start, &bh, OCFS2_BH_CACHED, inode);
3456 	if (ret < 0) {
3457 		mlog_errno(ret);
3458 		goto out;
3459 	}
3460 
3461 	ret = ocfs2_journal_access(handle, inode, bh,
3462 				   OCFS2_JOURNAL_ACCESS_WRITE);
3463 	if (ret) {
3464 		mlog_errno(ret);
3465 		goto out;
3466 	}
3467 
3468 	xh = (struct ocfs2_xattr_header *)bh->b_data;
3469 	xh->xh_num_buckets = cpu_to_le16(num_buckets);
3470 
3471 	ocfs2_journal_dirty(handle, bh);
3472 
3473 	if (first_hash)
3474 		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3475 out:
3476 	brelse(bh);
3477 	return ret;
3478 }
3479 
3480 /*
3481  * Move half of the xattrs in this cluster to the new cluster.
3482  * This function should only be called when bucket size == cluster size.
3483  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
3484  */
3485 static int ocfs2_half_xattr_cluster(struct inode *inode,
3486 				    handle_t *handle,
3487 				    u64 prev_blk,
3488 				    u64 new_blk,
3489 				    u32 *first_hash)
3490 {
3491 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3492 	int ret, credits = 2 * blk_per_bucket;
3493 
3494 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
3495 
3496 	ret = ocfs2_extend_trans(handle, credits);
3497 	if (ret) {
3498 		mlog_errno(ret);
3499 		return ret;
3500 	}
3501 
3502 	/* Move half of the xattr in start_blk to the next bucket. */
3503 	return  ocfs2_half_xattr_bucket(inode, handle, prev_blk,
3504 					new_blk, first_hash, 1);
3505 }
3506 
3507 /*
3508  * Move some xattrs from the old cluster to the new one since they are not
3509  * contiguous in ocfs2 xattr tree.
3510  *
3511  * new_blk starts a new separate cluster, and we will move some xattrs from
3512  * prev_blk to it. v_start will be set as the first name hash value in this
3513  * new cluster so that it can be used as e_cpos during tree insertion and
3514  * don't collide with our original b-tree operations. first_bh and header_bh
3515  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
3516  * to extend the insert bucket.
3517  *
3518  * The problem is how much xattr should we move to the new one and when should
3519  * we update first_bh and header_bh?
3520  * 1. If cluster size > bucket size, that means the previous cluster has more
3521  *    than 1 bucket, so just move half nums of bucket into the new cluster and
3522  *    update the first_bh and header_bh if the insert bucket has been moved
3523  *    to the new cluster.
3524  * 2. If cluster_size == bucket_size:
3525  *    a) If the previous extent rec has more than one cluster and the insert
3526  *       place isn't in the last cluster, copy the entire last cluster to the
3527  *       new one. This time, we don't need to upate the first_bh and header_bh
3528  *       since they will not be moved into the new cluster.
3529  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
3530  *       the new one. And we set the extend flag to zero if the insert place is
3531  *       moved into the new allocated cluster since no extend is needed.
3532  */
3533 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3534 					    handle_t *handle,
3535 					    struct buffer_head **first_bh,
3536 					    struct buffer_head **header_bh,
3537 					    u64 new_blk,
3538 					    u64 prev_blk,
3539 					    u32 prev_clusters,
3540 					    u32 *v_start,
3541 					    int *extend)
3542 {
3543 	int ret = 0;
3544 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3545 
3546 	mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
3547 	     prev_blk, prev_clusters, new_blk);
3548 
3549 	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
3550 		ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
3551 							  handle,
3552 							  first_bh,
3553 							  header_bh,
3554 							  new_blk,
3555 							  prev_blk,
3556 							  prev_clusters,
3557 							  v_start);
3558 	else {
3559 		u64 last_blk = prev_blk + bpc * (prev_clusters - 1);
3560 
3561 		if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
3562 			ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh,
3563 						     last_blk, new_blk,
3564 						     v_start);
3565 		else {
3566 			ret = ocfs2_half_xattr_cluster(inode, handle,
3567 						       last_blk, new_blk,
3568 						       v_start);
3569 
3570 			if ((*header_bh)->b_blocknr == last_blk && extend)
3571 				*extend = 0;
3572 		}
3573 	}
3574 
3575 	return ret;
3576 }
3577 
3578 /*
3579  * Add a new cluster for xattr storage.
3580  *
3581  * If the new cluster is contiguous with the previous one, it will be
3582  * appended to the same extent record, and num_clusters will be updated.
3583  * If not, we will insert a new extent for it and move some xattrs in
3584  * the last cluster into the new allocated one.
3585  * We also need to limit the maximum size of a btree leaf, otherwise we'll
3586  * lose the benefits of hashing because we'll have to search large leaves.
3587  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
3588  * if it's bigger).
3589  *
3590  * first_bh is the first block of the previous extent rec and header_bh
3591  * indicates the bucket we will insert the new xattrs. They will be updated
3592  * when the header_bh is moved into the new cluster.
3593  */
3594 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3595 				       struct buffer_head *root_bh,
3596 				       struct buffer_head **first_bh,
3597 				       struct buffer_head **header_bh,
3598 				       u32 *num_clusters,
3599 				       u32 prev_cpos,
3600 				       u64 prev_blkno,
3601 				       int *extend)
3602 {
3603 	int ret, credits;
3604 	u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3605 	u32 prev_clusters = *num_clusters;
3606 	u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
3607 	u64 block;
3608 	handle_t *handle = NULL;
3609 	struct ocfs2_alloc_context *data_ac = NULL;
3610 	struct ocfs2_alloc_context *meta_ac = NULL;
3611 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3612 	struct ocfs2_xattr_block *xb =
3613 			(struct ocfs2_xattr_block *)root_bh->b_data;
3614 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3615 	struct ocfs2_extent_list *root_el = &xb_root->xt_list;
3616 	enum ocfs2_extent_tree_type type = OCFS2_XATTR_TREE_EXTENT;
3617 
3618 	mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
3619 	     "previous xattr blkno = %llu\n",
3620 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
3621 	     prev_cpos, prev_blkno);
3622 
3623 	ret = ocfs2_lock_allocators(inode, root_bh, root_el,
3624 				    clusters_to_add, 0, &data_ac,
3625 				    &meta_ac, type, NULL);
3626 	if (ret) {
3627 		mlog_errno(ret);
3628 		goto leave;
3629 	}
3630 
3631 	credits = ocfs2_calc_extend_credits(osb->sb, root_el, clusters_to_add);
3632 	handle = ocfs2_start_trans(osb, credits);
3633 	if (IS_ERR(handle)) {
3634 		ret = PTR_ERR(handle);
3635 		handle = NULL;
3636 		mlog_errno(ret);
3637 		goto leave;
3638 	}
3639 
3640 	ret = ocfs2_journal_access(handle, inode, root_bh,
3641 				   OCFS2_JOURNAL_ACCESS_WRITE);
3642 	if (ret < 0) {
3643 		mlog_errno(ret);
3644 		goto leave;
3645 	}
3646 
3647 	ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
3648 				     clusters_to_add, &bit_off, &num_bits);
3649 	if (ret < 0) {
3650 		if (ret != -ENOSPC)
3651 			mlog_errno(ret);
3652 		goto leave;
3653 	}
3654 
3655 	BUG_ON(num_bits > clusters_to_add);
3656 
3657 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
3658 	mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
3659 	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
3660 
3661 	if (prev_blkno + prev_clusters * bpc == block &&
3662 	    (prev_clusters + num_bits) << osb->s_clustersize_bits <=
3663 	     OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
3664 		/*
3665 		 * If this cluster is contiguous with the old one and
3666 		 * adding this new cluster, we don't surpass the limit of
3667 		 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
3668 		 * initialized and used like other buckets in the previous
3669 		 * cluster.
3670 		 * So add it as a contiguous one. The caller will handle
3671 		 * its init process.
3672 		 */
3673 		v_start = prev_cpos + prev_clusters;
3674 		*num_clusters = prev_clusters + num_bits;
3675 		mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
3676 		     num_bits);
3677 	} else {
3678 		ret = ocfs2_adjust_xattr_cross_cluster(inode,
3679 						       handle,
3680 						       first_bh,
3681 						       header_bh,
3682 						       block,
3683 						       prev_blkno,
3684 						       prev_clusters,
3685 						       &v_start,
3686 						       extend);
3687 		if (ret) {
3688 			mlog_errno(ret);
3689 			goto leave;
3690 		}
3691 	}
3692 
3693 	mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
3694 	     num_bits, block, v_start);
3695 	ret = ocfs2_xattr_tree_insert_extent(osb, handle, inode, root_bh,
3696 					     v_start, block, num_bits,
3697 					     0, meta_ac);
3698 	if (ret < 0) {
3699 		mlog_errno(ret);
3700 		goto leave;
3701 	}
3702 
3703 	ret = ocfs2_journal_dirty(handle, root_bh);
3704 	if (ret < 0) {
3705 		mlog_errno(ret);
3706 		goto leave;
3707 	}
3708 
3709 leave:
3710 	if (handle)
3711 		ocfs2_commit_trans(osb, handle);
3712 	if (data_ac)
3713 		ocfs2_free_alloc_context(data_ac);
3714 	if (meta_ac)
3715 		ocfs2_free_alloc_context(meta_ac);
3716 
3717 	return ret;
3718 }
3719 
3720 /*
3721  * Extend a new xattr bucket and move xattrs to the end one by one until
3722  * We meet with start_bh. Only move half of the xattrs to the bucket after it.
3723  */
3724 static int ocfs2_extend_xattr_bucket(struct inode *inode,
3725 				     struct buffer_head *first_bh,
3726 				     struct buffer_head *start_bh,
3727 				     u32 num_clusters)
3728 {
3729 	int ret, credits;
3730 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3731 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3732 	u64 start_blk = start_bh->b_blocknr, end_blk;
3733 	u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
3734 	handle_t *handle;
3735 	struct ocfs2_xattr_header *first_xh =
3736 				(struct ocfs2_xattr_header *)first_bh->b_data;
3737 	u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
3738 
3739 	mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
3740 	     "from %llu, len = %u\n", start_blk,
3741 	     (unsigned long long)first_bh->b_blocknr, num_clusters);
3742 
3743 	BUG_ON(bucket >= num_buckets);
3744 
3745 	end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket;
3746 
3747 	/*
3748 	 * We will touch all the buckets after the start_bh(include it).
3749 	 * Add one more bucket and modify the first_bh.
3750 	 */
3751 	credits = end_blk - start_blk + 2 * blk_per_bucket + 1;
3752 	handle = ocfs2_start_trans(osb, credits);
3753 	if (IS_ERR(handle)) {
3754 		ret = PTR_ERR(handle);
3755 		handle = NULL;
3756 		mlog_errno(ret);
3757 		goto out;
3758 	}
3759 
3760 	ret = ocfs2_journal_access(handle, inode, first_bh,
3761 				   OCFS2_JOURNAL_ACCESS_WRITE);
3762 	if (ret) {
3763 		mlog_errno(ret);
3764 		goto commit;
3765 	}
3766 
3767 	while (end_blk != start_blk) {
3768 		ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
3769 					    end_blk + blk_per_bucket, 0);
3770 		if (ret)
3771 			goto commit;
3772 		end_blk -= blk_per_bucket;
3773 	}
3774 
3775 	/* Move half of the xattr in start_blk to the next bucket. */
3776 	ret = ocfs2_half_xattr_bucket(inode, handle, start_blk,
3777 				      start_blk + blk_per_bucket, NULL, 0);
3778 
3779 	le16_add_cpu(&first_xh->xh_num_buckets, 1);
3780 	ocfs2_journal_dirty(handle, first_bh);
3781 
3782 commit:
3783 	ocfs2_commit_trans(osb, handle);
3784 out:
3785 	return ret;
3786 }
3787 
3788 /*
3789  * Add new xattr bucket in an extent record and adjust the buckets accordingly.
3790  * xb_bh is the ocfs2_xattr_block.
3791  * We will move all the buckets starting from header_bh to the next place. As
3792  * for this one, half num of its xattrs will be moved to the next one.
3793  *
3794  * We will allocate a new cluster if current cluster is full and adjust
3795  * header_bh and first_bh if the insert place is moved to the new cluster.
3796  */
3797 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
3798 				      struct buffer_head *xb_bh,
3799 				      struct buffer_head *header_bh)
3800 {
3801 	struct ocfs2_xattr_header *first_xh = NULL;
3802 	struct buffer_head *first_bh = NULL;
3803 	struct ocfs2_xattr_block *xb =
3804 			(struct ocfs2_xattr_block *)xb_bh->b_data;
3805 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3806 	struct ocfs2_extent_list *el = &xb_root->xt_list;
3807 	struct ocfs2_xattr_header *xh =
3808 			(struct ocfs2_xattr_header *)header_bh->b_data;
3809 	u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3810 	struct super_block *sb = inode->i_sb;
3811 	struct ocfs2_super *osb = OCFS2_SB(sb);
3812 	int ret, num_buckets, extend = 1;
3813 	u64 p_blkno;
3814 	u32 e_cpos, num_clusters;
3815 
3816 	mlog(0, "Add new xattr bucket starting form %llu\n",
3817 	     (unsigned long long)header_bh->b_blocknr);
3818 
3819 	/*
3820 	 * Add refrence for header_bh here because it may be
3821 	 * changed in ocfs2_add_new_xattr_cluster and we need
3822 	 * to free it in the end.
3823 	 */
3824 	get_bh(header_bh);
3825 
3826 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
3827 				  &num_clusters, el);
3828 	if (ret) {
3829 		mlog_errno(ret);
3830 		goto out;
3831 	}
3832 
3833 	ret = ocfs2_read_block(osb, p_blkno,
3834 			       &first_bh, OCFS2_BH_CACHED, inode);
3835 	if (ret) {
3836 		mlog_errno(ret);
3837 		goto out;
3838 	}
3839 
3840 	num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
3841 	first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3842 
3843 	if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) {
3844 		ret = ocfs2_add_new_xattr_cluster(inode,
3845 						  xb_bh,
3846 						  &first_bh,
3847 						  &header_bh,
3848 						  &num_clusters,
3849 						  e_cpos,
3850 						  p_blkno,
3851 						  &extend);
3852 		if (ret) {
3853 			mlog_errno(ret);
3854 			goto out;
3855 		}
3856 	}
3857 
3858 	if (extend)
3859 		ret = ocfs2_extend_xattr_bucket(inode,
3860 						first_bh,
3861 						header_bh,
3862 						num_clusters);
3863 	if (ret)
3864 		mlog_errno(ret);
3865 out:
3866 	brelse(first_bh);
3867 	brelse(header_bh);
3868 	return ret;
3869 }
3870 
3871 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
3872 					struct ocfs2_xattr_bucket *bucket,
3873 					int offs)
3874 {
3875 	int block_off = offs >> inode->i_sb->s_blocksize_bits;
3876 
3877 	offs = offs % inode->i_sb->s_blocksize;
3878 	return bucket->bhs[block_off]->b_data + offs;
3879 }
3880 
3881 /*
3882  * Handle the normal xattr set, including replace, delete and new.
3883  * When the bucket is empty, "is_empty" is set and the caller can
3884  * free this bucket.
3885  *
3886  * Note: "local" indicates the real data's locality. So we can't
3887  * just its bucket locality by its length.
3888  */
3889 static void ocfs2_xattr_set_entry_normal(struct inode *inode,
3890 					 struct ocfs2_xattr_info *xi,
3891 					 struct ocfs2_xattr_search *xs,
3892 					 u32 name_hash,
3893 					 int local,
3894 					 int *is_empty)
3895 {
3896 	struct ocfs2_xattr_entry *last, *xe;
3897 	int name_len = strlen(xi->name);
3898 	struct ocfs2_xattr_header *xh = xs->header;
3899 	u16 count = le16_to_cpu(xh->xh_count), start;
3900 	size_t blocksize = inode->i_sb->s_blocksize;
3901 	char *val;
3902 	size_t offs, size, new_size;
3903 
3904 	last = &xh->xh_entries[count];
3905 	if (!xs->not_found) {
3906 		xe = xs->here;
3907 		offs = le16_to_cpu(xe->xe_name_offset);
3908 		if (ocfs2_xattr_is_local(xe))
3909 			size = OCFS2_XATTR_SIZE(name_len) +
3910 			OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3911 		else
3912 			size = OCFS2_XATTR_SIZE(name_len) +
3913 			OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
3914 
3915 		/*
3916 		 * If the new value will be stored outside, xi->value has been
3917 		 * initalized as an empty ocfs2_xattr_value_root, and the same
3918 		 * goes with xi->value_len, so we can set new_size safely here.
3919 		 * See ocfs2_xattr_set_in_bucket.
3920 		 */
3921 		new_size = OCFS2_XATTR_SIZE(name_len) +
3922 			   OCFS2_XATTR_SIZE(xi->value_len);
3923 
3924 		le16_add_cpu(&xh->xh_name_value_len, -size);
3925 		if (xi->value) {
3926 			if (new_size > size)
3927 				goto set_new_name_value;
3928 
3929 			/* Now replace the old value with new one. */
3930 			if (local)
3931 				xe->xe_value_size = cpu_to_le64(xi->value_len);
3932 			else
3933 				xe->xe_value_size = 0;
3934 
3935 			val = ocfs2_xattr_bucket_get_val(inode,
3936 							 &xs->bucket, offs);
3937 			memset(val + OCFS2_XATTR_SIZE(name_len), 0,
3938 			       size - OCFS2_XATTR_SIZE(name_len));
3939 			if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
3940 				memcpy(val + OCFS2_XATTR_SIZE(name_len),
3941 				       xi->value, xi->value_len);
3942 
3943 			le16_add_cpu(&xh->xh_name_value_len, new_size);
3944 			ocfs2_xattr_set_local(xe, local);
3945 			return;
3946 		} else {
3947 			/* Remove the old entry. */
3948 			last -= 1;
3949 			memmove(xe, xe + 1,
3950 				(void *)last - (void *)xe);
3951 			memset(last, 0, sizeof(struct ocfs2_xattr_entry));
3952 			le16_add_cpu(&xh->xh_count, -1);
3953 			if (xh->xh_count == 0 && is_empty)
3954 				*is_empty = 1;
3955 			return;
3956 		}
3957 	} else {
3958 		/* find a new entry for insert. */
3959 		int low = 0, high = count - 1, tmp;
3960 		struct ocfs2_xattr_entry *tmp_xe;
3961 
3962 		while (low <= high) {
3963 			tmp = (low + high) / 2;
3964 			tmp_xe = &xh->xh_entries[tmp];
3965 
3966 			if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
3967 				low = tmp + 1;
3968 			else if (name_hash <
3969 				 le32_to_cpu(tmp_xe->xe_name_hash))
3970 				high = tmp - 1;
3971 			else
3972 				break;
3973 		}
3974 
3975 		xe = &xh->xh_entries[low];
3976 		if (low != count)
3977 			memmove(xe + 1, xe, (void *)last - (void *)xe);
3978 
3979 		le16_add_cpu(&xh->xh_count, 1);
3980 		memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
3981 		xe->xe_name_hash = cpu_to_le32(name_hash);
3982 		xe->xe_name_len = name_len;
3983 		ocfs2_xattr_set_type(xe, xi->name_index);
3984 	}
3985 
3986 set_new_name_value:
3987 	/* Insert the new name+value. */
3988 	size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
3989 
3990 	/*
3991 	 * We must make sure that the name/value pair
3992 	 * exists in the same block.
3993 	 */
3994 	offs = le16_to_cpu(xh->xh_free_start);
3995 	start = offs - size;
3996 
3997 	if (start >> inode->i_sb->s_blocksize_bits !=
3998 	    (offs - 1) >> inode->i_sb->s_blocksize_bits) {
3999 		offs = offs - offs % blocksize;
4000 		xh->xh_free_start = cpu_to_le16(offs);
4001 	}
4002 
4003 	val = ocfs2_xattr_bucket_get_val(inode,
4004 					 &xs->bucket, offs - size);
4005 	xe->xe_name_offset = cpu_to_le16(offs - size);
4006 
4007 	memset(val, 0, size);
4008 	memcpy(val, xi->name, name_len);
4009 	memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4010 
4011 	xe->xe_value_size = cpu_to_le64(xi->value_len);
4012 	ocfs2_xattr_set_local(xe, local);
4013 	xs->here = xe;
4014 	le16_add_cpu(&xh->xh_free_start, -size);
4015 	le16_add_cpu(&xh->xh_name_value_len, size);
4016 
4017 	return;
4018 }
4019 
4020 static int ocfs2_xattr_bucket_handle_journal(struct inode *inode,
4021 					     handle_t *handle,
4022 					     struct ocfs2_xattr_search *xs,
4023 					     struct buffer_head **bhs,
4024 					     u16 bh_num)
4025 {
4026 	int ret = 0, off, block_off;
4027 	struct ocfs2_xattr_entry *xe = xs->here;
4028 
4029 	/*
4030 	 * First calculate all the blocks we should journal_access
4031 	 * and journal_dirty. The first block should always be touched.
4032 	 */
4033 	ret = ocfs2_journal_dirty(handle, bhs[0]);
4034 	if (ret)
4035 		mlog_errno(ret);
4036 
4037 	/* calc the data. */
4038 	off = le16_to_cpu(xe->xe_name_offset);
4039 	block_off = off >> inode->i_sb->s_blocksize_bits;
4040 	ret = ocfs2_journal_dirty(handle, bhs[block_off]);
4041 	if (ret)
4042 		mlog_errno(ret);
4043 
4044 	return ret;
4045 }
4046 
4047 /*
4048  * Set the xattr entry in the specified bucket.
4049  * The bucket is indicated by xs->bucket and it should have the enough
4050  * space for the xattr insertion.
4051  */
4052 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4053 					   struct ocfs2_xattr_info *xi,
4054 					   struct ocfs2_xattr_search *xs,
4055 					   u32 name_hash,
4056 					   int local,
4057 					   int *bucket_empty)
4058 {
4059 	int i, ret;
4060 	handle_t *handle = NULL;
4061 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4062 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4063 
4064 	mlog(0, "Set xattr entry len = %d index = %d in bucket %llu\n",
4065 	     xi->value_len, xi->name_index,
4066 	     (unsigned long long)xs->bucket.bhs[0]->b_blocknr);
4067 
4068 	if (!xs->bucket.bhs[1]) {
4069 		ret = ocfs2_read_blocks(osb,
4070 					xs->bucket.bhs[0]->b_blocknr + 1,
4071 					blk_per_bucket - 1, &xs->bucket.bhs[1],
4072 					OCFS2_BH_CACHED, inode);
4073 		if (ret) {
4074 			mlog_errno(ret);
4075 			goto out;
4076 		}
4077 	}
4078 
4079 	handle = ocfs2_start_trans(osb, blk_per_bucket);
4080 	if (IS_ERR(handle)) {
4081 		ret = PTR_ERR(handle);
4082 		handle = NULL;
4083 		mlog_errno(ret);
4084 		goto out;
4085 	}
4086 
4087 	for (i = 0; i < blk_per_bucket; i++) {
4088 		ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i],
4089 					   OCFS2_JOURNAL_ACCESS_WRITE);
4090 		if (ret < 0) {
4091 			mlog_errno(ret);
4092 			goto out;
4093 		}
4094 	}
4095 
4096 	ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash,
4097 				     local, bucket_empty);
4098 
4099 	/*Only dirty the blocks we have touched in set xattr. */
4100 	ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
4101 						xs->bucket.bhs, blk_per_bucket);
4102 	if (ret)
4103 		mlog_errno(ret);
4104 out:
4105 	ocfs2_commit_trans(osb, handle);
4106 
4107 	return ret;
4108 }
4109 
4110 static int ocfs2_xattr_value_update_size(struct inode *inode,
4111 					 struct buffer_head *xe_bh,
4112 					 struct ocfs2_xattr_entry *xe,
4113 					 u64 new_size)
4114 {
4115 	int ret;
4116 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4117 	handle_t *handle = NULL;
4118 
4119 	handle = ocfs2_start_trans(osb, 1);
4120 	if (handle == NULL) {
4121 		ret = -ENOMEM;
4122 		mlog_errno(ret);
4123 		goto out;
4124 	}
4125 
4126 	ret = ocfs2_journal_access(handle, inode, xe_bh,
4127 				   OCFS2_JOURNAL_ACCESS_WRITE);
4128 	if (ret < 0) {
4129 		mlog_errno(ret);
4130 		goto out_commit;
4131 	}
4132 
4133 	xe->xe_value_size = cpu_to_le64(new_size);
4134 
4135 	ret = ocfs2_journal_dirty(handle, xe_bh);
4136 	if (ret < 0)
4137 		mlog_errno(ret);
4138 
4139 out_commit:
4140 	ocfs2_commit_trans(osb, handle);
4141 out:
4142 	return ret;
4143 }
4144 
4145 /*
4146  * Truncate the specified xe_off entry in xattr bucket.
4147  * bucket is indicated by header_bh and len is the new length.
4148  * Both the ocfs2_xattr_value_root and the entry will be updated here.
4149  *
4150  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4151  */
4152 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4153 					     struct buffer_head *header_bh,
4154 					     int xe_off,
4155 					     int len)
4156 {
4157 	int ret, offset;
4158 	u64 value_blk;
4159 	struct buffer_head *value_bh = NULL;
4160 	struct ocfs2_xattr_value_root *xv;
4161 	struct ocfs2_xattr_entry *xe;
4162 	struct ocfs2_xattr_header *xh =
4163 			(struct ocfs2_xattr_header *)header_bh->b_data;
4164 	size_t blocksize = inode->i_sb->s_blocksize;
4165 
4166 	xe = &xh->xh_entries[xe_off];
4167 
4168 	BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4169 
4170 	offset = le16_to_cpu(xe->xe_name_offset) +
4171 		 OCFS2_XATTR_SIZE(xe->xe_name_len);
4172 
4173 	value_blk = offset / blocksize;
4174 
4175 	/* We don't allow ocfs2_xattr_value to be stored in different block. */
4176 	BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4177 	value_blk += header_bh->b_blocknr;
4178 
4179 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), value_blk,
4180 			       &value_bh, OCFS2_BH_CACHED, inode);
4181 	if (ret) {
4182 		mlog_errno(ret);
4183 		goto out;
4184 	}
4185 
4186 	xv = (struct ocfs2_xattr_value_root *)
4187 		(value_bh->b_data + offset % blocksize);
4188 
4189 	mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4190 	     xe_off, (unsigned long long)header_bh->b_blocknr, len);
4191 	ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len);
4192 	if (ret) {
4193 		mlog_errno(ret);
4194 		goto out;
4195 	}
4196 
4197 	ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len);
4198 	if (ret) {
4199 		mlog_errno(ret);
4200 		goto out;
4201 	}
4202 
4203 out:
4204 	brelse(value_bh);
4205 	return ret;
4206 }
4207 
4208 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4209 						struct ocfs2_xattr_search *xs,
4210 						int len)
4211 {
4212 	int ret, offset;
4213 	struct ocfs2_xattr_entry *xe = xs->here;
4214 	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4215 
4216 	BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4217 
4218 	offset = xe - xh->xh_entries;
4219 	ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0],
4220 						offset, len);
4221 	if (ret)
4222 		mlog_errno(ret);
4223 
4224 	return ret;
4225 }
4226 
4227 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4228 						struct ocfs2_xattr_search *xs,
4229 						char *val,
4230 						int value_len)
4231 {
4232 	int offset;
4233 	struct ocfs2_xattr_value_root *xv;
4234 	struct ocfs2_xattr_entry *xe = xs->here;
4235 
4236 	BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4237 
4238 	offset = le16_to_cpu(xe->xe_name_offset) +
4239 		 OCFS2_XATTR_SIZE(xe->xe_name_len);
4240 
4241 	xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
4242 
4243 	return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len);
4244 }
4245 
4246 /*
4247  * Remove the xattr bucket pointed by bucket_bh.
4248  * All the buckets after it in the same xattr extent rec will be
4249  * move forward one by one.
4250  */
4251 static int ocfs2_rm_xattr_bucket(struct inode *inode,
4252 				 struct buffer_head *first_bh,
4253 				 struct ocfs2_xattr_bucket *bucket)
4254 {
4255 	int ret = 0, credits;
4256 	struct ocfs2_xattr_header *xh =
4257 				(struct ocfs2_xattr_header *)first_bh->b_data;
4258 	u16 bucket_num = le16_to_cpu(xh->xh_num_buckets);
4259 	u64 end, start = bucket->bhs[0]->b_blocknr;
4260 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4261 	handle_t *handle;
4262 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4263 
4264 	end = first_bh->b_blocknr + (bucket_num - 1) * blk_per_bucket;
4265 
4266 	mlog(0, "rm xattr bucket %llu\n", start);
4267 	/*
4268 	 * We need to update the first xattr_header and all the buckets starting
4269 	 * from start in this xattr rec.
4270 	 *
4271 	 * XXX: Should we empty the old last bucket here?
4272 	 */
4273 	credits = 1 + end - start;
4274 	handle = ocfs2_start_trans(osb, credits);
4275 	if (IS_ERR(handle)) {
4276 		ret = PTR_ERR(handle);
4277 		mlog_errno(ret);
4278 		return ret;
4279 	}
4280 
4281 	ret = ocfs2_journal_access(handle, inode, first_bh,
4282 				   OCFS2_JOURNAL_ACCESS_WRITE);
4283 	if (ret) {
4284 		mlog_errno(ret);
4285 		goto out_commit;
4286 	}
4287 
4288 
4289 	while (start < end) {
4290 		ret = ocfs2_cp_xattr_bucket(inode, handle,
4291 					    start + blk_per_bucket,
4292 					    start, 0);
4293 		if (ret) {
4294 			mlog_errno(ret);
4295 			goto out_commit;
4296 		}
4297 		start += blk_per_bucket;
4298 	}
4299 
4300 	/* update the first_bh. */
4301 	xh->xh_num_buckets = cpu_to_le16(bucket_num - 1);
4302 	ocfs2_journal_dirty(handle, first_bh);
4303 
4304 out_commit:
4305 	ocfs2_commit_trans(osb, handle);
4306 	return ret;
4307 }
4308 
4309 static int ocfs2_rm_xattr_cluster(struct inode *inode,
4310 				  struct buffer_head *root_bh,
4311 				  u64 blkno,
4312 				  u32 cpos,
4313 				  u32 len)
4314 {
4315 	int ret;
4316 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4317 	struct inode *tl_inode = osb->osb_tl_inode;
4318 	handle_t *handle;
4319 	struct ocfs2_xattr_block *xb =
4320 			(struct ocfs2_xattr_block *)root_bh->b_data;
4321 	struct ocfs2_extent_list *root_el = &xb->xb_attrs.xb_root.xt_list;
4322 	struct ocfs2_alloc_context *meta_ac = NULL;
4323 	struct ocfs2_cached_dealloc_ctxt dealloc;
4324 
4325 	ocfs2_init_dealloc_ctxt(&dealloc);
4326 
4327 	mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4328 	     cpos, len, (unsigned long long)blkno);
4329 
4330 	ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
4331 
4332 	ret = ocfs2_lock_allocators(inode, root_bh, root_el,
4333 				    0, 1, NULL, &meta_ac,
4334 				    OCFS2_XATTR_TREE_EXTENT, NULL);
4335 	if (ret) {
4336 		mlog_errno(ret);
4337 		return ret;
4338 	}
4339 
4340 	mutex_lock(&tl_inode->i_mutex);
4341 
4342 	if (ocfs2_truncate_log_needs_flush(osb)) {
4343 		ret = __ocfs2_flush_truncate_log(osb);
4344 		if (ret < 0) {
4345 			mlog_errno(ret);
4346 			goto out;
4347 		}
4348 	}
4349 
4350 	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
4351 	if (handle == NULL) {
4352 		ret = -ENOMEM;
4353 		mlog_errno(ret);
4354 		goto out;
4355 	}
4356 
4357 	ret = ocfs2_journal_access(handle, inode, root_bh,
4358 				   OCFS2_JOURNAL_ACCESS_WRITE);
4359 	if (ret) {
4360 		mlog_errno(ret);
4361 		goto out_commit;
4362 	}
4363 
4364 	ret = ocfs2_remove_extent(inode, root_bh, cpos, len, handle, meta_ac,
4365 				  &dealloc, OCFS2_XATTR_TREE_EXTENT, NULL);
4366 	if (ret) {
4367 		mlog_errno(ret);
4368 		goto out_commit;
4369 	}
4370 
4371 	le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4372 
4373 	ret = ocfs2_journal_dirty(handle, root_bh);
4374 	if (ret) {
4375 		mlog_errno(ret);
4376 		goto out_commit;
4377 	}
4378 
4379 	ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4380 	if (ret)
4381 		mlog_errno(ret);
4382 
4383 out_commit:
4384 	ocfs2_commit_trans(osb, handle);
4385 out:
4386 	ocfs2_schedule_truncate_log_flush(osb, 1);
4387 
4388 	mutex_unlock(&tl_inode->i_mutex);
4389 
4390 	if (meta_ac)
4391 		ocfs2_free_alloc_context(meta_ac);
4392 
4393 	ocfs2_run_deallocs(osb, &dealloc);
4394 
4395 	return ret;
4396 }
4397 
4398 /*
4399  * Free the xattr bucket indicated by xs->bucket and if all the buckets
4400  * in the clusters is free, free the clusters also.
4401  */
4402 static int ocfs2_xattr_bucket_shrink(struct inode *inode,
4403 				     struct ocfs2_xattr_info *xi,
4404 				     struct ocfs2_xattr_search *xs,
4405 				     u32 name_hash)
4406 {
4407 	int ret;
4408 	u32 e_cpos, num_clusters;
4409 	u64 p_blkno;
4410 	struct buffer_head *first_bh = NULL;
4411 	struct ocfs2_xattr_header *first_xh;
4412 	struct ocfs2_xattr_block *xb =
4413 			(struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
4414 
4415 	BUG_ON(xs->header->xh_count != 0);
4416 
4417 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4418 				  &e_cpos, &num_clusters,
4419 				  &xb->xb_attrs.xb_root.xt_list);
4420 	if (ret) {
4421 		mlog_errno(ret);
4422 		return ret;
4423 	}
4424 
4425 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno,
4426 			       &first_bh, OCFS2_BH_CACHED, inode);
4427 	if (ret) {
4428 		mlog_errno(ret);
4429 		return ret;
4430 	}
4431 
4432 	ret = ocfs2_rm_xattr_bucket(inode, first_bh, &xs->bucket);
4433 	if (ret) {
4434 		mlog_errno(ret);
4435 		goto out;
4436 	}
4437 
4438 	first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
4439 	if (first_xh->xh_num_buckets == 0)
4440 		ret = ocfs2_rm_xattr_cluster(inode, xs->xattr_bh,
4441 					     p_blkno, e_cpos,
4442 					     num_clusters);
4443 
4444 out:
4445 	brelse(first_bh);
4446 	return ret;
4447 }
4448 
4449 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4450 					 struct ocfs2_xattr_search *xs)
4451 {
4452 	handle_t *handle = NULL;
4453 	struct ocfs2_xattr_header *xh = xs->bucket.xh;
4454 	struct ocfs2_xattr_entry *last = &xh->xh_entries[
4455 						le16_to_cpu(xh->xh_count) - 1];
4456 	int ret = 0;
4457 
4458 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1);
4459 	if (IS_ERR(handle)) {
4460 		ret = PTR_ERR(handle);
4461 		mlog_errno(ret);
4462 		return;
4463 	}
4464 
4465 	ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0],
4466 				   OCFS2_JOURNAL_ACCESS_WRITE);
4467 	if (ret) {
4468 		mlog_errno(ret);
4469 		goto out_commit;
4470 	}
4471 
4472 	/* Remove the old entry. */
4473 	memmove(xs->here, xs->here + 1,
4474 		(void *)last - (void *)xs->here);
4475 	memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4476 	le16_add_cpu(&xh->xh_count, -1);
4477 
4478 	ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]);
4479 	if (ret < 0)
4480 		mlog_errno(ret);
4481 out_commit:
4482 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
4483 }
4484 
4485 /*
4486  * Set the xattr name/value in the bucket specified in xs.
4487  *
4488  * As the new value in xi may be stored in the bucket or in an outside cluster,
4489  * we divide the whole process into 3 steps:
4490  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4491  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4492  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4493  * 4. If the clusters for the new outside value can't be allocated, we need
4494  *    to free the xattr we allocated in set.
4495  */
4496 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4497 				     struct ocfs2_xattr_info *xi,
4498 				     struct ocfs2_xattr_search *xs)
4499 {
4500 	int ret, local = 1, bucket_empty = 0;
4501 	size_t value_len;
4502 	char *val = (char *)xi->value;
4503 	struct ocfs2_xattr_entry *xe = xs->here;
4504 	u32 name_hash = ocfs2_xattr_hash_by_name(inode,
4505 						 xi->name_index, xi->name);
4506 
4507 	if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
4508 		/*
4509 		 * We need to truncate the xattr storage first.
4510 		 *
4511 		 * If both the old and new value are stored to
4512 		 * outside block, we only need to truncate
4513 		 * the storage and then set the value outside.
4514 		 *
4515 		 * If the new value should be stored within block,
4516 		 * we should free all the outside block first and
4517 		 * the modification to the xattr block will be done
4518 		 * by following steps.
4519 		 */
4520 		if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4521 			value_len = xi->value_len;
4522 		else
4523 			value_len = 0;
4524 
4525 		ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4526 							   value_len);
4527 		if (ret)
4528 			goto out;
4529 
4530 		if (value_len)
4531 			goto set_value_outside;
4532 	}
4533 
4534 	value_len = xi->value_len;
4535 	/* So we have to handle the inside block change now. */
4536 	if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4537 		/*
4538 		 * If the new value will be stored outside of block,
4539 		 * initalize a new empty value root and insert it first.
4540 		 */
4541 		local = 0;
4542 		xi->value = &def_xv;
4543 		xi->value_len = OCFS2_XATTR_ROOT_SIZE;
4544 	}
4545 
4546 	ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash,
4547 					      local, &bucket_empty);
4548 	if (ret) {
4549 		mlog_errno(ret);
4550 		goto out;
4551 	}
4552 
4553 	if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4554 		/* allocate the space now for the outside block storage. */
4555 		ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4556 							   value_len);
4557 		if (ret) {
4558 			mlog_errno(ret);
4559 
4560 			if (xs->not_found) {
4561 				/*
4562 				 * We can't allocate enough clusters for outside
4563 				 * storage and we have allocated xattr already,
4564 				 * so need to remove it.
4565 				 */
4566 				ocfs2_xattr_bucket_remove_xs(inode, xs);
4567 			}
4568 			goto out;
4569 		}
4570 	} else {
4571 		if (bucket_empty)
4572 			ret = ocfs2_xattr_bucket_shrink(inode, xi,
4573 							xs, name_hash);
4574 		goto out;
4575 	}
4576 
4577 set_value_outside:
4578 	ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len);
4579 out:
4580 	return ret;
4581 }
4582 
4583 /* check whether the xattr bucket is filled up with the same hash value. */
4584 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4585 					      struct ocfs2_xattr_bucket *bucket)
4586 {
4587 	struct ocfs2_xattr_header *xh = bucket->xh;
4588 
4589 	if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
4590 	    xh->xh_entries[0].xe_name_hash) {
4591 		mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
4592 		     "hash = %u\n",
4593 		     (unsigned long long)bucket->bhs[0]->b_blocknr,
4594 		     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
4595 		return -ENOSPC;
4596 	}
4597 
4598 	return 0;
4599 }
4600 
4601 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
4602 					     struct ocfs2_xattr_info *xi,
4603 					     struct ocfs2_xattr_search *xs)
4604 {
4605 	struct ocfs2_xattr_header *xh;
4606 	struct ocfs2_xattr_entry *xe;
4607 	u16 count, header_size, xh_free_start;
4608 	int i, free, max_free, need, old;
4609 	size_t value_size = 0, name_len = strlen(xi->name);
4610 	size_t blocksize = inode->i_sb->s_blocksize;
4611 	int ret, allocation = 0;
4612 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4613 
4614 	mlog_entry("Set xattr %s in xattr index block\n", xi->name);
4615 
4616 try_again:
4617 	xh = xs->header;
4618 	count = le16_to_cpu(xh->xh_count);
4619 	xh_free_start = le16_to_cpu(xh->xh_free_start);
4620 	header_size = sizeof(struct ocfs2_xattr_header) +
4621 			count * sizeof(struct ocfs2_xattr_entry);
4622 	max_free = OCFS2_XATTR_BUCKET_SIZE -
4623 		le16_to_cpu(xh->xh_name_value_len) - header_size;
4624 
4625 	mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
4626 			"of %u which exceed block size\n",
4627 			(unsigned long long)xs->bucket.bhs[0]->b_blocknr,
4628 			header_size);
4629 
4630 	if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4631 		value_size = OCFS2_XATTR_ROOT_SIZE;
4632 	else if (xi->value)
4633 		value_size = OCFS2_XATTR_SIZE(xi->value_len);
4634 
4635 	if (xs->not_found)
4636 		need = sizeof(struct ocfs2_xattr_entry) +
4637 			OCFS2_XATTR_SIZE(name_len) + value_size;
4638 	else {
4639 		need = value_size + OCFS2_XATTR_SIZE(name_len);
4640 
4641 		/*
4642 		 * We only replace the old value if the new length is smaller
4643 		 * than the old one. Otherwise we will allocate new space in the
4644 		 * bucket to store it.
4645 		 */
4646 		xe = xs->here;
4647 		if (ocfs2_xattr_is_local(xe))
4648 			old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4649 		else
4650 			old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4651 
4652 		if (old >= value_size)
4653 			need = 0;
4654 	}
4655 
4656 	free = xh_free_start - header_size;
4657 	/*
4658 	 * We need to make sure the new name/value pair
4659 	 * can exist in the same block.
4660 	 */
4661 	if (xh_free_start % blocksize < need)
4662 		free -= xh_free_start % blocksize;
4663 
4664 	mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
4665 	     "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
4666 	     " %u\n", xs->not_found,
4667 	     (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
4668 	     free, need, max_free, le16_to_cpu(xh->xh_free_start),
4669 	     le16_to_cpu(xh->xh_name_value_len));
4670 
4671 	if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4672 		if (need <= max_free &&
4673 		    count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4674 			/*
4675 			 * We can create the space by defragment. Since only the
4676 			 * name/value will be moved, the xe shouldn't be changed
4677 			 * in xs.
4678 			 */
4679 			ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket);
4680 			if (ret) {
4681 				mlog_errno(ret);
4682 				goto out;
4683 			}
4684 
4685 			xh_free_start = le16_to_cpu(xh->xh_free_start);
4686 			free = xh_free_start - header_size;
4687 			if (xh_free_start % blocksize < need)
4688 				free -= xh_free_start % blocksize;
4689 
4690 			if (free >= need)
4691 				goto xattr_set;
4692 
4693 			mlog(0, "Can't get enough space for xattr insert by "
4694 			     "defragment. Need %u bytes, but we have %d, so "
4695 			     "allocate new bucket for it.\n", need, free);
4696 		}
4697 
4698 		/*
4699 		 * We have to add new buckets or clusters and one
4700 		 * allocation should leave us enough space for insert.
4701 		 */
4702 		BUG_ON(allocation);
4703 
4704 		/*
4705 		 * We do not allow for overlapping ranges between buckets. And
4706 		 * the maximum number of collisions we will allow for then is
4707 		 * one bucket's worth, so check it here whether we need to
4708 		 * add a new bucket for the insert.
4709 		 */
4710 		ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket);
4711 		if (ret) {
4712 			mlog_errno(ret);
4713 			goto out;
4714 		}
4715 
4716 		ret = ocfs2_add_new_xattr_bucket(inode,
4717 						 xs->xattr_bh,
4718 						 xs->bucket.bhs[0]);
4719 		if (ret) {
4720 			mlog_errno(ret);
4721 			goto out;
4722 		}
4723 
4724 		for (i = 0; i < blk_per_bucket; i++)
4725 			brelse(xs->bucket.bhs[i]);
4726 
4727 		memset(&xs->bucket, 0, sizeof(xs->bucket));
4728 
4729 		ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
4730 						   xi->name_index,
4731 						   xi->name, xs);
4732 		if (ret && ret != -ENODATA)
4733 			goto out;
4734 		xs->not_found = ret;
4735 		allocation = 1;
4736 		goto try_again;
4737 	}
4738 
4739 xattr_set:
4740 	ret = ocfs2_xattr_set_in_bucket(inode, xi, xs);
4741 out:
4742 	mlog_exit(ret);
4743 	return ret;
4744 }
4745 
4746 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
4747 					struct ocfs2_xattr_bucket *bucket,
4748 					void *para)
4749 {
4750 	int ret = 0;
4751 	struct ocfs2_xattr_header *xh = bucket->xh;
4752 	u16 i;
4753 	struct ocfs2_xattr_entry *xe;
4754 
4755 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4756 		xe = &xh->xh_entries[i];
4757 		if (ocfs2_xattr_is_local(xe))
4758 			continue;
4759 
4760 		ret = ocfs2_xattr_bucket_value_truncate(inode,
4761 							bucket->bhs[0],
4762 							i, 0);
4763 		if (ret) {
4764 			mlog_errno(ret);
4765 			break;
4766 		}
4767 	}
4768 
4769 	return ret;
4770 }
4771 
4772 static int ocfs2_delete_xattr_index_block(struct inode *inode,
4773 					  struct buffer_head *xb_bh)
4774 {
4775 	struct ocfs2_xattr_block *xb =
4776 			(struct ocfs2_xattr_block *)xb_bh->b_data;
4777 	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4778 	int ret = 0;
4779 	u32 name_hash = UINT_MAX, e_cpos, num_clusters;
4780 	u64 p_blkno;
4781 
4782 	if (le16_to_cpu(el->l_next_free_rec) == 0)
4783 		return 0;
4784 
4785 	while (name_hash > 0) {
4786 		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4787 					  &e_cpos, &num_clusters, el);
4788 		if (ret) {
4789 			mlog_errno(ret);
4790 			goto out;
4791 		}
4792 
4793 		ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
4794 						  ocfs2_delete_xattr_in_bucket,
4795 						  NULL);
4796 		if (ret) {
4797 			mlog_errno(ret);
4798 			goto out;
4799 		}
4800 
4801 		ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
4802 					     p_blkno, e_cpos, num_clusters);
4803 		if (ret) {
4804 			mlog_errno(ret);
4805 			break;
4806 		}
4807 
4808 		if (e_cpos == 0)
4809 			break;
4810 
4811 		name_hash = e_cpos - 1;
4812 	}
4813 
4814 out:
4815 	return ret;
4816 }
4817