xref: /openbmc/linux/fs/ocfs2/xattr.c (revision 8154da3d)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is taken from ext3.
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU General Public
13  * License as published by the Free Software Foundation; either
14  * version 2 of the License, or (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public
22  * License along with this program; if not, write to the
23  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24  * Boston, MA 021110-1307, USA.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/types.h>
30 #include <linux/slab.h>
31 #include <linux/highmem.h>
32 #include <linux/pagemap.h>
33 #include <linux/uio.h>
34 #include <linux/sched.h>
35 #include <linux/splice.h>
36 #include <linux/mount.h>
37 #include <linux/writeback.h>
38 #include <linux/falloc.h>
39 #include <linux/sort.h>
40 
41 #define MLOG_MASK_PREFIX ML_XATTR
42 #include <cluster/masklog.h>
43 
44 #include "ocfs2.h"
45 #include "alloc.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 
59 
60 struct ocfs2_xattr_def_value_root {
61 	struct ocfs2_xattr_value_root	xv;
62 	struct ocfs2_extent_rec		er;
63 };
64 
65 struct ocfs2_xattr_bucket {
66 	struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
67 	struct ocfs2_xattr_header *xh;
68 };
69 
70 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
71 #define OCFS2_XATTR_INLINE_SIZE	80
72 
73 static struct ocfs2_xattr_def_value_root def_xv = {
74 	.xv.xr_list.l_count = cpu_to_le16(1),
75 };
76 
77 struct xattr_handler *ocfs2_xattr_handlers[] = {
78 	&ocfs2_xattr_user_handler,
79 	&ocfs2_xattr_trusted_handler,
80 	NULL
81 };
82 
83 static struct xattr_handler *ocfs2_xattr_handler_map[] = {
84 	[OCFS2_XATTR_INDEX_USER]	= &ocfs2_xattr_user_handler,
85 	[OCFS2_XATTR_INDEX_TRUSTED]	= &ocfs2_xattr_trusted_handler,
86 };
87 
88 struct ocfs2_xattr_info {
89 	int name_index;
90 	const char *name;
91 	const void *value;
92 	size_t value_len;
93 };
94 
95 struct ocfs2_xattr_search {
96 	struct buffer_head *inode_bh;
97 	/*
98 	 * xattr_bh point to the block buffer head which has extended attribute
99 	 * when extended attribute in inode, xattr_bh is equal to inode_bh.
100 	 */
101 	struct buffer_head *xattr_bh;
102 	struct ocfs2_xattr_header *header;
103 	struct ocfs2_xattr_bucket bucket;
104 	void *base;
105 	void *end;
106 	struct ocfs2_xattr_entry *here;
107 	int not_found;
108 };
109 
110 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
111 					     struct ocfs2_xattr_header *xh,
112 					     int index,
113 					     int *block_off,
114 					     int *new_offset);
115 
116 static int ocfs2_xattr_index_block_find(struct inode *inode,
117 					struct buffer_head *root_bh,
118 					int name_index,
119 					const char *name,
120 					struct ocfs2_xattr_search *xs);
121 
122 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
123 					struct ocfs2_xattr_tree_root *xt,
124 					char *buffer,
125 					size_t buffer_size);
126 
127 static int ocfs2_xattr_create_index_block(struct inode *inode,
128 					  struct ocfs2_xattr_search *xs);
129 
130 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
131 					     struct ocfs2_xattr_info *xi,
132 					     struct ocfs2_xattr_search *xs);
133 
134 static int ocfs2_delete_xattr_index_block(struct inode *inode,
135 					  struct buffer_head *xb_bh);
136 
137 static inline struct xattr_handler *ocfs2_xattr_handler(int name_index)
138 {
139 	struct xattr_handler *handler = NULL;
140 
141 	if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
142 		handler = ocfs2_xattr_handler_map[name_index];
143 
144 	return handler;
145 }
146 
147 static inline u32 ocfs2_xattr_name_hash(struct inode *inode,
148 					char *prefix,
149 					int prefix_len,
150 					char *name,
151 					int name_len)
152 {
153 	/* Get hash value of uuid from super block */
154 	u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
155 	int i;
156 
157 	/* hash extended attribute prefix */
158 	for (i = 0; i < prefix_len; i++) {
159 		hash = (hash << OCFS2_HASH_SHIFT) ^
160 		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
161 		       *prefix++;
162 	}
163 	/* hash extended attribute name */
164 	for (i = 0; i < name_len; i++) {
165 		hash = (hash << OCFS2_HASH_SHIFT) ^
166 		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
167 		       *name++;
168 	}
169 
170 	return hash;
171 }
172 
173 /*
174  * ocfs2_xattr_hash_entry()
175  *
176  * Compute the hash of an extended attribute.
177  */
178 static void ocfs2_xattr_hash_entry(struct inode *inode,
179 				   struct ocfs2_xattr_header *header,
180 				   struct ocfs2_xattr_entry *entry)
181 {
182 	u32 hash = 0;
183 	struct xattr_handler *handler =
184 			ocfs2_xattr_handler(ocfs2_xattr_get_type(entry));
185 	char *prefix = handler->prefix;
186 	char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
187 	int prefix_len = strlen(handler->prefix);
188 
189 	hash = ocfs2_xattr_name_hash(inode, prefix, prefix_len, name,
190 				     entry->xe_name_len);
191 	entry->xe_name_hash = cpu_to_le32(hash);
192 
193 	return;
194 }
195 
196 static int ocfs2_xattr_extend_allocation(struct inode *inode,
197 					 u32 clusters_to_add,
198 					 struct buffer_head *xattr_bh,
199 					 struct ocfs2_xattr_value_root *xv)
200 {
201 	int status = 0;
202 	int restart_func = 0;
203 	int credits = 0;
204 	handle_t *handle = NULL;
205 	struct ocfs2_alloc_context *data_ac = NULL;
206 	struct ocfs2_alloc_context *meta_ac = NULL;
207 	enum ocfs2_alloc_restarted why;
208 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
209 	struct ocfs2_extent_list *root_el = &xv->xr_list;
210 	u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
211 
212 	mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
213 
214 restart_all:
215 
216 	status = ocfs2_lock_allocators(inode, xattr_bh, root_el,
217 				       clusters_to_add, 0, &data_ac,
218 				       &meta_ac, OCFS2_XATTR_VALUE_EXTENT, xv);
219 	if (status) {
220 		mlog_errno(status);
221 		goto leave;
222 	}
223 
224 	credits = ocfs2_calc_extend_credits(osb->sb, root_el, clusters_to_add);
225 	handle = ocfs2_start_trans(osb, credits);
226 	if (IS_ERR(handle)) {
227 		status = PTR_ERR(handle);
228 		handle = NULL;
229 		mlog_errno(status);
230 		goto leave;
231 	}
232 
233 restarted_transaction:
234 	status = ocfs2_journal_access(handle, inode, xattr_bh,
235 				      OCFS2_JOURNAL_ACCESS_WRITE);
236 	if (status < 0) {
237 		mlog_errno(status);
238 		goto leave;
239 	}
240 
241 	prev_clusters = le32_to_cpu(xv->xr_clusters);
242 	status = ocfs2_add_clusters_in_btree(osb,
243 					     inode,
244 					     &logical_start,
245 					     clusters_to_add,
246 					     0,
247 					     xattr_bh,
248 					     root_el,
249 					     handle,
250 					     data_ac,
251 					     meta_ac,
252 					     &why,
253 					     OCFS2_XATTR_VALUE_EXTENT,
254 					     xv);
255 	if ((status < 0) && (status != -EAGAIN)) {
256 		if (status != -ENOSPC)
257 			mlog_errno(status);
258 		goto leave;
259 	}
260 
261 	status = ocfs2_journal_dirty(handle, xattr_bh);
262 	if (status < 0) {
263 		mlog_errno(status);
264 		goto leave;
265 	}
266 
267 	clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
268 
269 	if (why != RESTART_NONE && clusters_to_add) {
270 		if (why == RESTART_META) {
271 			mlog(0, "restarting function.\n");
272 			restart_func = 1;
273 		} else {
274 			BUG_ON(why != RESTART_TRANS);
275 
276 			mlog(0, "restarting transaction.\n");
277 			/* TODO: This can be more intelligent. */
278 			credits = ocfs2_calc_extend_credits(osb->sb,
279 							    root_el,
280 							    clusters_to_add);
281 			status = ocfs2_extend_trans(handle, credits);
282 			if (status < 0) {
283 				/* handle still has to be committed at
284 				 * this point. */
285 				status = -ENOMEM;
286 				mlog_errno(status);
287 				goto leave;
288 			}
289 			goto restarted_transaction;
290 		}
291 	}
292 
293 leave:
294 	if (handle) {
295 		ocfs2_commit_trans(osb, handle);
296 		handle = NULL;
297 	}
298 	if (data_ac) {
299 		ocfs2_free_alloc_context(data_ac);
300 		data_ac = NULL;
301 	}
302 	if (meta_ac) {
303 		ocfs2_free_alloc_context(meta_ac);
304 		meta_ac = NULL;
305 	}
306 	if ((!status) && restart_func) {
307 		restart_func = 0;
308 		goto restart_all;
309 	}
310 
311 	return status;
312 }
313 
314 static int __ocfs2_remove_xattr_range(struct inode *inode,
315 				      struct buffer_head *root_bh,
316 				      struct ocfs2_xattr_value_root *xv,
317 				      u32 cpos, u32 phys_cpos, u32 len,
318 				      struct ocfs2_cached_dealloc_ctxt *dealloc)
319 {
320 	int ret;
321 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
322 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
323 	struct inode *tl_inode = osb->osb_tl_inode;
324 	handle_t *handle;
325 	struct ocfs2_alloc_context *meta_ac = NULL;
326 
327 	ret = ocfs2_lock_allocators(inode, root_bh, &xv->xr_list,
328 				    0, 1, NULL, &meta_ac,
329 				    OCFS2_XATTR_VALUE_EXTENT, xv);
330 	if (ret) {
331 		mlog_errno(ret);
332 		return ret;
333 	}
334 
335 	mutex_lock(&tl_inode->i_mutex);
336 
337 	if (ocfs2_truncate_log_needs_flush(osb)) {
338 		ret = __ocfs2_flush_truncate_log(osb);
339 		if (ret < 0) {
340 			mlog_errno(ret);
341 			goto out;
342 		}
343 	}
344 
345 	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
346 	if (IS_ERR(handle)) {
347 		ret = PTR_ERR(handle);
348 		mlog_errno(ret);
349 		goto out;
350 	}
351 
352 	ret = ocfs2_journal_access(handle, inode, root_bh,
353 				   OCFS2_JOURNAL_ACCESS_WRITE);
354 	if (ret) {
355 		mlog_errno(ret);
356 		goto out_commit;
357 	}
358 
359 	ret = ocfs2_remove_extent(inode, root_bh, cpos, len, handle, meta_ac,
360 				  dealloc, OCFS2_XATTR_VALUE_EXTENT, xv);
361 	if (ret) {
362 		mlog_errno(ret);
363 		goto out_commit;
364 	}
365 
366 	le32_add_cpu(&xv->xr_clusters, -len);
367 
368 	ret = ocfs2_journal_dirty(handle, root_bh);
369 	if (ret) {
370 		mlog_errno(ret);
371 		goto out_commit;
372 	}
373 
374 	ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
375 	if (ret)
376 		mlog_errno(ret);
377 
378 out_commit:
379 	ocfs2_commit_trans(osb, handle);
380 out:
381 	mutex_unlock(&tl_inode->i_mutex);
382 
383 	if (meta_ac)
384 		ocfs2_free_alloc_context(meta_ac);
385 
386 	return ret;
387 }
388 
389 static int ocfs2_xattr_shrink_size(struct inode *inode,
390 				   u32 old_clusters,
391 				   u32 new_clusters,
392 				   struct buffer_head *root_bh,
393 				   struct ocfs2_xattr_value_root *xv)
394 {
395 	int ret = 0;
396 	u32 trunc_len, cpos, phys_cpos, alloc_size;
397 	u64 block;
398 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
399 	struct ocfs2_cached_dealloc_ctxt dealloc;
400 
401 	ocfs2_init_dealloc_ctxt(&dealloc);
402 
403 	if (old_clusters <= new_clusters)
404 		return 0;
405 
406 	cpos = new_clusters;
407 	trunc_len = old_clusters - new_clusters;
408 	while (trunc_len) {
409 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
410 					       &alloc_size, &xv->xr_list);
411 		if (ret) {
412 			mlog_errno(ret);
413 			goto out;
414 		}
415 
416 		if (alloc_size > trunc_len)
417 			alloc_size = trunc_len;
418 
419 		ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
420 						 phys_cpos, alloc_size,
421 						 &dealloc);
422 		if (ret) {
423 			mlog_errno(ret);
424 			goto out;
425 		}
426 
427 		block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
428 		ocfs2_remove_xattr_clusters_from_cache(inode, block,
429 						       alloc_size);
430 		cpos += alloc_size;
431 		trunc_len -= alloc_size;
432 	}
433 
434 out:
435 	ocfs2_schedule_truncate_log_flush(osb, 1);
436 	ocfs2_run_deallocs(osb, &dealloc);
437 
438 	return ret;
439 }
440 
441 static int ocfs2_xattr_value_truncate(struct inode *inode,
442 				      struct buffer_head *root_bh,
443 				      struct ocfs2_xattr_value_root *xv,
444 				      int len)
445 {
446 	int ret;
447 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
448 	u32 old_clusters = le32_to_cpu(xv->xr_clusters);
449 
450 	if (new_clusters == old_clusters)
451 		return 0;
452 
453 	if (new_clusters > old_clusters)
454 		ret = ocfs2_xattr_extend_allocation(inode,
455 						    new_clusters - old_clusters,
456 						    root_bh, xv);
457 	else
458 		ret = ocfs2_xattr_shrink_size(inode,
459 					      old_clusters, new_clusters,
460 					      root_bh, xv);
461 
462 	return ret;
463 }
464 
465 static int ocfs2_xattr_list_entries(struct inode *inode,
466 				    struct ocfs2_xattr_header *header,
467 				    char *buffer, size_t buffer_size)
468 {
469 	size_t rest = buffer_size;
470 	int i;
471 
472 	for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
473 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
474 		struct xattr_handler *handler =
475 			ocfs2_xattr_handler(ocfs2_xattr_get_type(entry));
476 
477 		if (handler) {
478 			size_t size = handler->list(inode, buffer, rest,
479 					((char *)header +
480 					le16_to_cpu(entry->xe_name_offset)),
481 					entry->xe_name_len);
482 			if (buffer) {
483 				if (size > rest)
484 					return -ERANGE;
485 				buffer += size;
486 			}
487 			rest -= size;
488 		}
489 	}
490 
491 	return buffer_size - rest;
492 }
493 
494 static int ocfs2_xattr_ibody_list(struct inode *inode,
495 				  struct ocfs2_dinode *di,
496 				  char *buffer,
497 				  size_t buffer_size)
498 {
499 	struct ocfs2_xattr_header *header = NULL;
500 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
501 	int ret = 0;
502 
503 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
504 		return ret;
505 
506 	header = (struct ocfs2_xattr_header *)
507 		 ((void *)di + inode->i_sb->s_blocksize -
508 		 le16_to_cpu(di->i_xattr_inline_size));
509 
510 	ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
511 
512 	return ret;
513 }
514 
515 static int ocfs2_xattr_block_list(struct inode *inode,
516 				  struct ocfs2_dinode *di,
517 				  char *buffer,
518 				  size_t buffer_size)
519 {
520 	struct buffer_head *blk_bh = NULL;
521 	struct ocfs2_xattr_block *xb;
522 	int ret = 0;
523 
524 	if (!di->i_xattr_loc)
525 		return ret;
526 
527 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
528 			       le64_to_cpu(di->i_xattr_loc),
529 			       &blk_bh, OCFS2_BH_CACHED, inode);
530 	if (ret < 0) {
531 		mlog_errno(ret);
532 		return ret;
533 	}
534 	/*Verify the signature of xattr block*/
535 	if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
536 		   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
537 		ret = -EFAULT;
538 		goto cleanup;
539 	}
540 
541 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
542 
543 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
544 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
545 		ret = ocfs2_xattr_list_entries(inode, header,
546 					       buffer, buffer_size);
547 	} else {
548 		struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
549 		ret = ocfs2_xattr_tree_list_index_block(inode, xt,
550 						   buffer, buffer_size);
551 	}
552 cleanup:
553 	brelse(blk_bh);
554 
555 	return ret;
556 }
557 
558 ssize_t ocfs2_listxattr(struct dentry *dentry,
559 			char *buffer,
560 			size_t size)
561 {
562 	int ret = 0, i_ret = 0, b_ret = 0;
563 	struct buffer_head *di_bh = NULL;
564 	struct ocfs2_dinode *di = NULL;
565 	struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
566 
567 	if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
568 		return -EOPNOTSUPP;
569 
570 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
571 		return ret;
572 
573 	ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
574 	if (ret < 0) {
575 		mlog_errno(ret);
576 		return ret;
577 	}
578 
579 	di = (struct ocfs2_dinode *)di_bh->b_data;
580 
581 	down_read(&oi->ip_xattr_sem);
582 	i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
583 	if (i_ret < 0)
584 		b_ret = 0;
585 	else {
586 		if (buffer) {
587 			buffer += i_ret;
588 			size -= i_ret;
589 		}
590 		b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
591 					       buffer, size);
592 		if (b_ret < 0)
593 			i_ret = 0;
594 	}
595 	up_read(&oi->ip_xattr_sem);
596 	ocfs2_inode_unlock(dentry->d_inode, 0);
597 
598 	brelse(di_bh);
599 
600 	return i_ret + b_ret;
601 }
602 
603 static int ocfs2_xattr_find_entry(int name_index,
604 				  const char *name,
605 				  struct ocfs2_xattr_search *xs)
606 {
607 	struct ocfs2_xattr_entry *entry;
608 	size_t name_len;
609 	int i, cmp = 1;
610 
611 	if (name == NULL)
612 		return -EINVAL;
613 
614 	name_len = strlen(name);
615 	entry = xs->here;
616 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
617 		cmp = name_index - ocfs2_xattr_get_type(entry);
618 		if (!cmp)
619 			cmp = name_len - entry->xe_name_len;
620 		if (!cmp)
621 			cmp = memcmp(name, (xs->base +
622 				     le16_to_cpu(entry->xe_name_offset)),
623 				     name_len);
624 		if (cmp == 0)
625 			break;
626 		entry += 1;
627 	}
628 	xs->here = entry;
629 
630 	return cmp ? -ENODATA : 0;
631 }
632 
633 static int ocfs2_xattr_get_value_outside(struct inode *inode,
634 					 struct ocfs2_xattr_value_root *xv,
635 					 void *buffer,
636 					 size_t len)
637 {
638 	u32 cpos, p_cluster, num_clusters, bpc, clusters;
639 	u64 blkno;
640 	int i, ret = 0;
641 	size_t cplen, blocksize;
642 	struct buffer_head *bh = NULL;
643 	struct ocfs2_extent_list *el;
644 
645 	el = &xv->xr_list;
646 	clusters = le32_to_cpu(xv->xr_clusters);
647 	bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
648 	blocksize = inode->i_sb->s_blocksize;
649 
650 	cpos = 0;
651 	while (cpos < clusters) {
652 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
653 					       &num_clusters, el);
654 		if (ret) {
655 			mlog_errno(ret);
656 			goto out;
657 		}
658 
659 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
660 		/* Copy ocfs2_xattr_value */
661 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
662 			ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
663 					       &bh, OCFS2_BH_CACHED, inode);
664 			if (ret) {
665 				mlog_errno(ret);
666 				goto out;
667 			}
668 
669 			cplen = len >= blocksize ? blocksize : len;
670 			memcpy(buffer, bh->b_data, cplen);
671 			len -= cplen;
672 			buffer += cplen;
673 
674 			brelse(bh);
675 			bh = NULL;
676 			if (len == 0)
677 				break;
678 		}
679 		cpos += num_clusters;
680 	}
681 out:
682 	return ret;
683 }
684 
685 static int ocfs2_xattr_ibody_get(struct inode *inode,
686 				 int name_index,
687 				 const char *name,
688 				 void *buffer,
689 				 size_t buffer_size,
690 				 struct ocfs2_xattr_search *xs)
691 {
692 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
693 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
694 	struct ocfs2_xattr_value_root *xv;
695 	size_t size;
696 	int ret = 0;
697 
698 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
699 		return -ENODATA;
700 
701 	xs->end = (void *)di + inode->i_sb->s_blocksize;
702 	xs->header = (struct ocfs2_xattr_header *)
703 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
704 	xs->base = (void *)xs->header;
705 	xs->here = xs->header->xh_entries;
706 
707 	ret = ocfs2_xattr_find_entry(name_index, name, xs);
708 	if (ret)
709 		return ret;
710 	size = le64_to_cpu(xs->here->xe_value_size);
711 	if (buffer) {
712 		if (size > buffer_size)
713 			return -ERANGE;
714 		if (ocfs2_xattr_is_local(xs->here)) {
715 			memcpy(buffer, (void *)xs->base +
716 			       le16_to_cpu(xs->here->xe_name_offset) +
717 			       OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
718 		} else {
719 			xv = (struct ocfs2_xattr_value_root *)
720 				(xs->base + le16_to_cpu(
721 				 xs->here->xe_name_offset) +
722 				OCFS2_XATTR_SIZE(xs->here->xe_name_len));
723 			ret = ocfs2_xattr_get_value_outside(inode, xv,
724 							    buffer, size);
725 			if (ret < 0) {
726 				mlog_errno(ret);
727 				return ret;
728 			}
729 		}
730 	}
731 
732 	return size;
733 }
734 
735 static int ocfs2_xattr_block_get(struct inode *inode,
736 				 int name_index,
737 				 const char *name,
738 				 void *buffer,
739 				 size_t buffer_size,
740 				 struct ocfs2_xattr_search *xs)
741 {
742 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
743 	struct buffer_head *blk_bh = NULL;
744 	struct ocfs2_xattr_block *xb;
745 	struct ocfs2_xattr_value_root *xv;
746 	size_t size;
747 	int ret = -ENODATA, name_offset, name_len, block_off, i;
748 
749 	if (!di->i_xattr_loc)
750 		return ret;
751 
752 	memset(&xs->bucket, 0, sizeof(xs->bucket));
753 
754 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
755 			       le64_to_cpu(di->i_xattr_loc),
756 			       &blk_bh, OCFS2_BH_CACHED, inode);
757 	if (ret < 0) {
758 		mlog_errno(ret);
759 		return ret;
760 	}
761 	/*Verify the signature of xattr block*/
762 	if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
763 		   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
764 		ret = -EFAULT;
765 		goto cleanup;
766 	}
767 
768 	xs->xattr_bh = blk_bh;
769 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
770 
771 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
772 		xs->header = &xb->xb_attrs.xb_header;
773 		xs->base = (void *)xs->header;
774 		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
775 		xs->here = xs->header->xh_entries;
776 
777 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
778 	} else
779 		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
780 						   name_index,
781 						   name, xs);
782 
783 	if (ret)
784 		goto cleanup;
785 	size = le64_to_cpu(xs->here->xe_value_size);
786 	if (buffer) {
787 		ret = -ERANGE;
788 		if (size > buffer_size)
789 			goto cleanup;
790 
791 		name_offset = le16_to_cpu(xs->here->xe_name_offset);
792 		name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
793 		i = xs->here - xs->header->xh_entries;
794 
795 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
796 			ret = ocfs2_xattr_bucket_get_name_value(inode,
797 								xs->bucket.xh,
798 								i,
799 								&block_off,
800 								&name_offset);
801 			xs->base = xs->bucket.bhs[block_off]->b_data;
802 		}
803 		if (ocfs2_xattr_is_local(xs->here)) {
804 			memcpy(buffer, (void *)xs->base +
805 			       name_offset + name_len, size);
806 		} else {
807 			xv = (struct ocfs2_xattr_value_root *)
808 				(xs->base + name_offset + name_len);
809 			ret = ocfs2_xattr_get_value_outside(inode, xv,
810 							    buffer, size);
811 			if (ret < 0) {
812 				mlog_errno(ret);
813 				goto cleanup;
814 			}
815 		}
816 	}
817 	ret = size;
818 cleanup:
819 	for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++)
820 		brelse(xs->bucket.bhs[i]);
821 	memset(&xs->bucket, 0, sizeof(xs->bucket));
822 
823 	brelse(blk_bh);
824 	return ret;
825 }
826 
827 /* ocfs2_xattr_get()
828  *
829  * Copy an extended attribute into the buffer provided.
830  * Buffer is NULL to compute the size of buffer required.
831  */
832 int ocfs2_xattr_get(struct inode *inode,
833 		    int name_index,
834 		    const char *name,
835 		    void *buffer,
836 		    size_t buffer_size)
837 {
838 	int ret;
839 	struct ocfs2_dinode *di = NULL;
840 	struct buffer_head *di_bh = NULL;
841 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
842 	struct ocfs2_xattr_search xis = {
843 		.not_found = -ENODATA,
844 	};
845 	struct ocfs2_xattr_search xbs = {
846 		.not_found = -ENODATA,
847 	};
848 
849 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
850 		return -EOPNOTSUPP;
851 
852 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
853 		ret = -ENODATA;
854 
855 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
856 	if (ret < 0) {
857 		mlog_errno(ret);
858 		return ret;
859 	}
860 	xis.inode_bh = xbs.inode_bh = di_bh;
861 	di = (struct ocfs2_dinode *)di_bh->b_data;
862 
863 	down_read(&oi->ip_xattr_sem);
864 	ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
865 				    buffer_size, &xis);
866 	if (ret == -ENODATA)
867 		ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
868 					    buffer_size, &xbs);
869 	up_read(&oi->ip_xattr_sem);
870 	ocfs2_inode_unlock(inode, 0);
871 
872 	brelse(di_bh);
873 
874 	return ret;
875 }
876 
877 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
878 					   struct ocfs2_xattr_value_root *xv,
879 					   const void *value,
880 					   int value_len)
881 {
882 	int ret = 0, i, cp_len, credits;
883 	u16 blocksize = inode->i_sb->s_blocksize;
884 	u32 p_cluster, num_clusters;
885 	u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
886 	u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
887 	u64 blkno;
888 	struct buffer_head *bh = NULL;
889 	handle_t *handle;
890 
891 	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
892 
893 	credits = clusters * bpc;
894 	handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits);
895 	if (IS_ERR(handle)) {
896 		ret = PTR_ERR(handle);
897 		mlog_errno(ret);
898 		goto out;
899 	}
900 
901 	while (cpos < clusters) {
902 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
903 					       &num_clusters, &xv->xr_list);
904 		if (ret) {
905 			mlog_errno(ret);
906 			goto out_commit;
907 		}
908 
909 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
910 
911 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
912 			ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
913 					       &bh, OCFS2_BH_CACHED, inode);
914 			if (ret) {
915 				mlog_errno(ret);
916 				goto out_commit;
917 			}
918 
919 			ret = ocfs2_journal_access(handle,
920 						   inode,
921 						   bh,
922 						   OCFS2_JOURNAL_ACCESS_WRITE);
923 			if (ret < 0) {
924 				mlog_errno(ret);
925 				goto out_commit;
926 			}
927 
928 			cp_len = value_len > blocksize ? blocksize : value_len;
929 			memcpy(bh->b_data, value, cp_len);
930 			value_len -= cp_len;
931 			value += cp_len;
932 			if (cp_len < blocksize)
933 				memset(bh->b_data + cp_len, 0,
934 				       blocksize - cp_len);
935 
936 			ret = ocfs2_journal_dirty(handle, bh);
937 			if (ret < 0) {
938 				mlog_errno(ret);
939 				goto out_commit;
940 			}
941 			brelse(bh);
942 			bh = NULL;
943 
944 			/*
945 			 * XXX: do we need to empty all the following
946 			 * blocks in this cluster?
947 			 */
948 			if (!value_len)
949 				break;
950 		}
951 		cpos += num_clusters;
952 	}
953 out_commit:
954 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
955 out:
956 	brelse(bh);
957 
958 	return ret;
959 }
960 
961 static int ocfs2_xattr_cleanup(struct inode *inode,
962 			       struct ocfs2_xattr_info *xi,
963 			       struct ocfs2_xattr_search *xs,
964 			       size_t offs)
965 {
966 	handle_t *handle = NULL;
967 	int ret = 0;
968 	size_t name_len = strlen(xi->name);
969 	void *val = xs->base + offs;
970 	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
971 
972 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
973 				   OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
974 	if (IS_ERR(handle)) {
975 		ret = PTR_ERR(handle);
976 		mlog_errno(ret);
977 		goto out;
978 	}
979 	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
980 				   OCFS2_JOURNAL_ACCESS_WRITE);
981 	if (ret) {
982 		mlog_errno(ret);
983 		goto out_commit;
984 	}
985 	/* Decrease xattr count */
986 	le16_add_cpu(&xs->header->xh_count, -1);
987 	/* Remove the xattr entry and tree root which has already be set*/
988 	memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
989 	memset(val, 0, size);
990 
991 	ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
992 	if (ret < 0)
993 		mlog_errno(ret);
994 out_commit:
995 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
996 out:
997 	return ret;
998 }
999 
1000 static int ocfs2_xattr_update_entry(struct inode *inode,
1001 				    struct ocfs2_xattr_info *xi,
1002 				    struct ocfs2_xattr_search *xs,
1003 				    size_t offs)
1004 {
1005 	handle_t *handle = NULL;
1006 	int ret = 0;
1007 
1008 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1009 				   OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1010 	if (IS_ERR(handle)) {
1011 		ret = PTR_ERR(handle);
1012 		mlog_errno(ret);
1013 		goto out;
1014 	}
1015 	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1016 				   OCFS2_JOURNAL_ACCESS_WRITE);
1017 	if (ret) {
1018 		mlog_errno(ret);
1019 		goto out_commit;
1020 	}
1021 
1022 	xs->here->xe_name_offset = cpu_to_le16(offs);
1023 	xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1024 	if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1025 		ocfs2_xattr_set_local(xs->here, 1);
1026 	else
1027 		ocfs2_xattr_set_local(xs->here, 0);
1028 	ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1029 
1030 	ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1031 	if (ret < 0)
1032 		mlog_errno(ret);
1033 out_commit:
1034 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1035 out:
1036 	return ret;
1037 }
1038 
1039 /*
1040  * ocfs2_xattr_set_value_outside()
1041  *
1042  * Set large size value in B tree.
1043  */
1044 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1045 					 struct ocfs2_xattr_info *xi,
1046 					 struct ocfs2_xattr_search *xs,
1047 					 size_t offs)
1048 {
1049 	size_t name_len = strlen(xi->name);
1050 	void *val = xs->base + offs;
1051 	struct ocfs2_xattr_value_root *xv = NULL;
1052 	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1053 	int ret = 0;
1054 
1055 	memset(val, 0, size);
1056 	memcpy(val, xi->name, name_len);
1057 	xv = (struct ocfs2_xattr_value_root *)
1058 		(val + OCFS2_XATTR_SIZE(name_len));
1059 	xv->xr_clusters = 0;
1060 	xv->xr_last_eb_blk = 0;
1061 	xv->xr_list.l_tree_depth = 0;
1062 	xv->xr_list.l_count = cpu_to_le16(1);
1063 	xv->xr_list.l_next_free_rec = 0;
1064 
1065 	ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
1066 					 xi->value_len);
1067 	if (ret < 0) {
1068 		mlog_errno(ret);
1069 		return ret;
1070 	}
1071 	ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value,
1072 					      xi->value_len);
1073 	if (ret < 0) {
1074 		mlog_errno(ret);
1075 		return ret;
1076 	}
1077 	ret = ocfs2_xattr_update_entry(inode, xi, xs, offs);
1078 	if (ret < 0)
1079 		mlog_errno(ret);
1080 
1081 	return ret;
1082 }
1083 
1084 /*
1085  * ocfs2_xattr_set_entry_local()
1086  *
1087  * Set, replace or remove extended attribute in local.
1088  */
1089 static void ocfs2_xattr_set_entry_local(struct inode *inode,
1090 					struct ocfs2_xattr_info *xi,
1091 					struct ocfs2_xattr_search *xs,
1092 					struct ocfs2_xattr_entry *last,
1093 					size_t min_offs)
1094 {
1095 	size_t name_len = strlen(xi->name);
1096 	int i;
1097 
1098 	if (xi->value && xs->not_found) {
1099 		/* Insert the new xattr entry. */
1100 		le16_add_cpu(&xs->header->xh_count, 1);
1101 		ocfs2_xattr_set_type(last, xi->name_index);
1102 		ocfs2_xattr_set_local(last, 1);
1103 		last->xe_name_len = name_len;
1104 	} else {
1105 		void *first_val;
1106 		void *val;
1107 		size_t offs, size;
1108 
1109 		first_val = xs->base + min_offs;
1110 		offs = le16_to_cpu(xs->here->xe_name_offset);
1111 		val = xs->base + offs;
1112 
1113 		if (le64_to_cpu(xs->here->xe_value_size) >
1114 		    OCFS2_XATTR_INLINE_SIZE)
1115 			size = OCFS2_XATTR_SIZE(name_len) +
1116 				OCFS2_XATTR_ROOT_SIZE;
1117 		else
1118 			size = OCFS2_XATTR_SIZE(name_len) +
1119 			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1120 
1121 		if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1122 				OCFS2_XATTR_SIZE(xi->value_len)) {
1123 			/* The old and the new value have the
1124 			   same size. Just replace the value. */
1125 			ocfs2_xattr_set_local(xs->here, 1);
1126 			xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1127 			/* Clear value bytes. */
1128 			memset(val + OCFS2_XATTR_SIZE(name_len),
1129 			       0,
1130 			       OCFS2_XATTR_SIZE(xi->value_len));
1131 			memcpy(val + OCFS2_XATTR_SIZE(name_len),
1132 			       xi->value,
1133 			       xi->value_len);
1134 			return;
1135 		}
1136 		/* Remove the old name+value. */
1137 		memmove(first_val + size, first_val, val - first_val);
1138 		memset(first_val, 0, size);
1139 		xs->here->xe_name_hash = 0;
1140 		xs->here->xe_name_offset = 0;
1141 		ocfs2_xattr_set_local(xs->here, 1);
1142 		xs->here->xe_value_size = 0;
1143 
1144 		min_offs += size;
1145 
1146 		/* Adjust all value offsets. */
1147 		last = xs->header->xh_entries;
1148 		for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1149 			size_t o = le16_to_cpu(last->xe_name_offset);
1150 
1151 			if (o < offs)
1152 				last->xe_name_offset = cpu_to_le16(o + size);
1153 			last += 1;
1154 		}
1155 
1156 		if (!xi->value) {
1157 			/* Remove the old entry. */
1158 			last -= 1;
1159 			memmove(xs->here, xs->here + 1,
1160 				(void *)last - (void *)xs->here);
1161 			memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1162 			le16_add_cpu(&xs->header->xh_count, -1);
1163 		}
1164 	}
1165 	if (xi->value) {
1166 		/* Insert the new name+value. */
1167 		size_t size = OCFS2_XATTR_SIZE(name_len) +
1168 				OCFS2_XATTR_SIZE(xi->value_len);
1169 		void *val = xs->base + min_offs - size;
1170 
1171 		xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1172 		memset(val, 0, size);
1173 		memcpy(val, xi->name, name_len);
1174 		memcpy(val + OCFS2_XATTR_SIZE(name_len),
1175 		       xi->value,
1176 		       xi->value_len);
1177 		xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1178 		ocfs2_xattr_set_local(xs->here, 1);
1179 		ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1180 	}
1181 
1182 	return;
1183 }
1184 
1185 /*
1186  * ocfs2_xattr_set_entry()
1187  *
1188  * Set extended attribute entry into inode or block.
1189  *
1190  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1191  * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1192  * then set value in B tree with set_value_outside().
1193  */
1194 static int ocfs2_xattr_set_entry(struct inode *inode,
1195 				 struct ocfs2_xattr_info *xi,
1196 				 struct ocfs2_xattr_search *xs,
1197 				 int flag)
1198 {
1199 	struct ocfs2_xattr_entry *last;
1200 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1201 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1202 	size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1203 	size_t size_l = 0;
1204 	handle_t *handle = NULL;
1205 	int free, i, ret;
1206 	struct ocfs2_xattr_info xi_l = {
1207 		.name_index = xi->name_index,
1208 		.name = xi->name,
1209 		.value = xi->value,
1210 		.value_len = xi->value_len,
1211 	};
1212 
1213 	/* Compute min_offs, last and free space. */
1214 	last = xs->header->xh_entries;
1215 
1216 	for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1217 		size_t offs = le16_to_cpu(last->xe_name_offset);
1218 		if (offs < min_offs)
1219 			min_offs = offs;
1220 		last += 1;
1221 	}
1222 
1223 	free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
1224 	if (free < 0)
1225 		return -EFAULT;
1226 
1227 	if (!xs->not_found) {
1228 		size_t size = 0;
1229 		if (ocfs2_xattr_is_local(xs->here))
1230 			size = OCFS2_XATTR_SIZE(name_len) +
1231 			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1232 		else
1233 			size = OCFS2_XATTR_SIZE(name_len) +
1234 				OCFS2_XATTR_ROOT_SIZE;
1235 		free += (size + sizeof(struct ocfs2_xattr_entry));
1236 	}
1237 	/* Check free space in inode or block */
1238 	if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1239 		if (free < sizeof(struct ocfs2_xattr_entry) +
1240 			   OCFS2_XATTR_SIZE(name_len) +
1241 			   OCFS2_XATTR_ROOT_SIZE) {
1242 			ret = -ENOSPC;
1243 			goto out;
1244 		}
1245 		size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1246 		xi_l.value = (void *)&def_xv;
1247 		xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1248 	} else if (xi->value) {
1249 		if (free < sizeof(struct ocfs2_xattr_entry) +
1250 			   OCFS2_XATTR_SIZE(name_len) +
1251 			   OCFS2_XATTR_SIZE(xi->value_len)) {
1252 			ret = -ENOSPC;
1253 			goto out;
1254 		}
1255 	}
1256 
1257 	if (!xs->not_found) {
1258 		/* For existing extended attribute */
1259 		size_t size = OCFS2_XATTR_SIZE(name_len) +
1260 			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1261 		size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1262 		void *val = xs->base + offs;
1263 
1264 		if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1265 			/* Replace existing local xattr with tree root */
1266 			ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1267 							    offs);
1268 			if (ret < 0)
1269 				mlog_errno(ret);
1270 			goto out;
1271 		} else if (!ocfs2_xattr_is_local(xs->here)) {
1272 			/* For existing xattr which has value outside */
1273 			struct ocfs2_xattr_value_root *xv = NULL;
1274 			xv = (struct ocfs2_xattr_value_root *)(val +
1275 				OCFS2_XATTR_SIZE(name_len));
1276 
1277 			if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1278 				/*
1279 				 * If new value need set outside also,
1280 				 * first truncate old value to new value,
1281 				 * then set new value with set_value_outside().
1282 				 */
1283 				ret = ocfs2_xattr_value_truncate(inode,
1284 								 xs->xattr_bh,
1285 								 xv,
1286 								 xi->value_len);
1287 				if (ret < 0) {
1288 					mlog_errno(ret);
1289 					goto out;
1290 				}
1291 
1292 				ret = __ocfs2_xattr_set_value_outside(inode,
1293 								xv,
1294 								xi->value,
1295 								xi->value_len);
1296 				if (ret < 0) {
1297 					mlog_errno(ret);
1298 					goto out;
1299 				}
1300 
1301 				ret = ocfs2_xattr_update_entry(inode,
1302 							       xi,
1303 							       xs,
1304 							       offs);
1305 				if (ret < 0)
1306 					mlog_errno(ret);
1307 				goto out;
1308 			} else {
1309 				/*
1310 				 * If new value need set in local,
1311 				 * just trucate old value to zero.
1312 				 */
1313 				 ret = ocfs2_xattr_value_truncate(inode,
1314 								 xs->xattr_bh,
1315 								 xv,
1316 								 0);
1317 				if (ret < 0)
1318 					mlog_errno(ret);
1319 			}
1320 		}
1321 	}
1322 
1323 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1324 				   OCFS2_INODE_UPDATE_CREDITS);
1325 	if (IS_ERR(handle)) {
1326 		ret = PTR_ERR(handle);
1327 		mlog_errno(ret);
1328 		goto out;
1329 	}
1330 
1331 	ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1332 				   OCFS2_JOURNAL_ACCESS_WRITE);
1333 	if (ret) {
1334 		mlog_errno(ret);
1335 		goto out_commit;
1336 	}
1337 
1338 	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1339 		/*set extended attribue in external blcok*/
1340 		ret = ocfs2_extend_trans(handle,
1341 					 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1342 		if (ret) {
1343 			mlog_errno(ret);
1344 			goto out_commit;
1345 		}
1346 		ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1347 					   OCFS2_JOURNAL_ACCESS_WRITE);
1348 		if (ret) {
1349 			mlog_errno(ret);
1350 			goto out_commit;
1351 		}
1352 	}
1353 
1354 	/*
1355 	 * Set value in local, include set tree root in local.
1356 	 * This is the first step for value size >INLINE_SIZE.
1357 	 */
1358 	ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1359 
1360 	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1361 		ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1362 		if (ret < 0) {
1363 			mlog_errno(ret);
1364 			goto out_commit;
1365 		}
1366 	}
1367 
1368 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1369 	    (flag & OCFS2_INLINE_XATTR_FL)) {
1370 		struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1371 		unsigned int xattrsize = osb->s_xattr_inline_size;
1372 
1373 		/*
1374 		 * Adjust extent record count or inline data size
1375 		 * to reserve space for extended attribute.
1376 		 */
1377 		if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1378 			struct ocfs2_inline_data *idata = &di->id2.i_data;
1379 			le16_add_cpu(&idata->id_count, -xattrsize);
1380 		} else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1381 			struct ocfs2_extent_list *el = &di->id2.i_list;
1382 			le16_add_cpu(&el->l_count, -(xattrsize /
1383 					sizeof(struct ocfs2_extent_rec)));
1384 		}
1385 		di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1386 	}
1387 	/* Update xattr flag */
1388 	spin_lock(&oi->ip_lock);
1389 	oi->ip_dyn_features |= flag;
1390 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1391 	spin_unlock(&oi->ip_lock);
1392 	/* Update inode ctime */
1393 	inode->i_ctime = CURRENT_TIME;
1394 	di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1395 	di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1396 
1397 	ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1398 	if (ret < 0)
1399 		mlog_errno(ret);
1400 
1401 out_commit:
1402 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1403 
1404 	if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1405 		/*
1406 		 * Set value outside in B tree.
1407 		 * This is the second step for value size > INLINE_SIZE.
1408 		 */
1409 		size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1410 		ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs);
1411 		if (ret < 0) {
1412 			int ret2;
1413 
1414 			mlog_errno(ret);
1415 			/*
1416 			 * If set value outside failed, we have to clean
1417 			 * the junk tree root we have already set in local.
1418 			 */
1419 			ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs);
1420 			if (ret2 < 0)
1421 				mlog_errno(ret2);
1422 		}
1423 	}
1424 out:
1425 	return ret;
1426 
1427 }
1428 
1429 static int ocfs2_xattr_free_block(handle_t *handle,
1430 				  struct ocfs2_super *osb,
1431 				  struct ocfs2_xattr_block *xb)
1432 {
1433 	struct inode *xb_alloc_inode;
1434 	struct buffer_head *xb_alloc_bh = NULL;
1435 	u64 blk = le64_to_cpu(xb->xb_blkno);
1436 	u16 bit = le16_to_cpu(xb->xb_suballoc_bit);
1437 	u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1438 	int ret = 0;
1439 
1440 	xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1441 				EXTENT_ALLOC_SYSTEM_INODE,
1442 				le16_to_cpu(xb->xb_suballoc_slot));
1443 	if (!xb_alloc_inode) {
1444 		ret = -ENOMEM;
1445 		mlog_errno(ret);
1446 		goto out;
1447 	}
1448 	mutex_lock(&xb_alloc_inode->i_mutex);
1449 
1450 	ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1451 	if (ret < 0) {
1452 		mlog_errno(ret);
1453 		goto out_mutex;
1454 	}
1455 	ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE);
1456 	if (ret < 0) {
1457 		mlog_errno(ret);
1458 		goto out_unlock;
1459 	}
1460 	ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1461 				       bit, bg_blkno, 1);
1462 	if (ret < 0)
1463 		mlog_errno(ret);
1464 out_unlock:
1465 	ocfs2_inode_unlock(xb_alloc_inode, 1);
1466 	brelse(xb_alloc_bh);
1467 out_mutex:
1468 	mutex_unlock(&xb_alloc_inode->i_mutex);
1469 	iput(xb_alloc_inode);
1470 out:
1471 	return ret;
1472 }
1473 
1474 static int ocfs2_remove_value_outside(struct inode*inode,
1475 				      struct buffer_head *bh,
1476 				      struct ocfs2_xattr_header *header)
1477 {
1478 	int ret = 0, i;
1479 
1480 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1481 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1482 
1483 		if (!ocfs2_xattr_is_local(entry)) {
1484 			struct ocfs2_xattr_value_root *xv;
1485 			void *val;
1486 
1487 			val = (void *)header +
1488 				le16_to_cpu(entry->xe_name_offset);
1489 			xv = (struct ocfs2_xattr_value_root *)
1490 				(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1491 			ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0);
1492 			if (ret < 0) {
1493 				mlog_errno(ret);
1494 				return ret;
1495 			}
1496 		}
1497 	}
1498 
1499 	return ret;
1500 }
1501 
1502 static int ocfs2_xattr_ibody_remove(struct inode *inode,
1503 				    struct buffer_head *di_bh)
1504 {
1505 
1506 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1507 	struct ocfs2_xattr_header *header;
1508 	int ret;
1509 
1510 	header = (struct ocfs2_xattr_header *)
1511 		 ((void *)di + inode->i_sb->s_blocksize -
1512 		 le16_to_cpu(di->i_xattr_inline_size));
1513 
1514 	ret = ocfs2_remove_value_outside(inode, di_bh, header);
1515 
1516 	return ret;
1517 }
1518 
1519 static int ocfs2_xattr_block_remove(struct inode *inode,
1520 				    struct buffer_head *blk_bh)
1521 {
1522 	struct ocfs2_xattr_block *xb;
1523 	int ret = 0;
1524 
1525 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1526 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1527 		struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1528 		ret = ocfs2_remove_value_outside(inode, blk_bh, header);
1529 	} else
1530 		ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
1531 
1532 	return ret;
1533 }
1534 
1535 /*
1536  * ocfs2_xattr_remove()
1537  *
1538  * Free extended attribute resources associated with this inode.
1539  */
1540 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1541 {
1542 	struct ocfs2_xattr_block *xb;
1543 	struct buffer_head *blk_bh = NULL;
1544 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1545 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1546 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1547 	handle_t *handle;
1548 	int ret;
1549 
1550 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1551 		return 0;
1552 
1553 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1554 		return 0;
1555 
1556 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1557 		ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1558 		if (ret < 0) {
1559 			mlog_errno(ret);
1560 			goto out;
1561 		}
1562 	}
1563 	if (di->i_xattr_loc) {
1564 		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
1565 				       le64_to_cpu(di->i_xattr_loc),
1566 				       &blk_bh, OCFS2_BH_CACHED, inode);
1567 		if (ret < 0) {
1568 			mlog_errno(ret);
1569 			return ret;
1570 		}
1571 		/*Verify the signature of xattr block*/
1572 		if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
1573 			   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
1574 			ret = -EFAULT;
1575 			goto out;
1576 		}
1577 
1578 		ret = ocfs2_xattr_block_remove(inode, blk_bh);
1579 		if (ret < 0) {
1580 			mlog_errno(ret);
1581 			goto out;
1582 		}
1583 	}
1584 
1585 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1586 				   OCFS2_INODE_UPDATE_CREDITS);
1587 	if (IS_ERR(handle)) {
1588 		ret = PTR_ERR(handle);
1589 		mlog_errno(ret);
1590 		goto out;
1591 	}
1592 	ret = ocfs2_journal_access(handle, inode, di_bh,
1593 				   OCFS2_JOURNAL_ACCESS_WRITE);
1594 	if (ret) {
1595 		mlog_errno(ret);
1596 		goto out_commit;
1597 	}
1598 
1599 	if (di->i_xattr_loc) {
1600 		xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1601 		ocfs2_xattr_free_block(handle, osb, xb);
1602 		di->i_xattr_loc = cpu_to_le64(0);
1603 	}
1604 
1605 	spin_lock(&oi->ip_lock);
1606 	oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1607 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1608 	spin_unlock(&oi->ip_lock);
1609 
1610 	ret = ocfs2_journal_dirty(handle, di_bh);
1611 	if (ret < 0)
1612 		mlog_errno(ret);
1613 out_commit:
1614 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1615 out:
1616 	brelse(blk_bh);
1617 
1618 	return ret;
1619 }
1620 
1621 static int ocfs2_xattr_has_space_inline(struct inode *inode,
1622 					struct ocfs2_dinode *di)
1623 {
1624 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1625 	unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1626 	int free;
1627 
1628 	if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1629 		return 0;
1630 
1631 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1632 		struct ocfs2_inline_data *idata = &di->id2.i_data;
1633 		free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1634 	} else if (ocfs2_inode_is_fast_symlink(inode)) {
1635 		free = ocfs2_fast_symlink_chars(inode->i_sb) -
1636 			le64_to_cpu(di->i_size);
1637 	} else {
1638 		struct ocfs2_extent_list *el = &di->id2.i_list;
1639 		free = (le16_to_cpu(el->l_count) -
1640 			le16_to_cpu(el->l_next_free_rec)) *
1641 			sizeof(struct ocfs2_extent_rec);
1642 	}
1643 	if (free >= xattrsize)
1644 		return 1;
1645 
1646 	return 0;
1647 }
1648 
1649 /*
1650  * ocfs2_xattr_ibody_find()
1651  *
1652  * Find extended attribute in inode block and
1653  * fill search info into struct ocfs2_xattr_search.
1654  */
1655 static int ocfs2_xattr_ibody_find(struct inode *inode,
1656 				  int name_index,
1657 				  const char *name,
1658 				  struct ocfs2_xattr_search *xs)
1659 {
1660 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1661 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1662 	int ret;
1663 	int has_space = 0;
1664 
1665 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1666 		return 0;
1667 
1668 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1669 		down_read(&oi->ip_alloc_sem);
1670 		has_space = ocfs2_xattr_has_space_inline(inode, di);
1671 		up_read(&oi->ip_alloc_sem);
1672 		if (!has_space)
1673 			return 0;
1674 	}
1675 
1676 	xs->xattr_bh = xs->inode_bh;
1677 	xs->end = (void *)di + inode->i_sb->s_blocksize;
1678 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1679 		xs->header = (struct ocfs2_xattr_header *)
1680 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
1681 	else
1682 		xs->header = (struct ocfs2_xattr_header *)
1683 			(xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
1684 	xs->base = (void *)xs->header;
1685 	xs->here = xs->header->xh_entries;
1686 
1687 	/* Find the named attribute. */
1688 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1689 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
1690 		if (ret && ret != -ENODATA)
1691 			return ret;
1692 		xs->not_found = ret;
1693 	}
1694 
1695 	return 0;
1696 }
1697 
1698 /*
1699  * ocfs2_xattr_ibody_set()
1700  *
1701  * Set, replace or remove an extended attribute into inode block.
1702  *
1703  */
1704 static int ocfs2_xattr_ibody_set(struct inode *inode,
1705 				 struct ocfs2_xattr_info *xi,
1706 				 struct ocfs2_xattr_search *xs)
1707 {
1708 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1709 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1710 	int ret;
1711 
1712 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1713 		return -ENOSPC;
1714 
1715 	down_write(&oi->ip_alloc_sem);
1716 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1717 		if (!ocfs2_xattr_has_space_inline(inode, di)) {
1718 			ret = -ENOSPC;
1719 			goto out;
1720 		}
1721 	}
1722 
1723 	ret = ocfs2_xattr_set_entry(inode, xi, xs,
1724 				(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
1725 out:
1726 	up_write(&oi->ip_alloc_sem);
1727 
1728 	return ret;
1729 }
1730 
1731 /*
1732  * ocfs2_xattr_block_find()
1733  *
1734  * Find extended attribute in external block and
1735  * fill search info into struct ocfs2_xattr_search.
1736  */
1737 static int ocfs2_xattr_block_find(struct inode *inode,
1738 				  int name_index,
1739 				  const char *name,
1740 				  struct ocfs2_xattr_search *xs)
1741 {
1742 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1743 	struct buffer_head *blk_bh = NULL;
1744 	struct ocfs2_xattr_block *xb;
1745 	int ret = 0;
1746 
1747 	if (!di->i_xattr_loc)
1748 		return ret;
1749 
1750 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
1751 			       le64_to_cpu(di->i_xattr_loc),
1752 			       &blk_bh, OCFS2_BH_CACHED, inode);
1753 	if (ret < 0) {
1754 		mlog_errno(ret);
1755 		return ret;
1756 	}
1757 	/*Verify the signature of xattr block*/
1758 	if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
1759 		   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
1760 			ret = -EFAULT;
1761 			goto cleanup;
1762 	}
1763 
1764 	xs->xattr_bh = blk_bh;
1765 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1766 
1767 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1768 		xs->header = &xb->xb_attrs.xb_header;
1769 		xs->base = (void *)xs->header;
1770 		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
1771 		xs->here = xs->header->xh_entries;
1772 
1773 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
1774 	} else
1775 		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
1776 						   name_index,
1777 						   name, xs);
1778 
1779 	if (ret && ret != -ENODATA) {
1780 		xs->xattr_bh = NULL;
1781 		goto cleanup;
1782 	}
1783 	xs->not_found = ret;
1784 	return 0;
1785 cleanup:
1786 	brelse(blk_bh);
1787 
1788 	return ret;
1789 }
1790 
1791 /*
1792  * When all the xattrs are deleted from index btree, the ocfs2_xattr_tree
1793  * will be erased and ocfs2_xattr_block will have its ocfs2_xattr_header
1794  * re-initialized.
1795  */
1796 static int ocfs2_restore_xattr_block(struct inode *inode,
1797 				     struct ocfs2_xattr_search *xs)
1798 {
1799 	int ret;
1800 	handle_t *handle;
1801 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1802 	struct ocfs2_xattr_block *xb =
1803 		(struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1804 	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
1805 	u16 xb_flags = le16_to_cpu(xb->xb_flags);
1806 
1807 	BUG_ON(!(xb_flags & OCFS2_XATTR_INDEXED) ||
1808 		le16_to_cpu(el->l_next_free_rec) != 0);
1809 
1810 	handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1811 	if (IS_ERR(handle)) {
1812 		ret = PTR_ERR(handle);
1813 		handle = NULL;
1814 		goto out;
1815 	}
1816 
1817 	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1818 				   OCFS2_JOURNAL_ACCESS_WRITE);
1819 	if (ret < 0) {
1820 		mlog_errno(ret);
1821 		goto out_commit;
1822 	}
1823 
1824 	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
1825 	       offsetof(struct ocfs2_xattr_block, xb_attrs));
1826 
1827 	xb->xb_flags = cpu_to_le16(xb_flags & ~OCFS2_XATTR_INDEXED);
1828 
1829 	ocfs2_journal_dirty(handle, xs->xattr_bh);
1830 
1831 out_commit:
1832 	ocfs2_commit_trans(osb, handle);
1833 out:
1834 	return ret;
1835 }
1836 
1837 /*
1838  * ocfs2_xattr_block_set()
1839  *
1840  * Set, replace or remove an extended attribute into external block.
1841  *
1842  */
1843 static int ocfs2_xattr_block_set(struct inode *inode,
1844 				 struct ocfs2_xattr_info *xi,
1845 				 struct ocfs2_xattr_search *xs)
1846 {
1847 	struct buffer_head *new_bh = NULL;
1848 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1849 	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
1850 	struct ocfs2_alloc_context *meta_ac = NULL;
1851 	handle_t *handle = NULL;
1852 	struct ocfs2_xattr_block *xblk = NULL;
1853 	u16 suballoc_bit_start;
1854 	u32 num_got;
1855 	u64 first_blkno;
1856 	int ret;
1857 
1858 	if (!xs->xattr_bh) {
1859 		/*
1860 		 * Alloc one external block for extended attribute
1861 		 * outside of inode.
1862 		 */
1863 		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
1864 		if (ret < 0) {
1865 			mlog_errno(ret);
1866 			goto out;
1867 		}
1868 		handle = ocfs2_start_trans(osb,
1869 					   OCFS2_XATTR_BLOCK_CREATE_CREDITS);
1870 		if (IS_ERR(handle)) {
1871 			ret = PTR_ERR(handle);
1872 			mlog_errno(ret);
1873 			goto out;
1874 		}
1875 		ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1876 					   OCFS2_JOURNAL_ACCESS_CREATE);
1877 		if (ret < 0) {
1878 			mlog_errno(ret);
1879 			goto out_commit;
1880 		}
1881 
1882 		ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
1883 					   &suballoc_bit_start, &num_got,
1884 					   &first_blkno);
1885 		if (ret < 0) {
1886 			mlog_errno(ret);
1887 			goto out_commit;
1888 		}
1889 
1890 		new_bh = sb_getblk(inode->i_sb, first_blkno);
1891 		ocfs2_set_new_buffer_uptodate(inode, new_bh);
1892 
1893 		ret = ocfs2_journal_access(handle, inode, new_bh,
1894 					   OCFS2_JOURNAL_ACCESS_CREATE);
1895 		if (ret < 0) {
1896 			mlog_errno(ret);
1897 			goto out_commit;
1898 		}
1899 
1900 		/* Initialize ocfs2_xattr_block */
1901 		xs->xattr_bh = new_bh;
1902 		xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
1903 		memset(xblk, 0, inode->i_sb->s_blocksize);
1904 		strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
1905 		xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
1906 		xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1907 		xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
1908 		xblk->xb_blkno = cpu_to_le64(first_blkno);
1909 
1910 		xs->header = &xblk->xb_attrs.xb_header;
1911 		xs->base = (void *)xs->header;
1912 		xs->end = (void *)xblk + inode->i_sb->s_blocksize;
1913 		xs->here = xs->header->xh_entries;
1914 
1915 
1916 		ret = ocfs2_journal_dirty(handle, new_bh);
1917 		if (ret < 0) {
1918 			mlog_errno(ret);
1919 			goto out_commit;
1920 		}
1921 		di->i_xattr_loc = cpu_to_le64(first_blkno);
1922 		ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1923 		if (ret < 0)
1924 			mlog_errno(ret);
1925 out_commit:
1926 		ocfs2_commit_trans(osb, handle);
1927 out:
1928 		if (meta_ac)
1929 			ocfs2_free_alloc_context(meta_ac);
1930 		if (ret < 0)
1931 			return ret;
1932 	} else
1933 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1934 
1935 	if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
1936 		/* Set extended attribute into external block */
1937 		ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL);
1938 		if (!ret || ret != -ENOSPC)
1939 			goto end;
1940 
1941 		ret = ocfs2_xattr_create_index_block(inode, xs);
1942 		if (ret)
1943 			goto end;
1944 	}
1945 
1946 	ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs);
1947 	if (!ret && xblk->xb_attrs.xb_root.xt_list.l_next_free_rec == 0)
1948 		ret = ocfs2_restore_xattr_block(inode, xs);
1949 
1950 end:
1951 
1952 	return ret;
1953 }
1954 
1955 /*
1956  * ocfs2_xattr_set()
1957  *
1958  * Set, replace or remove an extended attribute for this inode.
1959  * value is NULL to remove an existing extended attribute, else either
1960  * create or replace an extended attribute.
1961  */
1962 int ocfs2_xattr_set(struct inode *inode,
1963 		    int name_index,
1964 		    const char *name,
1965 		    const void *value,
1966 		    size_t value_len,
1967 		    int flags)
1968 {
1969 	struct buffer_head *di_bh = NULL;
1970 	struct ocfs2_dinode *di;
1971 	int ret;
1972 	u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
1973 
1974 	struct ocfs2_xattr_info xi = {
1975 		.name_index = name_index,
1976 		.name = name,
1977 		.value = value,
1978 		.value_len = value_len,
1979 	};
1980 
1981 	struct ocfs2_xattr_search xis = {
1982 		.not_found = -ENODATA,
1983 	};
1984 
1985 	struct ocfs2_xattr_search xbs = {
1986 		.not_found = -ENODATA,
1987 	};
1988 
1989 	if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1990 		return -EOPNOTSUPP;
1991 
1992 	ret = ocfs2_inode_lock(inode, &di_bh, 1);
1993 	if (ret < 0) {
1994 		mlog_errno(ret);
1995 		return ret;
1996 	}
1997 	xis.inode_bh = xbs.inode_bh = di_bh;
1998 	di = (struct ocfs2_dinode *)di_bh->b_data;
1999 
2000 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
2001 	/*
2002 	 * Scan inode and external block to find the same name
2003 	 * extended attribute and collect search infomation.
2004 	 */
2005 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2006 	if (ret)
2007 		goto cleanup;
2008 	if (xis.not_found) {
2009 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2010 		if (ret)
2011 			goto cleanup;
2012 	}
2013 
2014 	if (xis.not_found && xbs.not_found) {
2015 		ret = -ENODATA;
2016 		if (flags & XATTR_REPLACE)
2017 			goto cleanup;
2018 		ret = 0;
2019 		if (!value)
2020 			goto cleanup;
2021 	} else {
2022 		ret = -EEXIST;
2023 		if (flags & XATTR_CREATE)
2024 			goto cleanup;
2025 	}
2026 
2027 	if (!value) {
2028 		/* Remove existing extended attribute */
2029 		if (!xis.not_found)
2030 			ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2031 		else if (!xbs.not_found)
2032 			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2033 	} else {
2034 		/* We always try to set extended attribute into inode first*/
2035 		ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2036 		if (!ret && !xbs.not_found) {
2037 			/*
2038 			 * If succeed and that extended attribute existing in
2039 			 * external block, then we will remove it.
2040 			 */
2041 			xi.value = NULL;
2042 			xi.value_len = 0;
2043 			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2044 		} else if (ret == -ENOSPC) {
2045 			if (di->i_xattr_loc && !xbs.xattr_bh) {
2046 				ret = ocfs2_xattr_block_find(inode, name_index,
2047 							     name, &xbs);
2048 				if (ret)
2049 					goto cleanup;
2050 			}
2051 			/*
2052 			 * If no space in inode, we will set extended attribute
2053 			 * into external block.
2054 			 */
2055 			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2056 			if (ret)
2057 				goto cleanup;
2058 			if (!xis.not_found) {
2059 				/*
2060 				 * If succeed and that extended attribute
2061 				 * existing in inode, we will remove it.
2062 				 */
2063 				xi.value = NULL;
2064 				xi.value_len = 0;
2065 				ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2066 			}
2067 		}
2068 	}
2069 cleanup:
2070 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
2071 	ocfs2_inode_unlock(inode, 1);
2072 	brelse(di_bh);
2073 	brelse(xbs.xattr_bh);
2074 	for (i = 0; i < blk_per_bucket; i++)
2075 		brelse(xbs.bucket.bhs[i]);
2076 
2077 	return ret;
2078 }
2079 
2080 static inline u32 ocfs2_xattr_hash_by_name(struct inode *inode,
2081 					   int name_index,
2082 					   const char *suffix_name)
2083 {
2084 	struct xattr_handler *handler = ocfs2_xattr_handler(name_index);
2085 	char *prefix = handler->prefix;
2086 	int prefix_len = strlen(handler->prefix);
2087 
2088 	return ocfs2_xattr_name_hash(inode, prefix, prefix_len,
2089 				     (char *)suffix_name, strlen(suffix_name));
2090 }
2091 
2092 /*
2093  * Find the xattr extent rec which may contains name_hash.
2094  * e_cpos will be the first name hash of the xattr rec.
2095  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2096  */
2097 static int ocfs2_xattr_get_rec(struct inode *inode,
2098 			       u32 name_hash,
2099 			       u64 *p_blkno,
2100 			       u32 *e_cpos,
2101 			       u32 *num_clusters,
2102 			       struct ocfs2_extent_list *el)
2103 {
2104 	int ret = 0, i;
2105 	struct buffer_head *eb_bh = NULL;
2106 	struct ocfs2_extent_block *eb;
2107 	struct ocfs2_extent_rec *rec = NULL;
2108 	u64 e_blkno = 0;
2109 
2110 	if (el->l_tree_depth) {
2111 		ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
2112 		if (ret) {
2113 			mlog_errno(ret);
2114 			goto out;
2115 		}
2116 
2117 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2118 		el = &eb->h_list;
2119 
2120 		if (el->l_tree_depth) {
2121 			ocfs2_error(inode->i_sb,
2122 				    "Inode %lu has non zero tree depth in "
2123 				    "xattr tree block %llu\n", inode->i_ino,
2124 				    (unsigned long long)eb_bh->b_blocknr);
2125 			ret = -EROFS;
2126 			goto out;
2127 		}
2128 	}
2129 
2130 	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2131 		rec = &el->l_recs[i];
2132 
2133 		if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2134 			e_blkno = le64_to_cpu(rec->e_blkno);
2135 			break;
2136 		}
2137 	}
2138 
2139 	if (!e_blkno) {
2140 		ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2141 			    "record (%u, %u, 0) in xattr", inode->i_ino,
2142 			    le32_to_cpu(rec->e_cpos),
2143 			    ocfs2_rec_clusters(el, rec));
2144 		ret = -EROFS;
2145 		goto out;
2146 	}
2147 
2148 	*p_blkno = le64_to_cpu(rec->e_blkno);
2149 	*num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2150 	if (e_cpos)
2151 		*e_cpos = le32_to_cpu(rec->e_cpos);
2152 out:
2153 	brelse(eb_bh);
2154 	return ret;
2155 }
2156 
2157 typedef int (xattr_bucket_func)(struct inode *inode,
2158 				struct ocfs2_xattr_bucket *bucket,
2159 				void *para);
2160 
2161 static int ocfs2_find_xe_in_bucket(struct inode *inode,
2162 				   struct buffer_head *header_bh,
2163 				   int name_index,
2164 				   const char *name,
2165 				   u32 name_hash,
2166 				   u16 *xe_index,
2167 				   int *found)
2168 {
2169 	int i, ret = 0, cmp = 1, block_off, new_offset;
2170 	struct ocfs2_xattr_header *xh =
2171 			(struct ocfs2_xattr_header *)header_bh->b_data;
2172 	size_t name_len = strlen(name);
2173 	struct ocfs2_xattr_entry *xe = NULL;
2174 	struct buffer_head *name_bh = NULL;
2175 	char *xe_name;
2176 
2177 	/*
2178 	 * We don't use binary search in the bucket because there
2179 	 * may be multiple entries with the same name hash.
2180 	 */
2181 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
2182 		xe = &xh->xh_entries[i];
2183 
2184 		if (name_hash > le32_to_cpu(xe->xe_name_hash))
2185 			continue;
2186 		else if (name_hash < le32_to_cpu(xe->xe_name_hash))
2187 			break;
2188 
2189 		cmp = name_index - ocfs2_xattr_get_type(xe);
2190 		if (!cmp)
2191 			cmp = name_len - xe->xe_name_len;
2192 		if (cmp)
2193 			continue;
2194 
2195 		ret = ocfs2_xattr_bucket_get_name_value(inode,
2196 							xh,
2197 							i,
2198 							&block_off,
2199 							&new_offset);
2200 		if (ret) {
2201 			mlog_errno(ret);
2202 			break;
2203 		}
2204 
2205 		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
2206 				       header_bh->b_blocknr + block_off,
2207 				       &name_bh, OCFS2_BH_CACHED, inode);
2208 		if (ret) {
2209 			mlog_errno(ret);
2210 			break;
2211 		}
2212 		xe_name = name_bh->b_data + new_offset;
2213 
2214 		cmp = memcmp(name, xe_name, name_len);
2215 		brelse(name_bh);
2216 		name_bh = NULL;
2217 
2218 		if (cmp == 0) {
2219 			*xe_index = i;
2220 			*found = 1;
2221 			ret = 0;
2222 			break;
2223 		}
2224 	}
2225 
2226 	return ret;
2227 }
2228 
2229 /*
2230  * Find the specified xattr entry in a series of buckets.
2231  * This series start from p_blkno and last for num_clusters.
2232  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
2233  * the num of the valid buckets.
2234  *
2235  * Return the buffer_head this xattr should reside in. And if the xattr's
2236  * hash is in the gap of 2 buckets, return the lower bucket.
2237  */
2238 static int ocfs2_xattr_bucket_find(struct inode *inode,
2239 				   int name_index,
2240 				   const char *name,
2241 				   u32 name_hash,
2242 				   u64 p_blkno,
2243 				   u32 first_hash,
2244 				   u32 num_clusters,
2245 				   struct ocfs2_xattr_search *xs)
2246 {
2247 	int ret, found = 0;
2248 	struct buffer_head *bh = NULL;
2249 	struct buffer_head *lower_bh = NULL;
2250 	struct ocfs2_xattr_header *xh = NULL;
2251 	struct ocfs2_xattr_entry *xe = NULL;
2252 	u16 index = 0;
2253 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2254 	int low_bucket = 0, bucket, high_bucket;
2255 	u32 last_hash;
2256 	u64 blkno;
2257 
2258 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno,
2259 			       &bh, OCFS2_BH_CACHED, inode);
2260 	if (ret) {
2261 		mlog_errno(ret);
2262 		goto out;
2263 	}
2264 
2265 	xh = (struct ocfs2_xattr_header *)bh->b_data;
2266 	high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
2267 
2268 	while (low_bucket <= high_bucket) {
2269 		brelse(bh);
2270 		bh = NULL;
2271 		bucket = (low_bucket + high_bucket) / 2;
2272 
2273 		blkno = p_blkno + bucket * blk_per_bucket;
2274 
2275 		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
2276 				       &bh, OCFS2_BH_CACHED, inode);
2277 		if (ret) {
2278 			mlog_errno(ret);
2279 			goto out;
2280 		}
2281 
2282 		xh = (struct ocfs2_xattr_header *)bh->b_data;
2283 		xe = &xh->xh_entries[0];
2284 		if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
2285 			high_bucket = bucket - 1;
2286 			continue;
2287 		}
2288 
2289 		/*
2290 		 * Check whether the hash of the last entry in our
2291 		 * bucket is larger than the search one.
2292 		 */
2293 		xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
2294 		last_hash = le32_to_cpu(xe->xe_name_hash);
2295 
2296 		/* record lower_bh which may be the insert place. */
2297 		brelse(lower_bh);
2298 		lower_bh = bh;
2299 		bh = NULL;
2300 
2301 		if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
2302 			low_bucket = bucket + 1;
2303 			continue;
2304 		}
2305 
2306 		/* the searched xattr should reside in this bucket if exists. */
2307 		ret = ocfs2_find_xe_in_bucket(inode, lower_bh,
2308 					      name_index, name, name_hash,
2309 					      &index, &found);
2310 		if (ret) {
2311 			mlog_errno(ret);
2312 			goto out;
2313 		}
2314 		break;
2315 	}
2316 
2317 	/*
2318 	 * Record the bucket we have found.
2319 	 * When the xattr's hash value is in the gap of 2 buckets, we will
2320 	 * always set it to the previous bucket.
2321 	 */
2322 	if (!lower_bh) {
2323 		/*
2324 		 * We can't find any bucket whose first name_hash is less
2325 		 * than the find name_hash.
2326 		 */
2327 		BUG_ON(bh->b_blocknr != p_blkno);
2328 		lower_bh = bh;
2329 		bh = NULL;
2330 	}
2331 	xs->bucket.bhs[0] = lower_bh;
2332 	xs->bucket.xh = (struct ocfs2_xattr_header *)
2333 					xs->bucket.bhs[0]->b_data;
2334 	lower_bh = NULL;
2335 
2336 	xs->header = xs->bucket.xh;
2337 	xs->base = xs->bucket.bhs[0]->b_data;
2338 	xs->end = xs->base + inode->i_sb->s_blocksize;
2339 
2340 	if (found) {
2341 		/*
2342 		 * If we have found the xattr enty, read all the blocks in
2343 		 * this bucket.
2344 		 */
2345 		ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
2346 					xs->bucket.bhs[0]->b_blocknr + 1,
2347 					blk_per_bucket - 1, &xs->bucket.bhs[1],
2348 					OCFS2_BH_CACHED, inode);
2349 		if (ret) {
2350 			mlog_errno(ret);
2351 			goto out;
2352 		}
2353 
2354 		xs->here = &xs->header->xh_entries[index];
2355 		mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
2356 		     (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index);
2357 	} else
2358 		ret = -ENODATA;
2359 
2360 out:
2361 	brelse(bh);
2362 	brelse(lower_bh);
2363 	return ret;
2364 }
2365 
2366 static int ocfs2_xattr_index_block_find(struct inode *inode,
2367 					struct buffer_head *root_bh,
2368 					int name_index,
2369 					const char *name,
2370 					struct ocfs2_xattr_search *xs)
2371 {
2372 	int ret;
2373 	struct ocfs2_xattr_block *xb =
2374 			(struct ocfs2_xattr_block *)root_bh->b_data;
2375 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
2376 	struct ocfs2_extent_list *el = &xb_root->xt_list;
2377 	u64 p_blkno = 0;
2378 	u32 first_hash, num_clusters = 0;
2379 	u32 name_hash = ocfs2_xattr_hash_by_name(inode, name_index, name);
2380 
2381 	if (le16_to_cpu(el->l_next_free_rec) == 0)
2382 		return -ENODATA;
2383 
2384 	mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
2385 	     name, name_hash, name_index);
2386 
2387 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
2388 				  &num_clusters, el);
2389 	if (ret) {
2390 		mlog_errno(ret);
2391 		goto out;
2392 	}
2393 
2394 	BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
2395 
2396 	mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
2397 	     "in the rec is %u\n", num_clusters, p_blkno, first_hash);
2398 
2399 	ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
2400 				      p_blkno, first_hash, num_clusters, xs);
2401 
2402 out:
2403 	return ret;
2404 }
2405 
2406 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2407 				       u64 blkno,
2408 				       u32 clusters,
2409 				       xattr_bucket_func *func,
2410 				       void *para)
2411 {
2412 	int i, j, ret = 0;
2413 	int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2414 	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
2415 	u32 num_buckets = clusters * bpc;
2416 	struct ocfs2_xattr_bucket bucket;
2417 
2418 	memset(&bucket, 0, sizeof(bucket));
2419 
2420 	mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
2421 	     clusters, blkno);
2422 
2423 	for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
2424 		ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
2425 					blkno, blk_per_bucket,
2426 					bucket.bhs, OCFS2_BH_CACHED, inode);
2427 		if (ret) {
2428 			mlog_errno(ret);
2429 			goto out;
2430 		}
2431 
2432 		bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data;
2433 		/*
2434 		 * The real bucket num in this series of blocks is stored
2435 		 * in the 1st bucket.
2436 		 */
2437 		if (i == 0)
2438 			num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets);
2439 
2440 		mlog(0, "iterating xattr bucket %llu\n", blkno);
2441 		if (func) {
2442 			ret = func(inode, &bucket, para);
2443 			if (ret) {
2444 				mlog_errno(ret);
2445 				break;
2446 			}
2447 		}
2448 
2449 		for (j = 0; j < blk_per_bucket; j++)
2450 			brelse(bucket.bhs[j]);
2451 		memset(&bucket, 0, sizeof(bucket));
2452 	}
2453 
2454 out:
2455 	for (j = 0; j < blk_per_bucket; j++)
2456 		brelse(bucket.bhs[j]);
2457 
2458 	return ret;
2459 }
2460 
2461 struct ocfs2_xattr_tree_list {
2462 	char *buffer;
2463 	size_t buffer_size;
2464 };
2465 
2466 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
2467 					     struct ocfs2_xattr_header *xh,
2468 					     int index,
2469 					     int *block_off,
2470 					     int *new_offset)
2471 {
2472 	u16 name_offset;
2473 
2474 	if (index < 0 || index >= le16_to_cpu(xh->xh_count))
2475 		return -EINVAL;
2476 
2477 	name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
2478 
2479 	*block_off = name_offset >> inode->i_sb->s_blocksize_bits;
2480 	*new_offset = name_offset % inode->i_sb->s_blocksize;
2481 
2482 	return 0;
2483 }
2484 
2485 static int ocfs2_list_xattr_bucket(struct inode *inode,
2486 				   struct ocfs2_xattr_bucket *bucket,
2487 				   void *para)
2488 {
2489 	int ret = 0;
2490 	struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
2491 	size_t size;
2492 	int i, block_off, new_offset;
2493 
2494 	for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) {
2495 		struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i];
2496 		struct xattr_handler *handler =
2497 			ocfs2_xattr_handler(ocfs2_xattr_get_type(entry));
2498 
2499 		if (handler) {
2500 			ret = ocfs2_xattr_bucket_get_name_value(inode,
2501 								bucket->xh,
2502 								i,
2503 								&block_off,
2504 								&new_offset);
2505 			if (ret)
2506 				break;
2507 			size = handler->list(inode, xl->buffer, xl->buffer_size,
2508 					     bucket->bhs[block_off]->b_data +
2509 					     new_offset,
2510 					     entry->xe_name_len);
2511 			if (xl->buffer) {
2512 				if (size > xl->buffer_size)
2513 					return -ERANGE;
2514 				xl->buffer += size;
2515 			}
2516 			xl->buffer_size -= size;
2517 		}
2518 	}
2519 
2520 	return ret;
2521 }
2522 
2523 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
2524 					     struct ocfs2_xattr_tree_root *xt,
2525 					     char *buffer,
2526 					     size_t buffer_size)
2527 {
2528 	struct ocfs2_extent_list *el = &xt->xt_list;
2529 	int ret = 0;
2530 	u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
2531 	u64 p_blkno = 0;
2532 	struct ocfs2_xattr_tree_list xl = {
2533 		.buffer = buffer,
2534 		.buffer_size = buffer_size,
2535 	};
2536 
2537 	if (le16_to_cpu(el->l_next_free_rec) == 0)
2538 		return 0;
2539 
2540 	while (name_hash > 0) {
2541 		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
2542 					  &e_cpos, &num_clusters, el);
2543 		if (ret) {
2544 			mlog_errno(ret);
2545 			goto out;
2546 		}
2547 
2548 		ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
2549 						  ocfs2_list_xattr_bucket,
2550 						  &xl);
2551 		if (ret) {
2552 			mlog_errno(ret);
2553 			goto out;
2554 		}
2555 
2556 		if (e_cpos == 0)
2557 			break;
2558 
2559 		name_hash = e_cpos - 1;
2560 	}
2561 
2562 	ret = buffer_size - xl.buffer_size;
2563 out:
2564 	return ret;
2565 }
2566 
2567 static int cmp_xe(const void *a, const void *b)
2568 {
2569 	const struct ocfs2_xattr_entry *l = a, *r = b;
2570 	u32 l_hash = le32_to_cpu(l->xe_name_hash);
2571 	u32 r_hash = le32_to_cpu(r->xe_name_hash);
2572 
2573 	if (l_hash > r_hash)
2574 		return 1;
2575 	if (l_hash < r_hash)
2576 		return -1;
2577 	return 0;
2578 }
2579 
2580 static void swap_xe(void *a, void *b, int size)
2581 {
2582 	struct ocfs2_xattr_entry *l = a, *r = b, tmp;
2583 
2584 	tmp = *l;
2585 	memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
2586 	memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
2587 }
2588 
2589 /*
2590  * When the ocfs2_xattr_block is filled up, new bucket will be created
2591  * and all the xattr entries will be moved to the new bucket.
2592  * Note: we need to sort the entries since they are not saved in order
2593  * in the ocfs2_xattr_block.
2594  */
2595 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2596 					   struct buffer_head *xb_bh,
2597 					   struct buffer_head *xh_bh,
2598 					   struct buffer_head *data_bh)
2599 {
2600 	int i, blocksize = inode->i_sb->s_blocksize;
2601 	u16 offset, size, off_change;
2602 	struct ocfs2_xattr_entry *xe;
2603 	struct ocfs2_xattr_block *xb =
2604 				(struct ocfs2_xattr_block *)xb_bh->b_data;
2605 	struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
2606 	struct ocfs2_xattr_header *xh =
2607 				(struct ocfs2_xattr_header *)xh_bh->b_data;
2608 	u16 count = le16_to_cpu(xb_xh->xh_count);
2609 	char *target = xh_bh->b_data, *src = xb_bh->b_data;
2610 
2611 	mlog(0, "cp xattr from block %llu to bucket %llu\n",
2612 	     (unsigned long long)xb_bh->b_blocknr,
2613 	     (unsigned long long)xh_bh->b_blocknr);
2614 
2615 	memset(xh_bh->b_data, 0, blocksize);
2616 	if (data_bh)
2617 		memset(data_bh->b_data, 0, blocksize);
2618 	/*
2619 	 * Since the xe_name_offset is based on ocfs2_xattr_header,
2620 	 * there is a offset change corresponding to the change of
2621 	 * ocfs2_xattr_header's position.
2622 	 */
2623 	off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2624 	xe = &xb_xh->xh_entries[count - 1];
2625 	offset = le16_to_cpu(xe->xe_name_offset) + off_change;
2626 	size = blocksize - offset;
2627 
2628 	/* copy all the names and values. */
2629 	if (data_bh)
2630 		target = data_bh->b_data;
2631 	memcpy(target + offset, src + offset, size);
2632 
2633 	/* Init new header now. */
2634 	xh->xh_count = xb_xh->xh_count;
2635 	xh->xh_num_buckets = cpu_to_le16(1);
2636 	xh->xh_name_value_len = cpu_to_le16(size);
2637 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
2638 
2639 	/* copy all the entries. */
2640 	target = xh_bh->b_data;
2641 	offset = offsetof(struct ocfs2_xattr_header, xh_entries);
2642 	size = count * sizeof(struct ocfs2_xattr_entry);
2643 	memcpy(target + offset, (char *)xb_xh + offset, size);
2644 
2645 	/* Change the xe offset for all the xe because of the move. */
2646 	off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
2647 		 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2648 	for (i = 0; i < count; i++)
2649 		le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
2650 
2651 	mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
2652 	     offset, size, off_change);
2653 
2654 	sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
2655 	     cmp_xe, swap_xe);
2656 }
2657 
2658 /*
2659  * After we move xattr from block to index btree, we have to
2660  * update ocfs2_xattr_search to the new xe and base.
2661  *
2662  * When the entry is in xattr block, xattr_bh indicates the storage place.
2663  * While if the entry is in index b-tree, "bucket" indicates the
2664  * real place of the xattr.
2665  */
2666 static int ocfs2_xattr_update_xattr_search(struct inode *inode,
2667 					   struct ocfs2_xattr_search *xs,
2668 					   struct buffer_head *old_bh,
2669 					   struct buffer_head *new_bh)
2670 {
2671 	int ret = 0;
2672 	char *buf = old_bh->b_data;
2673 	struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
2674 	struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
2675 	int i, blocksize = inode->i_sb->s_blocksize;
2676 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2677 
2678 	xs->bucket.bhs[0] = new_bh;
2679 	get_bh(new_bh);
2680 	xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data;
2681 	xs->header = xs->bucket.xh;
2682 
2683 	xs->base = new_bh->b_data;
2684 	xs->end = xs->base + inode->i_sb->s_blocksize;
2685 
2686 	if (!xs->not_found) {
2687 		if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
2688 			ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
2689 					xs->bucket.bhs[0]->b_blocknr + 1,
2690 					blk_per_bucket - 1, &xs->bucket.bhs[1],
2691 					OCFS2_BH_CACHED, inode);
2692 			if (ret) {
2693 				mlog_errno(ret);
2694 				return ret;
2695 			}
2696 
2697 			i = xs->here - old_xh->xh_entries;
2698 			xs->here = &xs->header->xh_entries[i];
2699 		}
2700 	}
2701 
2702 	return ret;
2703 }
2704 
2705 static int ocfs2_xattr_create_index_block(struct inode *inode,
2706 					  struct ocfs2_xattr_search *xs)
2707 {
2708 	int ret, credits = OCFS2_SUBALLOC_ALLOC;
2709 	u32 bit_off, len;
2710 	u64 blkno;
2711 	handle_t *handle;
2712 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2713 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2714 	struct ocfs2_alloc_context *data_ac;
2715 	struct buffer_head *xh_bh = NULL, *data_bh = NULL;
2716 	struct buffer_head *xb_bh = xs->xattr_bh;
2717 	struct ocfs2_xattr_block *xb =
2718 			(struct ocfs2_xattr_block *)xb_bh->b_data;
2719 	struct ocfs2_xattr_tree_root *xr;
2720 	u16 xb_flags = le16_to_cpu(xb->xb_flags);
2721 	u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2722 
2723 	mlog(0, "create xattr index block for %llu\n",
2724 	     (unsigned long long)xb_bh->b_blocknr);
2725 
2726 	BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
2727 
2728 	ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
2729 	if (ret) {
2730 		mlog_errno(ret);
2731 		goto out;
2732 	}
2733 
2734 	/*
2735 	 * XXX:
2736 	 * We can use this lock for now, and maybe move to a dedicated mutex
2737 	 * if performance becomes a problem later.
2738 	 */
2739 	down_write(&oi->ip_alloc_sem);
2740 
2741 	/*
2742 	 * 3 more credits, one for xattr block update, one for the 1st block
2743 	 * of the new xattr bucket and one for the value/data.
2744 	 */
2745 	credits += 3;
2746 	handle = ocfs2_start_trans(osb, credits);
2747 	if (IS_ERR(handle)) {
2748 		ret = PTR_ERR(handle);
2749 		mlog_errno(ret);
2750 		goto out_sem;
2751 	}
2752 
2753 	ret = ocfs2_journal_access(handle, inode, xb_bh,
2754 				   OCFS2_JOURNAL_ACCESS_WRITE);
2755 	if (ret) {
2756 		mlog_errno(ret);
2757 		goto out_commit;
2758 	}
2759 
2760 	ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
2761 	if (ret) {
2762 		mlog_errno(ret);
2763 		goto out_commit;
2764 	}
2765 
2766 	/*
2767 	 * The bucket may spread in many blocks, and
2768 	 * we will only touch the 1st block and the last block
2769 	 * in the whole bucket(one for entry and one for data).
2770 	 */
2771 	blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
2772 
2773 	mlog(0, "allocate 1 cluster from %llu to xattr block\n", blkno);
2774 
2775 	xh_bh = sb_getblk(inode->i_sb, blkno);
2776 	if (!xh_bh) {
2777 		ret = -EIO;
2778 		mlog_errno(ret);
2779 		goto out_commit;
2780 	}
2781 
2782 	ocfs2_set_new_buffer_uptodate(inode, xh_bh);
2783 
2784 	ret = ocfs2_journal_access(handle, inode, xh_bh,
2785 				   OCFS2_JOURNAL_ACCESS_CREATE);
2786 	if (ret) {
2787 		mlog_errno(ret);
2788 		goto out_commit;
2789 	}
2790 
2791 	if (bpb > 1) {
2792 		data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
2793 		if (!data_bh) {
2794 			ret = -EIO;
2795 			mlog_errno(ret);
2796 			goto out_commit;
2797 		}
2798 
2799 		ocfs2_set_new_buffer_uptodate(inode, data_bh);
2800 
2801 		ret = ocfs2_journal_access(handle, inode, data_bh,
2802 					   OCFS2_JOURNAL_ACCESS_CREATE);
2803 		if (ret) {
2804 			mlog_errno(ret);
2805 			goto out_commit;
2806 		}
2807 	}
2808 
2809 	ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh);
2810 
2811 	ocfs2_journal_dirty(handle, xh_bh);
2812 	if (data_bh)
2813 		ocfs2_journal_dirty(handle, data_bh);
2814 
2815 	ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
2816 
2817 	/* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
2818 	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
2819 	       offsetof(struct ocfs2_xattr_block, xb_attrs));
2820 
2821 	xr = &xb->xb_attrs.xb_root;
2822 	xr->xt_clusters = cpu_to_le32(1);
2823 	xr->xt_last_eb_blk = 0;
2824 	xr->xt_list.l_tree_depth = 0;
2825 	xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
2826 	xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2827 
2828 	xr->xt_list.l_recs[0].e_cpos = 0;
2829 	xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
2830 	xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
2831 
2832 	xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
2833 
2834 	ret = ocfs2_journal_dirty(handle, xb_bh);
2835 	if (ret) {
2836 		mlog_errno(ret);
2837 		goto out_commit;
2838 	}
2839 
2840 out_commit:
2841 	ocfs2_commit_trans(osb, handle);
2842 
2843 out_sem:
2844 	up_write(&oi->ip_alloc_sem);
2845 
2846 out:
2847 	if (data_ac)
2848 		ocfs2_free_alloc_context(data_ac);
2849 
2850 	brelse(xh_bh);
2851 	brelse(data_bh);
2852 
2853 	return ret;
2854 }
2855 
2856 static int cmp_xe_offset(const void *a, const void *b)
2857 {
2858 	const struct ocfs2_xattr_entry *l = a, *r = b;
2859 	u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
2860 	u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
2861 
2862 	if (l_name_offset < r_name_offset)
2863 		return 1;
2864 	if (l_name_offset > r_name_offset)
2865 		return -1;
2866 	return 0;
2867 }
2868 
2869 /*
2870  * defrag a xattr bucket if we find that the bucket has some
2871  * holes beteen name/value pairs.
2872  * We will move all the name/value pairs to the end of the bucket
2873  * so that we can spare some space for insertion.
2874  */
2875 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2876 				     struct ocfs2_xattr_bucket *bucket)
2877 {
2878 	int ret, i;
2879 	size_t end, offset, len, value_len;
2880 	struct ocfs2_xattr_header *xh;
2881 	char *entries, *buf, *bucket_buf = NULL;
2882 	u64 blkno = bucket->bhs[0]->b_blocknr;
2883 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2884 	u16 xh_free_start;
2885 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2886 	size_t blocksize = inode->i_sb->s_blocksize;
2887 	handle_t *handle;
2888 	struct buffer_head **bhs;
2889 	struct ocfs2_xattr_entry *xe;
2890 
2891 	bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
2892 			GFP_NOFS);
2893 	if (!bhs)
2894 		return -ENOMEM;
2895 
2896 	ret = ocfs2_read_blocks(osb, blkno, blk_per_bucket, bhs,
2897 				OCFS2_BH_CACHED, inode);
2898 	if (ret)
2899 		goto out;
2900 
2901 	/*
2902 	 * In order to make the operation more efficient and generic,
2903 	 * we copy all the blocks into a contiguous memory and do the
2904 	 * defragment there, so if anything is error, we will not touch
2905 	 * the real block.
2906 	 */
2907 	bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
2908 	if (!bucket_buf) {
2909 		ret = -EIO;
2910 		goto out;
2911 	}
2912 
2913 	buf = bucket_buf;
2914 	for (i = 0; i < blk_per_bucket; i++, buf += blocksize)
2915 		memcpy(buf, bhs[i]->b_data, blocksize);
2916 
2917 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket);
2918 	if (IS_ERR(handle)) {
2919 		ret = PTR_ERR(handle);
2920 		handle = NULL;
2921 		mlog_errno(ret);
2922 		goto out;
2923 	}
2924 
2925 	for (i = 0; i < blk_per_bucket; i++) {
2926 		ret = ocfs2_journal_access(handle, inode, bhs[i],
2927 					   OCFS2_JOURNAL_ACCESS_WRITE);
2928 		if (ret < 0) {
2929 			mlog_errno(ret);
2930 			goto commit;
2931 		}
2932 	}
2933 
2934 	xh = (struct ocfs2_xattr_header *)bucket_buf;
2935 	entries = (char *)xh->xh_entries;
2936 	xh_free_start = le16_to_cpu(xh->xh_free_start);
2937 
2938 	mlog(0, "adjust xattr bucket in %llu, count = %u, "
2939 	     "xh_free_start = %u, xh_name_value_len = %u.\n",
2940 	     blkno, le16_to_cpu(xh->xh_count), xh_free_start,
2941 	     le16_to_cpu(xh->xh_name_value_len));
2942 
2943 	/*
2944 	 * sort all the entries by their offset.
2945 	 * the largest will be the first, so that we can
2946 	 * move them to the end one by one.
2947 	 */
2948 	sort(entries, le16_to_cpu(xh->xh_count),
2949 	     sizeof(struct ocfs2_xattr_entry),
2950 	     cmp_xe_offset, swap_xe);
2951 
2952 	/* Move all name/values to the end of the bucket. */
2953 	xe = xh->xh_entries;
2954 	end = OCFS2_XATTR_BUCKET_SIZE;
2955 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
2956 		offset = le16_to_cpu(xe->xe_name_offset);
2957 		if (ocfs2_xattr_is_local(xe))
2958 			value_len = OCFS2_XATTR_SIZE(
2959 					le64_to_cpu(xe->xe_value_size));
2960 		else
2961 			value_len = OCFS2_XATTR_ROOT_SIZE;
2962 		len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
2963 
2964 		/*
2965 		 * We must make sure that the name/value pair
2966 		 * exist in the same block. So adjust end to
2967 		 * the previous block end if needed.
2968 		 */
2969 		if (((end - len) / blocksize !=
2970 			(end - 1) / blocksize))
2971 			end = end - end % blocksize;
2972 
2973 		if (end > offset + len) {
2974 			memmove(bucket_buf + end - len,
2975 				bucket_buf + offset, len);
2976 			xe->xe_name_offset = cpu_to_le16(end - len);
2977 		}
2978 
2979 		mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
2980 				"bucket %llu\n", (unsigned long long)blkno);
2981 
2982 		end -= len;
2983 	}
2984 
2985 	mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
2986 			"bucket %llu\n", (unsigned long long)blkno);
2987 
2988 	if (xh_free_start == end)
2989 		goto commit;
2990 
2991 	memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
2992 	xh->xh_free_start = cpu_to_le16(end);
2993 
2994 	/* sort the entries by their name_hash. */
2995 	sort(entries, le16_to_cpu(xh->xh_count),
2996 	     sizeof(struct ocfs2_xattr_entry),
2997 	     cmp_xe, swap_xe);
2998 
2999 	buf = bucket_buf;
3000 	for (i = 0; i < blk_per_bucket; i++, buf += blocksize) {
3001 		memcpy(bhs[i]->b_data, buf, blocksize);
3002 		ocfs2_journal_dirty(handle, bhs[i]);
3003 	}
3004 
3005 commit:
3006 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
3007 out:
3008 
3009 	if (bhs) {
3010 		for (i = 0; i < blk_per_bucket; i++)
3011 			brelse(bhs[i]);
3012 	}
3013 	kfree(bhs);
3014 
3015 	kfree(bucket_buf);
3016 	return ret;
3017 }
3018 
3019 /*
3020  * Move half nums of the xattr bucket in the previous cluster to this new
3021  * cluster. We only touch the last cluster of the previous extend record.
3022  *
3023  * first_bh is the first buffer_head of a series of bucket in the same
3024  * extent rec and header_bh is the header of one bucket in this cluster.
3025  * They will be updated if we move the data header_bh contains to the new
3026  * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
3027  */
3028 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3029 					       handle_t *handle,
3030 					       struct buffer_head **first_bh,
3031 					       struct buffer_head **header_bh,
3032 					       u64 new_blkno,
3033 					       u64 prev_blkno,
3034 					       u32 num_clusters,
3035 					       u32 *first_hash)
3036 {
3037 	int i, ret, credits;
3038 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3039 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3040 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3041 	int blocksize = inode->i_sb->s_blocksize;
3042 	struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL;
3043 	struct ocfs2_xattr_header *new_xh;
3044 	struct ocfs2_xattr_header *xh =
3045 			(struct ocfs2_xattr_header *)((*first_bh)->b_data);
3046 
3047 	BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
3048 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
3049 
3050 	prev_bh = *first_bh;
3051 	get_bh(prev_bh);
3052 	xh = (struct ocfs2_xattr_header *)prev_bh->b_data;
3053 
3054 	prev_blkno += (num_clusters - 1) * bpc + bpc / 2;
3055 
3056 	mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3057 	     prev_blkno, new_blkno);
3058 
3059 	/*
3060 	 * We need to update the 1st half of the new cluster and
3061 	 * 1 more for the update of the 1st bucket of the previous
3062 	 * extent record.
3063 	 */
3064 	credits = bpc / 2 + 1;
3065 	ret = ocfs2_extend_trans(handle, credits);
3066 	if (ret) {
3067 		mlog_errno(ret);
3068 		goto out;
3069 	}
3070 
3071 	ret = ocfs2_journal_access(handle, inode, prev_bh,
3072 				   OCFS2_JOURNAL_ACCESS_WRITE);
3073 	if (ret) {
3074 		mlog_errno(ret);
3075 		goto out;
3076 	}
3077 
3078 	for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) {
3079 		old_bh = new_bh = NULL;
3080 		new_bh = sb_getblk(inode->i_sb, new_blkno);
3081 		if (!new_bh) {
3082 			ret = -EIO;
3083 			mlog_errno(ret);
3084 			goto out;
3085 		}
3086 
3087 		ocfs2_set_new_buffer_uptodate(inode, new_bh);
3088 
3089 		ret = ocfs2_journal_access(handle, inode, new_bh,
3090 					   OCFS2_JOURNAL_ACCESS_CREATE);
3091 		if (ret < 0) {
3092 			mlog_errno(ret);
3093 			brelse(new_bh);
3094 			goto out;
3095 		}
3096 
3097 		ret = ocfs2_read_block(osb, prev_blkno,
3098 				       &old_bh, OCFS2_BH_CACHED, inode);
3099 		if (ret < 0) {
3100 			mlog_errno(ret);
3101 			brelse(new_bh);
3102 			goto out;
3103 		}
3104 
3105 		memcpy(new_bh->b_data, old_bh->b_data, blocksize);
3106 
3107 		if (i == 0) {
3108 			new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
3109 			new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);
3110 
3111 			if (first_hash)
3112 				*first_hash = le32_to_cpu(
3113 					new_xh->xh_entries[0].xe_name_hash);
3114 			new_first_bh = new_bh;
3115 			get_bh(new_first_bh);
3116 		}
3117 
3118 		ocfs2_journal_dirty(handle, new_bh);
3119 
3120 		if (*header_bh == old_bh) {
3121 			brelse(*header_bh);
3122 			*header_bh = new_bh;
3123 			get_bh(*header_bh);
3124 
3125 			brelse(*first_bh);
3126 			*first_bh = new_first_bh;
3127 			get_bh(*first_bh);
3128 		}
3129 		brelse(new_bh);
3130 		brelse(old_bh);
3131 	}
3132 
3133 	le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));
3134 
3135 	ocfs2_journal_dirty(handle, prev_bh);
3136 out:
3137 	brelse(prev_bh);
3138 	brelse(new_first_bh);
3139 	return ret;
3140 }
3141 
3142 static int ocfs2_read_xattr_bucket(struct inode *inode,
3143 				   u64 blkno,
3144 				   struct buffer_head **bhs,
3145 				   int new)
3146 {
3147 	int ret = 0;
3148 	u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3149 
3150 	if (!new)
3151 		return ocfs2_read_blocks(OCFS2_SB(inode->i_sb), blkno,
3152 					 blk_per_bucket, bhs,
3153 					 OCFS2_BH_CACHED, inode);
3154 
3155 	for (i = 0; i < blk_per_bucket; i++) {
3156 		bhs[i] = sb_getblk(inode->i_sb, blkno + i);
3157 		if (bhs[i] == NULL) {
3158 			ret = -EIO;
3159 			mlog_errno(ret);
3160 			break;
3161 		}
3162 		ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
3163 	}
3164 
3165 	return ret;
3166 }
3167 
3168 /*
3169  * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk).
3170  * first_hash will record the 1st hash of the new bucket.
3171  */
3172 static int ocfs2_half_xattr_bucket(struct inode *inode,
3173 				   handle_t *handle,
3174 				   u64 blk,
3175 				   u64 new_blk,
3176 				   u32 *first_hash,
3177 				   int new_bucket_head)
3178 {
3179 	int ret, i;
3180 	u16 count, start, len, name_value_len, xe_len, name_offset;
3181 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3182 	struct buffer_head **s_bhs, **t_bhs = NULL;
3183 	struct ocfs2_xattr_header *xh;
3184 	struct ocfs2_xattr_entry *xe;
3185 	int blocksize = inode->i_sb->s_blocksize;
3186 
3187 	mlog(0, "move half of xattrs from bucket %llu to %llu\n",
3188 	     blk, new_blk);
3189 
3190 	s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3191 	if (!s_bhs)
3192 		return -ENOMEM;
3193 
3194 	ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0);
3195 	if (ret) {
3196 		mlog_errno(ret);
3197 		goto out;
3198 	}
3199 
3200 	ret = ocfs2_journal_access(handle, inode, s_bhs[0],
3201 				   OCFS2_JOURNAL_ACCESS_WRITE);
3202 	if (ret) {
3203 		mlog_errno(ret);
3204 		goto out;
3205 	}
3206 
3207 	t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3208 	if (!t_bhs) {
3209 		ret = -ENOMEM;
3210 		goto out;
3211 	}
3212 
3213 	ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head);
3214 	if (ret) {
3215 		mlog_errno(ret);
3216 		goto out;
3217 	}
3218 
3219 	for (i = 0; i < blk_per_bucket; i++) {
3220 		ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3221 					   OCFS2_JOURNAL_ACCESS_CREATE);
3222 		if (ret) {
3223 			mlog_errno(ret);
3224 			goto out;
3225 		}
3226 	}
3227 
3228 	/* copy the whole bucket to the new first. */
3229 	for (i = 0; i < blk_per_bucket; i++)
3230 		memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3231 
3232 	/* update the new bucket. */
3233 	xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
3234 	count = le16_to_cpu(xh->xh_count);
3235 	start = count / 2;
3236 
3237 	/*
3238 	 * Calculate the total name/value len and xh_free_start for
3239 	 * the old bucket first.
3240 	 */
3241 	name_offset = OCFS2_XATTR_BUCKET_SIZE;
3242 	name_value_len = 0;
3243 	for (i = 0; i < start; i++) {
3244 		xe = &xh->xh_entries[i];
3245 		xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3246 		if (ocfs2_xattr_is_local(xe))
3247 			xe_len +=
3248 			   OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3249 		else
3250 			xe_len += OCFS2_XATTR_ROOT_SIZE;
3251 		name_value_len += xe_len;
3252 		if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3253 			name_offset = le16_to_cpu(xe->xe_name_offset);
3254 	}
3255 
3256 	/*
3257 	 * Now begin the modification to the new bucket.
3258 	 *
3259 	 * In the new bucket, We just move the xattr entry to the beginning
3260 	 * and don't touch the name/value. So there will be some holes in the
3261 	 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3262 	 * called.
3263 	 */
3264 	xe = &xh->xh_entries[start];
3265 	len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3266 	mlog(0, "mv xattr entry len %d from %d to %d\n", len,
3267 	     (char *)xe - (char *)xh, (char *)xh->xh_entries - (char *)xh);
3268 	memmove((char *)xh->xh_entries, (char *)xe, len);
3269 	xe = &xh->xh_entries[count - start];
3270 	len = sizeof(struct ocfs2_xattr_entry) * start;
3271 	memset((char *)xe, 0, len);
3272 
3273 	le16_add_cpu(&xh->xh_count, -start);
3274 	le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3275 
3276 	/* Calculate xh_free_start for the new bucket. */
3277 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3278 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3279 		xe = &xh->xh_entries[i];
3280 		xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3281 		if (ocfs2_xattr_is_local(xe))
3282 			xe_len +=
3283 			   OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3284 		else
3285 			xe_len += OCFS2_XATTR_ROOT_SIZE;
3286 		if (le16_to_cpu(xe->xe_name_offset) <
3287 		    le16_to_cpu(xh->xh_free_start))
3288 			xh->xh_free_start = xe->xe_name_offset;
3289 	}
3290 
3291 	/* set xh->xh_num_buckets for the new xh. */
3292 	if (new_bucket_head)
3293 		xh->xh_num_buckets = cpu_to_le16(1);
3294 	else
3295 		xh->xh_num_buckets = 0;
3296 
3297 	for (i = 0; i < blk_per_bucket; i++) {
3298 		ocfs2_journal_dirty(handle, t_bhs[i]);
3299 		if (ret)
3300 			mlog_errno(ret);
3301 	}
3302 
3303 	/* store the first_hash of the new bucket. */
3304 	if (first_hash)
3305 		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3306 
3307 	/*
3308 	 * Now only update the 1st block of the old bucket.
3309 	 * Please note that the entry has been sorted already above.
3310 	 */
3311 	xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
3312 	memset(&xh->xh_entries[start], 0,
3313 	       sizeof(struct ocfs2_xattr_entry) * (count - start));
3314 	xh->xh_count = cpu_to_le16(start);
3315 	xh->xh_free_start = cpu_to_le16(name_offset);
3316 	xh->xh_name_value_len = cpu_to_le16(name_value_len);
3317 
3318 	ocfs2_journal_dirty(handle, s_bhs[0]);
3319 	if (ret)
3320 		mlog_errno(ret);
3321 
3322 out:
3323 	if (s_bhs) {
3324 		for (i = 0; i < blk_per_bucket; i++)
3325 			brelse(s_bhs[i]);
3326 	}
3327 	kfree(s_bhs);
3328 
3329 	if (t_bhs) {
3330 		for (i = 0; i < blk_per_bucket; i++)
3331 			brelse(t_bhs[i]);
3332 	}
3333 	kfree(t_bhs);
3334 
3335 	return ret;
3336 }
3337 
3338 /*
3339  * Copy xattr from one bucket to another bucket.
3340  *
3341  * The caller must make sure that the journal transaction
3342  * has enough space for journaling.
3343  */
3344 static int ocfs2_cp_xattr_bucket(struct inode *inode,
3345 				 handle_t *handle,
3346 				 u64 s_blkno,
3347 				 u64 t_blkno,
3348 				 int t_is_new)
3349 {
3350 	int ret, i;
3351 	int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3352 	int blocksize = inode->i_sb->s_blocksize;
3353 	struct buffer_head **s_bhs, **t_bhs = NULL;
3354 
3355 	BUG_ON(s_blkno == t_blkno);
3356 
3357 	mlog(0, "cp bucket %llu to %llu, target is %d\n",
3358 	     s_blkno, t_blkno, t_is_new);
3359 
3360 	s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3361 			GFP_NOFS);
3362 	if (!s_bhs)
3363 		return -ENOMEM;
3364 
3365 	ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0);
3366 	if (ret)
3367 		goto out;
3368 
3369 	t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3370 			GFP_NOFS);
3371 	if (!t_bhs) {
3372 		ret = -ENOMEM;
3373 		goto out;
3374 	}
3375 
3376 	ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new);
3377 	if (ret)
3378 		goto out;
3379 
3380 	for (i = 0; i < blk_per_bucket; i++) {
3381 		ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3382 					   OCFS2_JOURNAL_ACCESS_WRITE);
3383 		if (ret)
3384 			goto out;
3385 	}
3386 
3387 	for (i = 0; i < blk_per_bucket; i++) {
3388 		memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3389 		ocfs2_journal_dirty(handle, t_bhs[i]);
3390 	}
3391 
3392 out:
3393 	if (s_bhs) {
3394 		for (i = 0; i < blk_per_bucket; i++)
3395 			brelse(s_bhs[i]);
3396 	}
3397 	kfree(s_bhs);
3398 
3399 	if (t_bhs) {
3400 		for (i = 0; i < blk_per_bucket; i++)
3401 			brelse(t_bhs[i]);
3402 	}
3403 	kfree(t_bhs);
3404 
3405 	return ret;
3406 }
3407 
3408 /*
3409  * Copy one xattr cluster from src_blk to to_blk.
3410  * The to_blk will become the first bucket header of the cluster, so its
3411  * xh_num_buckets will be initialized as the bucket num in the cluster.
3412  */
3413 static int ocfs2_cp_xattr_cluster(struct inode *inode,
3414 				  handle_t *handle,
3415 				  struct buffer_head *first_bh,
3416 				  u64 src_blk,
3417 				  u64 to_blk,
3418 				  u32 *first_hash)
3419 {
3420 	int i, ret, credits;
3421 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3422 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3423 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3424 	struct buffer_head *bh = NULL;
3425 	struct ocfs2_xattr_header *xh;
3426 	u64 to_blk_start = to_blk;
3427 
3428 	mlog(0, "cp xattrs from cluster %llu to %llu\n", src_blk, to_blk);
3429 
3430 	/*
3431 	 * We need to update the new cluster and 1 more for the update of
3432 	 * the 1st bucket of the previous extent rec.
3433 	 */
3434 	credits = bpc + 1;
3435 	ret = ocfs2_extend_trans(handle, credits);
3436 	if (ret) {
3437 		mlog_errno(ret);
3438 		goto out;
3439 	}
3440 
3441 	ret = ocfs2_journal_access(handle, inode, first_bh,
3442 				   OCFS2_JOURNAL_ACCESS_WRITE);
3443 	if (ret) {
3444 		mlog_errno(ret);
3445 		goto out;
3446 	}
3447 
3448 	for (i = 0; i < num_buckets; i++) {
3449 		ret = ocfs2_cp_xattr_bucket(inode, handle,
3450 					    src_blk, to_blk, 1);
3451 		if (ret) {
3452 			mlog_errno(ret);
3453 			goto out;
3454 		}
3455 
3456 		src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3457 		to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3458 	}
3459 
3460 	/* update the old bucket header. */
3461 	xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3462 	le16_add_cpu(&xh->xh_num_buckets, -num_buckets);
3463 
3464 	ocfs2_journal_dirty(handle, first_bh);
3465 
3466 	/* update the new bucket header. */
3467 	ret = ocfs2_read_block(osb, to_blk_start, &bh, OCFS2_BH_CACHED, inode);
3468 	if (ret < 0) {
3469 		mlog_errno(ret);
3470 		goto out;
3471 	}
3472 
3473 	ret = ocfs2_journal_access(handle, inode, bh,
3474 				   OCFS2_JOURNAL_ACCESS_WRITE);
3475 	if (ret) {
3476 		mlog_errno(ret);
3477 		goto out;
3478 	}
3479 
3480 	xh = (struct ocfs2_xattr_header *)bh->b_data;
3481 	xh->xh_num_buckets = cpu_to_le16(num_buckets);
3482 
3483 	ocfs2_journal_dirty(handle, bh);
3484 
3485 	if (first_hash)
3486 		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3487 out:
3488 	brelse(bh);
3489 	return ret;
3490 }
3491 
3492 /*
3493  * Move half of the xattrs in this cluster to the new cluster.
3494  * This function should only be called when bucket size == cluster size.
3495  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
3496  */
3497 static int ocfs2_half_xattr_cluster(struct inode *inode,
3498 				    handle_t *handle,
3499 				    u64 prev_blk,
3500 				    u64 new_blk,
3501 				    u32 *first_hash)
3502 {
3503 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3504 	int ret, credits = 2 * blk_per_bucket;
3505 
3506 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
3507 
3508 	ret = ocfs2_extend_trans(handle, credits);
3509 	if (ret) {
3510 		mlog_errno(ret);
3511 		return ret;
3512 	}
3513 
3514 	/* Move half of the xattr in start_blk to the next bucket. */
3515 	return  ocfs2_half_xattr_bucket(inode, handle, prev_blk,
3516 					new_blk, first_hash, 1);
3517 }
3518 
3519 /*
3520  * Move some xattrs from the old cluster to the new one since they are not
3521  * contiguous in ocfs2 xattr tree.
3522  *
3523  * new_blk starts a new separate cluster, and we will move some xattrs from
3524  * prev_blk to it. v_start will be set as the first name hash value in this
3525  * new cluster so that it can be used as e_cpos during tree insertion and
3526  * don't collide with our original b-tree operations. first_bh and header_bh
3527  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
3528  * to extend the insert bucket.
3529  *
3530  * The problem is how much xattr should we move to the new one and when should
3531  * we update first_bh and header_bh?
3532  * 1. If cluster size > bucket size, that means the previous cluster has more
3533  *    than 1 bucket, so just move half nums of bucket into the new cluster and
3534  *    update the first_bh and header_bh if the insert bucket has been moved
3535  *    to the new cluster.
3536  * 2. If cluster_size == bucket_size:
3537  *    a) If the previous extent rec has more than one cluster and the insert
3538  *       place isn't in the last cluster, copy the entire last cluster to the
3539  *       new one. This time, we don't need to upate the first_bh and header_bh
3540  *       since they will not be moved into the new cluster.
3541  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
3542  *       the new one. And we set the extend flag to zero if the insert place is
3543  *       moved into the new allocated cluster since no extend is needed.
3544  */
3545 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3546 					    handle_t *handle,
3547 					    struct buffer_head **first_bh,
3548 					    struct buffer_head **header_bh,
3549 					    u64 new_blk,
3550 					    u64 prev_blk,
3551 					    u32 prev_clusters,
3552 					    u32 *v_start,
3553 					    int *extend)
3554 {
3555 	int ret = 0;
3556 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3557 
3558 	mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
3559 	     prev_blk, prev_clusters, new_blk);
3560 
3561 	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
3562 		ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
3563 							  handle,
3564 							  first_bh,
3565 							  header_bh,
3566 							  new_blk,
3567 							  prev_blk,
3568 							  prev_clusters,
3569 							  v_start);
3570 	else {
3571 		u64 last_blk = prev_blk + bpc * (prev_clusters - 1);
3572 
3573 		if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
3574 			ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh,
3575 						     last_blk, new_blk,
3576 						     v_start);
3577 		else {
3578 			ret = ocfs2_half_xattr_cluster(inode, handle,
3579 						       last_blk, new_blk,
3580 						       v_start);
3581 
3582 			if ((*header_bh)->b_blocknr == last_blk && extend)
3583 				*extend = 0;
3584 		}
3585 	}
3586 
3587 	return ret;
3588 }
3589 
3590 /*
3591  * Add a new cluster for xattr storage.
3592  *
3593  * If the new cluster is contiguous with the previous one, it will be
3594  * appended to the same extent record, and num_clusters will be updated.
3595  * If not, we will insert a new extent for it and move some xattrs in
3596  * the last cluster into the new allocated one.
3597  * We also need to limit the maximum size of a btree leaf, otherwise we'll
3598  * lose the benefits of hashing because we'll have to search large leaves.
3599  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
3600  * if it's bigger).
3601  *
3602  * first_bh is the first block of the previous extent rec and header_bh
3603  * indicates the bucket we will insert the new xattrs. They will be updated
3604  * when the header_bh is moved into the new cluster.
3605  */
3606 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3607 				       struct buffer_head *root_bh,
3608 				       struct buffer_head **first_bh,
3609 				       struct buffer_head **header_bh,
3610 				       u32 *num_clusters,
3611 				       u32 prev_cpos,
3612 				       u64 prev_blkno,
3613 				       int *extend)
3614 {
3615 	int ret, credits;
3616 	u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3617 	u32 prev_clusters = *num_clusters;
3618 	u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
3619 	u64 block;
3620 	handle_t *handle = NULL;
3621 	struct ocfs2_alloc_context *data_ac = NULL;
3622 	struct ocfs2_alloc_context *meta_ac = NULL;
3623 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3624 	struct ocfs2_xattr_block *xb =
3625 			(struct ocfs2_xattr_block *)root_bh->b_data;
3626 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3627 	struct ocfs2_extent_list *root_el = &xb_root->xt_list;
3628 	enum ocfs2_extent_tree_type type = OCFS2_XATTR_TREE_EXTENT;
3629 
3630 	mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
3631 	     "previous xattr blkno = %llu\n",
3632 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
3633 	     prev_cpos, prev_blkno);
3634 
3635 	ret = ocfs2_lock_allocators(inode, root_bh, root_el,
3636 				    clusters_to_add, 0, &data_ac,
3637 				    &meta_ac, type, NULL);
3638 	if (ret) {
3639 		mlog_errno(ret);
3640 		goto leave;
3641 	}
3642 
3643 	credits = ocfs2_calc_extend_credits(osb->sb, root_el, clusters_to_add);
3644 	handle = ocfs2_start_trans(osb, credits);
3645 	if (IS_ERR(handle)) {
3646 		ret = PTR_ERR(handle);
3647 		handle = NULL;
3648 		mlog_errno(ret);
3649 		goto leave;
3650 	}
3651 
3652 	ret = ocfs2_journal_access(handle, inode, root_bh,
3653 				   OCFS2_JOURNAL_ACCESS_WRITE);
3654 	if (ret < 0) {
3655 		mlog_errno(ret);
3656 		goto leave;
3657 	}
3658 
3659 	ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
3660 				     clusters_to_add, &bit_off, &num_bits);
3661 	if (ret < 0) {
3662 		if (ret != -ENOSPC)
3663 			mlog_errno(ret);
3664 		goto leave;
3665 	}
3666 
3667 	BUG_ON(num_bits > clusters_to_add);
3668 
3669 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
3670 	mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
3671 	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
3672 
3673 	if (prev_blkno + prev_clusters * bpc == block &&
3674 	    (prev_clusters + num_bits) << osb->s_clustersize_bits <=
3675 	     OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
3676 		/*
3677 		 * If this cluster is contiguous with the old one and
3678 		 * adding this new cluster, we don't surpass the limit of
3679 		 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
3680 		 * initialized and used like other buckets in the previous
3681 		 * cluster.
3682 		 * So add it as a contiguous one. The caller will handle
3683 		 * its init process.
3684 		 */
3685 		v_start = prev_cpos + prev_clusters;
3686 		*num_clusters = prev_clusters + num_bits;
3687 		mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
3688 		     num_bits);
3689 	} else {
3690 		ret = ocfs2_adjust_xattr_cross_cluster(inode,
3691 						       handle,
3692 						       first_bh,
3693 						       header_bh,
3694 						       block,
3695 						       prev_blkno,
3696 						       prev_clusters,
3697 						       &v_start,
3698 						       extend);
3699 		if (ret) {
3700 			mlog_errno(ret);
3701 			goto leave;
3702 		}
3703 	}
3704 
3705 	mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
3706 	     num_bits, block, v_start);
3707 	ret = ocfs2_xattr_tree_insert_extent(osb, handle, inode, root_bh,
3708 					     v_start, block, num_bits,
3709 					     0, meta_ac);
3710 	if (ret < 0) {
3711 		mlog_errno(ret);
3712 		goto leave;
3713 	}
3714 
3715 	ret = ocfs2_journal_dirty(handle, root_bh);
3716 	if (ret < 0) {
3717 		mlog_errno(ret);
3718 		goto leave;
3719 	}
3720 
3721 leave:
3722 	if (handle)
3723 		ocfs2_commit_trans(osb, handle);
3724 	if (data_ac)
3725 		ocfs2_free_alloc_context(data_ac);
3726 	if (meta_ac)
3727 		ocfs2_free_alloc_context(meta_ac);
3728 
3729 	return ret;
3730 }
3731 
3732 /*
3733  * Extend a new xattr bucket and move xattrs to the end one by one until
3734  * We meet with start_bh. Only move half of the xattrs to the bucket after it.
3735  */
3736 static int ocfs2_extend_xattr_bucket(struct inode *inode,
3737 				     struct buffer_head *first_bh,
3738 				     struct buffer_head *start_bh,
3739 				     u32 num_clusters)
3740 {
3741 	int ret, credits;
3742 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3743 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3744 	u64 start_blk = start_bh->b_blocknr, end_blk;
3745 	u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
3746 	handle_t *handle;
3747 	struct ocfs2_xattr_header *first_xh =
3748 				(struct ocfs2_xattr_header *)first_bh->b_data;
3749 	u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
3750 
3751 	mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
3752 	     "from %llu, len = %u\n", start_blk,
3753 	     (unsigned long long)first_bh->b_blocknr, num_clusters);
3754 
3755 	BUG_ON(bucket >= num_buckets);
3756 
3757 	end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket;
3758 
3759 	/*
3760 	 * We will touch all the buckets after the start_bh(include it).
3761 	 * Add one more bucket and modify the first_bh.
3762 	 */
3763 	credits = end_blk - start_blk + 2 * blk_per_bucket + 1;
3764 	handle = ocfs2_start_trans(osb, credits);
3765 	if (IS_ERR(handle)) {
3766 		ret = PTR_ERR(handle);
3767 		handle = NULL;
3768 		mlog_errno(ret);
3769 		goto out;
3770 	}
3771 
3772 	ret = ocfs2_journal_access(handle, inode, first_bh,
3773 				   OCFS2_JOURNAL_ACCESS_WRITE);
3774 	if (ret) {
3775 		mlog_errno(ret);
3776 		goto commit;
3777 	}
3778 
3779 	while (end_blk != start_blk) {
3780 		ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
3781 					    end_blk + blk_per_bucket, 0);
3782 		if (ret)
3783 			goto commit;
3784 		end_blk -= blk_per_bucket;
3785 	}
3786 
3787 	/* Move half of the xattr in start_blk to the next bucket. */
3788 	ret = ocfs2_half_xattr_bucket(inode, handle, start_blk,
3789 				      start_blk + blk_per_bucket, NULL, 0);
3790 
3791 	le16_add_cpu(&first_xh->xh_num_buckets, 1);
3792 	ocfs2_journal_dirty(handle, first_bh);
3793 
3794 commit:
3795 	ocfs2_commit_trans(osb, handle);
3796 out:
3797 	return ret;
3798 }
3799 
3800 /*
3801  * Add new xattr bucket in an extent record and adjust the buckets accordingly.
3802  * xb_bh is the ocfs2_xattr_block.
3803  * We will move all the buckets starting from header_bh to the next place. As
3804  * for this one, half num of its xattrs will be moved to the next one.
3805  *
3806  * We will allocate a new cluster if current cluster is full and adjust
3807  * header_bh and first_bh if the insert place is moved to the new cluster.
3808  */
3809 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
3810 				      struct buffer_head *xb_bh,
3811 				      struct buffer_head *header_bh)
3812 {
3813 	struct ocfs2_xattr_header *first_xh = NULL;
3814 	struct buffer_head *first_bh = NULL;
3815 	struct ocfs2_xattr_block *xb =
3816 			(struct ocfs2_xattr_block *)xb_bh->b_data;
3817 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3818 	struct ocfs2_extent_list *el = &xb_root->xt_list;
3819 	struct ocfs2_xattr_header *xh =
3820 			(struct ocfs2_xattr_header *)header_bh->b_data;
3821 	u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3822 	struct super_block *sb = inode->i_sb;
3823 	struct ocfs2_super *osb = OCFS2_SB(sb);
3824 	int ret, num_buckets, extend = 1;
3825 	u64 p_blkno;
3826 	u32 e_cpos, num_clusters;
3827 
3828 	mlog(0, "Add new xattr bucket starting form %llu\n",
3829 	     (unsigned long long)header_bh->b_blocknr);
3830 
3831 	/*
3832 	 * Add refrence for header_bh here because it may be
3833 	 * changed in ocfs2_add_new_xattr_cluster and we need
3834 	 * to free it in the end.
3835 	 */
3836 	get_bh(header_bh);
3837 
3838 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
3839 				  &num_clusters, el);
3840 	if (ret) {
3841 		mlog_errno(ret);
3842 		goto out;
3843 	}
3844 
3845 	ret = ocfs2_read_block(osb, p_blkno,
3846 			       &first_bh, OCFS2_BH_CACHED, inode);
3847 	if (ret) {
3848 		mlog_errno(ret);
3849 		goto out;
3850 	}
3851 
3852 	num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
3853 	first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3854 
3855 	if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) {
3856 		ret = ocfs2_add_new_xattr_cluster(inode,
3857 						  xb_bh,
3858 						  &first_bh,
3859 						  &header_bh,
3860 						  &num_clusters,
3861 						  e_cpos,
3862 						  p_blkno,
3863 						  &extend);
3864 		if (ret) {
3865 			mlog_errno(ret);
3866 			goto out;
3867 		}
3868 	}
3869 
3870 	if (extend)
3871 		ret = ocfs2_extend_xattr_bucket(inode,
3872 						first_bh,
3873 						header_bh,
3874 						num_clusters);
3875 	if (ret)
3876 		mlog_errno(ret);
3877 out:
3878 	brelse(first_bh);
3879 	brelse(header_bh);
3880 	return ret;
3881 }
3882 
3883 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
3884 					struct ocfs2_xattr_bucket *bucket,
3885 					int offs)
3886 {
3887 	int block_off = offs >> inode->i_sb->s_blocksize_bits;
3888 
3889 	offs = offs % inode->i_sb->s_blocksize;
3890 	return bucket->bhs[block_off]->b_data + offs;
3891 }
3892 
3893 /*
3894  * Handle the normal xattr set, including replace, delete and new.
3895  * When the bucket is empty, "is_empty" is set and the caller can
3896  * free this bucket.
3897  *
3898  * Note: "local" indicates the real data's locality. So we can't
3899  * just its bucket locality by its length.
3900  */
3901 static void ocfs2_xattr_set_entry_normal(struct inode *inode,
3902 					 struct ocfs2_xattr_info *xi,
3903 					 struct ocfs2_xattr_search *xs,
3904 					 u32 name_hash,
3905 					 int local,
3906 					 int *is_empty)
3907 {
3908 	struct ocfs2_xattr_entry *last, *xe;
3909 	int name_len = strlen(xi->name);
3910 	struct ocfs2_xattr_header *xh = xs->header;
3911 	u16 count = le16_to_cpu(xh->xh_count), start;
3912 	size_t blocksize = inode->i_sb->s_blocksize;
3913 	char *val;
3914 	size_t offs, size, new_size;
3915 
3916 	last = &xh->xh_entries[count];
3917 	if (!xs->not_found) {
3918 		xe = xs->here;
3919 		offs = le16_to_cpu(xe->xe_name_offset);
3920 		if (ocfs2_xattr_is_local(xe))
3921 			size = OCFS2_XATTR_SIZE(name_len) +
3922 			OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3923 		else
3924 			size = OCFS2_XATTR_SIZE(name_len) +
3925 			OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
3926 
3927 		/*
3928 		 * If the new value will be stored outside, xi->value has been
3929 		 * initalized as an empty ocfs2_xattr_value_root, and the same
3930 		 * goes with xi->value_len, so we can set new_size safely here.
3931 		 * See ocfs2_xattr_set_in_bucket.
3932 		 */
3933 		new_size = OCFS2_XATTR_SIZE(name_len) +
3934 			   OCFS2_XATTR_SIZE(xi->value_len);
3935 
3936 		le16_add_cpu(&xh->xh_name_value_len, -size);
3937 		if (xi->value) {
3938 			if (new_size > size)
3939 				goto set_new_name_value;
3940 
3941 			/* Now replace the old value with new one. */
3942 			if (local)
3943 				xe->xe_value_size = cpu_to_le64(xi->value_len);
3944 			else
3945 				xe->xe_value_size = 0;
3946 
3947 			val = ocfs2_xattr_bucket_get_val(inode,
3948 							 &xs->bucket, offs);
3949 			memset(val + OCFS2_XATTR_SIZE(name_len), 0,
3950 			       size - OCFS2_XATTR_SIZE(name_len));
3951 			if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
3952 				memcpy(val + OCFS2_XATTR_SIZE(name_len),
3953 				       xi->value, xi->value_len);
3954 
3955 			le16_add_cpu(&xh->xh_name_value_len, new_size);
3956 			ocfs2_xattr_set_local(xe, local);
3957 			return;
3958 		} else {
3959 			/* Remove the old entry. */
3960 			last -= 1;
3961 			memmove(xe, xe + 1,
3962 				(void *)last - (void *)xe);
3963 			memset(last, 0, sizeof(struct ocfs2_xattr_entry));
3964 			le16_add_cpu(&xh->xh_count, -1);
3965 			if (xh->xh_count == 0 && is_empty)
3966 				*is_empty = 1;
3967 			return;
3968 		}
3969 	} else {
3970 		/* find a new entry for insert. */
3971 		int low = 0, high = count - 1, tmp;
3972 		struct ocfs2_xattr_entry *tmp_xe;
3973 
3974 		while (low <= high) {
3975 			tmp = (low + high) / 2;
3976 			tmp_xe = &xh->xh_entries[tmp];
3977 
3978 			if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
3979 				low = tmp + 1;
3980 			else if (name_hash <
3981 				 le32_to_cpu(tmp_xe->xe_name_hash))
3982 				high = tmp - 1;
3983 			else
3984 				break;
3985 		}
3986 
3987 		xe = &xh->xh_entries[low];
3988 		if (low != count)
3989 			memmove(xe + 1, xe, (void *)last - (void *)xe);
3990 
3991 		le16_add_cpu(&xh->xh_count, 1);
3992 		memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
3993 		xe->xe_name_hash = cpu_to_le32(name_hash);
3994 		xe->xe_name_len = name_len;
3995 		ocfs2_xattr_set_type(xe, xi->name_index);
3996 	}
3997 
3998 set_new_name_value:
3999 	/* Insert the new name+value. */
4000 	size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4001 
4002 	/*
4003 	 * We must make sure that the name/value pair
4004 	 * exists in the same block.
4005 	 */
4006 	offs = le16_to_cpu(xh->xh_free_start);
4007 	start = offs - size;
4008 
4009 	if (start >> inode->i_sb->s_blocksize_bits !=
4010 	    (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4011 		offs = offs - offs % blocksize;
4012 		xh->xh_free_start = cpu_to_le16(offs);
4013 	}
4014 
4015 	val = ocfs2_xattr_bucket_get_val(inode,
4016 					 &xs->bucket, offs - size);
4017 	xe->xe_name_offset = cpu_to_le16(offs - size);
4018 
4019 	memset(val, 0, size);
4020 	memcpy(val, xi->name, name_len);
4021 	memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4022 
4023 	xe->xe_value_size = cpu_to_le64(xi->value_len);
4024 	ocfs2_xattr_set_local(xe, local);
4025 	xs->here = xe;
4026 	le16_add_cpu(&xh->xh_free_start, -size);
4027 	le16_add_cpu(&xh->xh_name_value_len, size);
4028 
4029 	return;
4030 }
4031 
4032 static int ocfs2_xattr_bucket_handle_journal(struct inode *inode,
4033 					     handle_t *handle,
4034 					     struct ocfs2_xattr_search *xs,
4035 					     struct buffer_head **bhs,
4036 					     u16 bh_num)
4037 {
4038 	int ret = 0, off, block_off;
4039 	struct ocfs2_xattr_entry *xe = xs->here;
4040 
4041 	/*
4042 	 * First calculate all the blocks we should journal_access
4043 	 * and journal_dirty. The first block should always be touched.
4044 	 */
4045 	ret = ocfs2_journal_dirty(handle, bhs[0]);
4046 	if (ret)
4047 		mlog_errno(ret);
4048 
4049 	/* calc the data. */
4050 	off = le16_to_cpu(xe->xe_name_offset);
4051 	block_off = off >> inode->i_sb->s_blocksize_bits;
4052 	ret = ocfs2_journal_dirty(handle, bhs[block_off]);
4053 	if (ret)
4054 		mlog_errno(ret);
4055 
4056 	return ret;
4057 }
4058 
4059 /*
4060  * Set the xattr entry in the specified bucket.
4061  * The bucket is indicated by xs->bucket and it should have the enough
4062  * space for the xattr insertion.
4063  */
4064 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4065 					   struct ocfs2_xattr_info *xi,
4066 					   struct ocfs2_xattr_search *xs,
4067 					   u32 name_hash,
4068 					   int local,
4069 					   int *bucket_empty)
4070 {
4071 	int i, ret;
4072 	handle_t *handle = NULL;
4073 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4074 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4075 
4076 	mlog(0, "Set xattr entry len = %d index = %d in bucket %llu\n",
4077 	     xi->value_len, xi->name_index,
4078 	     (unsigned long long)xs->bucket.bhs[0]->b_blocknr);
4079 
4080 	if (!xs->bucket.bhs[1]) {
4081 		ret = ocfs2_read_blocks(osb,
4082 					xs->bucket.bhs[0]->b_blocknr + 1,
4083 					blk_per_bucket - 1, &xs->bucket.bhs[1],
4084 					OCFS2_BH_CACHED, inode);
4085 		if (ret) {
4086 			mlog_errno(ret);
4087 			goto out;
4088 		}
4089 	}
4090 
4091 	handle = ocfs2_start_trans(osb, blk_per_bucket);
4092 	if (IS_ERR(handle)) {
4093 		ret = PTR_ERR(handle);
4094 		handle = NULL;
4095 		mlog_errno(ret);
4096 		goto out;
4097 	}
4098 
4099 	for (i = 0; i < blk_per_bucket; i++) {
4100 		ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i],
4101 					   OCFS2_JOURNAL_ACCESS_WRITE);
4102 		if (ret < 0) {
4103 			mlog_errno(ret);
4104 			goto out;
4105 		}
4106 	}
4107 
4108 	ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash,
4109 				     local, bucket_empty);
4110 
4111 	/*Only dirty the blocks we have touched in set xattr. */
4112 	ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
4113 						xs->bucket.bhs, blk_per_bucket);
4114 	if (ret)
4115 		mlog_errno(ret);
4116 out:
4117 	ocfs2_commit_trans(osb, handle);
4118 
4119 	return ret;
4120 }
4121 
4122 static int ocfs2_xattr_value_update_size(struct inode *inode,
4123 					 struct buffer_head *xe_bh,
4124 					 struct ocfs2_xattr_entry *xe,
4125 					 u64 new_size)
4126 {
4127 	int ret;
4128 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4129 	handle_t *handle = NULL;
4130 
4131 	handle = ocfs2_start_trans(osb, 1);
4132 	if (handle == NULL) {
4133 		ret = -ENOMEM;
4134 		mlog_errno(ret);
4135 		goto out;
4136 	}
4137 
4138 	ret = ocfs2_journal_access(handle, inode, xe_bh,
4139 				   OCFS2_JOURNAL_ACCESS_WRITE);
4140 	if (ret < 0) {
4141 		mlog_errno(ret);
4142 		goto out_commit;
4143 	}
4144 
4145 	xe->xe_value_size = cpu_to_le64(new_size);
4146 
4147 	ret = ocfs2_journal_dirty(handle, xe_bh);
4148 	if (ret < 0)
4149 		mlog_errno(ret);
4150 
4151 out_commit:
4152 	ocfs2_commit_trans(osb, handle);
4153 out:
4154 	return ret;
4155 }
4156 
4157 /*
4158  * Truncate the specified xe_off entry in xattr bucket.
4159  * bucket is indicated by header_bh and len is the new length.
4160  * Both the ocfs2_xattr_value_root and the entry will be updated here.
4161  *
4162  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4163  */
4164 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4165 					     struct buffer_head *header_bh,
4166 					     int xe_off,
4167 					     int len)
4168 {
4169 	int ret, offset;
4170 	u64 value_blk;
4171 	struct buffer_head *value_bh = NULL;
4172 	struct ocfs2_xattr_value_root *xv;
4173 	struct ocfs2_xattr_entry *xe;
4174 	struct ocfs2_xattr_header *xh =
4175 			(struct ocfs2_xattr_header *)header_bh->b_data;
4176 	size_t blocksize = inode->i_sb->s_blocksize;
4177 
4178 	xe = &xh->xh_entries[xe_off];
4179 
4180 	BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4181 
4182 	offset = le16_to_cpu(xe->xe_name_offset) +
4183 		 OCFS2_XATTR_SIZE(xe->xe_name_len);
4184 
4185 	value_blk = offset / blocksize;
4186 
4187 	/* We don't allow ocfs2_xattr_value to be stored in different block. */
4188 	BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4189 	value_blk += header_bh->b_blocknr;
4190 
4191 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), value_blk,
4192 			       &value_bh, OCFS2_BH_CACHED, inode);
4193 	if (ret) {
4194 		mlog_errno(ret);
4195 		goto out;
4196 	}
4197 
4198 	xv = (struct ocfs2_xattr_value_root *)
4199 		(value_bh->b_data + offset % blocksize);
4200 
4201 	mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4202 	     xe_off, (unsigned long long)header_bh->b_blocknr, len);
4203 	ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len);
4204 	if (ret) {
4205 		mlog_errno(ret);
4206 		goto out;
4207 	}
4208 
4209 	ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len);
4210 	if (ret) {
4211 		mlog_errno(ret);
4212 		goto out;
4213 	}
4214 
4215 out:
4216 	brelse(value_bh);
4217 	return ret;
4218 }
4219 
4220 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4221 						struct ocfs2_xattr_search *xs,
4222 						int len)
4223 {
4224 	int ret, offset;
4225 	struct ocfs2_xattr_entry *xe = xs->here;
4226 	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4227 
4228 	BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4229 
4230 	offset = xe - xh->xh_entries;
4231 	ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0],
4232 						offset, len);
4233 	if (ret)
4234 		mlog_errno(ret);
4235 
4236 	return ret;
4237 }
4238 
4239 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4240 						struct ocfs2_xattr_search *xs,
4241 						char *val,
4242 						int value_len)
4243 {
4244 	int offset;
4245 	struct ocfs2_xattr_value_root *xv;
4246 	struct ocfs2_xattr_entry *xe = xs->here;
4247 
4248 	BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4249 
4250 	offset = le16_to_cpu(xe->xe_name_offset) +
4251 		 OCFS2_XATTR_SIZE(xe->xe_name_len);
4252 
4253 	xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
4254 
4255 	return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len);
4256 }
4257 
4258 /*
4259  * Remove the xattr bucket pointed by bucket_bh.
4260  * All the buckets after it in the same xattr extent rec will be
4261  * move forward one by one.
4262  */
4263 static int ocfs2_rm_xattr_bucket(struct inode *inode,
4264 				 struct buffer_head *first_bh,
4265 				 struct ocfs2_xattr_bucket *bucket)
4266 {
4267 	int ret = 0, credits;
4268 	struct ocfs2_xattr_header *xh =
4269 				(struct ocfs2_xattr_header *)first_bh->b_data;
4270 	u16 bucket_num = le16_to_cpu(xh->xh_num_buckets);
4271 	u64 end, start = bucket->bhs[0]->b_blocknr;
4272 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4273 	handle_t *handle;
4274 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4275 
4276 	end = first_bh->b_blocknr + (bucket_num - 1) * blk_per_bucket;
4277 
4278 	mlog(0, "rm xattr bucket %llu\n", start);
4279 	/*
4280 	 * We need to update the first xattr_header and all the buckets starting
4281 	 * from start in this xattr rec.
4282 	 *
4283 	 * XXX: Should we empty the old last bucket here?
4284 	 */
4285 	credits = 1 + end - start;
4286 	handle = ocfs2_start_trans(osb, credits);
4287 	if (IS_ERR(handle)) {
4288 		ret = PTR_ERR(handle);
4289 		mlog_errno(ret);
4290 		return ret;
4291 	}
4292 
4293 	ret = ocfs2_journal_access(handle, inode, first_bh,
4294 				   OCFS2_JOURNAL_ACCESS_WRITE);
4295 	if (ret) {
4296 		mlog_errno(ret);
4297 		goto out_commit;
4298 	}
4299 
4300 
4301 	while (start < end) {
4302 		ret = ocfs2_cp_xattr_bucket(inode, handle,
4303 					    start + blk_per_bucket,
4304 					    start, 0);
4305 		if (ret) {
4306 			mlog_errno(ret);
4307 			goto out_commit;
4308 		}
4309 		start += blk_per_bucket;
4310 	}
4311 
4312 	/* update the first_bh. */
4313 	xh->xh_num_buckets = cpu_to_le16(bucket_num - 1);
4314 	ocfs2_journal_dirty(handle, first_bh);
4315 
4316 out_commit:
4317 	ocfs2_commit_trans(osb, handle);
4318 	return ret;
4319 }
4320 
4321 static int ocfs2_rm_xattr_cluster(struct inode *inode,
4322 				  struct buffer_head *root_bh,
4323 				  u64 blkno,
4324 				  u32 cpos,
4325 				  u32 len)
4326 {
4327 	int ret;
4328 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4329 	struct inode *tl_inode = osb->osb_tl_inode;
4330 	handle_t *handle;
4331 	struct ocfs2_xattr_block *xb =
4332 			(struct ocfs2_xattr_block *)root_bh->b_data;
4333 	struct ocfs2_extent_list *root_el = &xb->xb_attrs.xb_root.xt_list;
4334 	struct ocfs2_alloc_context *meta_ac = NULL;
4335 	struct ocfs2_cached_dealloc_ctxt dealloc;
4336 
4337 	ocfs2_init_dealloc_ctxt(&dealloc);
4338 
4339 	mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4340 	     cpos, len, (unsigned long long)blkno);
4341 
4342 	ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
4343 
4344 	ret = ocfs2_lock_allocators(inode, root_bh, root_el,
4345 				    0, 1, NULL, &meta_ac,
4346 				    OCFS2_XATTR_TREE_EXTENT, NULL);
4347 	if (ret) {
4348 		mlog_errno(ret);
4349 		return ret;
4350 	}
4351 
4352 	mutex_lock(&tl_inode->i_mutex);
4353 
4354 	if (ocfs2_truncate_log_needs_flush(osb)) {
4355 		ret = __ocfs2_flush_truncate_log(osb);
4356 		if (ret < 0) {
4357 			mlog_errno(ret);
4358 			goto out;
4359 		}
4360 	}
4361 
4362 	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
4363 	if (handle == NULL) {
4364 		ret = -ENOMEM;
4365 		mlog_errno(ret);
4366 		goto out;
4367 	}
4368 
4369 	ret = ocfs2_journal_access(handle, inode, root_bh,
4370 				   OCFS2_JOURNAL_ACCESS_WRITE);
4371 	if (ret) {
4372 		mlog_errno(ret);
4373 		goto out_commit;
4374 	}
4375 
4376 	ret = ocfs2_remove_extent(inode, root_bh, cpos, len, handle, meta_ac,
4377 				  &dealloc, OCFS2_XATTR_TREE_EXTENT, NULL);
4378 	if (ret) {
4379 		mlog_errno(ret);
4380 		goto out_commit;
4381 	}
4382 
4383 	le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4384 
4385 	ret = ocfs2_journal_dirty(handle, root_bh);
4386 	if (ret) {
4387 		mlog_errno(ret);
4388 		goto out_commit;
4389 	}
4390 
4391 	ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4392 	if (ret)
4393 		mlog_errno(ret);
4394 
4395 out_commit:
4396 	ocfs2_commit_trans(osb, handle);
4397 out:
4398 	ocfs2_schedule_truncate_log_flush(osb, 1);
4399 
4400 	mutex_unlock(&tl_inode->i_mutex);
4401 
4402 	if (meta_ac)
4403 		ocfs2_free_alloc_context(meta_ac);
4404 
4405 	ocfs2_run_deallocs(osb, &dealloc);
4406 
4407 	return ret;
4408 }
4409 
4410 /*
4411  * Free the xattr bucket indicated by xs->bucket and if all the buckets
4412  * in the clusters is free, free the clusters also.
4413  */
4414 static int ocfs2_xattr_bucket_shrink(struct inode *inode,
4415 				     struct ocfs2_xattr_info *xi,
4416 				     struct ocfs2_xattr_search *xs,
4417 				     u32 name_hash)
4418 {
4419 	int ret;
4420 	u32 e_cpos, num_clusters;
4421 	u64 p_blkno;
4422 	struct buffer_head *first_bh = NULL;
4423 	struct ocfs2_xattr_header *first_xh;
4424 	struct ocfs2_xattr_block *xb =
4425 			(struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
4426 
4427 	BUG_ON(xs->header->xh_count != 0);
4428 
4429 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4430 				  &e_cpos, &num_clusters,
4431 				  &xb->xb_attrs.xb_root.xt_list);
4432 	if (ret) {
4433 		mlog_errno(ret);
4434 		return ret;
4435 	}
4436 
4437 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno,
4438 			       &first_bh, OCFS2_BH_CACHED, inode);
4439 	if (ret) {
4440 		mlog_errno(ret);
4441 		return ret;
4442 	}
4443 
4444 	ret = ocfs2_rm_xattr_bucket(inode, first_bh, &xs->bucket);
4445 	if (ret) {
4446 		mlog_errno(ret);
4447 		goto out;
4448 	}
4449 
4450 	first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
4451 	if (first_xh->xh_num_buckets == 0)
4452 		ret = ocfs2_rm_xattr_cluster(inode, xs->xattr_bh,
4453 					     p_blkno, e_cpos,
4454 					     num_clusters);
4455 
4456 out:
4457 	brelse(first_bh);
4458 	return ret;
4459 }
4460 
4461 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4462 					 struct ocfs2_xattr_search *xs)
4463 {
4464 	handle_t *handle = NULL;
4465 	struct ocfs2_xattr_header *xh = xs->bucket.xh;
4466 	struct ocfs2_xattr_entry *last = &xh->xh_entries[
4467 						le16_to_cpu(xh->xh_count) - 1];
4468 	int ret = 0;
4469 
4470 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1);
4471 	if (IS_ERR(handle)) {
4472 		ret = PTR_ERR(handle);
4473 		mlog_errno(ret);
4474 		return;
4475 	}
4476 
4477 	ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0],
4478 				   OCFS2_JOURNAL_ACCESS_WRITE);
4479 	if (ret) {
4480 		mlog_errno(ret);
4481 		goto out_commit;
4482 	}
4483 
4484 	/* Remove the old entry. */
4485 	memmove(xs->here, xs->here + 1,
4486 		(void *)last - (void *)xs->here);
4487 	memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4488 	le16_add_cpu(&xh->xh_count, -1);
4489 
4490 	ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]);
4491 	if (ret < 0)
4492 		mlog_errno(ret);
4493 out_commit:
4494 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
4495 }
4496 
4497 /*
4498  * Set the xattr name/value in the bucket specified in xs.
4499  *
4500  * As the new value in xi may be stored in the bucket or in an outside cluster,
4501  * we divide the whole process into 3 steps:
4502  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4503  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4504  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4505  * 4. If the clusters for the new outside value can't be allocated, we need
4506  *    to free the xattr we allocated in set.
4507  */
4508 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4509 				     struct ocfs2_xattr_info *xi,
4510 				     struct ocfs2_xattr_search *xs)
4511 {
4512 	int ret, local = 1, bucket_empty = 0;
4513 	size_t value_len;
4514 	char *val = (char *)xi->value;
4515 	struct ocfs2_xattr_entry *xe = xs->here;
4516 	u32 name_hash = ocfs2_xattr_hash_by_name(inode,
4517 						 xi->name_index, xi->name);
4518 
4519 	if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
4520 		/*
4521 		 * We need to truncate the xattr storage first.
4522 		 *
4523 		 * If both the old and new value are stored to
4524 		 * outside block, we only need to truncate
4525 		 * the storage and then set the value outside.
4526 		 *
4527 		 * If the new value should be stored within block,
4528 		 * we should free all the outside block first and
4529 		 * the modification to the xattr block will be done
4530 		 * by following steps.
4531 		 */
4532 		if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4533 			value_len = xi->value_len;
4534 		else
4535 			value_len = 0;
4536 
4537 		ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4538 							   value_len);
4539 		if (ret)
4540 			goto out;
4541 
4542 		if (value_len)
4543 			goto set_value_outside;
4544 	}
4545 
4546 	value_len = xi->value_len;
4547 	/* So we have to handle the inside block change now. */
4548 	if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4549 		/*
4550 		 * If the new value will be stored outside of block,
4551 		 * initalize a new empty value root and insert it first.
4552 		 */
4553 		local = 0;
4554 		xi->value = &def_xv;
4555 		xi->value_len = OCFS2_XATTR_ROOT_SIZE;
4556 	}
4557 
4558 	ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash,
4559 					      local, &bucket_empty);
4560 	if (ret) {
4561 		mlog_errno(ret);
4562 		goto out;
4563 	}
4564 
4565 	if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4566 		/* allocate the space now for the outside block storage. */
4567 		ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4568 							   value_len);
4569 		if (ret) {
4570 			mlog_errno(ret);
4571 
4572 			if (xs->not_found) {
4573 				/*
4574 				 * We can't allocate enough clusters for outside
4575 				 * storage and we have allocated xattr already,
4576 				 * so need to remove it.
4577 				 */
4578 				ocfs2_xattr_bucket_remove_xs(inode, xs);
4579 			}
4580 			goto out;
4581 		}
4582 	} else {
4583 		if (bucket_empty)
4584 			ret = ocfs2_xattr_bucket_shrink(inode, xi,
4585 							xs, name_hash);
4586 		goto out;
4587 	}
4588 
4589 set_value_outside:
4590 	ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len);
4591 out:
4592 	return ret;
4593 }
4594 
4595 /* check whether the xattr bucket is filled up with the same hash value. */
4596 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4597 					      struct ocfs2_xattr_bucket *bucket)
4598 {
4599 	struct ocfs2_xattr_header *xh = bucket->xh;
4600 
4601 	if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
4602 	    xh->xh_entries[0].xe_name_hash) {
4603 		mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
4604 		     "hash = %u\n",
4605 		     (unsigned long long)bucket->bhs[0]->b_blocknr,
4606 		     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
4607 		return -ENOSPC;
4608 	}
4609 
4610 	return 0;
4611 }
4612 
4613 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
4614 					     struct ocfs2_xattr_info *xi,
4615 					     struct ocfs2_xattr_search *xs)
4616 {
4617 	struct ocfs2_xattr_header *xh;
4618 	struct ocfs2_xattr_entry *xe;
4619 	u16 count, header_size, xh_free_start;
4620 	int i, free, max_free, need, old;
4621 	size_t value_size = 0, name_len = strlen(xi->name);
4622 	size_t blocksize = inode->i_sb->s_blocksize;
4623 	int ret, allocation = 0;
4624 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4625 
4626 	mlog_entry("Set xattr %s in xattr index block\n", xi->name);
4627 
4628 try_again:
4629 	xh = xs->header;
4630 	count = le16_to_cpu(xh->xh_count);
4631 	xh_free_start = le16_to_cpu(xh->xh_free_start);
4632 	header_size = sizeof(struct ocfs2_xattr_header) +
4633 			count * sizeof(struct ocfs2_xattr_entry);
4634 	max_free = OCFS2_XATTR_BUCKET_SIZE -
4635 		le16_to_cpu(xh->xh_name_value_len) - header_size;
4636 
4637 	mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
4638 			"of %u which exceed block size\n",
4639 			(unsigned long long)xs->bucket.bhs[0]->b_blocknr,
4640 			header_size);
4641 
4642 	if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4643 		value_size = OCFS2_XATTR_ROOT_SIZE;
4644 	else if (xi->value)
4645 		value_size = OCFS2_XATTR_SIZE(xi->value_len);
4646 
4647 	if (xs->not_found)
4648 		need = sizeof(struct ocfs2_xattr_entry) +
4649 			OCFS2_XATTR_SIZE(name_len) + value_size;
4650 	else {
4651 		need = value_size + OCFS2_XATTR_SIZE(name_len);
4652 
4653 		/*
4654 		 * We only replace the old value if the new length is smaller
4655 		 * than the old one. Otherwise we will allocate new space in the
4656 		 * bucket to store it.
4657 		 */
4658 		xe = xs->here;
4659 		if (ocfs2_xattr_is_local(xe))
4660 			old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4661 		else
4662 			old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4663 
4664 		if (old >= value_size)
4665 			need = 0;
4666 	}
4667 
4668 	free = xh_free_start - header_size;
4669 	/*
4670 	 * We need to make sure the new name/value pair
4671 	 * can exist in the same block.
4672 	 */
4673 	if (xh_free_start % blocksize < need)
4674 		free -= xh_free_start % blocksize;
4675 
4676 	mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
4677 	     "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
4678 	     " %u\n", xs->not_found,
4679 	     (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
4680 	     free, need, max_free, le16_to_cpu(xh->xh_free_start),
4681 	     le16_to_cpu(xh->xh_name_value_len));
4682 
4683 	if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4684 		if (need <= max_free &&
4685 		    count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4686 			/*
4687 			 * We can create the space by defragment. Since only the
4688 			 * name/value will be moved, the xe shouldn't be changed
4689 			 * in xs.
4690 			 */
4691 			ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket);
4692 			if (ret) {
4693 				mlog_errno(ret);
4694 				goto out;
4695 			}
4696 
4697 			xh_free_start = le16_to_cpu(xh->xh_free_start);
4698 			free = xh_free_start - header_size;
4699 			if (xh_free_start % blocksize < need)
4700 				free -= xh_free_start % blocksize;
4701 
4702 			if (free >= need)
4703 				goto xattr_set;
4704 
4705 			mlog(0, "Can't get enough space for xattr insert by "
4706 			     "defragment. Need %u bytes, but we have %d, so "
4707 			     "allocate new bucket for it.\n", need, free);
4708 		}
4709 
4710 		/*
4711 		 * We have to add new buckets or clusters and one
4712 		 * allocation should leave us enough space for insert.
4713 		 */
4714 		BUG_ON(allocation);
4715 
4716 		/*
4717 		 * We do not allow for overlapping ranges between buckets. And
4718 		 * the maximum number of collisions we will allow for then is
4719 		 * one bucket's worth, so check it here whether we need to
4720 		 * add a new bucket for the insert.
4721 		 */
4722 		ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket);
4723 		if (ret) {
4724 			mlog_errno(ret);
4725 			goto out;
4726 		}
4727 
4728 		ret = ocfs2_add_new_xattr_bucket(inode,
4729 						 xs->xattr_bh,
4730 						 xs->bucket.bhs[0]);
4731 		if (ret) {
4732 			mlog_errno(ret);
4733 			goto out;
4734 		}
4735 
4736 		for (i = 0; i < blk_per_bucket; i++)
4737 			brelse(xs->bucket.bhs[i]);
4738 
4739 		memset(&xs->bucket, 0, sizeof(xs->bucket));
4740 
4741 		ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
4742 						   xi->name_index,
4743 						   xi->name, xs);
4744 		if (ret && ret != -ENODATA)
4745 			goto out;
4746 		xs->not_found = ret;
4747 		allocation = 1;
4748 		goto try_again;
4749 	}
4750 
4751 xattr_set:
4752 	ret = ocfs2_xattr_set_in_bucket(inode, xi, xs);
4753 out:
4754 	mlog_exit(ret);
4755 	return ret;
4756 }
4757 
4758 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
4759 					struct ocfs2_xattr_bucket *bucket,
4760 					void *para)
4761 {
4762 	int ret = 0;
4763 	struct ocfs2_xattr_header *xh = bucket->xh;
4764 	u16 i;
4765 	struct ocfs2_xattr_entry *xe;
4766 
4767 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4768 		xe = &xh->xh_entries[i];
4769 		if (ocfs2_xattr_is_local(xe))
4770 			continue;
4771 
4772 		ret = ocfs2_xattr_bucket_value_truncate(inode,
4773 							bucket->bhs[0],
4774 							i, 0);
4775 		if (ret) {
4776 			mlog_errno(ret);
4777 			break;
4778 		}
4779 	}
4780 
4781 	return ret;
4782 }
4783 
4784 static int ocfs2_delete_xattr_index_block(struct inode *inode,
4785 					  struct buffer_head *xb_bh)
4786 {
4787 	struct ocfs2_xattr_block *xb =
4788 			(struct ocfs2_xattr_block *)xb_bh->b_data;
4789 	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4790 	int ret = 0;
4791 	u32 name_hash = UINT_MAX, e_cpos, num_clusters;
4792 	u64 p_blkno;
4793 
4794 	if (le16_to_cpu(el->l_next_free_rec) == 0)
4795 		return 0;
4796 
4797 	while (name_hash > 0) {
4798 		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4799 					  &e_cpos, &num_clusters, el);
4800 		if (ret) {
4801 			mlog_errno(ret);
4802 			goto out;
4803 		}
4804 
4805 		ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
4806 						  ocfs2_delete_xattr_in_bucket,
4807 						  NULL);
4808 		if (ret) {
4809 			mlog_errno(ret);
4810 			goto out;
4811 		}
4812 
4813 		ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
4814 					     p_blkno, e_cpos, num_clusters);
4815 		if (ret) {
4816 			mlog_errno(ret);
4817 			break;
4818 		}
4819 
4820 		if (e_cpos == 0)
4821 			break;
4822 
4823 		name_hash = e_cpos - 1;
4824 	}
4825 
4826 out:
4827 	return ret;
4828 }
4829