xref: /openbmc/linux/fs/ocfs2/xattr.c (revision 01225596)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is taken from ext3.
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU General Public
13  * License as published by the Free Software Foundation; either
14  * version 2 of the License, or (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public
22  * License along with this program; if not, write to the
23  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24  * Boston, MA 021110-1307, USA.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/types.h>
30 #include <linux/slab.h>
31 #include <linux/highmem.h>
32 #include <linux/pagemap.h>
33 #include <linux/uio.h>
34 #include <linux/sched.h>
35 #include <linux/splice.h>
36 #include <linux/mount.h>
37 #include <linux/writeback.h>
38 #include <linux/falloc.h>
39 #include <linux/sort.h>
40 
41 #define MLOG_MASK_PREFIX ML_XATTR
42 #include <cluster/masklog.h>
43 
44 #include "ocfs2.h"
45 #include "alloc.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 
59 
60 struct ocfs2_xattr_def_value_root {
61 	struct ocfs2_xattr_value_root	xv;
62 	struct ocfs2_extent_rec		er;
63 };
64 
65 struct ocfs2_xattr_bucket {
66 	struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
67 	struct ocfs2_xattr_header *xh;
68 };
69 
70 #define OCFS2_XATTR_ROOT_SIZE	(sizeof(struct ocfs2_xattr_def_value_root))
71 #define OCFS2_XATTR_INLINE_SIZE	80
72 
73 static struct ocfs2_xattr_def_value_root def_xv = {
74 	.xv.xr_list.l_count = cpu_to_le16(1),
75 };
76 
77 struct xattr_handler *ocfs2_xattr_handlers[] = {
78 	&ocfs2_xattr_user_handler,
79 	&ocfs2_xattr_trusted_handler,
80 	NULL
81 };
82 
83 static struct xattr_handler *ocfs2_xattr_handler_map[] = {
84 	[OCFS2_XATTR_INDEX_USER]	= &ocfs2_xattr_user_handler,
85 	[OCFS2_XATTR_INDEX_TRUSTED]	= &ocfs2_xattr_trusted_handler,
86 };
87 
88 struct ocfs2_xattr_info {
89 	int name_index;
90 	const char *name;
91 	const void *value;
92 	size_t value_len;
93 };
94 
95 struct ocfs2_xattr_search {
96 	struct buffer_head *inode_bh;
97 	/*
98 	 * xattr_bh point to the block buffer head which has extended attribute
99 	 * when extended attribute in inode, xattr_bh is equal to inode_bh.
100 	 */
101 	struct buffer_head *xattr_bh;
102 	struct ocfs2_xattr_header *header;
103 	struct ocfs2_xattr_bucket bucket;
104 	void *base;
105 	void *end;
106 	struct ocfs2_xattr_entry *here;
107 	int not_found;
108 };
109 
110 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
111 					     struct ocfs2_xattr_header *xh,
112 					     int index,
113 					     int *block_off,
114 					     int *new_offset);
115 
116 static int ocfs2_xattr_index_block_find(struct inode *inode,
117 					struct buffer_head *root_bh,
118 					int name_index,
119 					const char *name,
120 					struct ocfs2_xattr_search *xs);
121 
122 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
123 					struct ocfs2_xattr_tree_root *xt,
124 					char *buffer,
125 					size_t buffer_size);
126 
127 static int ocfs2_xattr_create_index_block(struct inode *inode,
128 					  struct ocfs2_xattr_search *xs);
129 
130 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
131 					     struct ocfs2_xattr_info *xi,
132 					     struct ocfs2_xattr_search *xs);
133 
134 static inline struct xattr_handler *ocfs2_xattr_handler(int name_index)
135 {
136 	struct xattr_handler *handler = NULL;
137 
138 	if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
139 		handler = ocfs2_xattr_handler_map[name_index];
140 
141 	return handler;
142 }
143 
144 static inline u32 ocfs2_xattr_name_hash(struct inode *inode,
145 					char *prefix,
146 					int prefix_len,
147 					char *name,
148 					int name_len)
149 {
150 	/* Get hash value of uuid from super block */
151 	u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
152 	int i;
153 
154 	/* hash extended attribute prefix */
155 	for (i = 0; i < prefix_len; i++) {
156 		hash = (hash << OCFS2_HASH_SHIFT) ^
157 		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
158 		       *prefix++;
159 	}
160 	/* hash extended attribute name */
161 	for (i = 0; i < name_len; i++) {
162 		hash = (hash << OCFS2_HASH_SHIFT) ^
163 		       (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
164 		       *name++;
165 	}
166 
167 	return hash;
168 }
169 
170 /*
171  * ocfs2_xattr_hash_entry()
172  *
173  * Compute the hash of an extended attribute.
174  */
175 static void ocfs2_xattr_hash_entry(struct inode *inode,
176 				   struct ocfs2_xattr_header *header,
177 				   struct ocfs2_xattr_entry *entry)
178 {
179 	u32 hash = 0;
180 	struct xattr_handler *handler =
181 			ocfs2_xattr_handler(ocfs2_xattr_get_type(entry));
182 	char *prefix = handler->prefix;
183 	char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
184 	int prefix_len = strlen(handler->prefix);
185 
186 	hash = ocfs2_xattr_name_hash(inode, prefix, prefix_len, name,
187 				     entry->xe_name_len);
188 	entry->xe_name_hash = cpu_to_le32(hash);
189 
190 	return;
191 }
192 
193 static int ocfs2_xattr_extend_allocation(struct inode *inode,
194 					 u32 clusters_to_add,
195 					 struct buffer_head *xattr_bh,
196 					 struct ocfs2_xattr_value_root *xv)
197 {
198 	int status = 0;
199 	int restart_func = 0;
200 	int credits = 0;
201 	handle_t *handle = NULL;
202 	struct ocfs2_alloc_context *data_ac = NULL;
203 	struct ocfs2_alloc_context *meta_ac = NULL;
204 	enum ocfs2_alloc_restarted why;
205 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
206 	struct ocfs2_extent_list *root_el = &xv->xr_list;
207 	u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
208 
209 	mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
210 
211 restart_all:
212 
213 	status = ocfs2_lock_allocators(inode, xattr_bh, root_el,
214 				       clusters_to_add, 0, &data_ac,
215 				       &meta_ac, OCFS2_XATTR_VALUE_EXTENT, xv);
216 	if (status) {
217 		mlog_errno(status);
218 		goto leave;
219 	}
220 
221 	credits = ocfs2_calc_extend_credits(osb->sb, root_el, clusters_to_add);
222 	handle = ocfs2_start_trans(osb, credits);
223 	if (IS_ERR(handle)) {
224 		status = PTR_ERR(handle);
225 		handle = NULL;
226 		mlog_errno(status);
227 		goto leave;
228 	}
229 
230 restarted_transaction:
231 	status = ocfs2_journal_access(handle, inode, xattr_bh,
232 				      OCFS2_JOURNAL_ACCESS_WRITE);
233 	if (status < 0) {
234 		mlog_errno(status);
235 		goto leave;
236 	}
237 
238 	prev_clusters = le32_to_cpu(xv->xr_clusters);
239 	status = ocfs2_add_clusters_in_btree(osb,
240 					     inode,
241 					     &logical_start,
242 					     clusters_to_add,
243 					     0,
244 					     xattr_bh,
245 					     root_el,
246 					     handle,
247 					     data_ac,
248 					     meta_ac,
249 					     &why,
250 					     OCFS2_XATTR_VALUE_EXTENT,
251 					     xv);
252 	if ((status < 0) && (status != -EAGAIN)) {
253 		if (status != -ENOSPC)
254 			mlog_errno(status);
255 		goto leave;
256 	}
257 
258 	status = ocfs2_journal_dirty(handle, xattr_bh);
259 	if (status < 0) {
260 		mlog_errno(status);
261 		goto leave;
262 	}
263 
264 	clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
265 
266 	if (why != RESTART_NONE && clusters_to_add) {
267 		if (why == RESTART_META) {
268 			mlog(0, "restarting function.\n");
269 			restart_func = 1;
270 		} else {
271 			BUG_ON(why != RESTART_TRANS);
272 
273 			mlog(0, "restarting transaction.\n");
274 			/* TODO: This can be more intelligent. */
275 			credits = ocfs2_calc_extend_credits(osb->sb,
276 							    root_el,
277 							    clusters_to_add);
278 			status = ocfs2_extend_trans(handle, credits);
279 			if (status < 0) {
280 				/* handle still has to be committed at
281 				 * this point. */
282 				status = -ENOMEM;
283 				mlog_errno(status);
284 				goto leave;
285 			}
286 			goto restarted_transaction;
287 		}
288 	}
289 
290 leave:
291 	if (handle) {
292 		ocfs2_commit_trans(osb, handle);
293 		handle = NULL;
294 	}
295 	if (data_ac) {
296 		ocfs2_free_alloc_context(data_ac);
297 		data_ac = NULL;
298 	}
299 	if (meta_ac) {
300 		ocfs2_free_alloc_context(meta_ac);
301 		meta_ac = NULL;
302 	}
303 	if ((!status) && restart_func) {
304 		restart_func = 0;
305 		goto restart_all;
306 	}
307 
308 	return status;
309 }
310 
311 static int __ocfs2_remove_xattr_range(struct inode *inode,
312 				      struct buffer_head *root_bh,
313 				      struct ocfs2_xattr_value_root *xv,
314 				      u32 cpos, u32 phys_cpos, u32 len,
315 				      struct ocfs2_cached_dealloc_ctxt *dealloc)
316 {
317 	int ret;
318 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
319 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
320 	struct inode *tl_inode = osb->osb_tl_inode;
321 	handle_t *handle;
322 	struct ocfs2_alloc_context *meta_ac = NULL;
323 
324 	ret = ocfs2_lock_allocators(inode, root_bh, &xv->xr_list,
325 				    0, 1, NULL, &meta_ac,
326 				    OCFS2_XATTR_VALUE_EXTENT, xv);
327 	if (ret) {
328 		mlog_errno(ret);
329 		return ret;
330 	}
331 
332 	mutex_lock(&tl_inode->i_mutex);
333 
334 	if (ocfs2_truncate_log_needs_flush(osb)) {
335 		ret = __ocfs2_flush_truncate_log(osb);
336 		if (ret < 0) {
337 			mlog_errno(ret);
338 			goto out;
339 		}
340 	}
341 
342 	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
343 	if (IS_ERR(handle)) {
344 		ret = PTR_ERR(handle);
345 		mlog_errno(ret);
346 		goto out;
347 	}
348 
349 	ret = ocfs2_journal_access(handle, inode, root_bh,
350 				   OCFS2_JOURNAL_ACCESS_WRITE);
351 	if (ret) {
352 		mlog_errno(ret);
353 		goto out_commit;
354 	}
355 
356 	ret = ocfs2_remove_extent(inode, root_bh, cpos, len, handle, meta_ac,
357 				  dealloc, OCFS2_XATTR_VALUE_EXTENT, xv);
358 	if (ret) {
359 		mlog_errno(ret);
360 		goto out_commit;
361 	}
362 
363 	le32_add_cpu(&xv->xr_clusters, -len);
364 
365 	ret = ocfs2_journal_dirty(handle, root_bh);
366 	if (ret) {
367 		mlog_errno(ret);
368 		goto out_commit;
369 	}
370 
371 	ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
372 	if (ret)
373 		mlog_errno(ret);
374 
375 out_commit:
376 	ocfs2_commit_trans(osb, handle);
377 out:
378 	mutex_unlock(&tl_inode->i_mutex);
379 
380 	if (meta_ac)
381 		ocfs2_free_alloc_context(meta_ac);
382 
383 	return ret;
384 }
385 
386 static int ocfs2_xattr_shrink_size(struct inode *inode,
387 				   u32 old_clusters,
388 				   u32 new_clusters,
389 				   struct buffer_head *root_bh,
390 				   struct ocfs2_xattr_value_root *xv)
391 {
392 	int ret = 0;
393 	u32 trunc_len, cpos, phys_cpos, alloc_size;
394 	u64 block;
395 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
396 	struct ocfs2_cached_dealloc_ctxt dealloc;
397 
398 	ocfs2_init_dealloc_ctxt(&dealloc);
399 
400 	if (old_clusters <= new_clusters)
401 		return 0;
402 
403 	cpos = new_clusters;
404 	trunc_len = old_clusters - new_clusters;
405 	while (trunc_len) {
406 		ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
407 					       &alloc_size, &xv->xr_list);
408 		if (ret) {
409 			mlog_errno(ret);
410 			goto out;
411 		}
412 
413 		if (alloc_size > trunc_len)
414 			alloc_size = trunc_len;
415 
416 		ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
417 						 phys_cpos, alloc_size,
418 						 &dealloc);
419 		if (ret) {
420 			mlog_errno(ret);
421 			goto out;
422 		}
423 
424 		block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
425 		ocfs2_remove_xattr_clusters_from_cache(inode, block,
426 						       alloc_size);
427 		cpos += alloc_size;
428 		trunc_len -= alloc_size;
429 	}
430 
431 out:
432 	ocfs2_schedule_truncate_log_flush(osb, 1);
433 	ocfs2_run_deallocs(osb, &dealloc);
434 
435 	return ret;
436 }
437 
438 static int ocfs2_xattr_value_truncate(struct inode *inode,
439 				      struct buffer_head *root_bh,
440 				      struct ocfs2_xattr_value_root *xv,
441 				      int len)
442 {
443 	int ret;
444 	u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
445 	u32 old_clusters = le32_to_cpu(xv->xr_clusters);
446 
447 	if (new_clusters == old_clusters)
448 		return 0;
449 
450 	if (new_clusters > old_clusters)
451 		ret = ocfs2_xattr_extend_allocation(inode,
452 						    new_clusters - old_clusters,
453 						    root_bh, xv);
454 	else
455 		ret = ocfs2_xattr_shrink_size(inode,
456 					      old_clusters, new_clusters,
457 					      root_bh, xv);
458 
459 	return ret;
460 }
461 
462 static int ocfs2_xattr_list_entries(struct inode *inode,
463 				    struct ocfs2_xattr_header *header,
464 				    char *buffer, size_t buffer_size)
465 {
466 	size_t rest = buffer_size;
467 	int i;
468 
469 	for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
470 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
471 		struct xattr_handler *handler =
472 			ocfs2_xattr_handler(ocfs2_xattr_get_type(entry));
473 
474 		if (handler) {
475 			size_t size = handler->list(inode, buffer, rest,
476 					((char *)header +
477 					le16_to_cpu(entry->xe_name_offset)),
478 					entry->xe_name_len);
479 			if (buffer) {
480 				if (size > rest)
481 					return -ERANGE;
482 				buffer += size;
483 			}
484 			rest -= size;
485 		}
486 	}
487 
488 	return buffer_size - rest;
489 }
490 
491 static int ocfs2_xattr_ibody_list(struct inode *inode,
492 				  struct ocfs2_dinode *di,
493 				  char *buffer,
494 				  size_t buffer_size)
495 {
496 	struct ocfs2_xattr_header *header = NULL;
497 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
498 	int ret = 0;
499 
500 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
501 		return ret;
502 
503 	header = (struct ocfs2_xattr_header *)
504 		 ((void *)di + inode->i_sb->s_blocksize -
505 		 le16_to_cpu(di->i_xattr_inline_size));
506 
507 	ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
508 
509 	return ret;
510 }
511 
512 static int ocfs2_xattr_block_list(struct inode *inode,
513 				  struct ocfs2_dinode *di,
514 				  char *buffer,
515 				  size_t buffer_size)
516 {
517 	struct buffer_head *blk_bh = NULL;
518 	struct ocfs2_xattr_block *xb;
519 	int ret = 0;
520 
521 	if (!di->i_xattr_loc)
522 		return ret;
523 
524 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
525 			       le64_to_cpu(di->i_xattr_loc),
526 			       &blk_bh, OCFS2_BH_CACHED, inode);
527 	if (ret < 0) {
528 		mlog_errno(ret);
529 		return ret;
530 	}
531 	/*Verify the signature of xattr block*/
532 	if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
533 		   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
534 		ret = -EFAULT;
535 		goto cleanup;
536 	}
537 
538 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
539 
540 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
541 		struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
542 		ret = ocfs2_xattr_list_entries(inode, header,
543 					       buffer, buffer_size);
544 	} else {
545 		struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
546 		ret = ocfs2_xattr_tree_list_index_block(inode, xt,
547 						   buffer, buffer_size);
548 	}
549 cleanup:
550 	brelse(blk_bh);
551 
552 	return ret;
553 }
554 
555 ssize_t ocfs2_listxattr(struct dentry *dentry,
556 			char *buffer,
557 			size_t size)
558 {
559 	int ret = 0, i_ret = 0, b_ret = 0;
560 	struct buffer_head *di_bh = NULL;
561 	struct ocfs2_dinode *di = NULL;
562 	struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
563 
564 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
565 		return ret;
566 
567 	ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
568 	if (ret < 0) {
569 		mlog_errno(ret);
570 		return ret;
571 	}
572 
573 	di = (struct ocfs2_dinode *)di_bh->b_data;
574 
575 	down_read(&oi->ip_xattr_sem);
576 	i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
577 	if (i_ret < 0)
578 		b_ret = 0;
579 	else {
580 		if (buffer) {
581 			buffer += i_ret;
582 			size -= i_ret;
583 		}
584 		b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
585 					       buffer, size);
586 		if (b_ret < 0)
587 			i_ret = 0;
588 	}
589 	up_read(&oi->ip_xattr_sem);
590 	ocfs2_inode_unlock(dentry->d_inode, 0);
591 
592 	brelse(di_bh);
593 
594 	return i_ret + b_ret;
595 }
596 
597 static int ocfs2_xattr_find_entry(int name_index,
598 				  const char *name,
599 				  struct ocfs2_xattr_search *xs)
600 {
601 	struct ocfs2_xattr_entry *entry;
602 	size_t name_len;
603 	int i, cmp = 1;
604 
605 	if (name == NULL)
606 		return -EINVAL;
607 
608 	name_len = strlen(name);
609 	entry = xs->here;
610 	for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
611 		cmp = name_index - ocfs2_xattr_get_type(entry);
612 		if (!cmp)
613 			cmp = name_len - entry->xe_name_len;
614 		if (!cmp)
615 			cmp = memcmp(name, (xs->base +
616 				     le16_to_cpu(entry->xe_name_offset)),
617 				     name_len);
618 		if (cmp == 0)
619 			break;
620 		entry += 1;
621 	}
622 	xs->here = entry;
623 
624 	return cmp ? -ENODATA : 0;
625 }
626 
627 static int ocfs2_xattr_get_value_outside(struct inode *inode,
628 					 struct ocfs2_xattr_value_root *xv,
629 					 void *buffer,
630 					 size_t len)
631 {
632 	u32 cpos, p_cluster, num_clusters, bpc, clusters;
633 	u64 blkno;
634 	int i, ret = 0;
635 	size_t cplen, blocksize;
636 	struct buffer_head *bh = NULL;
637 	struct ocfs2_extent_list *el;
638 
639 	el = &xv->xr_list;
640 	clusters = le32_to_cpu(xv->xr_clusters);
641 	bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
642 	blocksize = inode->i_sb->s_blocksize;
643 
644 	cpos = 0;
645 	while (cpos < clusters) {
646 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
647 					       &num_clusters, el);
648 		if (ret) {
649 			mlog_errno(ret);
650 			goto out;
651 		}
652 
653 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
654 		/* Copy ocfs2_xattr_value */
655 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
656 			ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
657 					       &bh, OCFS2_BH_CACHED, inode);
658 			if (ret) {
659 				mlog_errno(ret);
660 				goto out;
661 			}
662 
663 			cplen = len >= blocksize ? blocksize : len;
664 			memcpy(buffer, bh->b_data, cplen);
665 			len -= cplen;
666 			buffer += cplen;
667 
668 			brelse(bh);
669 			bh = NULL;
670 			if (len == 0)
671 				break;
672 		}
673 		cpos += num_clusters;
674 	}
675 out:
676 	return ret;
677 }
678 
679 static int ocfs2_xattr_ibody_get(struct inode *inode,
680 				 int name_index,
681 				 const char *name,
682 				 void *buffer,
683 				 size_t buffer_size,
684 				 struct ocfs2_xattr_search *xs)
685 {
686 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
687 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
688 	struct ocfs2_xattr_value_root *xv;
689 	size_t size;
690 	int ret = 0;
691 
692 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
693 		return -ENODATA;
694 
695 	xs->end = (void *)di + inode->i_sb->s_blocksize;
696 	xs->header = (struct ocfs2_xattr_header *)
697 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
698 	xs->base = (void *)xs->header;
699 	xs->here = xs->header->xh_entries;
700 
701 	ret = ocfs2_xattr_find_entry(name_index, name, xs);
702 	if (ret)
703 		return ret;
704 	size = le64_to_cpu(xs->here->xe_value_size);
705 	if (buffer) {
706 		if (size > buffer_size)
707 			return -ERANGE;
708 		if (ocfs2_xattr_is_local(xs->here)) {
709 			memcpy(buffer, (void *)xs->base +
710 			       le16_to_cpu(xs->here->xe_name_offset) +
711 			       OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
712 		} else {
713 			xv = (struct ocfs2_xattr_value_root *)
714 				(xs->base + le16_to_cpu(
715 				 xs->here->xe_name_offset) +
716 				OCFS2_XATTR_SIZE(xs->here->xe_name_len));
717 			ret = ocfs2_xattr_get_value_outside(inode, xv,
718 							    buffer, size);
719 			if (ret < 0) {
720 				mlog_errno(ret);
721 				return ret;
722 			}
723 		}
724 	}
725 
726 	return size;
727 }
728 
729 static int ocfs2_xattr_block_get(struct inode *inode,
730 				 int name_index,
731 				 const char *name,
732 				 void *buffer,
733 				 size_t buffer_size,
734 				 struct ocfs2_xattr_search *xs)
735 {
736 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
737 	struct buffer_head *blk_bh = NULL;
738 	struct ocfs2_xattr_block *xb;
739 	struct ocfs2_xattr_value_root *xv;
740 	size_t size;
741 	int ret = -ENODATA, name_offset, name_len, block_off, i;
742 
743 	if (!di->i_xattr_loc)
744 		return ret;
745 
746 	memset(&xs->bucket, 0, sizeof(xs->bucket));
747 
748 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
749 			       le64_to_cpu(di->i_xattr_loc),
750 			       &blk_bh, OCFS2_BH_CACHED, inode);
751 	if (ret < 0) {
752 		mlog_errno(ret);
753 		return ret;
754 	}
755 	/*Verify the signature of xattr block*/
756 	if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
757 		   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
758 		ret = -EFAULT;
759 		goto cleanup;
760 	}
761 
762 	xs->xattr_bh = blk_bh;
763 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
764 
765 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
766 		xs->header = &xb->xb_attrs.xb_header;
767 		xs->base = (void *)xs->header;
768 		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
769 		xs->here = xs->header->xh_entries;
770 
771 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
772 	} else
773 		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
774 						   name_index,
775 						   name, xs);
776 
777 	if (ret)
778 		goto cleanup;
779 	size = le64_to_cpu(xs->here->xe_value_size);
780 	if (buffer) {
781 		ret = -ERANGE;
782 		if (size > buffer_size)
783 			goto cleanup;
784 
785 		name_offset = le16_to_cpu(xs->here->xe_name_offset);
786 		name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
787 		i = xs->here - xs->header->xh_entries;
788 
789 		if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
790 			ret = ocfs2_xattr_bucket_get_name_value(inode,
791 								xs->bucket.xh,
792 								i,
793 								&block_off,
794 								&name_offset);
795 			xs->base = xs->bucket.bhs[block_off]->b_data;
796 		}
797 		if (ocfs2_xattr_is_local(xs->here)) {
798 			memcpy(buffer, (void *)xs->base +
799 			       name_offset + name_len, size);
800 		} else {
801 			xv = (struct ocfs2_xattr_value_root *)
802 				(xs->base + name_offset + name_len);
803 			ret = ocfs2_xattr_get_value_outside(inode, xv,
804 							    buffer, size);
805 			if (ret < 0) {
806 				mlog_errno(ret);
807 				goto cleanup;
808 			}
809 		}
810 	}
811 	ret = size;
812 cleanup:
813 	for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++)
814 		brelse(xs->bucket.bhs[i]);
815 	memset(&xs->bucket, 0, sizeof(xs->bucket));
816 
817 	brelse(blk_bh);
818 	return ret;
819 }
820 
821 /* ocfs2_xattr_get()
822  *
823  * Copy an extended attribute into the buffer provided.
824  * Buffer is NULL to compute the size of buffer required.
825  */
826 int ocfs2_xattr_get(struct inode *inode,
827 		    int name_index,
828 		    const char *name,
829 		    void *buffer,
830 		    size_t buffer_size)
831 {
832 	int ret;
833 	struct ocfs2_dinode *di = NULL;
834 	struct buffer_head *di_bh = NULL;
835 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
836 	struct ocfs2_xattr_search xis = {
837 		.not_found = -ENODATA,
838 	};
839 	struct ocfs2_xattr_search xbs = {
840 		.not_found = -ENODATA,
841 	};
842 
843 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
844 		ret = -ENODATA;
845 
846 	ret = ocfs2_inode_lock(inode, &di_bh, 0);
847 	if (ret < 0) {
848 		mlog_errno(ret);
849 		return ret;
850 	}
851 	xis.inode_bh = xbs.inode_bh = di_bh;
852 	di = (struct ocfs2_dinode *)di_bh->b_data;
853 
854 	down_read(&oi->ip_xattr_sem);
855 	ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
856 				    buffer_size, &xis);
857 	if (ret == -ENODATA)
858 		ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
859 					    buffer_size, &xbs);
860 	up_read(&oi->ip_xattr_sem);
861 	ocfs2_inode_unlock(inode, 0);
862 
863 	brelse(di_bh);
864 
865 	return ret;
866 }
867 
868 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
869 					   struct ocfs2_xattr_value_root *xv,
870 					   const void *value,
871 					   int value_len)
872 {
873 	int ret = 0, i, cp_len, credits;
874 	u16 blocksize = inode->i_sb->s_blocksize;
875 	u32 p_cluster, num_clusters;
876 	u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
877 	u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
878 	u64 blkno;
879 	struct buffer_head *bh = NULL;
880 	handle_t *handle;
881 
882 	BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
883 
884 	credits = clusters * bpc;
885 	handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits);
886 	if (IS_ERR(handle)) {
887 		ret = PTR_ERR(handle);
888 		mlog_errno(ret);
889 		goto out;
890 	}
891 
892 	while (cpos < clusters) {
893 		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
894 					       &num_clusters, &xv->xr_list);
895 		if (ret) {
896 			mlog_errno(ret);
897 			goto out_commit;
898 		}
899 
900 		blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
901 
902 		for (i = 0; i < num_clusters * bpc; i++, blkno++) {
903 			ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
904 					       &bh, OCFS2_BH_CACHED, inode);
905 			if (ret) {
906 				mlog_errno(ret);
907 				goto out_commit;
908 			}
909 
910 			ret = ocfs2_journal_access(handle,
911 						   inode,
912 						   bh,
913 						   OCFS2_JOURNAL_ACCESS_WRITE);
914 			if (ret < 0) {
915 				mlog_errno(ret);
916 				goto out_commit;
917 			}
918 
919 			cp_len = value_len > blocksize ? blocksize : value_len;
920 			memcpy(bh->b_data, value, cp_len);
921 			value_len -= cp_len;
922 			value += cp_len;
923 			if (cp_len < blocksize)
924 				memset(bh->b_data + cp_len, 0,
925 				       blocksize - cp_len);
926 
927 			ret = ocfs2_journal_dirty(handle, bh);
928 			if (ret < 0) {
929 				mlog_errno(ret);
930 				goto out_commit;
931 			}
932 			brelse(bh);
933 			bh = NULL;
934 
935 			/*
936 			 * XXX: do we need to empty all the following
937 			 * blocks in this cluster?
938 			 */
939 			if (!value_len)
940 				break;
941 		}
942 		cpos += num_clusters;
943 	}
944 out_commit:
945 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
946 out:
947 	brelse(bh);
948 
949 	return ret;
950 }
951 
952 static int ocfs2_xattr_cleanup(struct inode *inode,
953 			       struct ocfs2_xattr_info *xi,
954 			       struct ocfs2_xattr_search *xs,
955 			       size_t offs)
956 {
957 	handle_t *handle = NULL;
958 	int ret = 0;
959 	size_t name_len = strlen(xi->name);
960 	void *val = xs->base + offs;
961 	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
962 
963 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
964 				   OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
965 	if (IS_ERR(handle)) {
966 		ret = PTR_ERR(handle);
967 		mlog_errno(ret);
968 		goto out;
969 	}
970 	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
971 				   OCFS2_JOURNAL_ACCESS_WRITE);
972 	if (ret) {
973 		mlog_errno(ret);
974 		goto out_commit;
975 	}
976 	/* Decrease xattr count */
977 	le16_add_cpu(&xs->header->xh_count, -1);
978 	/* Remove the xattr entry and tree root which has already be set*/
979 	memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
980 	memset(val, 0, size);
981 
982 	ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
983 	if (ret < 0)
984 		mlog_errno(ret);
985 out_commit:
986 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
987 out:
988 	return ret;
989 }
990 
991 static int ocfs2_xattr_update_entry(struct inode *inode,
992 				    struct ocfs2_xattr_info *xi,
993 				    struct ocfs2_xattr_search *xs,
994 				    size_t offs)
995 {
996 	handle_t *handle = NULL;
997 	int ret = 0;
998 
999 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1000 				   OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1001 	if (IS_ERR(handle)) {
1002 		ret = PTR_ERR(handle);
1003 		mlog_errno(ret);
1004 		goto out;
1005 	}
1006 	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1007 				   OCFS2_JOURNAL_ACCESS_WRITE);
1008 	if (ret) {
1009 		mlog_errno(ret);
1010 		goto out_commit;
1011 	}
1012 
1013 	xs->here->xe_name_offset = cpu_to_le16(offs);
1014 	xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1015 	if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1016 		ocfs2_xattr_set_local(xs->here, 1);
1017 	else
1018 		ocfs2_xattr_set_local(xs->here, 0);
1019 	ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1020 
1021 	ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1022 	if (ret < 0)
1023 		mlog_errno(ret);
1024 out_commit:
1025 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1026 out:
1027 	return ret;
1028 }
1029 
1030 /*
1031  * ocfs2_xattr_set_value_outside()
1032  *
1033  * Set large size value in B tree.
1034  */
1035 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1036 					 struct ocfs2_xattr_info *xi,
1037 					 struct ocfs2_xattr_search *xs,
1038 					 size_t offs)
1039 {
1040 	size_t name_len = strlen(xi->name);
1041 	void *val = xs->base + offs;
1042 	struct ocfs2_xattr_value_root *xv = NULL;
1043 	size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1044 	int ret = 0;
1045 
1046 	memset(val, 0, size);
1047 	memcpy(val, xi->name, name_len);
1048 	xv = (struct ocfs2_xattr_value_root *)
1049 		(val + OCFS2_XATTR_SIZE(name_len));
1050 	xv->xr_clusters = 0;
1051 	xv->xr_last_eb_blk = 0;
1052 	xv->xr_list.l_tree_depth = 0;
1053 	xv->xr_list.l_count = cpu_to_le16(1);
1054 	xv->xr_list.l_next_free_rec = 0;
1055 
1056 	ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
1057 					 xi->value_len);
1058 	if (ret < 0) {
1059 		mlog_errno(ret);
1060 		return ret;
1061 	}
1062 	ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value,
1063 					      xi->value_len);
1064 	if (ret < 0) {
1065 		mlog_errno(ret);
1066 		return ret;
1067 	}
1068 	ret = ocfs2_xattr_update_entry(inode, xi, xs, offs);
1069 	if (ret < 0)
1070 		mlog_errno(ret);
1071 
1072 	return ret;
1073 }
1074 
1075 /*
1076  * ocfs2_xattr_set_entry_local()
1077  *
1078  * Set, replace or remove extended attribute in local.
1079  */
1080 static void ocfs2_xattr_set_entry_local(struct inode *inode,
1081 					struct ocfs2_xattr_info *xi,
1082 					struct ocfs2_xattr_search *xs,
1083 					struct ocfs2_xattr_entry *last,
1084 					size_t min_offs)
1085 {
1086 	size_t name_len = strlen(xi->name);
1087 	int i;
1088 
1089 	if (xi->value && xs->not_found) {
1090 		/* Insert the new xattr entry. */
1091 		le16_add_cpu(&xs->header->xh_count, 1);
1092 		ocfs2_xattr_set_type(last, xi->name_index);
1093 		ocfs2_xattr_set_local(last, 1);
1094 		last->xe_name_len = name_len;
1095 	} else {
1096 		void *first_val;
1097 		void *val;
1098 		size_t offs, size;
1099 
1100 		first_val = xs->base + min_offs;
1101 		offs = le16_to_cpu(xs->here->xe_name_offset);
1102 		val = xs->base + offs;
1103 
1104 		if (le64_to_cpu(xs->here->xe_value_size) >
1105 		    OCFS2_XATTR_INLINE_SIZE)
1106 			size = OCFS2_XATTR_SIZE(name_len) +
1107 				OCFS2_XATTR_ROOT_SIZE;
1108 		else
1109 			size = OCFS2_XATTR_SIZE(name_len) +
1110 			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1111 
1112 		if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1113 				OCFS2_XATTR_SIZE(xi->value_len)) {
1114 			/* The old and the new value have the
1115 			   same size. Just replace the value. */
1116 			ocfs2_xattr_set_local(xs->here, 1);
1117 			xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1118 			/* Clear value bytes. */
1119 			memset(val + OCFS2_XATTR_SIZE(name_len),
1120 			       0,
1121 			       OCFS2_XATTR_SIZE(xi->value_len));
1122 			memcpy(val + OCFS2_XATTR_SIZE(name_len),
1123 			       xi->value,
1124 			       xi->value_len);
1125 			return;
1126 		}
1127 		/* Remove the old name+value. */
1128 		memmove(first_val + size, first_val, val - first_val);
1129 		memset(first_val, 0, size);
1130 		xs->here->xe_name_hash = 0;
1131 		xs->here->xe_name_offset = 0;
1132 		ocfs2_xattr_set_local(xs->here, 1);
1133 		xs->here->xe_value_size = 0;
1134 
1135 		min_offs += size;
1136 
1137 		/* Adjust all value offsets. */
1138 		last = xs->header->xh_entries;
1139 		for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1140 			size_t o = le16_to_cpu(last->xe_name_offset);
1141 
1142 			if (o < offs)
1143 				last->xe_name_offset = cpu_to_le16(o + size);
1144 			last += 1;
1145 		}
1146 
1147 		if (!xi->value) {
1148 			/* Remove the old entry. */
1149 			last -= 1;
1150 			memmove(xs->here, xs->here + 1,
1151 				(void *)last - (void *)xs->here);
1152 			memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1153 			le16_add_cpu(&xs->header->xh_count, -1);
1154 		}
1155 	}
1156 	if (xi->value) {
1157 		/* Insert the new name+value. */
1158 		size_t size = OCFS2_XATTR_SIZE(name_len) +
1159 				OCFS2_XATTR_SIZE(xi->value_len);
1160 		void *val = xs->base + min_offs - size;
1161 
1162 		xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1163 		memset(val, 0, size);
1164 		memcpy(val, xi->name, name_len);
1165 		memcpy(val + OCFS2_XATTR_SIZE(name_len),
1166 		       xi->value,
1167 		       xi->value_len);
1168 		xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1169 		ocfs2_xattr_set_local(xs->here, 1);
1170 		ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1171 	}
1172 
1173 	return;
1174 }
1175 
1176 /*
1177  * ocfs2_xattr_set_entry()
1178  *
1179  * Set extended attribute entry into inode or block.
1180  *
1181  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1182  * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1183  * then set value in B tree with set_value_outside().
1184  */
1185 static int ocfs2_xattr_set_entry(struct inode *inode,
1186 				 struct ocfs2_xattr_info *xi,
1187 				 struct ocfs2_xattr_search *xs,
1188 				 int flag)
1189 {
1190 	struct ocfs2_xattr_entry *last;
1191 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1192 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1193 	size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1194 	size_t size_l = 0;
1195 	handle_t *handle = NULL;
1196 	int free, i, ret;
1197 	struct ocfs2_xattr_info xi_l = {
1198 		.name_index = xi->name_index,
1199 		.name = xi->name,
1200 		.value = xi->value,
1201 		.value_len = xi->value_len,
1202 	};
1203 
1204 	/* Compute min_offs, last and free space. */
1205 	last = xs->header->xh_entries;
1206 
1207 	for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1208 		size_t offs = le16_to_cpu(last->xe_name_offset);
1209 		if (offs < min_offs)
1210 			min_offs = offs;
1211 		last += 1;
1212 	}
1213 
1214 	free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
1215 	if (free < 0)
1216 		return -EFAULT;
1217 
1218 	if (!xs->not_found) {
1219 		size_t size = 0;
1220 		if (ocfs2_xattr_is_local(xs->here))
1221 			size = OCFS2_XATTR_SIZE(name_len) +
1222 			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1223 		else
1224 			size = OCFS2_XATTR_SIZE(name_len) +
1225 				OCFS2_XATTR_ROOT_SIZE;
1226 		free += (size + sizeof(struct ocfs2_xattr_entry));
1227 	}
1228 	/* Check free space in inode or block */
1229 	if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1230 		if (free < sizeof(struct ocfs2_xattr_entry) +
1231 			   OCFS2_XATTR_SIZE(name_len) +
1232 			   OCFS2_XATTR_ROOT_SIZE) {
1233 			ret = -ENOSPC;
1234 			goto out;
1235 		}
1236 		size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1237 		xi_l.value = (void *)&def_xv;
1238 		xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1239 	} else if (xi->value) {
1240 		if (free < sizeof(struct ocfs2_xattr_entry) +
1241 			   OCFS2_XATTR_SIZE(name_len) +
1242 			   OCFS2_XATTR_SIZE(xi->value_len)) {
1243 			ret = -ENOSPC;
1244 			goto out;
1245 		}
1246 	}
1247 
1248 	if (!xs->not_found) {
1249 		/* For existing extended attribute */
1250 		size_t size = OCFS2_XATTR_SIZE(name_len) +
1251 			OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1252 		size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1253 		void *val = xs->base + offs;
1254 
1255 		if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1256 			/* Replace existing local xattr with tree root */
1257 			ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1258 							    offs);
1259 			if (ret < 0)
1260 				mlog_errno(ret);
1261 			goto out;
1262 		} else if (!ocfs2_xattr_is_local(xs->here)) {
1263 			/* For existing xattr which has value outside */
1264 			struct ocfs2_xattr_value_root *xv = NULL;
1265 			xv = (struct ocfs2_xattr_value_root *)(val +
1266 				OCFS2_XATTR_SIZE(name_len));
1267 
1268 			if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1269 				/*
1270 				 * If new value need set outside also,
1271 				 * first truncate old value to new value,
1272 				 * then set new value with set_value_outside().
1273 				 */
1274 				ret = ocfs2_xattr_value_truncate(inode,
1275 								 xs->xattr_bh,
1276 								 xv,
1277 								 xi->value_len);
1278 				if (ret < 0) {
1279 					mlog_errno(ret);
1280 					goto out;
1281 				}
1282 
1283 				ret = __ocfs2_xattr_set_value_outside(inode,
1284 								xv,
1285 								xi->value,
1286 								xi->value_len);
1287 				if (ret < 0) {
1288 					mlog_errno(ret);
1289 					goto out;
1290 				}
1291 
1292 				ret = ocfs2_xattr_update_entry(inode,
1293 							       xi,
1294 							       xs,
1295 							       offs);
1296 				if (ret < 0)
1297 					mlog_errno(ret);
1298 				goto out;
1299 			} else {
1300 				/*
1301 				 * If new value need set in local,
1302 				 * just trucate old value to zero.
1303 				 */
1304 				 ret = ocfs2_xattr_value_truncate(inode,
1305 								 xs->xattr_bh,
1306 								 xv,
1307 								 0);
1308 				if (ret < 0)
1309 					mlog_errno(ret);
1310 			}
1311 		}
1312 	}
1313 
1314 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1315 				   OCFS2_INODE_UPDATE_CREDITS);
1316 	if (IS_ERR(handle)) {
1317 		ret = PTR_ERR(handle);
1318 		mlog_errno(ret);
1319 		goto out;
1320 	}
1321 
1322 	ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1323 				   OCFS2_JOURNAL_ACCESS_WRITE);
1324 	if (ret) {
1325 		mlog_errno(ret);
1326 		goto out_commit;
1327 	}
1328 
1329 	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1330 		/*set extended attribue in external blcok*/
1331 		ret = ocfs2_extend_trans(handle,
1332 					 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1333 		if (ret) {
1334 			mlog_errno(ret);
1335 			goto out_commit;
1336 		}
1337 		ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1338 					   OCFS2_JOURNAL_ACCESS_WRITE);
1339 		if (ret) {
1340 			mlog_errno(ret);
1341 			goto out_commit;
1342 		}
1343 	}
1344 
1345 	/*
1346 	 * Set value in local, include set tree root in local.
1347 	 * This is the first step for value size >INLINE_SIZE.
1348 	 */
1349 	ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1350 
1351 	if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1352 		ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1353 		if (ret < 0) {
1354 			mlog_errno(ret);
1355 			goto out_commit;
1356 		}
1357 	}
1358 
1359 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1360 	    (flag & OCFS2_INLINE_XATTR_FL)) {
1361 		struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1362 		unsigned int xattrsize = osb->s_xattr_inline_size;
1363 
1364 		/*
1365 		 * Adjust extent record count or inline data size
1366 		 * to reserve space for extended attribute.
1367 		 */
1368 		if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1369 			struct ocfs2_inline_data *idata = &di->id2.i_data;
1370 			le16_add_cpu(&idata->id_count, -xattrsize);
1371 		} else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1372 			struct ocfs2_extent_list *el = &di->id2.i_list;
1373 			le16_add_cpu(&el->l_count, -(xattrsize /
1374 					sizeof(struct ocfs2_extent_rec)));
1375 		}
1376 		di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1377 	}
1378 	/* Update xattr flag */
1379 	spin_lock(&oi->ip_lock);
1380 	oi->ip_dyn_features |= flag;
1381 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1382 	spin_unlock(&oi->ip_lock);
1383 	/* Update inode ctime */
1384 	inode->i_ctime = CURRENT_TIME;
1385 	di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1386 	di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1387 
1388 	ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1389 	if (ret < 0)
1390 		mlog_errno(ret);
1391 
1392 out_commit:
1393 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1394 
1395 	if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1396 		/*
1397 		 * Set value outside in B tree.
1398 		 * This is the second step for value size > INLINE_SIZE.
1399 		 */
1400 		size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1401 		ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs);
1402 		if (ret < 0) {
1403 			int ret2;
1404 
1405 			mlog_errno(ret);
1406 			/*
1407 			 * If set value outside failed, we have to clean
1408 			 * the junk tree root we have already set in local.
1409 			 */
1410 			ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs);
1411 			if (ret2 < 0)
1412 				mlog_errno(ret2);
1413 		}
1414 	}
1415 out:
1416 	return ret;
1417 
1418 }
1419 
1420 static int ocfs2_xattr_free_block(handle_t *handle,
1421 				  struct ocfs2_super *osb,
1422 				  struct ocfs2_xattr_block *xb)
1423 {
1424 	struct inode *xb_alloc_inode;
1425 	struct buffer_head *xb_alloc_bh = NULL;
1426 	u64 blk = le64_to_cpu(xb->xb_blkno);
1427 	u16 bit = le16_to_cpu(xb->xb_suballoc_bit);
1428 	u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1429 	int ret = 0;
1430 
1431 	xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1432 				EXTENT_ALLOC_SYSTEM_INODE,
1433 				le16_to_cpu(xb->xb_suballoc_slot));
1434 	if (!xb_alloc_inode) {
1435 		ret = -ENOMEM;
1436 		mlog_errno(ret);
1437 		goto out;
1438 	}
1439 	mutex_lock(&xb_alloc_inode->i_mutex);
1440 
1441 	ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1442 	if (ret < 0) {
1443 		mlog_errno(ret);
1444 		goto out_mutex;
1445 	}
1446 	ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE);
1447 	if (ret < 0) {
1448 		mlog_errno(ret);
1449 		goto out_unlock;
1450 	}
1451 	ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1452 				       bit, bg_blkno, 1);
1453 	if (ret < 0)
1454 		mlog_errno(ret);
1455 out_unlock:
1456 	ocfs2_inode_unlock(xb_alloc_inode, 1);
1457 	brelse(xb_alloc_bh);
1458 out_mutex:
1459 	mutex_unlock(&xb_alloc_inode->i_mutex);
1460 	iput(xb_alloc_inode);
1461 out:
1462 	return ret;
1463 }
1464 
1465 static int ocfs2_remove_value_outside(struct inode*inode,
1466 				      struct buffer_head *bh,
1467 				      struct ocfs2_xattr_header *header)
1468 {
1469 	int ret = 0, i;
1470 
1471 	for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1472 		struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1473 
1474 		if (!ocfs2_xattr_is_local(entry)) {
1475 			struct ocfs2_xattr_value_root *xv;
1476 			void *val;
1477 
1478 			val = (void *)header +
1479 				le16_to_cpu(entry->xe_name_offset);
1480 			xv = (struct ocfs2_xattr_value_root *)
1481 				(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1482 			ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0);
1483 			if (ret < 0) {
1484 				mlog_errno(ret);
1485 				return ret;
1486 			}
1487 		}
1488 	}
1489 
1490 	return ret;
1491 }
1492 
1493 static int ocfs2_xattr_ibody_remove(struct inode *inode,
1494 				    struct buffer_head *di_bh)
1495 {
1496 
1497 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1498 	struct ocfs2_xattr_header *header;
1499 	int ret;
1500 
1501 	header = (struct ocfs2_xattr_header *)
1502 		 ((void *)di + inode->i_sb->s_blocksize -
1503 		 le16_to_cpu(di->i_xattr_inline_size));
1504 
1505 	ret = ocfs2_remove_value_outside(inode, di_bh, header);
1506 
1507 	return ret;
1508 }
1509 
1510 static int ocfs2_xattr_block_remove(struct inode *inode,
1511 				    struct buffer_head *blk_bh)
1512 {
1513 	struct ocfs2_xattr_block *xb;
1514 	struct ocfs2_xattr_header *header;
1515 	int ret = 0;
1516 
1517 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1518 	header = &(xb->xb_attrs.xb_header);
1519 
1520 	ret = ocfs2_remove_value_outside(inode, blk_bh, header);
1521 
1522 	return ret;
1523 }
1524 
1525 /*
1526  * ocfs2_xattr_remove()
1527  *
1528  * Free extended attribute resources associated with this inode.
1529  */
1530 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1531 {
1532 	struct ocfs2_xattr_block *xb;
1533 	struct buffer_head *blk_bh = NULL;
1534 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1535 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1536 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1537 	handle_t *handle;
1538 	int ret;
1539 
1540 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1541 		return 0;
1542 
1543 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1544 		ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1545 		if (ret < 0) {
1546 			mlog_errno(ret);
1547 			goto out;
1548 		}
1549 	}
1550 	if (di->i_xattr_loc) {
1551 		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
1552 				       le64_to_cpu(di->i_xattr_loc),
1553 				       &blk_bh, OCFS2_BH_CACHED, inode);
1554 		if (ret < 0) {
1555 			mlog_errno(ret);
1556 			return ret;
1557 		}
1558 		/*Verify the signature of xattr block*/
1559 		if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
1560 			   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
1561 			ret = -EFAULT;
1562 			goto out;
1563 		}
1564 
1565 		ret = ocfs2_xattr_block_remove(inode, blk_bh);
1566 		if (ret < 0) {
1567 			mlog_errno(ret);
1568 			goto out;
1569 		}
1570 	}
1571 
1572 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1573 				   OCFS2_INODE_UPDATE_CREDITS);
1574 	if (IS_ERR(handle)) {
1575 		ret = PTR_ERR(handle);
1576 		mlog_errno(ret);
1577 		goto out;
1578 	}
1579 	ret = ocfs2_journal_access(handle, inode, di_bh,
1580 				   OCFS2_JOURNAL_ACCESS_WRITE);
1581 	if (ret) {
1582 		mlog_errno(ret);
1583 		goto out_commit;
1584 	}
1585 
1586 	if (di->i_xattr_loc) {
1587 		xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1588 		ocfs2_xattr_free_block(handle, osb, xb);
1589 		di->i_xattr_loc = cpu_to_le64(0);
1590 	}
1591 
1592 	spin_lock(&oi->ip_lock);
1593 	oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1594 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1595 	spin_unlock(&oi->ip_lock);
1596 
1597 	ret = ocfs2_journal_dirty(handle, di_bh);
1598 	if (ret < 0)
1599 		mlog_errno(ret);
1600 out_commit:
1601 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1602 out:
1603 	brelse(blk_bh);
1604 
1605 	return ret;
1606 }
1607 
1608 static int ocfs2_xattr_has_space_inline(struct inode *inode,
1609 					struct ocfs2_dinode *di)
1610 {
1611 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1612 	unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1613 	int free;
1614 
1615 	if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1616 		return 0;
1617 
1618 	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1619 		struct ocfs2_inline_data *idata = &di->id2.i_data;
1620 		free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1621 	} else if (ocfs2_inode_is_fast_symlink(inode)) {
1622 		free = ocfs2_fast_symlink_chars(inode->i_sb) -
1623 			le64_to_cpu(di->i_size);
1624 	} else {
1625 		struct ocfs2_extent_list *el = &di->id2.i_list;
1626 		free = (le16_to_cpu(el->l_count) -
1627 			le16_to_cpu(el->l_next_free_rec)) *
1628 			sizeof(struct ocfs2_extent_rec);
1629 	}
1630 	if (free >= xattrsize)
1631 		return 1;
1632 
1633 	return 0;
1634 }
1635 
1636 /*
1637  * ocfs2_xattr_ibody_find()
1638  *
1639  * Find extended attribute in inode block and
1640  * fill search info into struct ocfs2_xattr_search.
1641  */
1642 static int ocfs2_xattr_ibody_find(struct inode *inode,
1643 				  int name_index,
1644 				  const char *name,
1645 				  struct ocfs2_xattr_search *xs)
1646 {
1647 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1648 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1649 	int ret;
1650 	int has_space = 0;
1651 
1652 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1653 		return 0;
1654 
1655 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1656 		down_read(&oi->ip_alloc_sem);
1657 		has_space = ocfs2_xattr_has_space_inline(inode, di);
1658 		up_read(&oi->ip_alloc_sem);
1659 		if (!has_space)
1660 			return 0;
1661 	}
1662 
1663 	xs->xattr_bh = xs->inode_bh;
1664 	xs->end = (void *)di + inode->i_sb->s_blocksize;
1665 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1666 		xs->header = (struct ocfs2_xattr_header *)
1667 			(xs->end - le16_to_cpu(di->i_xattr_inline_size));
1668 	else
1669 		xs->header = (struct ocfs2_xattr_header *)
1670 			(xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
1671 	xs->base = (void *)xs->header;
1672 	xs->here = xs->header->xh_entries;
1673 
1674 	/* Find the named attribute. */
1675 	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1676 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
1677 		if (ret && ret != -ENODATA)
1678 			return ret;
1679 		xs->not_found = ret;
1680 	}
1681 
1682 	return 0;
1683 }
1684 
1685 /*
1686  * ocfs2_xattr_ibody_set()
1687  *
1688  * Set, replace or remove an extended attribute into inode block.
1689  *
1690  */
1691 static int ocfs2_xattr_ibody_set(struct inode *inode,
1692 				 struct ocfs2_xattr_info *xi,
1693 				 struct ocfs2_xattr_search *xs)
1694 {
1695 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
1696 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1697 	int ret;
1698 
1699 	if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1700 		return -ENOSPC;
1701 
1702 	down_write(&oi->ip_alloc_sem);
1703 	if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1704 		if (!ocfs2_xattr_has_space_inline(inode, di)) {
1705 			ret = -ENOSPC;
1706 			goto out;
1707 		}
1708 	}
1709 
1710 	ret = ocfs2_xattr_set_entry(inode, xi, xs,
1711 				(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
1712 out:
1713 	up_write(&oi->ip_alloc_sem);
1714 
1715 	return ret;
1716 }
1717 
1718 /*
1719  * ocfs2_xattr_block_find()
1720  *
1721  * Find extended attribute in external block and
1722  * fill search info into struct ocfs2_xattr_search.
1723  */
1724 static int ocfs2_xattr_block_find(struct inode *inode,
1725 				  int name_index,
1726 				  const char *name,
1727 				  struct ocfs2_xattr_search *xs)
1728 {
1729 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1730 	struct buffer_head *blk_bh = NULL;
1731 	struct ocfs2_xattr_block *xb;
1732 	int ret = 0;
1733 
1734 	if (!di->i_xattr_loc)
1735 		return ret;
1736 
1737 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
1738 			       le64_to_cpu(di->i_xattr_loc),
1739 			       &blk_bh, OCFS2_BH_CACHED, inode);
1740 	if (ret < 0) {
1741 		mlog_errno(ret);
1742 		return ret;
1743 	}
1744 	/*Verify the signature of xattr block*/
1745 	if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
1746 		   strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
1747 			ret = -EFAULT;
1748 			goto cleanup;
1749 	}
1750 
1751 	xs->xattr_bh = blk_bh;
1752 	xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1753 
1754 	if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1755 		xs->header = &xb->xb_attrs.xb_header;
1756 		xs->base = (void *)xs->header;
1757 		xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
1758 		xs->here = xs->header->xh_entries;
1759 
1760 		ret = ocfs2_xattr_find_entry(name_index, name, xs);
1761 	} else
1762 		ret = ocfs2_xattr_index_block_find(inode, blk_bh,
1763 						   name_index,
1764 						   name, xs);
1765 
1766 	if (ret && ret != -ENODATA) {
1767 		xs->xattr_bh = NULL;
1768 		goto cleanup;
1769 	}
1770 	xs->not_found = ret;
1771 	return 0;
1772 cleanup:
1773 	brelse(blk_bh);
1774 
1775 	return ret;
1776 }
1777 
1778 /*
1779  * When all the xattrs are deleted from index btree, the ocfs2_xattr_tree
1780  * will be erased and ocfs2_xattr_block will have its ocfs2_xattr_header
1781  * re-initialized.
1782  */
1783 static int ocfs2_restore_xattr_block(struct inode *inode,
1784 				     struct ocfs2_xattr_search *xs)
1785 {
1786 	int ret;
1787 	handle_t *handle;
1788 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1789 	struct ocfs2_xattr_block *xb =
1790 		(struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1791 	struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
1792 	u16 xb_flags = le16_to_cpu(xb->xb_flags);
1793 
1794 	BUG_ON(!(xb_flags & OCFS2_XATTR_INDEXED) ||
1795 		le16_to_cpu(el->l_next_free_rec) != 0);
1796 
1797 	handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1798 	if (IS_ERR(handle)) {
1799 		ret = PTR_ERR(handle);
1800 		handle = NULL;
1801 		goto out;
1802 	}
1803 
1804 	ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1805 				   OCFS2_JOURNAL_ACCESS_WRITE);
1806 	if (ret < 0) {
1807 		mlog_errno(ret);
1808 		goto out_commit;
1809 	}
1810 
1811 	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
1812 	       offsetof(struct ocfs2_xattr_block, xb_attrs));
1813 
1814 	xb->xb_flags = cpu_to_le16(xb_flags & ~OCFS2_XATTR_INDEXED);
1815 
1816 	ocfs2_journal_dirty(handle, xs->xattr_bh);
1817 
1818 out_commit:
1819 	ocfs2_commit_trans(osb, handle);
1820 out:
1821 	return ret;
1822 }
1823 
1824 /*
1825  * ocfs2_xattr_block_set()
1826  *
1827  * Set, replace or remove an extended attribute into external block.
1828  *
1829  */
1830 static int ocfs2_xattr_block_set(struct inode *inode,
1831 				 struct ocfs2_xattr_info *xi,
1832 				 struct ocfs2_xattr_search *xs)
1833 {
1834 	struct buffer_head *new_bh = NULL;
1835 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1836 	struct ocfs2_dinode *di =  (struct ocfs2_dinode *)xs->inode_bh->b_data;
1837 	struct ocfs2_alloc_context *meta_ac = NULL;
1838 	handle_t *handle = NULL;
1839 	struct ocfs2_xattr_block *xblk = NULL;
1840 	u16 suballoc_bit_start;
1841 	u32 num_got;
1842 	u64 first_blkno;
1843 	int ret;
1844 
1845 	if (!xs->xattr_bh) {
1846 		/*
1847 		 * Alloc one external block for extended attribute
1848 		 * outside of inode.
1849 		 */
1850 		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
1851 		if (ret < 0) {
1852 			mlog_errno(ret);
1853 			goto out;
1854 		}
1855 		handle = ocfs2_start_trans(osb,
1856 					   OCFS2_XATTR_BLOCK_CREATE_CREDITS);
1857 		if (IS_ERR(handle)) {
1858 			ret = PTR_ERR(handle);
1859 			mlog_errno(ret);
1860 			goto out;
1861 		}
1862 		ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1863 					   OCFS2_JOURNAL_ACCESS_CREATE);
1864 		if (ret < 0) {
1865 			mlog_errno(ret);
1866 			goto out_commit;
1867 		}
1868 
1869 		ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
1870 					   &suballoc_bit_start, &num_got,
1871 					   &first_blkno);
1872 		if (ret < 0) {
1873 			mlog_errno(ret);
1874 			goto out_commit;
1875 		}
1876 
1877 		new_bh = sb_getblk(inode->i_sb, first_blkno);
1878 		ocfs2_set_new_buffer_uptodate(inode, new_bh);
1879 
1880 		ret = ocfs2_journal_access(handle, inode, new_bh,
1881 					   OCFS2_JOURNAL_ACCESS_CREATE);
1882 		if (ret < 0) {
1883 			mlog_errno(ret);
1884 			goto out_commit;
1885 		}
1886 
1887 		/* Initialize ocfs2_xattr_block */
1888 		xs->xattr_bh = new_bh;
1889 		xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
1890 		memset(xblk, 0, inode->i_sb->s_blocksize);
1891 		strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
1892 		xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
1893 		xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1894 		xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
1895 		xblk->xb_blkno = cpu_to_le64(first_blkno);
1896 
1897 		xs->header = &xblk->xb_attrs.xb_header;
1898 		xs->base = (void *)xs->header;
1899 		xs->end = (void *)xblk + inode->i_sb->s_blocksize;
1900 		xs->here = xs->header->xh_entries;
1901 
1902 
1903 		ret = ocfs2_journal_dirty(handle, new_bh);
1904 		if (ret < 0) {
1905 			mlog_errno(ret);
1906 			goto out_commit;
1907 		}
1908 		di->i_xattr_loc = cpu_to_le64(first_blkno);
1909 		ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1910 		if (ret < 0)
1911 			mlog_errno(ret);
1912 out_commit:
1913 		ocfs2_commit_trans(osb, handle);
1914 out:
1915 		if (meta_ac)
1916 			ocfs2_free_alloc_context(meta_ac);
1917 		if (ret < 0)
1918 			return ret;
1919 	} else
1920 		xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1921 
1922 	if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
1923 		/* Set extended attribute into external block */
1924 		ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL);
1925 		if (!ret || ret != -ENOSPC)
1926 			goto end;
1927 
1928 		ret = ocfs2_xattr_create_index_block(inode, xs);
1929 		if (ret)
1930 			goto end;
1931 	}
1932 
1933 	ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs);
1934 	if (!ret && xblk->xb_attrs.xb_root.xt_list.l_next_free_rec == 0)
1935 		ret = ocfs2_restore_xattr_block(inode, xs);
1936 
1937 end:
1938 
1939 	return ret;
1940 }
1941 
1942 /*
1943  * ocfs2_xattr_set()
1944  *
1945  * Set, replace or remove an extended attribute for this inode.
1946  * value is NULL to remove an existing extended attribute, else either
1947  * create or replace an extended attribute.
1948  */
1949 int ocfs2_xattr_set(struct inode *inode,
1950 		    int name_index,
1951 		    const char *name,
1952 		    const void *value,
1953 		    size_t value_len,
1954 		    int flags)
1955 {
1956 	struct buffer_head *di_bh = NULL;
1957 	struct ocfs2_dinode *di;
1958 	int ret;
1959 	u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
1960 
1961 	struct ocfs2_xattr_info xi = {
1962 		.name_index = name_index,
1963 		.name = name,
1964 		.value = value,
1965 		.value_len = value_len,
1966 	};
1967 
1968 	struct ocfs2_xattr_search xis = {
1969 		.not_found = -ENODATA,
1970 	};
1971 
1972 	struct ocfs2_xattr_search xbs = {
1973 		.not_found = -ENODATA,
1974 	};
1975 
1976 	ret = ocfs2_inode_lock(inode, &di_bh, 1);
1977 	if (ret < 0) {
1978 		mlog_errno(ret);
1979 		return ret;
1980 	}
1981 	xis.inode_bh = xbs.inode_bh = di_bh;
1982 	di = (struct ocfs2_dinode *)di_bh->b_data;
1983 
1984 	down_write(&OCFS2_I(inode)->ip_xattr_sem);
1985 	/*
1986 	 * Scan inode and external block to find the same name
1987 	 * extended attribute and collect search infomation.
1988 	 */
1989 	ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
1990 	if (ret)
1991 		goto cleanup;
1992 	if (xis.not_found) {
1993 		ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
1994 		if (ret)
1995 			goto cleanup;
1996 	}
1997 
1998 	if (xis.not_found && xbs.not_found) {
1999 		ret = -ENODATA;
2000 		if (flags & XATTR_REPLACE)
2001 			goto cleanup;
2002 		ret = 0;
2003 		if (!value)
2004 			goto cleanup;
2005 	} else {
2006 		ret = -EEXIST;
2007 		if (flags & XATTR_CREATE)
2008 			goto cleanup;
2009 	}
2010 
2011 	if (!value) {
2012 		/* Remove existing extended attribute */
2013 		if (!xis.not_found)
2014 			ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2015 		else if (!xbs.not_found)
2016 			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2017 	} else {
2018 		/* We always try to set extended attribute into inode first*/
2019 		ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2020 		if (!ret && !xbs.not_found) {
2021 			/*
2022 			 * If succeed and that extended attribute existing in
2023 			 * external block, then we will remove it.
2024 			 */
2025 			xi.value = NULL;
2026 			xi.value_len = 0;
2027 			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2028 		} else if (ret == -ENOSPC) {
2029 			if (di->i_xattr_loc && !xbs.xattr_bh) {
2030 				ret = ocfs2_xattr_block_find(inode, name_index,
2031 							     name, &xbs);
2032 				if (ret)
2033 					goto cleanup;
2034 			}
2035 			/*
2036 			 * If no space in inode, we will set extended attribute
2037 			 * into external block.
2038 			 */
2039 			ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2040 			if (ret)
2041 				goto cleanup;
2042 			if (!xis.not_found) {
2043 				/*
2044 				 * If succeed and that extended attribute
2045 				 * existing in inode, we will remove it.
2046 				 */
2047 				xi.value = NULL;
2048 				xi.value_len = 0;
2049 				ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2050 			}
2051 		}
2052 	}
2053 cleanup:
2054 	up_write(&OCFS2_I(inode)->ip_xattr_sem);
2055 	ocfs2_inode_unlock(inode, 1);
2056 	brelse(di_bh);
2057 	brelse(xbs.xattr_bh);
2058 	for (i = 0; i < blk_per_bucket; i++)
2059 		brelse(xbs.bucket.bhs[i]);
2060 
2061 	return ret;
2062 }
2063 
2064 static inline u32 ocfs2_xattr_hash_by_name(struct inode *inode,
2065 					   int name_index,
2066 					   const char *suffix_name)
2067 {
2068 	struct xattr_handler *handler = ocfs2_xattr_handler(name_index);
2069 	char *prefix = handler->prefix;
2070 	int prefix_len = strlen(handler->prefix);
2071 
2072 	return ocfs2_xattr_name_hash(inode, prefix, prefix_len,
2073 				     (char *)suffix_name, strlen(suffix_name));
2074 }
2075 
2076 /*
2077  * Find the xattr extent rec which may contains name_hash.
2078  * e_cpos will be the first name hash of the xattr rec.
2079  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2080  */
2081 static int ocfs2_xattr_get_rec(struct inode *inode,
2082 			       u32 name_hash,
2083 			       u64 *p_blkno,
2084 			       u32 *e_cpos,
2085 			       u32 *num_clusters,
2086 			       struct ocfs2_extent_list *el)
2087 {
2088 	int ret = 0, i;
2089 	struct buffer_head *eb_bh = NULL;
2090 	struct ocfs2_extent_block *eb;
2091 	struct ocfs2_extent_rec *rec = NULL;
2092 	u64 e_blkno = 0;
2093 
2094 	if (el->l_tree_depth) {
2095 		ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
2096 		if (ret) {
2097 			mlog_errno(ret);
2098 			goto out;
2099 		}
2100 
2101 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2102 		el = &eb->h_list;
2103 
2104 		if (el->l_tree_depth) {
2105 			ocfs2_error(inode->i_sb,
2106 				    "Inode %lu has non zero tree depth in "
2107 				    "xattr tree block %llu\n", inode->i_ino,
2108 				    (unsigned long long)eb_bh->b_blocknr);
2109 			ret = -EROFS;
2110 			goto out;
2111 		}
2112 	}
2113 
2114 	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2115 		rec = &el->l_recs[i];
2116 
2117 		if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2118 			e_blkno = le64_to_cpu(rec->e_blkno);
2119 			break;
2120 		}
2121 	}
2122 
2123 	if (!e_blkno) {
2124 		ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2125 			    "record (%u, %u, 0) in xattr", inode->i_ino,
2126 			    le32_to_cpu(rec->e_cpos),
2127 			    ocfs2_rec_clusters(el, rec));
2128 		ret = -EROFS;
2129 		goto out;
2130 	}
2131 
2132 	*p_blkno = le64_to_cpu(rec->e_blkno);
2133 	*num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2134 	if (e_cpos)
2135 		*e_cpos = le32_to_cpu(rec->e_cpos);
2136 out:
2137 	brelse(eb_bh);
2138 	return ret;
2139 }
2140 
2141 typedef int (xattr_bucket_func)(struct inode *inode,
2142 				struct ocfs2_xattr_bucket *bucket,
2143 				void *para);
2144 
2145 static int ocfs2_find_xe_in_bucket(struct inode *inode,
2146 				   struct buffer_head *header_bh,
2147 				   int name_index,
2148 				   const char *name,
2149 				   u32 name_hash,
2150 				   u16 *xe_index,
2151 				   int *found)
2152 {
2153 	int i, ret = 0, cmp = 1, block_off, new_offset;
2154 	struct ocfs2_xattr_header *xh =
2155 			(struct ocfs2_xattr_header *)header_bh->b_data;
2156 	size_t name_len = strlen(name);
2157 	struct ocfs2_xattr_entry *xe = NULL;
2158 	struct buffer_head *name_bh = NULL;
2159 	char *xe_name;
2160 
2161 	/*
2162 	 * We don't use binary search in the bucket because there
2163 	 * may be multiple entries with the same name hash.
2164 	 */
2165 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
2166 		xe = &xh->xh_entries[i];
2167 
2168 		if (name_hash > le32_to_cpu(xe->xe_name_hash))
2169 			continue;
2170 		else if (name_hash < le32_to_cpu(xe->xe_name_hash))
2171 			break;
2172 
2173 		cmp = name_index - ocfs2_xattr_get_type(xe);
2174 		if (!cmp)
2175 			cmp = name_len - xe->xe_name_len;
2176 		if (cmp)
2177 			continue;
2178 
2179 		ret = ocfs2_xattr_bucket_get_name_value(inode,
2180 							xh,
2181 							i,
2182 							&block_off,
2183 							&new_offset);
2184 		if (ret) {
2185 			mlog_errno(ret);
2186 			break;
2187 		}
2188 
2189 		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
2190 				       header_bh->b_blocknr + block_off,
2191 				       &name_bh, OCFS2_BH_CACHED, inode);
2192 		if (ret) {
2193 			mlog_errno(ret);
2194 			break;
2195 		}
2196 		xe_name = name_bh->b_data + new_offset;
2197 
2198 		cmp = memcmp(name, xe_name, name_len);
2199 		brelse(name_bh);
2200 		name_bh = NULL;
2201 
2202 		if (cmp == 0) {
2203 			*xe_index = i;
2204 			*found = 1;
2205 			ret = 0;
2206 			break;
2207 		}
2208 	}
2209 
2210 	return ret;
2211 }
2212 
2213 /*
2214  * Find the specified xattr entry in a series of buckets.
2215  * This series start from p_blkno and last for num_clusters.
2216  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
2217  * the num of the valid buckets.
2218  *
2219  * Return the buffer_head this xattr should reside in. And if the xattr's
2220  * hash is in the gap of 2 buckets, return the lower bucket.
2221  */
2222 static int ocfs2_xattr_bucket_find(struct inode *inode,
2223 				   int name_index,
2224 				   const char *name,
2225 				   u32 name_hash,
2226 				   u64 p_blkno,
2227 				   u32 first_hash,
2228 				   u32 num_clusters,
2229 				   struct ocfs2_xattr_search *xs)
2230 {
2231 	int ret, found = 0;
2232 	struct buffer_head *bh = NULL;
2233 	struct buffer_head *lower_bh = NULL;
2234 	struct ocfs2_xattr_header *xh = NULL;
2235 	struct ocfs2_xattr_entry *xe = NULL;
2236 	u16 index = 0;
2237 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2238 	int low_bucket = 0, bucket, high_bucket;
2239 	u32 last_hash;
2240 	u64 blkno;
2241 
2242 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno,
2243 			       &bh, OCFS2_BH_CACHED, inode);
2244 	if (ret) {
2245 		mlog_errno(ret);
2246 		goto out;
2247 	}
2248 
2249 	xh = (struct ocfs2_xattr_header *)bh->b_data;
2250 	high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
2251 
2252 	while (low_bucket <= high_bucket) {
2253 		brelse(bh);
2254 		bh = NULL;
2255 		bucket = (low_bucket + high_bucket) / 2;
2256 
2257 		blkno = p_blkno + bucket * blk_per_bucket;
2258 
2259 		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno,
2260 				       &bh, OCFS2_BH_CACHED, inode);
2261 		if (ret) {
2262 			mlog_errno(ret);
2263 			goto out;
2264 		}
2265 
2266 		xh = (struct ocfs2_xattr_header *)bh->b_data;
2267 		xe = &xh->xh_entries[0];
2268 		if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
2269 			high_bucket = bucket - 1;
2270 			continue;
2271 		}
2272 
2273 		/*
2274 		 * Check whether the hash of the last entry in our
2275 		 * bucket is larger than the search one.
2276 		 */
2277 		xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
2278 		last_hash = le32_to_cpu(xe->xe_name_hash);
2279 
2280 		/* record lower_bh which may be the insert place. */
2281 		brelse(lower_bh);
2282 		lower_bh = bh;
2283 		bh = NULL;
2284 
2285 		if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
2286 			low_bucket = bucket + 1;
2287 			continue;
2288 		}
2289 
2290 		/* the searched xattr should reside in this bucket if exists. */
2291 		ret = ocfs2_find_xe_in_bucket(inode, lower_bh,
2292 					      name_index, name, name_hash,
2293 					      &index, &found);
2294 		if (ret) {
2295 			mlog_errno(ret);
2296 			goto out;
2297 		}
2298 		break;
2299 	}
2300 
2301 	/*
2302 	 * Record the bucket we have found.
2303 	 * When the xattr's hash value is in the gap of 2 buckets, we will
2304 	 * always set it to the previous bucket.
2305 	 */
2306 	if (!lower_bh) {
2307 		/*
2308 		 * We can't find any bucket whose first name_hash is less
2309 		 * than the find name_hash.
2310 		 */
2311 		BUG_ON(bh->b_blocknr != p_blkno);
2312 		lower_bh = bh;
2313 		bh = NULL;
2314 	}
2315 	xs->bucket.bhs[0] = lower_bh;
2316 	xs->bucket.xh = (struct ocfs2_xattr_header *)
2317 					xs->bucket.bhs[0]->b_data;
2318 	lower_bh = NULL;
2319 
2320 	xs->header = xs->bucket.xh;
2321 	xs->base = xs->bucket.bhs[0]->b_data;
2322 	xs->end = xs->base + inode->i_sb->s_blocksize;
2323 
2324 	if (found) {
2325 		/*
2326 		 * If we have found the xattr enty, read all the blocks in
2327 		 * this bucket.
2328 		 */
2329 		ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
2330 					xs->bucket.bhs[0]->b_blocknr + 1,
2331 					blk_per_bucket - 1, &xs->bucket.bhs[1],
2332 					OCFS2_BH_CACHED, inode);
2333 		if (ret) {
2334 			mlog_errno(ret);
2335 			goto out;
2336 		}
2337 
2338 		xs->here = &xs->header->xh_entries[index];
2339 		mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
2340 		     (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index);
2341 	} else
2342 		ret = -ENODATA;
2343 
2344 out:
2345 	brelse(bh);
2346 	brelse(lower_bh);
2347 	return ret;
2348 }
2349 
2350 static int ocfs2_xattr_index_block_find(struct inode *inode,
2351 					struct buffer_head *root_bh,
2352 					int name_index,
2353 					const char *name,
2354 					struct ocfs2_xattr_search *xs)
2355 {
2356 	int ret;
2357 	struct ocfs2_xattr_block *xb =
2358 			(struct ocfs2_xattr_block *)root_bh->b_data;
2359 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
2360 	struct ocfs2_extent_list *el = &xb_root->xt_list;
2361 	u64 p_blkno = 0;
2362 	u32 first_hash, num_clusters = 0;
2363 	u32 name_hash = ocfs2_xattr_hash_by_name(inode, name_index, name);
2364 
2365 	if (le16_to_cpu(el->l_next_free_rec) == 0)
2366 		return -ENODATA;
2367 
2368 	mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
2369 	     name, name_hash, name_index);
2370 
2371 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
2372 				  &num_clusters, el);
2373 	if (ret) {
2374 		mlog_errno(ret);
2375 		goto out;
2376 	}
2377 
2378 	BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
2379 
2380 	mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
2381 	     "in the rec is %u\n", num_clusters, p_blkno, first_hash);
2382 
2383 	ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
2384 				      p_blkno, first_hash, num_clusters, xs);
2385 
2386 out:
2387 	return ret;
2388 }
2389 
2390 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2391 				       u64 blkno,
2392 				       u32 clusters,
2393 				       xattr_bucket_func *func,
2394 				       void *para)
2395 {
2396 	int i, j, ret = 0;
2397 	int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2398 	u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
2399 	u32 num_buckets = clusters * bpc;
2400 	struct ocfs2_xattr_bucket bucket;
2401 
2402 	memset(&bucket, 0, sizeof(bucket));
2403 
2404 	mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
2405 	     clusters, blkno);
2406 
2407 	for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
2408 		ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
2409 					blkno, blk_per_bucket,
2410 					bucket.bhs, OCFS2_BH_CACHED, inode);
2411 		if (ret) {
2412 			mlog_errno(ret);
2413 			goto out;
2414 		}
2415 
2416 		bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data;
2417 		/*
2418 		 * The real bucket num in this series of blocks is stored
2419 		 * in the 1st bucket.
2420 		 */
2421 		if (i == 0)
2422 			num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets);
2423 
2424 		mlog(0, "iterating xattr bucket %llu\n", blkno);
2425 		if (func) {
2426 			ret = func(inode, &bucket, para);
2427 			if (ret) {
2428 				mlog_errno(ret);
2429 				break;
2430 			}
2431 		}
2432 
2433 		for (j = 0; j < blk_per_bucket; j++)
2434 			brelse(bucket.bhs[j]);
2435 		memset(&bucket, 0, sizeof(bucket));
2436 	}
2437 
2438 out:
2439 	for (j = 0; j < blk_per_bucket; j++)
2440 		brelse(bucket.bhs[j]);
2441 
2442 	return ret;
2443 }
2444 
2445 struct ocfs2_xattr_tree_list {
2446 	char *buffer;
2447 	size_t buffer_size;
2448 };
2449 
2450 static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
2451 					     struct ocfs2_xattr_header *xh,
2452 					     int index,
2453 					     int *block_off,
2454 					     int *new_offset)
2455 {
2456 	u16 name_offset;
2457 
2458 	if (index < 0 || index >= le16_to_cpu(xh->xh_count))
2459 		return -EINVAL;
2460 
2461 	name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
2462 
2463 	*block_off = name_offset >> inode->i_sb->s_blocksize_bits;
2464 	*new_offset = name_offset % inode->i_sb->s_blocksize;
2465 
2466 	return 0;
2467 }
2468 
2469 static int ocfs2_list_xattr_bucket(struct inode *inode,
2470 				   struct ocfs2_xattr_bucket *bucket,
2471 				   void *para)
2472 {
2473 	int ret = 0;
2474 	struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
2475 	size_t size;
2476 	int i, block_off, new_offset;
2477 
2478 	for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) {
2479 		struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i];
2480 		struct xattr_handler *handler =
2481 			ocfs2_xattr_handler(ocfs2_xattr_get_type(entry));
2482 
2483 		if (handler) {
2484 			ret = ocfs2_xattr_bucket_get_name_value(inode,
2485 								bucket->xh,
2486 								i,
2487 								&block_off,
2488 								&new_offset);
2489 			if (ret)
2490 				break;
2491 			size = handler->list(inode, xl->buffer, xl->buffer_size,
2492 					     bucket->bhs[block_off]->b_data +
2493 					     new_offset,
2494 					     entry->xe_name_len);
2495 			if (xl->buffer) {
2496 				if (size > xl->buffer_size)
2497 					return -ERANGE;
2498 				xl->buffer += size;
2499 			}
2500 			xl->buffer_size -= size;
2501 		}
2502 	}
2503 
2504 	return ret;
2505 }
2506 
2507 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
2508 					     struct ocfs2_xattr_tree_root *xt,
2509 					     char *buffer,
2510 					     size_t buffer_size)
2511 {
2512 	struct ocfs2_extent_list *el = &xt->xt_list;
2513 	int ret = 0;
2514 	u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
2515 	u64 p_blkno = 0;
2516 	struct ocfs2_xattr_tree_list xl = {
2517 		.buffer = buffer,
2518 		.buffer_size = buffer_size,
2519 	};
2520 
2521 	if (le16_to_cpu(el->l_next_free_rec) == 0)
2522 		return 0;
2523 
2524 	while (name_hash > 0) {
2525 		ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
2526 					  &e_cpos, &num_clusters, el);
2527 		if (ret) {
2528 			mlog_errno(ret);
2529 			goto out;
2530 		}
2531 
2532 		ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
2533 						  ocfs2_list_xattr_bucket,
2534 						  &xl);
2535 		if (ret) {
2536 			mlog_errno(ret);
2537 			goto out;
2538 		}
2539 
2540 		if (e_cpos == 0)
2541 			break;
2542 
2543 		name_hash = e_cpos - 1;
2544 	}
2545 
2546 	ret = buffer_size - xl.buffer_size;
2547 out:
2548 	return ret;
2549 }
2550 
2551 static int cmp_xe(const void *a, const void *b)
2552 {
2553 	const struct ocfs2_xattr_entry *l = a, *r = b;
2554 	u32 l_hash = le32_to_cpu(l->xe_name_hash);
2555 	u32 r_hash = le32_to_cpu(r->xe_name_hash);
2556 
2557 	if (l_hash > r_hash)
2558 		return 1;
2559 	if (l_hash < r_hash)
2560 		return -1;
2561 	return 0;
2562 }
2563 
2564 static void swap_xe(void *a, void *b, int size)
2565 {
2566 	struct ocfs2_xattr_entry *l = a, *r = b, tmp;
2567 
2568 	tmp = *l;
2569 	memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
2570 	memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
2571 }
2572 
2573 /*
2574  * When the ocfs2_xattr_block is filled up, new bucket will be created
2575  * and all the xattr entries will be moved to the new bucket.
2576  * Note: we need to sort the entries since they are not saved in order
2577  * in the ocfs2_xattr_block.
2578  */
2579 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2580 					   struct buffer_head *xb_bh,
2581 					   struct buffer_head *xh_bh,
2582 					   struct buffer_head *data_bh)
2583 {
2584 	int i, blocksize = inode->i_sb->s_blocksize;
2585 	u16 offset, size, off_change;
2586 	struct ocfs2_xattr_entry *xe;
2587 	struct ocfs2_xattr_block *xb =
2588 				(struct ocfs2_xattr_block *)xb_bh->b_data;
2589 	struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
2590 	struct ocfs2_xattr_header *xh =
2591 				(struct ocfs2_xattr_header *)xh_bh->b_data;
2592 	u16 count = le16_to_cpu(xb_xh->xh_count);
2593 	char *target = xh_bh->b_data, *src = xb_bh->b_data;
2594 
2595 	mlog(0, "cp xattr from block %llu to bucket %llu\n",
2596 	     (unsigned long long)xb_bh->b_blocknr,
2597 	     (unsigned long long)xh_bh->b_blocknr);
2598 
2599 	memset(xh_bh->b_data, 0, blocksize);
2600 	if (data_bh)
2601 		memset(data_bh->b_data, 0, blocksize);
2602 	/*
2603 	 * Since the xe_name_offset is based on ocfs2_xattr_header,
2604 	 * there is a offset change corresponding to the change of
2605 	 * ocfs2_xattr_header's position.
2606 	 */
2607 	off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2608 	xe = &xb_xh->xh_entries[count - 1];
2609 	offset = le16_to_cpu(xe->xe_name_offset) + off_change;
2610 	size = blocksize - offset;
2611 
2612 	/* copy all the names and values. */
2613 	if (data_bh)
2614 		target = data_bh->b_data;
2615 	memcpy(target + offset, src + offset, size);
2616 
2617 	/* Init new header now. */
2618 	xh->xh_count = xb_xh->xh_count;
2619 	xh->xh_num_buckets = cpu_to_le16(1);
2620 	xh->xh_name_value_len = cpu_to_le16(size);
2621 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
2622 
2623 	/* copy all the entries. */
2624 	target = xh_bh->b_data;
2625 	offset = offsetof(struct ocfs2_xattr_header, xh_entries);
2626 	size = count * sizeof(struct ocfs2_xattr_entry);
2627 	memcpy(target + offset, (char *)xb_xh + offset, size);
2628 
2629 	/* Change the xe offset for all the xe because of the move. */
2630 	off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
2631 		 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2632 	for (i = 0; i < count; i++)
2633 		le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
2634 
2635 	mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
2636 	     offset, size, off_change);
2637 
2638 	sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
2639 	     cmp_xe, swap_xe);
2640 }
2641 
2642 /*
2643  * After we move xattr from block to index btree, we have to
2644  * update ocfs2_xattr_search to the new xe and base.
2645  *
2646  * When the entry is in xattr block, xattr_bh indicates the storage place.
2647  * While if the entry is in index b-tree, "bucket" indicates the
2648  * real place of the xattr.
2649  */
2650 static int ocfs2_xattr_update_xattr_search(struct inode *inode,
2651 					   struct ocfs2_xattr_search *xs,
2652 					   struct buffer_head *old_bh,
2653 					   struct buffer_head *new_bh)
2654 {
2655 	int ret = 0;
2656 	char *buf = old_bh->b_data;
2657 	struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
2658 	struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
2659 	int i, blocksize = inode->i_sb->s_blocksize;
2660 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2661 
2662 	xs->bucket.bhs[0] = new_bh;
2663 	get_bh(new_bh);
2664 	xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data;
2665 	xs->header = xs->bucket.xh;
2666 
2667 	xs->base = new_bh->b_data;
2668 	xs->end = xs->base + inode->i_sb->s_blocksize;
2669 
2670 	if (!xs->not_found) {
2671 		if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
2672 			ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
2673 					xs->bucket.bhs[0]->b_blocknr + 1,
2674 					blk_per_bucket - 1, &xs->bucket.bhs[1],
2675 					OCFS2_BH_CACHED, inode);
2676 			if (ret) {
2677 				mlog_errno(ret);
2678 				return ret;
2679 			}
2680 
2681 			i = xs->here - old_xh->xh_entries;
2682 			xs->here = &xs->header->xh_entries[i];
2683 		}
2684 	}
2685 
2686 	return ret;
2687 }
2688 
2689 static int ocfs2_xattr_create_index_block(struct inode *inode,
2690 					  struct ocfs2_xattr_search *xs)
2691 {
2692 	int ret, credits = OCFS2_SUBALLOC_ALLOC;
2693 	u32 bit_off, len;
2694 	u64 blkno;
2695 	handle_t *handle;
2696 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2697 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
2698 	struct ocfs2_alloc_context *data_ac;
2699 	struct buffer_head *xh_bh = NULL, *data_bh = NULL;
2700 	struct buffer_head *xb_bh = xs->xattr_bh;
2701 	struct ocfs2_xattr_block *xb =
2702 			(struct ocfs2_xattr_block *)xb_bh->b_data;
2703 	struct ocfs2_xattr_tree_root *xr;
2704 	u16 xb_flags = le16_to_cpu(xb->xb_flags);
2705 	u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2706 
2707 	mlog(0, "create xattr index block for %llu\n",
2708 	     (unsigned long long)xb_bh->b_blocknr);
2709 
2710 	BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
2711 
2712 	ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
2713 	if (ret) {
2714 		mlog_errno(ret);
2715 		goto out;
2716 	}
2717 
2718 	/*
2719 	 * XXX:
2720 	 * We can use this lock for now, and maybe move to a dedicated mutex
2721 	 * if performance becomes a problem later.
2722 	 */
2723 	down_write(&oi->ip_alloc_sem);
2724 
2725 	/*
2726 	 * 3 more credits, one for xattr block update, one for the 1st block
2727 	 * of the new xattr bucket and one for the value/data.
2728 	 */
2729 	credits += 3;
2730 	handle = ocfs2_start_trans(osb, credits);
2731 	if (IS_ERR(handle)) {
2732 		ret = PTR_ERR(handle);
2733 		mlog_errno(ret);
2734 		goto out_sem;
2735 	}
2736 
2737 	ret = ocfs2_journal_access(handle, inode, xb_bh,
2738 				   OCFS2_JOURNAL_ACCESS_WRITE);
2739 	if (ret) {
2740 		mlog_errno(ret);
2741 		goto out_commit;
2742 	}
2743 
2744 	ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
2745 	if (ret) {
2746 		mlog_errno(ret);
2747 		goto out_commit;
2748 	}
2749 
2750 	/*
2751 	 * The bucket may spread in many blocks, and
2752 	 * we will only touch the 1st block and the last block
2753 	 * in the whole bucket(one for entry and one for data).
2754 	 */
2755 	blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
2756 
2757 	mlog(0, "allocate 1 cluster from %llu to xattr block\n", blkno);
2758 
2759 	xh_bh = sb_getblk(inode->i_sb, blkno);
2760 	if (!xh_bh) {
2761 		ret = -EIO;
2762 		mlog_errno(ret);
2763 		goto out_commit;
2764 	}
2765 
2766 	ocfs2_set_new_buffer_uptodate(inode, xh_bh);
2767 
2768 	ret = ocfs2_journal_access(handle, inode, xh_bh,
2769 				   OCFS2_JOURNAL_ACCESS_CREATE);
2770 	if (ret) {
2771 		mlog_errno(ret);
2772 		goto out_commit;
2773 	}
2774 
2775 	if (bpb > 1) {
2776 		data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
2777 		if (!data_bh) {
2778 			ret = -EIO;
2779 			mlog_errno(ret);
2780 			goto out_commit;
2781 		}
2782 
2783 		ocfs2_set_new_buffer_uptodate(inode, data_bh);
2784 
2785 		ret = ocfs2_journal_access(handle, inode, data_bh,
2786 					   OCFS2_JOURNAL_ACCESS_CREATE);
2787 		if (ret) {
2788 			mlog_errno(ret);
2789 			goto out_commit;
2790 		}
2791 	}
2792 
2793 	ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh);
2794 
2795 	ocfs2_journal_dirty(handle, xh_bh);
2796 	if (data_bh)
2797 		ocfs2_journal_dirty(handle, data_bh);
2798 
2799 	ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
2800 
2801 	/* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
2802 	memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
2803 	       offsetof(struct ocfs2_xattr_block, xb_attrs));
2804 
2805 	xr = &xb->xb_attrs.xb_root;
2806 	xr->xt_clusters = cpu_to_le32(1);
2807 	xr->xt_last_eb_blk = 0;
2808 	xr->xt_list.l_tree_depth = 0;
2809 	xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
2810 	xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2811 
2812 	xr->xt_list.l_recs[0].e_cpos = 0;
2813 	xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
2814 	xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
2815 
2816 	xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
2817 
2818 	ret = ocfs2_journal_dirty(handle, xb_bh);
2819 	if (ret) {
2820 		mlog_errno(ret);
2821 		goto out_commit;
2822 	}
2823 
2824 out_commit:
2825 	ocfs2_commit_trans(osb, handle);
2826 
2827 out_sem:
2828 	up_write(&oi->ip_alloc_sem);
2829 
2830 out:
2831 	if (data_ac)
2832 		ocfs2_free_alloc_context(data_ac);
2833 
2834 	brelse(xh_bh);
2835 	brelse(data_bh);
2836 
2837 	return ret;
2838 }
2839 
2840 static int cmp_xe_offset(const void *a, const void *b)
2841 {
2842 	const struct ocfs2_xattr_entry *l = a, *r = b;
2843 	u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
2844 	u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
2845 
2846 	if (l_name_offset < r_name_offset)
2847 		return 1;
2848 	if (l_name_offset > r_name_offset)
2849 		return -1;
2850 	return 0;
2851 }
2852 
2853 /*
2854  * defrag a xattr bucket if we find that the bucket has some
2855  * holes beteen name/value pairs.
2856  * We will move all the name/value pairs to the end of the bucket
2857  * so that we can spare some space for insertion.
2858  */
2859 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2860 				     struct ocfs2_xattr_bucket *bucket)
2861 {
2862 	int ret, i;
2863 	size_t end, offset, len, value_len;
2864 	struct ocfs2_xattr_header *xh;
2865 	char *entries, *buf, *bucket_buf = NULL;
2866 	u64 blkno = bucket->bhs[0]->b_blocknr;
2867 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2868 	u16 xh_free_start;
2869 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2870 	size_t blocksize = inode->i_sb->s_blocksize;
2871 	handle_t *handle;
2872 	struct buffer_head **bhs;
2873 	struct ocfs2_xattr_entry *xe;
2874 
2875 	bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
2876 			GFP_NOFS);
2877 	if (!bhs)
2878 		return -ENOMEM;
2879 
2880 	ret = ocfs2_read_blocks(osb, blkno, blk_per_bucket, bhs,
2881 				OCFS2_BH_CACHED, inode);
2882 	if (ret)
2883 		goto out;
2884 
2885 	/*
2886 	 * In order to make the operation more efficient and generic,
2887 	 * we copy all the blocks into a contiguous memory and do the
2888 	 * defragment there, so if anything is error, we will not touch
2889 	 * the real block.
2890 	 */
2891 	bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
2892 	if (!bucket_buf) {
2893 		ret = -EIO;
2894 		goto out;
2895 	}
2896 
2897 	buf = bucket_buf;
2898 	for (i = 0; i < blk_per_bucket; i++, buf += blocksize)
2899 		memcpy(buf, bhs[i]->b_data, blocksize);
2900 
2901 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket);
2902 	if (IS_ERR(handle)) {
2903 		ret = PTR_ERR(handle);
2904 		handle = NULL;
2905 		mlog_errno(ret);
2906 		goto out;
2907 	}
2908 
2909 	for (i = 0; i < blk_per_bucket; i++) {
2910 		ret = ocfs2_journal_access(handle, inode, bhs[i],
2911 					   OCFS2_JOURNAL_ACCESS_WRITE);
2912 		if (ret < 0) {
2913 			mlog_errno(ret);
2914 			goto commit;
2915 		}
2916 	}
2917 
2918 	xh = (struct ocfs2_xattr_header *)bucket_buf;
2919 	entries = (char *)xh->xh_entries;
2920 	xh_free_start = le16_to_cpu(xh->xh_free_start);
2921 
2922 	mlog(0, "adjust xattr bucket in %llu, count = %u, "
2923 	     "xh_free_start = %u, xh_name_value_len = %u.\n",
2924 	     blkno, le16_to_cpu(xh->xh_count), xh_free_start,
2925 	     le16_to_cpu(xh->xh_name_value_len));
2926 
2927 	/*
2928 	 * sort all the entries by their offset.
2929 	 * the largest will be the first, so that we can
2930 	 * move them to the end one by one.
2931 	 */
2932 	sort(entries, le16_to_cpu(xh->xh_count),
2933 	     sizeof(struct ocfs2_xattr_entry),
2934 	     cmp_xe_offset, swap_xe);
2935 
2936 	/* Move all name/values to the end of the bucket. */
2937 	xe = xh->xh_entries;
2938 	end = OCFS2_XATTR_BUCKET_SIZE;
2939 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
2940 		offset = le16_to_cpu(xe->xe_name_offset);
2941 		if (ocfs2_xattr_is_local(xe))
2942 			value_len = OCFS2_XATTR_SIZE(
2943 					le64_to_cpu(xe->xe_value_size));
2944 		else
2945 			value_len = OCFS2_XATTR_ROOT_SIZE;
2946 		len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
2947 
2948 		/*
2949 		 * We must make sure that the name/value pair
2950 		 * exist in the same block. So adjust end to
2951 		 * the previous block end if needed.
2952 		 */
2953 		if (((end - len) / blocksize !=
2954 			(end - 1) / blocksize))
2955 			end = end - end % blocksize;
2956 
2957 		if (end > offset + len) {
2958 			memmove(bucket_buf + end - len,
2959 				bucket_buf + offset, len);
2960 			xe->xe_name_offset = cpu_to_le16(end - len);
2961 		}
2962 
2963 		mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
2964 				"bucket %llu\n", (unsigned long long)blkno);
2965 
2966 		end -= len;
2967 	}
2968 
2969 	mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
2970 			"bucket %llu\n", (unsigned long long)blkno);
2971 
2972 	if (xh_free_start == end)
2973 		goto commit;
2974 
2975 	memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
2976 	xh->xh_free_start = cpu_to_le16(end);
2977 
2978 	/* sort the entries by their name_hash. */
2979 	sort(entries, le16_to_cpu(xh->xh_count),
2980 	     sizeof(struct ocfs2_xattr_entry),
2981 	     cmp_xe, swap_xe);
2982 
2983 	buf = bucket_buf;
2984 	for (i = 0; i < blk_per_bucket; i++, buf += blocksize) {
2985 		memcpy(bhs[i]->b_data, buf, blocksize);
2986 		ocfs2_journal_dirty(handle, bhs[i]);
2987 	}
2988 
2989 commit:
2990 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2991 out:
2992 
2993 	if (bhs) {
2994 		for (i = 0; i < blk_per_bucket; i++)
2995 			brelse(bhs[i]);
2996 	}
2997 	kfree(bhs);
2998 
2999 	kfree(bucket_buf);
3000 	return ret;
3001 }
3002 
3003 /*
3004  * Move half nums of the xattr bucket in the previous cluster to this new
3005  * cluster. We only touch the last cluster of the previous extend record.
3006  *
3007  * first_bh is the first buffer_head of a series of bucket in the same
3008  * extent rec and header_bh is the header of one bucket in this cluster.
3009  * They will be updated if we move the data header_bh contains to the new
3010  * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
3011  */
3012 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3013 					       handle_t *handle,
3014 					       struct buffer_head **first_bh,
3015 					       struct buffer_head **header_bh,
3016 					       u64 new_blkno,
3017 					       u64 prev_blkno,
3018 					       u32 num_clusters,
3019 					       u32 *first_hash)
3020 {
3021 	int i, ret, credits;
3022 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3023 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3024 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3025 	int blocksize = inode->i_sb->s_blocksize;
3026 	struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL;
3027 	struct ocfs2_xattr_header *new_xh;
3028 	struct ocfs2_xattr_header *xh =
3029 			(struct ocfs2_xattr_header *)((*first_bh)->b_data);
3030 
3031 	BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
3032 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
3033 
3034 	prev_bh = *first_bh;
3035 	get_bh(prev_bh);
3036 	xh = (struct ocfs2_xattr_header *)prev_bh->b_data;
3037 
3038 	prev_blkno += (num_clusters - 1) * bpc + bpc / 2;
3039 
3040 	mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3041 	     prev_blkno, new_blkno);
3042 
3043 	/*
3044 	 * We need to update the 1st half of the new cluster and
3045 	 * 1 more for the update of the 1st bucket of the previous
3046 	 * extent record.
3047 	 */
3048 	credits = bpc / 2 + 1;
3049 	ret = ocfs2_extend_trans(handle, credits);
3050 	if (ret) {
3051 		mlog_errno(ret);
3052 		goto out;
3053 	}
3054 
3055 	ret = ocfs2_journal_access(handle, inode, prev_bh,
3056 				   OCFS2_JOURNAL_ACCESS_WRITE);
3057 	if (ret) {
3058 		mlog_errno(ret);
3059 		goto out;
3060 	}
3061 
3062 	for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) {
3063 		old_bh = new_bh = NULL;
3064 		new_bh = sb_getblk(inode->i_sb, new_blkno);
3065 		if (!new_bh) {
3066 			ret = -EIO;
3067 			mlog_errno(ret);
3068 			goto out;
3069 		}
3070 
3071 		ocfs2_set_new_buffer_uptodate(inode, new_bh);
3072 
3073 		ret = ocfs2_journal_access(handle, inode, new_bh,
3074 					   OCFS2_JOURNAL_ACCESS_CREATE);
3075 		if (ret < 0) {
3076 			mlog_errno(ret);
3077 			brelse(new_bh);
3078 			goto out;
3079 		}
3080 
3081 		ret = ocfs2_read_block(osb, prev_blkno,
3082 				       &old_bh, OCFS2_BH_CACHED, inode);
3083 		if (ret < 0) {
3084 			mlog_errno(ret);
3085 			brelse(new_bh);
3086 			goto out;
3087 		}
3088 
3089 		memcpy(new_bh->b_data, old_bh->b_data, blocksize);
3090 
3091 		if (i == 0) {
3092 			new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
3093 			new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);
3094 
3095 			if (first_hash)
3096 				*first_hash = le32_to_cpu(
3097 					new_xh->xh_entries[0].xe_name_hash);
3098 			new_first_bh = new_bh;
3099 			get_bh(new_first_bh);
3100 		}
3101 
3102 		ocfs2_journal_dirty(handle, new_bh);
3103 
3104 		if (*header_bh == old_bh) {
3105 			brelse(*header_bh);
3106 			*header_bh = new_bh;
3107 			get_bh(*header_bh);
3108 
3109 			brelse(*first_bh);
3110 			*first_bh = new_first_bh;
3111 			get_bh(*first_bh);
3112 		}
3113 		brelse(new_bh);
3114 		brelse(old_bh);
3115 	}
3116 
3117 	le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));
3118 
3119 	ocfs2_journal_dirty(handle, prev_bh);
3120 out:
3121 	brelse(prev_bh);
3122 	brelse(new_first_bh);
3123 	return ret;
3124 }
3125 
3126 static int ocfs2_read_xattr_bucket(struct inode *inode,
3127 				   u64 blkno,
3128 				   struct buffer_head **bhs,
3129 				   int new)
3130 {
3131 	int ret = 0;
3132 	u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3133 
3134 	if (!new)
3135 		return ocfs2_read_blocks(OCFS2_SB(inode->i_sb), blkno,
3136 					 blk_per_bucket, bhs,
3137 					 OCFS2_BH_CACHED, inode);
3138 
3139 	for (i = 0; i < blk_per_bucket; i++) {
3140 		bhs[i] = sb_getblk(inode->i_sb, blkno + i);
3141 		if (bhs[i] == NULL) {
3142 			ret = -EIO;
3143 			mlog_errno(ret);
3144 			break;
3145 		}
3146 		ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
3147 	}
3148 
3149 	return ret;
3150 }
3151 
3152 /*
3153  * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk).
3154  * first_hash will record the 1st hash of the new bucket.
3155  */
3156 static int ocfs2_half_xattr_bucket(struct inode *inode,
3157 				   handle_t *handle,
3158 				   u64 blk,
3159 				   u64 new_blk,
3160 				   u32 *first_hash,
3161 				   int new_bucket_head)
3162 {
3163 	int ret, i;
3164 	u16 count, start, len, name_value_len, xe_len, name_offset;
3165 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3166 	struct buffer_head **s_bhs, **t_bhs = NULL;
3167 	struct ocfs2_xattr_header *xh;
3168 	struct ocfs2_xattr_entry *xe;
3169 	int blocksize = inode->i_sb->s_blocksize;
3170 
3171 	mlog(0, "move half of xattrs from bucket %llu to %llu\n",
3172 	     blk, new_blk);
3173 
3174 	s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3175 	if (!s_bhs)
3176 		return -ENOMEM;
3177 
3178 	ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0);
3179 	if (ret) {
3180 		mlog_errno(ret);
3181 		goto out;
3182 	}
3183 
3184 	ret = ocfs2_journal_access(handle, inode, s_bhs[0],
3185 				   OCFS2_JOURNAL_ACCESS_WRITE);
3186 	if (ret) {
3187 		mlog_errno(ret);
3188 		goto out;
3189 	}
3190 
3191 	t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3192 	if (!t_bhs) {
3193 		ret = -ENOMEM;
3194 		goto out;
3195 	}
3196 
3197 	ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head);
3198 	if (ret) {
3199 		mlog_errno(ret);
3200 		goto out;
3201 	}
3202 
3203 	for (i = 0; i < blk_per_bucket; i++) {
3204 		ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3205 					   OCFS2_JOURNAL_ACCESS_CREATE);
3206 		if (ret) {
3207 			mlog_errno(ret);
3208 			goto out;
3209 		}
3210 	}
3211 
3212 	/* copy the whole bucket to the new first. */
3213 	for (i = 0; i < blk_per_bucket; i++)
3214 		memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3215 
3216 	/* update the new bucket. */
3217 	xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
3218 	count = le16_to_cpu(xh->xh_count);
3219 	start = count / 2;
3220 
3221 	/*
3222 	 * Calculate the total name/value len and xh_free_start for
3223 	 * the old bucket first.
3224 	 */
3225 	name_offset = OCFS2_XATTR_BUCKET_SIZE;
3226 	name_value_len = 0;
3227 	for (i = 0; i < start; i++) {
3228 		xe = &xh->xh_entries[i];
3229 		xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3230 		if (ocfs2_xattr_is_local(xe))
3231 			xe_len +=
3232 			   OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3233 		else
3234 			xe_len += OCFS2_XATTR_ROOT_SIZE;
3235 		name_value_len += xe_len;
3236 		if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3237 			name_offset = le16_to_cpu(xe->xe_name_offset);
3238 	}
3239 
3240 	/*
3241 	 * Now begin the modification to the new bucket.
3242 	 *
3243 	 * In the new bucket, We just move the xattr entry to the beginning
3244 	 * and don't touch the name/value. So there will be some holes in the
3245 	 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3246 	 * called.
3247 	 */
3248 	xe = &xh->xh_entries[start];
3249 	len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3250 	mlog(0, "mv xattr entry len %d from %d to %d\n", len,
3251 	     (char *)xe - (char *)xh, (char *)xh->xh_entries - (char *)xh);
3252 	memmove((char *)xh->xh_entries, (char *)xe, len);
3253 	xe = &xh->xh_entries[count - start];
3254 	len = sizeof(struct ocfs2_xattr_entry) * start;
3255 	memset((char *)xe, 0, len);
3256 
3257 	le16_add_cpu(&xh->xh_count, -start);
3258 	le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3259 
3260 	/* Calculate xh_free_start for the new bucket. */
3261 	xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3262 	for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3263 		xe = &xh->xh_entries[i];
3264 		xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3265 		if (ocfs2_xattr_is_local(xe))
3266 			xe_len +=
3267 			   OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3268 		else
3269 			xe_len += OCFS2_XATTR_ROOT_SIZE;
3270 		if (le16_to_cpu(xe->xe_name_offset) <
3271 		    le16_to_cpu(xh->xh_free_start))
3272 			xh->xh_free_start = xe->xe_name_offset;
3273 	}
3274 
3275 	/* set xh->xh_num_buckets for the new xh. */
3276 	if (new_bucket_head)
3277 		xh->xh_num_buckets = cpu_to_le16(1);
3278 	else
3279 		xh->xh_num_buckets = 0;
3280 
3281 	for (i = 0; i < blk_per_bucket; i++) {
3282 		ocfs2_journal_dirty(handle, t_bhs[i]);
3283 		if (ret)
3284 			mlog_errno(ret);
3285 	}
3286 
3287 	/* store the first_hash of the new bucket. */
3288 	if (first_hash)
3289 		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3290 
3291 	/*
3292 	 * Now only update the 1st block of the old bucket.
3293 	 * Please note that the entry has been sorted already above.
3294 	 */
3295 	xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
3296 	memset(&xh->xh_entries[start], 0,
3297 	       sizeof(struct ocfs2_xattr_entry) * (count - start));
3298 	xh->xh_count = cpu_to_le16(start);
3299 	xh->xh_free_start = cpu_to_le16(name_offset);
3300 	xh->xh_name_value_len = cpu_to_le16(name_value_len);
3301 
3302 	ocfs2_journal_dirty(handle, s_bhs[0]);
3303 	if (ret)
3304 		mlog_errno(ret);
3305 
3306 out:
3307 	if (s_bhs) {
3308 		for (i = 0; i < blk_per_bucket; i++)
3309 			brelse(s_bhs[i]);
3310 	}
3311 	kfree(s_bhs);
3312 
3313 	if (t_bhs) {
3314 		for (i = 0; i < blk_per_bucket; i++)
3315 			brelse(t_bhs[i]);
3316 	}
3317 	kfree(t_bhs);
3318 
3319 	return ret;
3320 }
3321 
3322 /*
3323  * Copy xattr from one bucket to another bucket.
3324  *
3325  * The caller must make sure that the journal transaction
3326  * has enough space for journaling.
3327  */
3328 static int ocfs2_cp_xattr_bucket(struct inode *inode,
3329 				 handle_t *handle,
3330 				 u64 s_blkno,
3331 				 u64 t_blkno,
3332 				 int t_is_new)
3333 {
3334 	int ret, i;
3335 	int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3336 	int blocksize = inode->i_sb->s_blocksize;
3337 	struct buffer_head **s_bhs, **t_bhs = NULL;
3338 
3339 	BUG_ON(s_blkno == t_blkno);
3340 
3341 	mlog(0, "cp bucket %llu to %llu, target is %d\n",
3342 	     s_blkno, t_blkno, t_is_new);
3343 
3344 	s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3345 			GFP_NOFS);
3346 	if (!s_bhs)
3347 		return -ENOMEM;
3348 
3349 	ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0);
3350 	if (ret)
3351 		goto out;
3352 
3353 	t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3354 			GFP_NOFS);
3355 	if (!t_bhs) {
3356 		ret = -ENOMEM;
3357 		goto out;
3358 	}
3359 
3360 	ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new);
3361 	if (ret)
3362 		goto out;
3363 
3364 	for (i = 0; i < blk_per_bucket; i++) {
3365 		ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3366 					   OCFS2_JOURNAL_ACCESS_WRITE);
3367 		if (ret)
3368 			goto out;
3369 	}
3370 
3371 	for (i = 0; i < blk_per_bucket; i++) {
3372 		memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3373 		ocfs2_journal_dirty(handle, t_bhs[i]);
3374 	}
3375 
3376 out:
3377 	if (s_bhs) {
3378 		for (i = 0; i < blk_per_bucket; i++)
3379 			brelse(s_bhs[i]);
3380 	}
3381 	kfree(s_bhs);
3382 
3383 	if (t_bhs) {
3384 		for (i = 0; i < blk_per_bucket; i++)
3385 			brelse(t_bhs[i]);
3386 	}
3387 	kfree(t_bhs);
3388 
3389 	return ret;
3390 }
3391 
3392 /*
3393  * Copy one xattr cluster from src_blk to to_blk.
3394  * The to_blk will become the first bucket header of the cluster, so its
3395  * xh_num_buckets will be initialized as the bucket num in the cluster.
3396  */
3397 static int ocfs2_cp_xattr_cluster(struct inode *inode,
3398 				  handle_t *handle,
3399 				  struct buffer_head *first_bh,
3400 				  u64 src_blk,
3401 				  u64 to_blk,
3402 				  u32 *first_hash)
3403 {
3404 	int i, ret, credits;
3405 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3406 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3407 	int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3408 	struct buffer_head *bh = NULL;
3409 	struct ocfs2_xattr_header *xh;
3410 	u64 to_blk_start = to_blk;
3411 
3412 	mlog(0, "cp xattrs from cluster %llu to %llu\n", src_blk, to_blk);
3413 
3414 	/*
3415 	 * We need to update the new cluster and 1 more for the update of
3416 	 * the 1st bucket of the previous extent rec.
3417 	 */
3418 	credits = bpc + 1;
3419 	ret = ocfs2_extend_trans(handle, credits);
3420 	if (ret) {
3421 		mlog_errno(ret);
3422 		goto out;
3423 	}
3424 
3425 	ret = ocfs2_journal_access(handle, inode, first_bh,
3426 				   OCFS2_JOURNAL_ACCESS_WRITE);
3427 	if (ret) {
3428 		mlog_errno(ret);
3429 		goto out;
3430 	}
3431 
3432 	for (i = 0; i < num_buckets; i++) {
3433 		ret = ocfs2_cp_xattr_bucket(inode, handle,
3434 					    src_blk, to_blk, 1);
3435 		if (ret) {
3436 			mlog_errno(ret);
3437 			goto out;
3438 		}
3439 
3440 		src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3441 		to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3442 	}
3443 
3444 	/* update the old bucket header. */
3445 	xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3446 	le16_add_cpu(&xh->xh_num_buckets, -num_buckets);
3447 
3448 	ocfs2_journal_dirty(handle, first_bh);
3449 
3450 	/* update the new bucket header. */
3451 	ret = ocfs2_read_block(osb, to_blk_start, &bh, OCFS2_BH_CACHED, inode);
3452 	if (ret < 0) {
3453 		mlog_errno(ret);
3454 		goto out;
3455 	}
3456 
3457 	ret = ocfs2_journal_access(handle, inode, bh,
3458 				   OCFS2_JOURNAL_ACCESS_WRITE);
3459 	if (ret) {
3460 		mlog_errno(ret);
3461 		goto out;
3462 	}
3463 
3464 	xh = (struct ocfs2_xattr_header *)bh->b_data;
3465 	xh->xh_num_buckets = cpu_to_le16(num_buckets);
3466 
3467 	ocfs2_journal_dirty(handle, bh);
3468 
3469 	if (first_hash)
3470 		*first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3471 out:
3472 	brelse(bh);
3473 	return ret;
3474 }
3475 
3476 /*
3477  * Move half of the xattrs in this cluster to the new cluster.
3478  * This function should only be called when bucket size == cluster size.
3479  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
3480  */
3481 static int ocfs2_half_xattr_cluster(struct inode *inode,
3482 				    handle_t *handle,
3483 				    u64 prev_blk,
3484 				    u64 new_blk,
3485 				    u32 *first_hash)
3486 {
3487 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3488 	int ret, credits = 2 * blk_per_bucket;
3489 
3490 	BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
3491 
3492 	ret = ocfs2_extend_trans(handle, credits);
3493 	if (ret) {
3494 		mlog_errno(ret);
3495 		return ret;
3496 	}
3497 
3498 	/* Move half of the xattr in start_blk to the next bucket. */
3499 	return  ocfs2_half_xattr_bucket(inode, handle, prev_blk,
3500 					new_blk, first_hash, 1);
3501 }
3502 
3503 /*
3504  * Move some xattrs from the old cluster to the new one since they are not
3505  * contiguous in ocfs2 xattr tree.
3506  *
3507  * new_blk starts a new separate cluster, and we will move some xattrs from
3508  * prev_blk to it. v_start will be set as the first name hash value in this
3509  * new cluster so that it can be used as e_cpos during tree insertion and
3510  * don't collide with our original b-tree operations. first_bh and header_bh
3511  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
3512  * to extend the insert bucket.
3513  *
3514  * The problem is how much xattr should we move to the new one and when should
3515  * we update first_bh and header_bh?
3516  * 1. If cluster size > bucket size, that means the previous cluster has more
3517  *    than 1 bucket, so just move half nums of bucket into the new cluster and
3518  *    update the first_bh and header_bh if the insert bucket has been moved
3519  *    to the new cluster.
3520  * 2. If cluster_size == bucket_size:
3521  *    a) If the previous extent rec has more than one cluster and the insert
3522  *       place isn't in the last cluster, copy the entire last cluster to the
3523  *       new one. This time, we don't need to upate the first_bh and header_bh
3524  *       since they will not be moved into the new cluster.
3525  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
3526  *       the new one. And we set the extend flag to zero if the insert place is
3527  *       moved into the new allocated cluster since no extend is needed.
3528  */
3529 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3530 					    handle_t *handle,
3531 					    struct buffer_head **first_bh,
3532 					    struct buffer_head **header_bh,
3533 					    u64 new_blk,
3534 					    u64 prev_blk,
3535 					    u32 prev_clusters,
3536 					    u32 *v_start,
3537 					    int *extend)
3538 {
3539 	int ret = 0;
3540 	int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3541 
3542 	mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
3543 	     prev_blk, prev_clusters, new_blk);
3544 
3545 	if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
3546 		ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
3547 							  handle,
3548 							  first_bh,
3549 							  header_bh,
3550 							  new_blk,
3551 							  prev_blk,
3552 							  prev_clusters,
3553 							  v_start);
3554 	else {
3555 		u64 last_blk = prev_blk + bpc * (prev_clusters - 1);
3556 
3557 		if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
3558 			ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh,
3559 						     last_blk, new_blk,
3560 						     v_start);
3561 		else {
3562 			ret = ocfs2_half_xattr_cluster(inode, handle,
3563 						       last_blk, new_blk,
3564 						       v_start);
3565 
3566 			if ((*header_bh)->b_blocknr == last_blk && extend)
3567 				*extend = 0;
3568 		}
3569 	}
3570 
3571 	return ret;
3572 }
3573 
3574 /*
3575  * Add a new cluster for xattr storage.
3576  *
3577  * If the new cluster is contiguous with the previous one, it will be
3578  * appended to the same extent record, and num_clusters will be updated.
3579  * If not, we will insert a new extent for it and move some xattrs in
3580  * the last cluster into the new allocated one.
3581  * We also need to limit the maximum size of a btree leaf, otherwise we'll
3582  * lose the benefits of hashing because we'll have to search large leaves.
3583  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
3584  * if it's bigger).
3585  *
3586  * first_bh is the first block of the previous extent rec and header_bh
3587  * indicates the bucket we will insert the new xattrs. They will be updated
3588  * when the header_bh is moved into the new cluster.
3589  */
3590 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3591 				       struct buffer_head *root_bh,
3592 				       struct buffer_head **first_bh,
3593 				       struct buffer_head **header_bh,
3594 				       u32 *num_clusters,
3595 				       u32 prev_cpos,
3596 				       u64 prev_blkno,
3597 				       int *extend)
3598 {
3599 	int ret, credits;
3600 	u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3601 	u32 prev_clusters = *num_clusters;
3602 	u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
3603 	u64 block;
3604 	handle_t *handle = NULL;
3605 	struct ocfs2_alloc_context *data_ac = NULL;
3606 	struct ocfs2_alloc_context *meta_ac = NULL;
3607 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3608 	struct ocfs2_xattr_block *xb =
3609 			(struct ocfs2_xattr_block *)root_bh->b_data;
3610 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3611 	struct ocfs2_extent_list *root_el = &xb_root->xt_list;
3612 	enum ocfs2_extent_tree_type type = OCFS2_XATTR_TREE_EXTENT;
3613 
3614 	mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
3615 	     "previous xattr blkno = %llu\n",
3616 	     (unsigned long long)OCFS2_I(inode)->ip_blkno,
3617 	     prev_cpos, prev_blkno);
3618 
3619 	ret = ocfs2_lock_allocators(inode, root_bh, root_el,
3620 				    clusters_to_add, 0, &data_ac,
3621 				    &meta_ac, type, NULL);
3622 	if (ret) {
3623 		mlog_errno(ret);
3624 		goto leave;
3625 	}
3626 
3627 	credits = ocfs2_calc_extend_credits(osb->sb, root_el, clusters_to_add);
3628 	handle = ocfs2_start_trans(osb, credits);
3629 	if (IS_ERR(handle)) {
3630 		ret = PTR_ERR(handle);
3631 		handle = NULL;
3632 		mlog_errno(ret);
3633 		goto leave;
3634 	}
3635 
3636 	ret = ocfs2_journal_access(handle, inode, root_bh,
3637 				   OCFS2_JOURNAL_ACCESS_WRITE);
3638 	if (ret < 0) {
3639 		mlog_errno(ret);
3640 		goto leave;
3641 	}
3642 
3643 	ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
3644 				     clusters_to_add, &bit_off, &num_bits);
3645 	if (ret < 0) {
3646 		if (ret != -ENOSPC)
3647 			mlog_errno(ret);
3648 		goto leave;
3649 	}
3650 
3651 	BUG_ON(num_bits > clusters_to_add);
3652 
3653 	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
3654 	mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
3655 	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
3656 
3657 	if (prev_blkno + prev_clusters * bpc == block &&
3658 	    (prev_clusters + num_bits) << osb->s_clustersize_bits <=
3659 	     OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
3660 		/*
3661 		 * If this cluster is contiguous with the old one and
3662 		 * adding this new cluster, we don't surpass the limit of
3663 		 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
3664 		 * initialized and used like other buckets in the previous
3665 		 * cluster.
3666 		 * So add it as a contiguous one. The caller will handle
3667 		 * its init process.
3668 		 */
3669 		v_start = prev_cpos + prev_clusters;
3670 		*num_clusters = prev_clusters + num_bits;
3671 		mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
3672 		     num_bits);
3673 	} else {
3674 		ret = ocfs2_adjust_xattr_cross_cluster(inode,
3675 						       handle,
3676 						       first_bh,
3677 						       header_bh,
3678 						       block,
3679 						       prev_blkno,
3680 						       prev_clusters,
3681 						       &v_start,
3682 						       extend);
3683 		if (ret) {
3684 			mlog_errno(ret);
3685 			goto leave;
3686 		}
3687 	}
3688 
3689 	mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
3690 	     num_bits, block, v_start);
3691 	ret = ocfs2_xattr_tree_insert_extent(osb, handle, inode, root_bh,
3692 					     v_start, block, num_bits,
3693 					     0, meta_ac);
3694 	if (ret < 0) {
3695 		mlog_errno(ret);
3696 		goto leave;
3697 	}
3698 
3699 	ret = ocfs2_journal_dirty(handle, root_bh);
3700 	if (ret < 0) {
3701 		mlog_errno(ret);
3702 		goto leave;
3703 	}
3704 
3705 leave:
3706 	if (handle)
3707 		ocfs2_commit_trans(osb, handle);
3708 	if (data_ac)
3709 		ocfs2_free_alloc_context(data_ac);
3710 	if (meta_ac)
3711 		ocfs2_free_alloc_context(meta_ac);
3712 
3713 	return ret;
3714 }
3715 
3716 /*
3717  * Extend a new xattr bucket and move xattrs to the end one by one until
3718  * We meet with start_bh. Only move half of the xattrs to the bucket after it.
3719  */
3720 static int ocfs2_extend_xattr_bucket(struct inode *inode,
3721 				     struct buffer_head *first_bh,
3722 				     struct buffer_head *start_bh,
3723 				     u32 num_clusters)
3724 {
3725 	int ret, credits;
3726 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3727 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3728 	u64 start_blk = start_bh->b_blocknr, end_blk;
3729 	u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
3730 	handle_t *handle;
3731 	struct ocfs2_xattr_header *first_xh =
3732 				(struct ocfs2_xattr_header *)first_bh->b_data;
3733 	u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
3734 
3735 	mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
3736 	     "from %llu, len = %u\n", start_blk,
3737 	     (unsigned long long)first_bh->b_blocknr, num_clusters);
3738 
3739 	BUG_ON(bucket >= num_buckets);
3740 
3741 	end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket;
3742 
3743 	/*
3744 	 * We will touch all the buckets after the start_bh(include it).
3745 	 * Add one more bucket and modify the first_bh.
3746 	 */
3747 	credits = end_blk - start_blk + 2 * blk_per_bucket + 1;
3748 	handle = ocfs2_start_trans(osb, credits);
3749 	if (IS_ERR(handle)) {
3750 		ret = PTR_ERR(handle);
3751 		handle = NULL;
3752 		mlog_errno(ret);
3753 		goto out;
3754 	}
3755 
3756 	ret = ocfs2_journal_access(handle, inode, first_bh,
3757 				   OCFS2_JOURNAL_ACCESS_WRITE);
3758 	if (ret) {
3759 		mlog_errno(ret);
3760 		goto commit;
3761 	}
3762 
3763 	while (end_blk != start_blk) {
3764 		ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
3765 					    end_blk + blk_per_bucket, 0);
3766 		if (ret)
3767 			goto commit;
3768 		end_blk -= blk_per_bucket;
3769 	}
3770 
3771 	/* Move half of the xattr in start_blk to the next bucket. */
3772 	ret = ocfs2_half_xattr_bucket(inode, handle, start_blk,
3773 				      start_blk + blk_per_bucket, NULL, 0);
3774 
3775 	le16_add_cpu(&first_xh->xh_num_buckets, 1);
3776 	ocfs2_journal_dirty(handle, first_bh);
3777 
3778 commit:
3779 	ocfs2_commit_trans(osb, handle);
3780 out:
3781 	return ret;
3782 }
3783 
3784 /*
3785  * Add new xattr bucket in an extent record and adjust the buckets accordingly.
3786  * xb_bh is the ocfs2_xattr_block.
3787  * We will move all the buckets starting from header_bh to the next place. As
3788  * for this one, half num of its xattrs will be moved to the next one.
3789  *
3790  * We will allocate a new cluster if current cluster is full and adjust
3791  * header_bh and first_bh if the insert place is moved to the new cluster.
3792  */
3793 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
3794 				      struct buffer_head *xb_bh,
3795 				      struct buffer_head *header_bh)
3796 {
3797 	struct ocfs2_xattr_header *first_xh = NULL;
3798 	struct buffer_head *first_bh = NULL;
3799 	struct ocfs2_xattr_block *xb =
3800 			(struct ocfs2_xattr_block *)xb_bh->b_data;
3801 	struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3802 	struct ocfs2_extent_list *el = &xb_root->xt_list;
3803 	struct ocfs2_xattr_header *xh =
3804 			(struct ocfs2_xattr_header *)header_bh->b_data;
3805 	u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3806 	struct super_block *sb = inode->i_sb;
3807 	struct ocfs2_super *osb = OCFS2_SB(sb);
3808 	int ret, num_buckets, extend = 1;
3809 	u64 p_blkno;
3810 	u32 e_cpos, num_clusters;
3811 
3812 	mlog(0, "Add new xattr bucket starting form %llu\n",
3813 	     (unsigned long long)header_bh->b_blocknr);
3814 
3815 	/*
3816 	 * Add refrence for header_bh here because it may be
3817 	 * changed in ocfs2_add_new_xattr_cluster and we need
3818 	 * to free it in the end.
3819 	 */
3820 	get_bh(header_bh);
3821 
3822 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
3823 				  &num_clusters, el);
3824 	if (ret) {
3825 		mlog_errno(ret);
3826 		goto out;
3827 	}
3828 
3829 	ret = ocfs2_read_block(osb, p_blkno,
3830 			       &first_bh, OCFS2_BH_CACHED, inode);
3831 	if (ret) {
3832 		mlog_errno(ret);
3833 		goto out;
3834 	}
3835 
3836 	num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
3837 	first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3838 
3839 	if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) {
3840 		ret = ocfs2_add_new_xattr_cluster(inode,
3841 						  xb_bh,
3842 						  &first_bh,
3843 						  &header_bh,
3844 						  &num_clusters,
3845 						  e_cpos,
3846 						  p_blkno,
3847 						  &extend);
3848 		if (ret) {
3849 			mlog_errno(ret);
3850 			goto out;
3851 		}
3852 	}
3853 
3854 	if (extend)
3855 		ret = ocfs2_extend_xattr_bucket(inode,
3856 						first_bh,
3857 						header_bh,
3858 						num_clusters);
3859 	if (ret)
3860 		mlog_errno(ret);
3861 out:
3862 	brelse(first_bh);
3863 	brelse(header_bh);
3864 	return ret;
3865 }
3866 
3867 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
3868 					struct ocfs2_xattr_bucket *bucket,
3869 					int offs)
3870 {
3871 	int block_off = offs >> inode->i_sb->s_blocksize_bits;
3872 
3873 	offs = offs % inode->i_sb->s_blocksize;
3874 	return bucket->bhs[block_off]->b_data + offs;
3875 }
3876 
3877 /*
3878  * Handle the normal xattr set, including replace, delete and new.
3879  * When the bucket is empty, "is_empty" is set and the caller can
3880  * free this bucket.
3881  *
3882  * Note: "local" indicates the real data's locality. So we can't
3883  * just its bucket locality by its length.
3884  */
3885 static void ocfs2_xattr_set_entry_normal(struct inode *inode,
3886 					 struct ocfs2_xattr_info *xi,
3887 					 struct ocfs2_xattr_search *xs,
3888 					 u32 name_hash,
3889 					 int local,
3890 					 int *is_empty)
3891 {
3892 	struct ocfs2_xattr_entry *last, *xe;
3893 	int name_len = strlen(xi->name);
3894 	struct ocfs2_xattr_header *xh = xs->header;
3895 	u16 count = le16_to_cpu(xh->xh_count), start;
3896 	size_t blocksize = inode->i_sb->s_blocksize;
3897 	char *val;
3898 	size_t offs, size, new_size;
3899 
3900 	last = &xh->xh_entries[count];
3901 	if (!xs->not_found) {
3902 		xe = xs->here;
3903 		offs = le16_to_cpu(xe->xe_name_offset);
3904 		if (ocfs2_xattr_is_local(xe))
3905 			size = OCFS2_XATTR_SIZE(name_len) +
3906 			OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3907 		else
3908 			size = OCFS2_XATTR_SIZE(name_len) +
3909 			OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
3910 
3911 		/*
3912 		 * If the new value will be stored outside, xi->value has been
3913 		 * initalized as an empty ocfs2_xattr_value_root, and the same
3914 		 * goes with xi->value_len, so we can set new_size safely here.
3915 		 * See ocfs2_xattr_set_in_bucket.
3916 		 */
3917 		new_size = OCFS2_XATTR_SIZE(name_len) +
3918 			   OCFS2_XATTR_SIZE(xi->value_len);
3919 
3920 		le16_add_cpu(&xh->xh_name_value_len, -size);
3921 		if (xi->value) {
3922 			if (new_size > size)
3923 				goto set_new_name_value;
3924 
3925 			/* Now replace the old value with new one. */
3926 			if (local)
3927 				xe->xe_value_size = cpu_to_le64(xi->value_len);
3928 			else
3929 				xe->xe_value_size = 0;
3930 
3931 			val = ocfs2_xattr_bucket_get_val(inode,
3932 							 &xs->bucket, offs);
3933 			memset(val + OCFS2_XATTR_SIZE(name_len), 0,
3934 			       size - OCFS2_XATTR_SIZE(name_len));
3935 			if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
3936 				memcpy(val + OCFS2_XATTR_SIZE(name_len),
3937 				       xi->value, xi->value_len);
3938 
3939 			le16_add_cpu(&xh->xh_name_value_len, new_size);
3940 			ocfs2_xattr_set_local(xe, local);
3941 			return;
3942 		} else {
3943 			/* Remove the old entry. */
3944 			last -= 1;
3945 			memmove(xe, xe + 1,
3946 				(void *)last - (void *)xe);
3947 			memset(last, 0, sizeof(struct ocfs2_xattr_entry));
3948 			le16_add_cpu(&xh->xh_count, -1);
3949 			if (xh->xh_count == 0 && is_empty)
3950 				*is_empty = 1;
3951 			return;
3952 		}
3953 	} else {
3954 		/* find a new entry for insert. */
3955 		int low = 0, high = count - 1, tmp;
3956 		struct ocfs2_xattr_entry *tmp_xe;
3957 
3958 		while (low <= high) {
3959 			tmp = (low + high) / 2;
3960 			tmp_xe = &xh->xh_entries[tmp];
3961 
3962 			if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
3963 				low = tmp + 1;
3964 			else if (name_hash <
3965 				 le32_to_cpu(tmp_xe->xe_name_hash))
3966 				high = tmp - 1;
3967 			else
3968 				break;
3969 		}
3970 
3971 		xe = &xh->xh_entries[low];
3972 		if (low != count)
3973 			memmove(xe + 1, xe, (void *)last - (void *)xe);
3974 
3975 		le16_add_cpu(&xh->xh_count, 1);
3976 		memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
3977 		xe->xe_name_hash = cpu_to_le32(name_hash);
3978 		xe->xe_name_len = name_len;
3979 		ocfs2_xattr_set_type(xe, xi->name_index);
3980 	}
3981 
3982 set_new_name_value:
3983 	/* Insert the new name+value. */
3984 	size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
3985 
3986 	/*
3987 	 * We must make sure that the name/value pair
3988 	 * exists in the same block.
3989 	 */
3990 	offs = le16_to_cpu(xh->xh_free_start);
3991 	start = offs - size;
3992 
3993 	if (start >> inode->i_sb->s_blocksize_bits !=
3994 	    (offs - 1) >> inode->i_sb->s_blocksize_bits) {
3995 		offs = offs - offs % blocksize;
3996 		xh->xh_free_start = cpu_to_le16(offs);
3997 	}
3998 
3999 	val = ocfs2_xattr_bucket_get_val(inode,
4000 					 &xs->bucket, offs - size);
4001 	xe->xe_name_offset = cpu_to_le16(offs - size);
4002 
4003 	memset(val, 0, size);
4004 	memcpy(val, xi->name, name_len);
4005 	memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4006 
4007 	xe->xe_value_size = cpu_to_le64(xi->value_len);
4008 	ocfs2_xattr_set_local(xe, local);
4009 	xs->here = xe;
4010 	le16_add_cpu(&xh->xh_free_start, -size);
4011 	le16_add_cpu(&xh->xh_name_value_len, size);
4012 
4013 	return;
4014 }
4015 
4016 static int ocfs2_xattr_bucket_handle_journal(struct inode *inode,
4017 					     handle_t *handle,
4018 					     struct ocfs2_xattr_search *xs,
4019 					     struct buffer_head **bhs,
4020 					     u16 bh_num)
4021 {
4022 	int ret = 0, off, block_off;
4023 	struct ocfs2_xattr_entry *xe = xs->here;
4024 
4025 	/*
4026 	 * First calculate all the blocks we should journal_access
4027 	 * and journal_dirty. The first block should always be touched.
4028 	 */
4029 	ret = ocfs2_journal_dirty(handle, bhs[0]);
4030 	if (ret)
4031 		mlog_errno(ret);
4032 
4033 	/* calc the data. */
4034 	off = le16_to_cpu(xe->xe_name_offset);
4035 	block_off = off >> inode->i_sb->s_blocksize_bits;
4036 	ret = ocfs2_journal_dirty(handle, bhs[block_off]);
4037 	if (ret)
4038 		mlog_errno(ret);
4039 
4040 	return ret;
4041 }
4042 
4043 /*
4044  * Set the xattr entry in the specified bucket.
4045  * The bucket is indicated by xs->bucket and it should have the enough
4046  * space for the xattr insertion.
4047  */
4048 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4049 					   struct ocfs2_xattr_info *xi,
4050 					   struct ocfs2_xattr_search *xs,
4051 					   u32 name_hash,
4052 					   int local,
4053 					   int *bucket_empty)
4054 {
4055 	int i, ret;
4056 	handle_t *handle = NULL;
4057 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4058 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4059 
4060 	mlog(0, "Set xattr entry len = %d index = %d in bucket %llu\n",
4061 	     xi->value_len, xi->name_index,
4062 	     (unsigned long long)xs->bucket.bhs[0]->b_blocknr);
4063 
4064 	if (!xs->bucket.bhs[1]) {
4065 		ret = ocfs2_read_blocks(osb,
4066 					xs->bucket.bhs[0]->b_blocknr + 1,
4067 					blk_per_bucket - 1, &xs->bucket.bhs[1],
4068 					OCFS2_BH_CACHED, inode);
4069 		if (ret) {
4070 			mlog_errno(ret);
4071 			goto out;
4072 		}
4073 	}
4074 
4075 	handle = ocfs2_start_trans(osb, blk_per_bucket);
4076 	if (IS_ERR(handle)) {
4077 		ret = PTR_ERR(handle);
4078 		handle = NULL;
4079 		mlog_errno(ret);
4080 		goto out;
4081 	}
4082 
4083 	for (i = 0; i < blk_per_bucket; i++) {
4084 		ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i],
4085 					   OCFS2_JOURNAL_ACCESS_WRITE);
4086 		if (ret < 0) {
4087 			mlog_errno(ret);
4088 			goto out;
4089 		}
4090 	}
4091 
4092 	ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash,
4093 				     local, bucket_empty);
4094 
4095 	/*Only dirty the blocks we have touched in set xattr. */
4096 	ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
4097 						xs->bucket.bhs, blk_per_bucket);
4098 	if (ret)
4099 		mlog_errno(ret);
4100 out:
4101 	ocfs2_commit_trans(osb, handle);
4102 
4103 	return ret;
4104 }
4105 
4106 static int ocfs2_xattr_value_update_size(struct inode *inode,
4107 					 struct buffer_head *xe_bh,
4108 					 struct ocfs2_xattr_entry *xe,
4109 					 u64 new_size)
4110 {
4111 	int ret;
4112 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4113 	handle_t *handle = NULL;
4114 
4115 	handle = ocfs2_start_trans(osb, 1);
4116 	if (handle == NULL) {
4117 		ret = -ENOMEM;
4118 		mlog_errno(ret);
4119 		goto out;
4120 	}
4121 
4122 	ret = ocfs2_journal_access(handle, inode, xe_bh,
4123 				   OCFS2_JOURNAL_ACCESS_WRITE);
4124 	if (ret < 0) {
4125 		mlog_errno(ret);
4126 		goto out_commit;
4127 	}
4128 
4129 	xe->xe_value_size = cpu_to_le64(new_size);
4130 
4131 	ret = ocfs2_journal_dirty(handle, xe_bh);
4132 	if (ret < 0)
4133 		mlog_errno(ret);
4134 
4135 out_commit:
4136 	ocfs2_commit_trans(osb, handle);
4137 out:
4138 	return ret;
4139 }
4140 
4141 /*
4142  * Truncate the specified xe_off entry in xattr bucket.
4143  * bucket is indicated by header_bh and len is the new length.
4144  * Both the ocfs2_xattr_value_root and the entry will be updated here.
4145  *
4146  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4147  */
4148 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4149 					     struct buffer_head *header_bh,
4150 					     int xe_off,
4151 					     int len)
4152 {
4153 	int ret, offset;
4154 	u64 value_blk;
4155 	struct buffer_head *value_bh = NULL;
4156 	struct ocfs2_xattr_value_root *xv;
4157 	struct ocfs2_xattr_entry *xe;
4158 	struct ocfs2_xattr_header *xh =
4159 			(struct ocfs2_xattr_header *)header_bh->b_data;
4160 	size_t blocksize = inode->i_sb->s_blocksize;
4161 
4162 	xe = &xh->xh_entries[xe_off];
4163 
4164 	BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4165 
4166 	offset = le16_to_cpu(xe->xe_name_offset) +
4167 		 OCFS2_XATTR_SIZE(xe->xe_name_len);
4168 
4169 	value_blk = offset / blocksize;
4170 
4171 	/* We don't allow ocfs2_xattr_value to be stored in different block. */
4172 	BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4173 	value_blk += header_bh->b_blocknr;
4174 
4175 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), value_blk,
4176 			       &value_bh, OCFS2_BH_CACHED, inode);
4177 	if (ret) {
4178 		mlog_errno(ret);
4179 		goto out;
4180 	}
4181 
4182 	xv = (struct ocfs2_xattr_value_root *)
4183 		(value_bh->b_data + offset % blocksize);
4184 
4185 	mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4186 	     xe_off, (unsigned long long)header_bh->b_blocknr, len);
4187 	ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len);
4188 	if (ret) {
4189 		mlog_errno(ret);
4190 		goto out;
4191 	}
4192 
4193 	ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len);
4194 	if (ret) {
4195 		mlog_errno(ret);
4196 		goto out;
4197 	}
4198 
4199 out:
4200 	brelse(value_bh);
4201 	return ret;
4202 }
4203 
4204 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4205 						struct ocfs2_xattr_search *xs,
4206 						int len)
4207 {
4208 	int ret, offset;
4209 	struct ocfs2_xattr_entry *xe = xs->here;
4210 	struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4211 
4212 	BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4213 
4214 	offset = xe - xh->xh_entries;
4215 	ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0],
4216 						offset, len);
4217 	if (ret)
4218 		mlog_errno(ret);
4219 
4220 	return ret;
4221 }
4222 
4223 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4224 						struct ocfs2_xattr_search *xs,
4225 						char *val,
4226 						int value_len)
4227 {
4228 	int offset;
4229 	struct ocfs2_xattr_value_root *xv;
4230 	struct ocfs2_xattr_entry *xe = xs->here;
4231 
4232 	BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4233 
4234 	offset = le16_to_cpu(xe->xe_name_offset) +
4235 		 OCFS2_XATTR_SIZE(xe->xe_name_len);
4236 
4237 	xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
4238 
4239 	return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len);
4240 }
4241 
4242 /*
4243  * Remove the xattr bucket pointed by bucket_bh.
4244  * All the buckets after it in the same xattr extent rec will be
4245  * move forward one by one.
4246  */
4247 static int ocfs2_rm_xattr_bucket(struct inode *inode,
4248 				 struct buffer_head *first_bh,
4249 				 struct ocfs2_xattr_bucket *bucket)
4250 {
4251 	int ret = 0, credits;
4252 	struct ocfs2_xattr_header *xh =
4253 				(struct ocfs2_xattr_header *)first_bh->b_data;
4254 	u16 bucket_num = le16_to_cpu(xh->xh_num_buckets);
4255 	u64 end, start = bucket->bhs[0]->b_blocknr;
4256 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4257 	handle_t *handle;
4258 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4259 
4260 	end = first_bh->b_blocknr + (bucket_num - 1) * blk_per_bucket;
4261 
4262 	mlog(0, "rm xattr bucket %llu\n", start);
4263 	/*
4264 	 * We need to update the first xattr_header and all the buckets starting
4265 	 * from start in this xattr rec.
4266 	 *
4267 	 * XXX: Should we empty the old last bucket here?
4268 	 */
4269 	credits = 1 + end - start;
4270 	handle = ocfs2_start_trans(osb, credits);
4271 	if (IS_ERR(handle)) {
4272 		ret = PTR_ERR(handle);
4273 		mlog_errno(ret);
4274 		return ret;
4275 	}
4276 
4277 	ret = ocfs2_journal_access(handle, inode, first_bh,
4278 				   OCFS2_JOURNAL_ACCESS_WRITE);
4279 	if (ret) {
4280 		mlog_errno(ret);
4281 		goto out_commit;
4282 	}
4283 
4284 
4285 	while (start < end) {
4286 		ret = ocfs2_cp_xattr_bucket(inode, handle,
4287 					    start + blk_per_bucket,
4288 					    start, 0);
4289 		if (ret) {
4290 			mlog_errno(ret);
4291 			goto out_commit;
4292 		}
4293 		start += blk_per_bucket;
4294 	}
4295 
4296 	/* update the first_bh. */
4297 	xh->xh_num_buckets = cpu_to_le16(bucket_num - 1);
4298 	ocfs2_journal_dirty(handle, first_bh);
4299 
4300 out_commit:
4301 	ocfs2_commit_trans(osb, handle);
4302 	return ret;
4303 }
4304 
4305 static int ocfs2_rm_xattr_cluster(struct inode *inode,
4306 				  struct buffer_head *root_bh,
4307 				  u64 blkno,
4308 				  u32 cpos,
4309 				  u32 len)
4310 {
4311 	int ret;
4312 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4313 	struct inode *tl_inode = osb->osb_tl_inode;
4314 	handle_t *handle;
4315 	struct ocfs2_xattr_block *xb =
4316 			(struct ocfs2_xattr_block *)root_bh->b_data;
4317 	struct ocfs2_extent_list *root_el = &xb->xb_attrs.xb_root.xt_list;
4318 	struct ocfs2_alloc_context *meta_ac = NULL;
4319 	struct ocfs2_cached_dealloc_ctxt dealloc;
4320 
4321 	ocfs2_init_dealloc_ctxt(&dealloc);
4322 
4323 	mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4324 	     cpos, len, (unsigned long long)blkno);
4325 
4326 	ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
4327 
4328 	ret = ocfs2_lock_allocators(inode, root_bh, root_el,
4329 				    0, 1, NULL, &meta_ac,
4330 				    OCFS2_XATTR_TREE_EXTENT, NULL);
4331 	if (ret) {
4332 		mlog_errno(ret);
4333 		return ret;
4334 	}
4335 
4336 	mutex_lock(&tl_inode->i_mutex);
4337 
4338 	if (ocfs2_truncate_log_needs_flush(osb)) {
4339 		ret = __ocfs2_flush_truncate_log(osb);
4340 		if (ret < 0) {
4341 			mlog_errno(ret);
4342 			goto out;
4343 		}
4344 	}
4345 
4346 	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
4347 	if (handle == NULL) {
4348 		ret = -ENOMEM;
4349 		mlog_errno(ret);
4350 		goto out;
4351 	}
4352 
4353 	ret = ocfs2_journal_access(handle, inode, root_bh,
4354 				   OCFS2_JOURNAL_ACCESS_WRITE);
4355 	if (ret) {
4356 		mlog_errno(ret);
4357 		goto out_commit;
4358 	}
4359 
4360 	ret = ocfs2_remove_extent(inode, root_bh, cpos, len, handle, meta_ac,
4361 				  &dealloc, OCFS2_XATTR_TREE_EXTENT, NULL);
4362 	if (ret) {
4363 		mlog_errno(ret);
4364 		goto out_commit;
4365 	}
4366 
4367 	le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4368 
4369 	ret = ocfs2_journal_dirty(handle, root_bh);
4370 	if (ret) {
4371 		mlog_errno(ret);
4372 		goto out_commit;
4373 	}
4374 
4375 	ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4376 	if (ret)
4377 		mlog_errno(ret);
4378 
4379 out_commit:
4380 	ocfs2_commit_trans(osb, handle);
4381 out:
4382 	ocfs2_schedule_truncate_log_flush(osb, 1);
4383 
4384 	mutex_unlock(&tl_inode->i_mutex);
4385 
4386 	if (meta_ac)
4387 		ocfs2_free_alloc_context(meta_ac);
4388 
4389 	ocfs2_run_deallocs(osb, &dealloc);
4390 
4391 	return ret;
4392 }
4393 
4394 /*
4395  * Free the xattr bucket indicated by xs->bucket and if all the buckets
4396  * in the clusters is free, free the clusters also.
4397  */
4398 static int ocfs2_xattr_bucket_shrink(struct inode *inode,
4399 				     struct ocfs2_xattr_info *xi,
4400 				     struct ocfs2_xattr_search *xs,
4401 				     u32 name_hash)
4402 {
4403 	int ret;
4404 	u32 e_cpos, num_clusters;
4405 	u64 p_blkno;
4406 	struct buffer_head *first_bh = NULL;
4407 	struct ocfs2_xattr_header *first_xh;
4408 	struct ocfs2_xattr_block *xb =
4409 			(struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
4410 
4411 	BUG_ON(xs->header->xh_count != 0);
4412 
4413 	ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4414 				  &e_cpos, &num_clusters,
4415 				  &xb->xb_attrs.xb_root.xt_list);
4416 	if (ret) {
4417 		mlog_errno(ret);
4418 		return ret;
4419 	}
4420 
4421 	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno,
4422 			       &first_bh, OCFS2_BH_CACHED, inode);
4423 	if (ret) {
4424 		mlog_errno(ret);
4425 		return ret;
4426 	}
4427 
4428 	ret = ocfs2_rm_xattr_bucket(inode, first_bh, &xs->bucket);
4429 	if (ret) {
4430 		mlog_errno(ret);
4431 		goto out;
4432 	}
4433 
4434 	first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
4435 	if (first_xh->xh_num_buckets == 0)
4436 		ret = ocfs2_rm_xattr_cluster(inode, xs->xattr_bh,
4437 					     p_blkno, e_cpos,
4438 					     num_clusters);
4439 
4440 out:
4441 	brelse(first_bh);
4442 	return ret;
4443 }
4444 
4445 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4446 					 struct ocfs2_xattr_search *xs)
4447 {
4448 	handle_t *handle = NULL;
4449 	struct ocfs2_xattr_header *xh = xs->bucket.xh;
4450 	struct ocfs2_xattr_entry *last = &xh->xh_entries[
4451 						le16_to_cpu(xh->xh_count) - 1];
4452 	int ret = 0;
4453 
4454 	handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1);
4455 	if (IS_ERR(handle)) {
4456 		ret = PTR_ERR(handle);
4457 		mlog_errno(ret);
4458 		return;
4459 	}
4460 
4461 	ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0],
4462 				   OCFS2_JOURNAL_ACCESS_WRITE);
4463 	if (ret) {
4464 		mlog_errno(ret);
4465 		goto out_commit;
4466 	}
4467 
4468 	/* Remove the old entry. */
4469 	memmove(xs->here, xs->here + 1,
4470 		(void *)last - (void *)xs->here);
4471 	memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4472 	le16_add_cpu(&xh->xh_count, -1);
4473 
4474 	ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]);
4475 	if (ret < 0)
4476 		mlog_errno(ret);
4477 out_commit:
4478 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
4479 }
4480 
4481 /*
4482  * Set the xattr name/value in the bucket specified in xs.
4483  *
4484  * As the new value in xi may be stored in the bucket or in an outside cluster,
4485  * we divide the whole process into 3 steps:
4486  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4487  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4488  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4489  * 4. If the clusters for the new outside value can't be allocated, we need
4490  *    to free the xattr we allocated in set.
4491  */
4492 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4493 				     struct ocfs2_xattr_info *xi,
4494 				     struct ocfs2_xattr_search *xs)
4495 {
4496 	int ret, local = 1, bucket_empty = 0;
4497 	size_t value_len;
4498 	char *val = (char *)xi->value;
4499 	struct ocfs2_xattr_entry *xe = xs->here;
4500 	u32 name_hash = ocfs2_xattr_hash_by_name(inode,
4501 						 xi->name_index, xi->name);
4502 
4503 	if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
4504 		/*
4505 		 * We need to truncate the xattr storage first.
4506 		 *
4507 		 * If both the old and new value are stored to
4508 		 * outside block, we only need to truncate
4509 		 * the storage and then set the value outside.
4510 		 *
4511 		 * If the new value should be stored within block,
4512 		 * we should free all the outside block first and
4513 		 * the modification to the xattr block will be done
4514 		 * by following steps.
4515 		 */
4516 		if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4517 			value_len = xi->value_len;
4518 		else
4519 			value_len = 0;
4520 
4521 		ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4522 							   value_len);
4523 		if (ret)
4524 			goto out;
4525 
4526 		if (value_len)
4527 			goto set_value_outside;
4528 	}
4529 
4530 	value_len = xi->value_len;
4531 	/* So we have to handle the inside block change now. */
4532 	if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4533 		/*
4534 		 * If the new value will be stored outside of block,
4535 		 * initalize a new empty value root and insert it first.
4536 		 */
4537 		local = 0;
4538 		xi->value = &def_xv;
4539 		xi->value_len = OCFS2_XATTR_ROOT_SIZE;
4540 	}
4541 
4542 	ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash,
4543 					      local, &bucket_empty);
4544 	if (ret) {
4545 		mlog_errno(ret);
4546 		goto out;
4547 	}
4548 
4549 	if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4550 		/* allocate the space now for the outside block storage. */
4551 		ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4552 							   value_len);
4553 		if (ret) {
4554 			mlog_errno(ret);
4555 
4556 			if (xs->not_found) {
4557 				/*
4558 				 * We can't allocate enough clusters for outside
4559 				 * storage and we have allocated xattr already,
4560 				 * so need to remove it.
4561 				 */
4562 				ocfs2_xattr_bucket_remove_xs(inode, xs);
4563 			}
4564 			goto out;
4565 		}
4566 	} else {
4567 		if (bucket_empty)
4568 			ret = ocfs2_xattr_bucket_shrink(inode, xi,
4569 							xs, name_hash);
4570 		goto out;
4571 	}
4572 
4573 set_value_outside:
4574 	ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len);
4575 out:
4576 	return ret;
4577 }
4578 
4579 /* check whether the xattr bucket is filled up with the same hash value. */
4580 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4581 					      struct ocfs2_xattr_bucket *bucket)
4582 {
4583 	struct ocfs2_xattr_header *xh = bucket->xh;
4584 
4585 	if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
4586 	    xh->xh_entries[0].xe_name_hash) {
4587 		mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
4588 		     "hash = %u\n",
4589 		     (unsigned long long)bucket->bhs[0]->b_blocknr,
4590 		     le32_to_cpu(xh->xh_entries[0].xe_name_hash));
4591 		return -ENOSPC;
4592 	}
4593 
4594 	return 0;
4595 }
4596 
4597 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
4598 					     struct ocfs2_xattr_info *xi,
4599 					     struct ocfs2_xattr_search *xs)
4600 {
4601 	struct ocfs2_xattr_header *xh;
4602 	struct ocfs2_xattr_entry *xe;
4603 	u16 count, header_size, xh_free_start;
4604 	int i, free, max_free, need, old;
4605 	size_t value_size = 0, name_len = strlen(xi->name);
4606 	size_t blocksize = inode->i_sb->s_blocksize;
4607 	int ret, allocation = 0;
4608 	u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4609 
4610 	mlog_entry("Set xattr %s in xattr index block\n", xi->name);
4611 
4612 try_again:
4613 	xh = xs->header;
4614 	count = le16_to_cpu(xh->xh_count);
4615 	xh_free_start = le16_to_cpu(xh->xh_free_start);
4616 	header_size = sizeof(struct ocfs2_xattr_header) +
4617 			count * sizeof(struct ocfs2_xattr_entry);
4618 	max_free = OCFS2_XATTR_BUCKET_SIZE -
4619 		le16_to_cpu(xh->xh_name_value_len) - header_size;
4620 
4621 	mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
4622 			"of %u which exceed block size\n",
4623 			(unsigned long long)xs->bucket.bhs[0]->b_blocknr,
4624 			header_size);
4625 
4626 	if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4627 		value_size = OCFS2_XATTR_ROOT_SIZE;
4628 	else if (xi->value)
4629 		value_size = OCFS2_XATTR_SIZE(xi->value_len);
4630 
4631 	if (xs->not_found)
4632 		need = sizeof(struct ocfs2_xattr_entry) +
4633 			OCFS2_XATTR_SIZE(name_len) + value_size;
4634 	else {
4635 		need = value_size + OCFS2_XATTR_SIZE(name_len);
4636 
4637 		/*
4638 		 * We only replace the old value if the new length is smaller
4639 		 * than the old one. Otherwise we will allocate new space in the
4640 		 * bucket to store it.
4641 		 */
4642 		xe = xs->here;
4643 		if (ocfs2_xattr_is_local(xe))
4644 			old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4645 		else
4646 			old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4647 
4648 		if (old >= value_size)
4649 			need = 0;
4650 	}
4651 
4652 	free = xh_free_start - header_size;
4653 	/*
4654 	 * We need to make sure the new name/value pair
4655 	 * can exist in the same block.
4656 	 */
4657 	if (xh_free_start % blocksize < need)
4658 		free -= xh_free_start % blocksize;
4659 
4660 	mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
4661 	     "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
4662 	     " %u\n", xs->not_found,
4663 	     (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
4664 	     free, need, max_free, le16_to_cpu(xh->xh_free_start),
4665 	     le16_to_cpu(xh->xh_name_value_len));
4666 
4667 	if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4668 		if (need <= max_free &&
4669 		    count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4670 			/*
4671 			 * We can create the space by defragment. Since only the
4672 			 * name/value will be moved, the xe shouldn't be changed
4673 			 * in xs.
4674 			 */
4675 			ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket);
4676 			if (ret) {
4677 				mlog_errno(ret);
4678 				goto out;
4679 			}
4680 
4681 			xh_free_start = le16_to_cpu(xh->xh_free_start);
4682 			free = xh_free_start - header_size;
4683 			if (xh_free_start % blocksize < need)
4684 				free -= xh_free_start % blocksize;
4685 
4686 			if (free >= need)
4687 				goto xattr_set;
4688 
4689 			mlog(0, "Can't get enough space for xattr insert by "
4690 			     "defragment. Need %u bytes, but we have %d, so "
4691 			     "allocate new bucket for it.\n", need, free);
4692 		}
4693 
4694 		/*
4695 		 * We have to add new buckets or clusters and one
4696 		 * allocation should leave us enough space for insert.
4697 		 */
4698 		BUG_ON(allocation);
4699 
4700 		/*
4701 		 * We do not allow for overlapping ranges between buckets. And
4702 		 * the maximum number of collisions we will allow for then is
4703 		 * one bucket's worth, so check it here whether we need to
4704 		 * add a new bucket for the insert.
4705 		 */
4706 		ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket);
4707 		if (ret) {
4708 			mlog_errno(ret);
4709 			goto out;
4710 		}
4711 
4712 		ret = ocfs2_add_new_xattr_bucket(inode,
4713 						 xs->xattr_bh,
4714 						 xs->bucket.bhs[0]);
4715 		if (ret) {
4716 			mlog_errno(ret);
4717 			goto out;
4718 		}
4719 
4720 		for (i = 0; i < blk_per_bucket; i++)
4721 			brelse(xs->bucket.bhs[i]);
4722 
4723 		memset(&xs->bucket, 0, sizeof(xs->bucket));
4724 
4725 		ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
4726 						   xi->name_index,
4727 						   xi->name, xs);
4728 		if (ret && ret != -ENODATA)
4729 			goto out;
4730 		xs->not_found = ret;
4731 		allocation = 1;
4732 		goto try_again;
4733 	}
4734 
4735 xattr_set:
4736 	ret = ocfs2_xattr_set_in_bucket(inode, xi, xs);
4737 out:
4738 	mlog_exit(ret);
4739 	return ret;
4740 }
4741