xref: /openbmc/linux/fs/ocfs2/refcounttree.c (revision 8bf396de)
1f2c870e3STao Ma /* -*- mode: c; c-basic-offset: 8; -*-
2f2c870e3STao Ma  * vim: noexpandtab sw=8 ts=8 sts=0:
3f2c870e3STao Ma  *
4f2c870e3STao Ma  * refcounttree.c
5f2c870e3STao Ma  *
6f2c870e3STao Ma  * Copyright (C) 2009 Oracle.  All rights reserved.
7f2c870e3STao Ma  *
8f2c870e3STao Ma  * This program is free software; you can redistribute it and/or
9f2c870e3STao Ma  * modify it under the terms of the GNU General Public
10f2c870e3STao Ma  * License version 2 as published by the Free Software Foundation.
11f2c870e3STao Ma  *
12f2c870e3STao Ma  * This program is distributed in the hope that it will be useful,
13f2c870e3STao Ma  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14f2c870e3STao Ma  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15f2c870e3STao Ma  * General Public License for more details.
16f2c870e3STao Ma  */
17f2c870e3STao Ma 
18f2c870e3STao Ma #define MLOG_MASK_PREFIX ML_REFCOUNT
19f2c870e3STao Ma #include <cluster/masklog.h>
20f2c870e3STao Ma #include "ocfs2.h"
21f2c870e3STao Ma #include "inode.h"
22f2c870e3STao Ma #include "alloc.h"
23f2c870e3STao Ma #include "suballoc.h"
24f2c870e3STao Ma #include "journal.h"
25f2c870e3STao Ma #include "uptodate.h"
26f2c870e3STao Ma #include "super.h"
27f2c870e3STao Ma #include "buffer_head_io.h"
28f2c870e3STao Ma #include "blockcheck.h"
29c732eb16STao Ma #include "refcounttree.h"
308bf396deSTao Ma #include "sysfile.h"
31374a263eSTao Ma #include "dlmglue.h"
32c732eb16STao Ma 
33c732eb16STao Ma static inline struct ocfs2_refcount_tree *
34c732eb16STao Ma cache_info_to_refcount(struct ocfs2_caching_info *ci)
35c732eb16STao Ma {
36c732eb16STao Ma 	return container_of(ci, struct ocfs2_refcount_tree, rf_ci);
37c732eb16STao Ma }
38f2c870e3STao Ma 
39f2c870e3STao Ma static int ocfs2_validate_refcount_block(struct super_block *sb,
40f2c870e3STao Ma 					 struct buffer_head *bh)
41f2c870e3STao Ma {
42f2c870e3STao Ma 	int rc;
43f2c870e3STao Ma 	struct ocfs2_refcount_block *rb =
44f2c870e3STao Ma 		(struct ocfs2_refcount_block *)bh->b_data;
45f2c870e3STao Ma 
46f2c870e3STao Ma 	mlog(0, "Validating refcount block %llu\n",
47f2c870e3STao Ma 	     (unsigned long long)bh->b_blocknr);
48f2c870e3STao Ma 
49f2c870e3STao Ma 	BUG_ON(!buffer_uptodate(bh));
50f2c870e3STao Ma 
51f2c870e3STao Ma 	/*
52f2c870e3STao Ma 	 * If the ecc fails, we return the error but otherwise
53f2c870e3STao Ma 	 * leave the filesystem running.  We know any error is
54f2c870e3STao Ma 	 * local to this block.
55f2c870e3STao Ma 	 */
56f2c870e3STao Ma 	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check);
57f2c870e3STao Ma 	if (rc) {
58f2c870e3STao Ma 		mlog(ML_ERROR, "Checksum failed for refcount block %llu\n",
59f2c870e3STao Ma 		     (unsigned long long)bh->b_blocknr);
60f2c870e3STao Ma 		return rc;
61f2c870e3STao Ma 	}
62f2c870e3STao Ma 
63f2c870e3STao Ma 
64f2c870e3STao Ma 	if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) {
65f2c870e3STao Ma 		ocfs2_error(sb,
66f2c870e3STao Ma 			    "Refcount block #%llu has bad signature %.*s",
67f2c870e3STao Ma 			    (unsigned long long)bh->b_blocknr, 7,
68f2c870e3STao Ma 			    rb->rf_signature);
69f2c870e3STao Ma 		return -EINVAL;
70f2c870e3STao Ma 	}
71f2c870e3STao Ma 
72f2c870e3STao Ma 	if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) {
73f2c870e3STao Ma 		ocfs2_error(sb,
74f2c870e3STao Ma 			    "Refcount block #%llu has an invalid rf_blkno "
75f2c870e3STao Ma 			    "of %llu",
76f2c870e3STao Ma 			    (unsigned long long)bh->b_blocknr,
77f2c870e3STao Ma 			    (unsigned long long)le64_to_cpu(rb->rf_blkno));
78f2c870e3STao Ma 		return -EINVAL;
79f2c870e3STao Ma 	}
80f2c870e3STao Ma 
81f2c870e3STao Ma 	if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) {
82f2c870e3STao Ma 		ocfs2_error(sb,
83f2c870e3STao Ma 			    "Refcount block #%llu has an invalid "
84f2c870e3STao Ma 			    "rf_fs_generation of #%u",
85f2c870e3STao Ma 			    (unsigned long long)bh->b_blocknr,
86f2c870e3STao Ma 			    le32_to_cpu(rb->rf_fs_generation));
87f2c870e3STao Ma 		return -EINVAL;
88f2c870e3STao Ma 	}
89f2c870e3STao Ma 
90f2c870e3STao Ma 	return 0;
91f2c870e3STao Ma }
92f2c870e3STao Ma 
93f2c870e3STao Ma static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci,
94f2c870e3STao Ma 				     u64 rb_blkno,
95f2c870e3STao Ma 				     struct buffer_head **bh)
96f2c870e3STao Ma {
97f2c870e3STao Ma 	int rc;
98f2c870e3STao Ma 	struct buffer_head *tmp = *bh;
99f2c870e3STao Ma 
100f2c870e3STao Ma 	rc = ocfs2_read_block(ci, rb_blkno, &tmp,
101f2c870e3STao Ma 			      ocfs2_validate_refcount_block);
102f2c870e3STao Ma 
103f2c870e3STao Ma 	/* If ocfs2_read_block() got us a new bh, pass it up. */
104f2c870e3STao Ma 	if (!rc && !*bh)
105f2c870e3STao Ma 		*bh = tmp;
106f2c870e3STao Ma 
107f2c870e3STao Ma 	return rc;
108f2c870e3STao Ma }
109c732eb16STao Ma 
110c732eb16STao Ma static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci)
111c732eb16STao Ma {
112c732eb16STao Ma 	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
113c732eb16STao Ma 
114c732eb16STao Ma 	return rf->rf_blkno;
115c732eb16STao Ma }
116c732eb16STao Ma 
117c732eb16STao Ma static struct super_block *
118c732eb16STao Ma ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci)
119c732eb16STao Ma {
120c732eb16STao Ma 	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
121c732eb16STao Ma 
122c732eb16STao Ma 	return rf->rf_sb;
123c732eb16STao Ma }
124c732eb16STao Ma 
125c732eb16STao Ma static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci)
126c732eb16STao Ma {
127c732eb16STao Ma 	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
128c732eb16STao Ma 
129c732eb16STao Ma 	spin_lock(&rf->rf_lock);
130c732eb16STao Ma }
131c732eb16STao Ma 
132c732eb16STao Ma static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci)
133c732eb16STao Ma {
134c732eb16STao Ma 	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
135c732eb16STao Ma 
136c732eb16STao Ma 	spin_unlock(&rf->rf_lock);
137c732eb16STao Ma }
138c732eb16STao Ma 
139c732eb16STao Ma static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci)
140c732eb16STao Ma {
141c732eb16STao Ma 	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
142c732eb16STao Ma 
143c732eb16STao Ma 	mutex_lock(&rf->rf_io_mutex);
144c732eb16STao Ma }
145c732eb16STao Ma 
146c732eb16STao Ma static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci)
147c732eb16STao Ma {
148c732eb16STao Ma 	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
149c732eb16STao Ma 
150c732eb16STao Ma 	mutex_unlock(&rf->rf_io_mutex);
151c732eb16STao Ma }
152c732eb16STao Ma 
153c732eb16STao Ma static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
154c732eb16STao Ma 	.co_owner		= ocfs2_refcount_cache_owner,
155c732eb16STao Ma 	.co_get_super		= ocfs2_refcount_cache_get_super,
156c732eb16STao Ma 	.co_cache_lock		= ocfs2_refcount_cache_lock,
157c732eb16STao Ma 	.co_cache_unlock	= ocfs2_refcount_cache_unlock,
158c732eb16STao Ma 	.co_io_lock		= ocfs2_refcount_cache_io_lock,
159c732eb16STao Ma 	.co_io_unlock		= ocfs2_refcount_cache_io_unlock,
160c732eb16STao Ma };
161374a263eSTao Ma 
162374a263eSTao Ma static struct ocfs2_refcount_tree *
163374a263eSTao Ma ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno)
164374a263eSTao Ma {
165374a263eSTao Ma 	struct rb_node *n = osb->osb_rf_lock_tree.rb_node;
166374a263eSTao Ma 	struct ocfs2_refcount_tree *tree = NULL;
167374a263eSTao Ma 
168374a263eSTao Ma 	while (n) {
169374a263eSTao Ma 		tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node);
170374a263eSTao Ma 
171374a263eSTao Ma 		if (blkno < tree->rf_blkno)
172374a263eSTao Ma 			n = n->rb_left;
173374a263eSTao Ma 		else if (blkno > tree->rf_blkno)
174374a263eSTao Ma 			n = n->rb_right;
175374a263eSTao Ma 		else
176374a263eSTao Ma 			return tree;
177374a263eSTao Ma 	}
178374a263eSTao Ma 
179374a263eSTao Ma 	return NULL;
180374a263eSTao Ma }
181374a263eSTao Ma 
182374a263eSTao Ma /* osb_lock is already locked. */
183374a263eSTao Ma static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb,
184374a263eSTao Ma 				       struct ocfs2_refcount_tree *new)
185374a263eSTao Ma {
186374a263eSTao Ma 	u64 rf_blkno = new->rf_blkno;
187374a263eSTao Ma 	struct rb_node *parent = NULL;
188374a263eSTao Ma 	struct rb_node **p = &osb->osb_rf_lock_tree.rb_node;
189374a263eSTao Ma 	struct ocfs2_refcount_tree *tmp;
190374a263eSTao Ma 
191374a263eSTao Ma 	while (*p) {
192374a263eSTao Ma 		parent = *p;
193374a263eSTao Ma 
194374a263eSTao Ma 		tmp = rb_entry(parent, struct ocfs2_refcount_tree,
195374a263eSTao Ma 			       rf_node);
196374a263eSTao Ma 
197374a263eSTao Ma 		if (rf_blkno < tmp->rf_blkno)
198374a263eSTao Ma 			p = &(*p)->rb_left;
199374a263eSTao Ma 		else if (rf_blkno > tmp->rf_blkno)
200374a263eSTao Ma 			p = &(*p)->rb_right;
201374a263eSTao Ma 		else {
202374a263eSTao Ma 			/* This should never happen! */
203374a263eSTao Ma 			mlog(ML_ERROR, "Duplicate refcount block %llu found!\n",
204374a263eSTao Ma 			     (unsigned long long)rf_blkno);
205374a263eSTao Ma 			BUG();
206374a263eSTao Ma 		}
207374a263eSTao Ma 	}
208374a263eSTao Ma 
209374a263eSTao Ma 	rb_link_node(&new->rf_node, parent, p);
210374a263eSTao Ma 	rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree);
211374a263eSTao Ma }
212374a263eSTao Ma 
213374a263eSTao Ma static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree)
214374a263eSTao Ma {
215374a263eSTao Ma 	ocfs2_metadata_cache_exit(&tree->rf_ci);
216374a263eSTao Ma 	ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres);
217374a263eSTao Ma 	ocfs2_lock_res_free(&tree->rf_lockres);
218374a263eSTao Ma 	kfree(tree);
219374a263eSTao Ma }
220374a263eSTao Ma 
221374a263eSTao Ma static inline void
222374a263eSTao Ma ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb,
223374a263eSTao Ma 					struct ocfs2_refcount_tree *tree)
224374a263eSTao Ma {
225374a263eSTao Ma 	rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree);
226374a263eSTao Ma 	if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree)
227374a263eSTao Ma 		osb->osb_ref_tree_lru = NULL;
228374a263eSTao Ma }
229374a263eSTao Ma 
230374a263eSTao Ma static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
231374a263eSTao Ma 					struct ocfs2_refcount_tree *tree)
232374a263eSTao Ma {
233374a263eSTao Ma 	spin_lock(&osb->osb_lock);
234374a263eSTao Ma 	ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
235374a263eSTao Ma 	spin_unlock(&osb->osb_lock);
236374a263eSTao Ma }
237374a263eSTao Ma 
238374a263eSTao Ma void ocfs2_kref_remove_refcount_tree(struct kref *kref)
239374a263eSTao Ma {
240374a263eSTao Ma 	struct ocfs2_refcount_tree *tree =
241374a263eSTao Ma 		container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
242374a263eSTao Ma 
243374a263eSTao Ma 	ocfs2_free_refcount_tree(tree);
244374a263eSTao Ma }
245374a263eSTao Ma 
246374a263eSTao Ma static inline void
247374a263eSTao Ma ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree)
248374a263eSTao Ma {
249374a263eSTao Ma 	kref_get(&tree->rf_getcnt);
250374a263eSTao Ma }
251374a263eSTao Ma 
252374a263eSTao Ma static inline void
253374a263eSTao Ma ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree)
254374a263eSTao Ma {
255374a263eSTao Ma 	kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree);
256374a263eSTao Ma }
257374a263eSTao Ma 
258374a263eSTao Ma static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new,
259374a263eSTao Ma 					       struct super_block *sb)
260374a263eSTao Ma {
261374a263eSTao Ma 	ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops);
262374a263eSTao Ma 	mutex_init(&new->rf_io_mutex);
263374a263eSTao Ma 	new->rf_sb = sb;
264374a263eSTao Ma 	spin_lock_init(&new->rf_lock);
265374a263eSTao Ma }
266374a263eSTao Ma 
267374a263eSTao Ma static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
268374a263eSTao Ma 					struct ocfs2_refcount_tree *new,
269374a263eSTao Ma 					u64 rf_blkno, u32 generation)
270374a263eSTao Ma {
271374a263eSTao Ma 	init_rwsem(&new->rf_sem);
272374a263eSTao Ma 	ocfs2_refcount_lock_res_init(&new->rf_lockres, osb,
273374a263eSTao Ma 				     rf_blkno, generation);
274374a263eSTao Ma }
275374a263eSTao Ma 
2768bf396deSTao Ma static struct ocfs2_refcount_tree*
2778bf396deSTao Ma ocfs2_allocate_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno)
2788bf396deSTao Ma {
2798bf396deSTao Ma 	struct ocfs2_refcount_tree *new;
2808bf396deSTao Ma 
2818bf396deSTao Ma 	new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
2828bf396deSTao Ma 	if (!new)
2838bf396deSTao Ma 		return NULL;
2848bf396deSTao Ma 
2858bf396deSTao Ma 	new->rf_blkno = rf_blkno;
2868bf396deSTao Ma 	kref_init(&new->rf_getcnt);
2878bf396deSTao Ma 	ocfs2_init_refcount_tree_ci(new, osb->sb);
2888bf396deSTao Ma 
2898bf396deSTao Ma 	return new;
2908bf396deSTao Ma }
2918bf396deSTao Ma 
292374a263eSTao Ma static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
293374a263eSTao Ma 				   struct ocfs2_refcount_tree **ret_tree)
294374a263eSTao Ma {
295374a263eSTao Ma 	int ret = 0;
296374a263eSTao Ma 	struct ocfs2_refcount_tree *tree, *new = NULL;
297374a263eSTao Ma 	struct buffer_head *ref_root_bh = NULL;
298374a263eSTao Ma 	struct ocfs2_refcount_block *ref_rb;
299374a263eSTao Ma 
300374a263eSTao Ma 	spin_lock(&osb->osb_lock);
301374a263eSTao Ma 	if (osb->osb_ref_tree_lru &&
302374a263eSTao Ma 	    osb->osb_ref_tree_lru->rf_blkno == rf_blkno)
303374a263eSTao Ma 		tree = osb->osb_ref_tree_lru;
304374a263eSTao Ma 	else
305374a263eSTao Ma 		tree = ocfs2_find_refcount_tree(osb, rf_blkno);
306374a263eSTao Ma 	if (tree)
307374a263eSTao Ma 		goto out;
308374a263eSTao Ma 
309374a263eSTao Ma 	spin_unlock(&osb->osb_lock);
310374a263eSTao Ma 
3118bf396deSTao Ma 	new = ocfs2_allocate_refcount_tree(osb, rf_blkno);
312374a263eSTao Ma 	if (!new) {
313374a263eSTao Ma 		ret = -ENOMEM;
3148bf396deSTao Ma 		mlog_errno(ret);
315374a263eSTao Ma 		return ret;
316374a263eSTao Ma 	}
317374a263eSTao Ma 	/*
318374a263eSTao Ma 	 * We need the generation to create the refcount tree lock and since
319374a263eSTao Ma 	 * it isn't changed during the tree modification, we are safe here to
320374a263eSTao Ma 	 * read without protection.
321374a263eSTao Ma 	 * We also have to purge the cache after we create the lock since the
322374a263eSTao Ma 	 * refcount block may have the stale data. It can only be trusted when
323374a263eSTao Ma 	 * we hold the refcount lock.
324374a263eSTao Ma 	 */
325374a263eSTao Ma 	ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh);
326374a263eSTao Ma 	if (ret) {
327374a263eSTao Ma 		mlog_errno(ret);
328374a263eSTao Ma 		ocfs2_metadata_cache_exit(&new->rf_ci);
329374a263eSTao Ma 		kfree(new);
330374a263eSTao Ma 		return ret;
331374a263eSTao Ma 	}
332374a263eSTao Ma 
333374a263eSTao Ma 	ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
334374a263eSTao Ma 	new->rf_generation = le32_to_cpu(ref_rb->rf_generation);
335374a263eSTao Ma 	ocfs2_init_refcount_tree_lock(osb, new, rf_blkno,
336374a263eSTao Ma 				      new->rf_generation);
337374a263eSTao Ma 	ocfs2_metadata_cache_purge(&new->rf_ci);
338374a263eSTao Ma 
339374a263eSTao Ma 	spin_lock(&osb->osb_lock);
340374a263eSTao Ma 	tree = ocfs2_find_refcount_tree(osb, rf_blkno);
341374a263eSTao Ma 	if (tree)
342374a263eSTao Ma 		goto out;
343374a263eSTao Ma 
344374a263eSTao Ma 	ocfs2_insert_refcount_tree(osb, new);
345374a263eSTao Ma 
346374a263eSTao Ma 	tree = new;
347374a263eSTao Ma 	new = NULL;
348374a263eSTao Ma 
349374a263eSTao Ma out:
350374a263eSTao Ma 	*ret_tree = tree;
351374a263eSTao Ma 
352374a263eSTao Ma 	osb->osb_ref_tree_lru = tree;
353374a263eSTao Ma 
354374a263eSTao Ma 	spin_unlock(&osb->osb_lock);
355374a263eSTao Ma 
356374a263eSTao Ma 	if (new)
357374a263eSTao Ma 		ocfs2_free_refcount_tree(new);
358374a263eSTao Ma 
359374a263eSTao Ma 	brelse(ref_root_bh);
360374a263eSTao Ma 	return ret;
361374a263eSTao Ma }
362374a263eSTao Ma 
363374a263eSTao Ma static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno)
364374a263eSTao Ma {
365374a263eSTao Ma 	int ret;
366374a263eSTao Ma 	struct buffer_head *di_bh = NULL;
367374a263eSTao Ma 	struct ocfs2_dinode *di;
368374a263eSTao Ma 
369374a263eSTao Ma 	ret = ocfs2_read_inode_block(inode, &di_bh);
370374a263eSTao Ma 	if (ret) {
371374a263eSTao Ma 		mlog_errno(ret);
372374a263eSTao Ma 		goto out;
373374a263eSTao Ma 	}
374374a263eSTao Ma 
375374a263eSTao Ma 	BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
376374a263eSTao Ma 
377374a263eSTao Ma 	di = (struct ocfs2_dinode *)di_bh->b_data;
378374a263eSTao Ma 	*ref_blkno = le64_to_cpu(di->i_refcount_loc);
379374a263eSTao Ma 	brelse(di_bh);
380374a263eSTao Ma out:
381374a263eSTao Ma 	return ret;
382374a263eSTao Ma }
383374a263eSTao Ma 
384374a263eSTao Ma static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
385374a263eSTao Ma 				      struct ocfs2_refcount_tree *tree, int rw)
386374a263eSTao Ma {
387374a263eSTao Ma 	int ret;
388374a263eSTao Ma 
389374a263eSTao Ma 	ret = ocfs2_refcount_lock(tree, rw);
390374a263eSTao Ma 	if (ret) {
391374a263eSTao Ma 		mlog_errno(ret);
392374a263eSTao Ma 		goto out;
393374a263eSTao Ma 	}
394374a263eSTao Ma 
395374a263eSTao Ma 	if (rw)
396374a263eSTao Ma 		down_write(&tree->rf_sem);
397374a263eSTao Ma 	else
398374a263eSTao Ma 		down_read(&tree->rf_sem);
399374a263eSTao Ma 
400374a263eSTao Ma out:
401374a263eSTao Ma 	return ret;
402374a263eSTao Ma }
403374a263eSTao Ma 
404374a263eSTao Ma /*
405374a263eSTao Ma  * Lock the refcount tree pointed by ref_blkno and return the tree.
406374a263eSTao Ma  * In most case, we lock the tree and read the refcount block.
407374a263eSTao Ma  * So read it here if the caller really needs it.
408374a263eSTao Ma  *
409374a263eSTao Ma  * If the tree has been re-created by other node, it will free the
410374a263eSTao Ma  * old one and re-create it.
411374a263eSTao Ma  */
412374a263eSTao Ma int ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
413374a263eSTao Ma 			     u64 ref_blkno, int rw,
414374a263eSTao Ma 			     struct ocfs2_refcount_tree **ret_tree,
415374a263eSTao Ma 			     struct buffer_head **ref_bh)
416374a263eSTao Ma {
417374a263eSTao Ma 	int ret, delete_tree = 0;
418374a263eSTao Ma 	struct ocfs2_refcount_tree *tree = NULL;
419374a263eSTao Ma 	struct buffer_head *ref_root_bh = NULL;
420374a263eSTao Ma 	struct ocfs2_refcount_block *rb;
421374a263eSTao Ma 
422374a263eSTao Ma again:
423374a263eSTao Ma 	ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree);
424374a263eSTao Ma 	if (ret) {
425374a263eSTao Ma 		mlog_errno(ret);
426374a263eSTao Ma 		return ret;
427374a263eSTao Ma 	}
428374a263eSTao Ma 
429374a263eSTao Ma 	ocfs2_refcount_tree_get(tree);
430374a263eSTao Ma 
431374a263eSTao Ma 	ret = __ocfs2_lock_refcount_tree(osb, tree, rw);
432374a263eSTao Ma 	if (ret) {
433374a263eSTao Ma 		mlog_errno(ret);
434374a263eSTao Ma 		ocfs2_refcount_tree_put(tree);
435374a263eSTao Ma 		goto out;
436374a263eSTao Ma 	}
437374a263eSTao Ma 
438374a263eSTao Ma 	ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
439374a263eSTao Ma 					&ref_root_bh);
440374a263eSTao Ma 	if (ret) {
441374a263eSTao Ma 		mlog_errno(ret);
442374a263eSTao Ma 		ocfs2_unlock_refcount_tree(osb, tree, rw);
443374a263eSTao Ma 		ocfs2_refcount_tree_put(tree);
444374a263eSTao Ma 		goto out;
445374a263eSTao Ma 	}
446374a263eSTao Ma 
447374a263eSTao Ma 	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
448374a263eSTao Ma 	/*
449374a263eSTao Ma 	 * If the refcount block has been freed and re-created, we may need
450374a263eSTao Ma 	 * to recreate the refcount tree also.
451374a263eSTao Ma 	 *
452374a263eSTao Ma 	 * Here we just remove the tree from the rb-tree, and the last
453374a263eSTao Ma 	 * kref holder will unlock and delete this refcount_tree.
454374a263eSTao Ma 	 * Then we goto "again" and ocfs2_get_refcount_tree will create
455374a263eSTao Ma 	 * the new refcount tree for us.
456374a263eSTao Ma 	 */
457374a263eSTao Ma 	if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) {
458374a263eSTao Ma 		if (!tree->rf_removed) {
459374a263eSTao Ma 			ocfs2_erase_refcount_tree_from_list(osb, tree);
460374a263eSTao Ma 			tree->rf_removed = 1;
461374a263eSTao Ma 			delete_tree = 1;
462374a263eSTao Ma 		}
463374a263eSTao Ma 
464374a263eSTao Ma 		ocfs2_unlock_refcount_tree(osb, tree, rw);
465374a263eSTao Ma 		/*
466374a263eSTao Ma 		 * We get an extra reference when we create the refcount
467374a263eSTao Ma 		 * tree, so another put will destroy it.
468374a263eSTao Ma 		 */
469374a263eSTao Ma 		if (delete_tree)
470374a263eSTao Ma 			ocfs2_refcount_tree_put(tree);
471374a263eSTao Ma 		brelse(ref_root_bh);
472374a263eSTao Ma 		ref_root_bh = NULL;
473374a263eSTao Ma 		goto again;
474374a263eSTao Ma 	}
475374a263eSTao Ma 
476374a263eSTao Ma 	*ret_tree = tree;
477374a263eSTao Ma 	if (ref_bh) {
478374a263eSTao Ma 		*ref_bh = ref_root_bh;
479374a263eSTao Ma 		ref_root_bh = NULL;
480374a263eSTao Ma 	}
481374a263eSTao Ma out:
482374a263eSTao Ma 	brelse(ref_root_bh);
483374a263eSTao Ma 	return ret;
484374a263eSTao Ma }
485374a263eSTao Ma 
486374a263eSTao Ma int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
487374a263eSTao Ma 				      struct ocfs2_refcount_tree **ret_tree,
488374a263eSTao Ma 				      struct buffer_head **ref_bh)
489374a263eSTao Ma {
490374a263eSTao Ma 	int ret;
491374a263eSTao Ma 	u64 ref_blkno;
492374a263eSTao Ma 
493374a263eSTao Ma 	ret = ocfs2_get_refcount_block(inode, &ref_blkno);
494374a263eSTao Ma 	if (ret) {
495374a263eSTao Ma 		mlog_errno(ret);
496374a263eSTao Ma 		return ret;
497374a263eSTao Ma 	}
498374a263eSTao Ma 
499374a263eSTao Ma 	return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
500374a263eSTao Ma 					rw, ret_tree, ref_bh);
501374a263eSTao Ma }
502374a263eSTao Ma 
503374a263eSTao Ma void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
504374a263eSTao Ma 				struct ocfs2_refcount_tree *tree, int rw)
505374a263eSTao Ma {
506374a263eSTao Ma 	if (rw)
507374a263eSTao Ma 		up_write(&tree->rf_sem);
508374a263eSTao Ma 	else
509374a263eSTao Ma 		up_read(&tree->rf_sem);
510374a263eSTao Ma 
511374a263eSTao Ma 	ocfs2_refcount_unlock(tree, rw);
512374a263eSTao Ma 	ocfs2_refcount_tree_put(tree);
513374a263eSTao Ma }
514374a263eSTao Ma 
515374a263eSTao Ma void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
516374a263eSTao Ma {
517374a263eSTao Ma 	struct rb_node *node;
518374a263eSTao Ma 	struct ocfs2_refcount_tree *tree;
519374a263eSTao Ma 	struct rb_root *root = &osb->osb_rf_lock_tree;
520374a263eSTao Ma 
521374a263eSTao Ma 	while ((node = rb_last(root)) != NULL) {
522374a263eSTao Ma 		tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
523374a263eSTao Ma 
524374a263eSTao Ma 		mlog(0, "Purge tree %llu\n",
525374a263eSTao Ma 		     (unsigned long long) tree->rf_blkno);
526374a263eSTao Ma 
527374a263eSTao Ma 		rb_erase(&tree->rf_node, root);
528374a263eSTao Ma 		ocfs2_free_refcount_tree(tree);
529374a263eSTao Ma 	}
530374a263eSTao Ma }
5318bf396deSTao Ma 
5328bf396deSTao Ma /*
5338bf396deSTao Ma  * Create a refcount tree for an inode.
5348bf396deSTao Ma  * We take for granted that the inode is already locked.
5358bf396deSTao Ma  */
5368bf396deSTao Ma static int ocfs2_create_refcount_tree(struct inode *inode,
5378bf396deSTao Ma 				      struct buffer_head *di_bh)
5388bf396deSTao Ma {
5398bf396deSTao Ma 	int ret;
5408bf396deSTao Ma 	handle_t *handle = NULL;
5418bf396deSTao Ma 	struct ocfs2_alloc_context *meta_ac = NULL;
5428bf396deSTao Ma 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
5438bf396deSTao Ma 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
5448bf396deSTao Ma 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5458bf396deSTao Ma 	struct buffer_head *new_bh = NULL;
5468bf396deSTao Ma 	struct ocfs2_refcount_block *rb;
5478bf396deSTao Ma 	struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
5488bf396deSTao Ma 	u16 suballoc_bit_start;
5498bf396deSTao Ma 	u32 num_got;
5508bf396deSTao Ma 	u64 first_blkno;
5518bf396deSTao Ma 
5528bf396deSTao Ma 	BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
5538bf396deSTao Ma 
5548bf396deSTao Ma 	mlog(0, "create tree for inode %lu\n", inode->i_ino);
5558bf396deSTao Ma 
5568bf396deSTao Ma 	ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
5578bf396deSTao Ma 	if (ret) {
5588bf396deSTao Ma 		mlog_errno(ret);
5598bf396deSTao Ma 		goto out;
5608bf396deSTao Ma 	}
5618bf396deSTao Ma 
5628bf396deSTao Ma 	handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_CREATE_CREDITS);
5638bf396deSTao Ma 	if (IS_ERR(handle)) {
5648bf396deSTao Ma 		ret = PTR_ERR(handle);
5658bf396deSTao Ma 		mlog_errno(ret);
5668bf396deSTao Ma 		goto out;
5678bf396deSTao Ma 	}
5688bf396deSTao Ma 
5698bf396deSTao Ma 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
5708bf396deSTao Ma 				      OCFS2_JOURNAL_ACCESS_WRITE);
5718bf396deSTao Ma 	if (ret) {
5728bf396deSTao Ma 		mlog_errno(ret);
5738bf396deSTao Ma 		goto out_commit;
5748bf396deSTao Ma 	}
5758bf396deSTao Ma 
5768bf396deSTao Ma 	ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
5778bf396deSTao Ma 				   &suballoc_bit_start, &num_got,
5788bf396deSTao Ma 				   &first_blkno);
5798bf396deSTao Ma 	if (ret) {
5808bf396deSTao Ma 		mlog_errno(ret);
5818bf396deSTao Ma 		goto out_commit;
5828bf396deSTao Ma 	}
5838bf396deSTao Ma 
5848bf396deSTao Ma 	new_tree = ocfs2_allocate_refcount_tree(osb, first_blkno);
5858bf396deSTao Ma 	if (!new_tree) {
5868bf396deSTao Ma 		ret = -ENOMEM;
5878bf396deSTao Ma 		mlog_errno(ret);
5888bf396deSTao Ma 		goto out_commit;
5898bf396deSTao Ma 	}
5908bf396deSTao Ma 
5918bf396deSTao Ma 	new_bh = sb_getblk(inode->i_sb, first_blkno);
5928bf396deSTao Ma 	ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh);
5938bf396deSTao Ma 
5948bf396deSTao Ma 	ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh,
5958bf396deSTao Ma 				      OCFS2_JOURNAL_ACCESS_CREATE);
5968bf396deSTao Ma 	if (ret) {
5978bf396deSTao Ma 		mlog_errno(ret);
5988bf396deSTao Ma 		goto out_commit;
5998bf396deSTao Ma 	}
6008bf396deSTao Ma 
6018bf396deSTao Ma 	/* Initialize ocfs2_refcount_block. */
6028bf396deSTao Ma 	rb = (struct ocfs2_refcount_block *)new_bh->b_data;
6038bf396deSTao Ma 	memset(rb, 0, inode->i_sb->s_blocksize);
6048bf396deSTao Ma 	strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
6058bf396deSTao Ma 	rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num);
6068bf396deSTao Ma 	rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
6078bf396deSTao Ma 	rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
6088bf396deSTao Ma 	rb->rf_blkno = cpu_to_le64(first_blkno);
6098bf396deSTao Ma 	rb->rf_count = cpu_to_le32(1);
6108bf396deSTao Ma 	rb->rf_records.rl_count =
6118bf396deSTao Ma 			cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb));
6128bf396deSTao Ma 	spin_lock(&osb->osb_lock);
6138bf396deSTao Ma 	rb->rf_generation = osb->s_next_generation++;
6148bf396deSTao Ma 	spin_unlock(&osb->osb_lock);
6158bf396deSTao Ma 
6168bf396deSTao Ma 	ocfs2_journal_dirty(handle, new_bh);
6178bf396deSTao Ma 
6188bf396deSTao Ma 	spin_lock(&oi->ip_lock);
6198bf396deSTao Ma 	oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
6208bf396deSTao Ma 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
6218bf396deSTao Ma 	di->i_refcount_loc = cpu_to_le64(first_blkno);
6228bf396deSTao Ma 	spin_unlock(&oi->ip_lock);
6238bf396deSTao Ma 
6248bf396deSTao Ma 	mlog(0, "created tree for inode %lu, refblock %llu\n",
6258bf396deSTao Ma 	     inode->i_ino, (unsigned long long)first_blkno);
6268bf396deSTao Ma 
6278bf396deSTao Ma 	ocfs2_journal_dirty(handle, di_bh);
6288bf396deSTao Ma 
6298bf396deSTao Ma 	/*
6308bf396deSTao Ma 	 * We have to init the tree lock here since it will use
6318bf396deSTao Ma 	 * the generation number to create it.
6328bf396deSTao Ma 	 */
6338bf396deSTao Ma 	new_tree->rf_generation = le32_to_cpu(rb->rf_generation);
6348bf396deSTao Ma 	ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno,
6358bf396deSTao Ma 				      new_tree->rf_generation);
6368bf396deSTao Ma 
6378bf396deSTao Ma 	spin_lock(&osb->osb_lock);
6388bf396deSTao Ma 	tree = ocfs2_find_refcount_tree(osb, first_blkno);
6398bf396deSTao Ma 
6408bf396deSTao Ma 	/*
6418bf396deSTao Ma 	 * We've just created a new refcount tree in this block.  If
6428bf396deSTao Ma 	 * we found a refcount tree on the ocfs2_super, it must be
6438bf396deSTao Ma 	 * one we just deleted.  We free the old tree before
6448bf396deSTao Ma 	 * inserting the new tree.
6458bf396deSTao Ma 	 */
6468bf396deSTao Ma 	BUG_ON(tree && tree->rf_generation == new_tree->rf_generation);
6478bf396deSTao Ma 	if (tree)
6488bf396deSTao Ma 		ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
6498bf396deSTao Ma 	ocfs2_insert_refcount_tree(osb, new_tree);
6508bf396deSTao Ma 	spin_unlock(&osb->osb_lock);
6518bf396deSTao Ma 	new_tree = NULL;
6528bf396deSTao Ma 	if (tree)
6538bf396deSTao Ma 		ocfs2_refcount_tree_put(tree);
6548bf396deSTao Ma 
6558bf396deSTao Ma out_commit:
6568bf396deSTao Ma 	ocfs2_commit_trans(osb, handle);
6578bf396deSTao Ma 
6588bf396deSTao Ma out:
6598bf396deSTao Ma 	if (new_tree) {
6608bf396deSTao Ma 		ocfs2_metadata_cache_exit(&new_tree->rf_ci);
6618bf396deSTao Ma 		kfree(new_tree);
6628bf396deSTao Ma 	}
6638bf396deSTao Ma 
6648bf396deSTao Ma 	brelse(new_bh);
6658bf396deSTao Ma 	if (meta_ac)
6668bf396deSTao Ma 		ocfs2_free_alloc_context(meta_ac);
6678bf396deSTao Ma 
6688bf396deSTao Ma 	return ret;
6698bf396deSTao Ma }
6708bf396deSTao Ma 
6718bf396deSTao Ma static int ocfs2_set_refcount_tree(struct inode *inode,
6728bf396deSTao Ma 				   struct buffer_head *di_bh,
6738bf396deSTao Ma 				   u64 refcount_loc)
6748bf396deSTao Ma {
6758bf396deSTao Ma 	int ret;
6768bf396deSTao Ma 	handle_t *handle = NULL;
6778bf396deSTao Ma 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
6788bf396deSTao Ma 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
6798bf396deSTao Ma 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6808bf396deSTao Ma 	struct buffer_head *ref_root_bh = NULL;
6818bf396deSTao Ma 	struct ocfs2_refcount_block *rb;
6828bf396deSTao Ma 	struct ocfs2_refcount_tree *ref_tree;
6838bf396deSTao Ma 
6848bf396deSTao Ma 	BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
6858bf396deSTao Ma 
6868bf396deSTao Ma 	ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
6878bf396deSTao Ma 				       &ref_tree, &ref_root_bh);
6888bf396deSTao Ma 	if (ret) {
6898bf396deSTao Ma 		mlog_errno(ret);
6908bf396deSTao Ma 		return ret;
6918bf396deSTao Ma 	}
6928bf396deSTao Ma 
6938bf396deSTao Ma 	handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_SET_CREDITS);
6948bf396deSTao Ma 	if (IS_ERR(handle)) {
6958bf396deSTao Ma 		ret = PTR_ERR(handle);
6968bf396deSTao Ma 		mlog_errno(ret);
6978bf396deSTao Ma 		goto out;
6988bf396deSTao Ma 	}
6998bf396deSTao Ma 
7008bf396deSTao Ma 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
7018bf396deSTao Ma 				      OCFS2_JOURNAL_ACCESS_WRITE);
7028bf396deSTao Ma 	if (ret) {
7038bf396deSTao Ma 		mlog_errno(ret);
7048bf396deSTao Ma 		goto out_commit;
7058bf396deSTao Ma 	}
7068bf396deSTao Ma 
7078bf396deSTao Ma 	ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, ref_root_bh,
7088bf396deSTao Ma 				      OCFS2_JOURNAL_ACCESS_WRITE);
7098bf396deSTao Ma 	if (ret) {
7108bf396deSTao Ma 		mlog_errno(ret);
7118bf396deSTao Ma 		goto out_commit;
7128bf396deSTao Ma 	}
7138bf396deSTao Ma 
7148bf396deSTao Ma 	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
7158bf396deSTao Ma 	le32_add_cpu(&rb->rf_count, 1);
7168bf396deSTao Ma 
7178bf396deSTao Ma 	ocfs2_journal_dirty(handle, ref_root_bh);
7188bf396deSTao Ma 
7198bf396deSTao Ma 	spin_lock(&oi->ip_lock);
7208bf396deSTao Ma 	oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
7218bf396deSTao Ma 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
7228bf396deSTao Ma 	di->i_refcount_loc = cpu_to_le64(refcount_loc);
7238bf396deSTao Ma 	spin_unlock(&oi->ip_lock);
7248bf396deSTao Ma 	ocfs2_journal_dirty(handle, di_bh);
7258bf396deSTao Ma 
7268bf396deSTao Ma out_commit:
7278bf396deSTao Ma 	ocfs2_commit_trans(osb, handle);
7288bf396deSTao Ma out:
7298bf396deSTao Ma 	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7308bf396deSTao Ma 	brelse(ref_root_bh);
7318bf396deSTao Ma 
7328bf396deSTao Ma 	return ret;
7338bf396deSTao Ma }
7348bf396deSTao Ma 
7358bf396deSTao Ma int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
7368bf396deSTao Ma {
7378bf396deSTao Ma 	int ret, delete_tree = 0;
7388bf396deSTao Ma 	handle_t *handle = NULL;
7398bf396deSTao Ma 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
7408bf396deSTao Ma 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
7418bf396deSTao Ma 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7428bf396deSTao Ma 	struct ocfs2_refcount_block *rb;
7438bf396deSTao Ma 	struct inode *alloc_inode = NULL;
7448bf396deSTao Ma 	struct buffer_head *alloc_bh = NULL;
7458bf396deSTao Ma 	struct buffer_head *blk_bh = NULL;
7468bf396deSTao Ma 	struct ocfs2_refcount_tree *ref_tree;
7478bf396deSTao Ma 	int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS;
7488bf396deSTao Ma 	u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc);
7498bf396deSTao Ma 	u16 bit = 0;
7508bf396deSTao Ma 
7518bf396deSTao Ma 	if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
7528bf396deSTao Ma 		return 0;
7538bf396deSTao Ma 
7548bf396deSTao Ma 	BUG_ON(!ref_blkno);
7558bf396deSTao Ma 	ret = ocfs2_lock_refcount_tree(osb, ref_blkno, 1, &ref_tree, &blk_bh);
7568bf396deSTao Ma 	if (ret) {
7578bf396deSTao Ma 		mlog_errno(ret);
7588bf396deSTao Ma 		return ret;
7598bf396deSTao Ma 	}
7608bf396deSTao Ma 
7618bf396deSTao Ma 	rb = (struct ocfs2_refcount_block *)blk_bh->b_data;
7628bf396deSTao Ma 
7638bf396deSTao Ma 	/*
7648bf396deSTao Ma 	 * If we are the last user, we need to free the block.
7658bf396deSTao Ma 	 * So lock the allocator ahead.
7668bf396deSTao Ma 	 */
7678bf396deSTao Ma 	if (le32_to_cpu(rb->rf_count) == 1) {
7688bf396deSTao Ma 		blk = le64_to_cpu(rb->rf_blkno);
7698bf396deSTao Ma 		bit = le16_to_cpu(rb->rf_suballoc_bit);
7708bf396deSTao Ma 		bg_blkno = ocfs2_which_suballoc_group(blk, bit);
7718bf396deSTao Ma 
7728bf396deSTao Ma 		alloc_inode = ocfs2_get_system_file_inode(osb,
7738bf396deSTao Ma 					EXTENT_ALLOC_SYSTEM_INODE,
7748bf396deSTao Ma 					le16_to_cpu(rb->rf_suballoc_slot));
7758bf396deSTao Ma 		if (!alloc_inode) {
7768bf396deSTao Ma 			ret = -ENOMEM;
7778bf396deSTao Ma 			mlog_errno(ret);
7788bf396deSTao Ma 			goto out;
7798bf396deSTao Ma 		}
7808bf396deSTao Ma 		mutex_lock(&alloc_inode->i_mutex);
7818bf396deSTao Ma 
7828bf396deSTao Ma 		ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
7838bf396deSTao Ma 		if (ret) {
7848bf396deSTao Ma 			mlog_errno(ret);
7858bf396deSTao Ma 			goto out_mutex;
7868bf396deSTao Ma 		}
7878bf396deSTao Ma 
7888bf396deSTao Ma 		credits += OCFS2_SUBALLOC_FREE;
7898bf396deSTao Ma 	}
7908bf396deSTao Ma 
7918bf396deSTao Ma 	handle = ocfs2_start_trans(osb, credits);
7928bf396deSTao Ma 	if (IS_ERR(handle)) {
7938bf396deSTao Ma 		ret = PTR_ERR(handle);
7948bf396deSTao Ma 		mlog_errno(ret);
7958bf396deSTao Ma 		goto out_unlock;
7968bf396deSTao Ma 	}
7978bf396deSTao Ma 
7988bf396deSTao Ma 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
7998bf396deSTao Ma 				      OCFS2_JOURNAL_ACCESS_WRITE);
8008bf396deSTao Ma 	if (ret) {
8018bf396deSTao Ma 		mlog_errno(ret);
8028bf396deSTao Ma 		goto out_commit;
8038bf396deSTao Ma 	}
8048bf396deSTao Ma 
8058bf396deSTao Ma 	ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh,
8068bf396deSTao Ma 				      OCFS2_JOURNAL_ACCESS_WRITE);
8078bf396deSTao Ma 	if (ret) {
8088bf396deSTao Ma 		mlog_errno(ret);
8098bf396deSTao Ma 		goto out_commit;
8108bf396deSTao Ma 	}
8118bf396deSTao Ma 
8128bf396deSTao Ma 	spin_lock(&oi->ip_lock);
8138bf396deSTao Ma 	oi->ip_dyn_features &= ~OCFS2_HAS_REFCOUNT_FL;
8148bf396deSTao Ma 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
8158bf396deSTao Ma 	di->i_refcount_loc = 0;
8168bf396deSTao Ma 	spin_unlock(&oi->ip_lock);
8178bf396deSTao Ma 	ocfs2_journal_dirty(handle, di_bh);
8188bf396deSTao Ma 
8198bf396deSTao Ma 	le32_add_cpu(&rb->rf_count , -1);
8208bf396deSTao Ma 	ocfs2_journal_dirty(handle, blk_bh);
8218bf396deSTao Ma 
8228bf396deSTao Ma 	if (!rb->rf_count) {
8238bf396deSTao Ma 		delete_tree = 1;
8248bf396deSTao Ma 		ocfs2_erase_refcount_tree_from_list(osb, ref_tree);
8258bf396deSTao Ma 		ret = ocfs2_free_suballoc_bits(handle, alloc_inode,
8268bf396deSTao Ma 					       alloc_bh, bit, bg_blkno, 1);
8278bf396deSTao Ma 		if (ret)
8288bf396deSTao Ma 			mlog_errno(ret);
8298bf396deSTao Ma 	}
8308bf396deSTao Ma 
8318bf396deSTao Ma out_commit:
8328bf396deSTao Ma 	ocfs2_commit_trans(osb, handle);
8338bf396deSTao Ma out_unlock:
8348bf396deSTao Ma 	if (alloc_inode) {
8358bf396deSTao Ma 		ocfs2_inode_unlock(alloc_inode, 1);
8368bf396deSTao Ma 		brelse(alloc_bh);
8378bf396deSTao Ma 	}
8388bf396deSTao Ma out_mutex:
8398bf396deSTao Ma 	if (alloc_inode) {
8408bf396deSTao Ma 		mutex_unlock(&alloc_inode->i_mutex);
8418bf396deSTao Ma 		iput(alloc_inode);
8428bf396deSTao Ma 	}
8438bf396deSTao Ma out:
8448bf396deSTao Ma 	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
8458bf396deSTao Ma 	if (delete_tree)
8468bf396deSTao Ma 		ocfs2_refcount_tree_put(ref_tree);
8478bf396deSTao Ma 	brelse(blk_bh);
8488bf396deSTao Ma 
8498bf396deSTao Ma 	return ret;
8508bf396deSTao Ma }
851