xref: /openbmc/linux/fs/ocfs2/refcounttree.c (revision 374a263e)
1f2c870e3STao Ma /* -*- mode: c; c-basic-offset: 8; -*-
2f2c870e3STao Ma  * vim: noexpandtab sw=8 ts=8 sts=0:
3f2c870e3STao Ma  *
4f2c870e3STao Ma  * refcounttree.c
5f2c870e3STao Ma  *
6f2c870e3STao Ma  * Copyright (C) 2009 Oracle.  All rights reserved.
7f2c870e3STao Ma  *
8f2c870e3STao Ma  * This program is free software; you can redistribute it and/or
9f2c870e3STao Ma  * modify it under the terms of the GNU General Public
10f2c870e3STao Ma  * License version 2 as published by the Free Software Foundation.
11f2c870e3STao Ma  *
12f2c870e3STao Ma  * This program is distributed in the hope that it will be useful,
13f2c870e3STao Ma  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14f2c870e3STao Ma  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15f2c870e3STao Ma  * General Public License for more details.
16f2c870e3STao Ma  */
17f2c870e3STao Ma 
18f2c870e3STao Ma #define MLOG_MASK_PREFIX ML_REFCOUNT
19f2c870e3STao Ma #include <cluster/masklog.h>
20f2c870e3STao Ma #include "ocfs2.h"
21f2c870e3STao Ma #include "inode.h"
22f2c870e3STao Ma #include "alloc.h"
23f2c870e3STao Ma #include "suballoc.h"
24f2c870e3STao Ma #include "journal.h"
25f2c870e3STao Ma #include "uptodate.h"
26f2c870e3STao Ma #include "super.h"
27f2c870e3STao Ma #include "buffer_head_io.h"
28f2c870e3STao Ma #include "blockcheck.h"
29c732eb16STao Ma #include "refcounttree.h"
30374a263eSTao Ma #include "dlmglue.h"
31c732eb16STao Ma 
32c732eb16STao Ma static inline struct ocfs2_refcount_tree *
33c732eb16STao Ma cache_info_to_refcount(struct ocfs2_caching_info *ci)
34c732eb16STao Ma {
35c732eb16STao Ma 	return container_of(ci, struct ocfs2_refcount_tree, rf_ci);
36c732eb16STao Ma }
37f2c870e3STao Ma 
38f2c870e3STao Ma static int ocfs2_validate_refcount_block(struct super_block *sb,
39f2c870e3STao Ma 					 struct buffer_head *bh)
40f2c870e3STao Ma {
41f2c870e3STao Ma 	int rc;
42f2c870e3STao Ma 	struct ocfs2_refcount_block *rb =
43f2c870e3STao Ma 		(struct ocfs2_refcount_block *)bh->b_data;
44f2c870e3STao Ma 
45f2c870e3STao Ma 	mlog(0, "Validating refcount block %llu\n",
46f2c870e3STao Ma 	     (unsigned long long)bh->b_blocknr);
47f2c870e3STao Ma 
48f2c870e3STao Ma 	BUG_ON(!buffer_uptodate(bh));
49f2c870e3STao Ma 
50f2c870e3STao Ma 	/*
51f2c870e3STao Ma 	 * If the ecc fails, we return the error but otherwise
52f2c870e3STao Ma 	 * leave the filesystem running.  We know any error is
53f2c870e3STao Ma 	 * local to this block.
54f2c870e3STao Ma 	 */
55f2c870e3STao Ma 	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check);
56f2c870e3STao Ma 	if (rc) {
57f2c870e3STao Ma 		mlog(ML_ERROR, "Checksum failed for refcount block %llu\n",
58f2c870e3STao Ma 		     (unsigned long long)bh->b_blocknr);
59f2c870e3STao Ma 		return rc;
60f2c870e3STao Ma 	}
61f2c870e3STao Ma 
62f2c870e3STao Ma 
63f2c870e3STao Ma 	if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) {
64f2c870e3STao Ma 		ocfs2_error(sb,
65f2c870e3STao Ma 			    "Refcount block #%llu has bad signature %.*s",
66f2c870e3STao Ma 			    (unsigned long long)bh->b_blocknr, 7,
67f2c870e3STao Ma 			    rb->rf_signature);
68f2c870e3STao Ma 		return -EINVAL;
69f2c870e3STao Ma 	}
70f2c870e3STao Ma 
71f2c870e3STao Ma 	if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) {
72f2c870e3STao Ma 		ocfs2_error(sb,
73f2c870e3STao Ma 			    "Refcount block #%llu has an invalid rf_blkno "
74f2c870e3STao Ma 			    "of %llu",
75f2c870e3STao Ma 			    (unsigned long long)bh->b_blocknr,
76f2c870e3STao Ma 			    (unsigned long long)le64_to_cpu(rb->rf_blkno));
77f2c870e3STao Ma 		return -EINVAL;
78f2c870e3STao Ma 	}
79f2c870e3STao Ma 
80f2c870e3STao Ma 	if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) {
81f2c870e3STao Ma 		ocfs2_error(sb,
82f2c870e3STao Ma 			    "Refcount block #%llu has an invalid "
83f2c870e3STao Ma 			    "rf_fs_generation of #%u",
84f2c870e3STao Ma 			    (unsigned long long)bh->b_blocknr,
85f2c870e3STao Ma 			    le32_to_cpu(rb->rf_fs_generation));
86f2c870e3STao Ma 		return -EINVAL;
87f2c870e3STao Ma 	}
88f2c870e3STao Ma 
89f2c870e3STao Ma 	return 0;
90f2c870e3STao Ma }
91f2c870e3STao Ma 
92f2c870e3STao Ma static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci,
93f2c870e3STao Ma 				     u64 rb_blkno,
94f2c870e3STao Ma 				     struct buffer_head **bh)
95f2c870e3STao Ma {
96f2c870e3STao Ma 	int rc;
97f2c870e3STao Ma 	struct buffer_head *tmp = *bh;
98f2c870e3STao Ma 
99f2c870e3STao Ma 	rc = ocfs2_read_block(ci, rb_blkno, &tmp,
100f2c870e3STao Ma 			      ocfs2_validate_refcount_block);
101f2c870e3STao Ma 
102f2c870e3STao Ma 	/* If ocfs2_read_block() got us a new bh, pass it up. */
103f2c870e3STao Ma 	if (!rc && !*bh)
104f2c870e3STao Ma 		*bh = tmp;
105f2c870e3STao Ma 
106f2c870e3STao Ma 	return rc;
107f2c870e3STao Ma }
108c732eb16STao Ma 
109c732eb16STao Ma static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci)
110c732eb16STao Ma {
111c732eb16STao Ma 	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
112c732eb16STao Ma 
113c732eb16STao Ma 	return rf->rf_blkno;
114c732eb16STao Ma }
115c732eb16STao Ma 
116c732eb16STao Ma static struct super_block *
117c732eb16STao Ma ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci)
118c732eb16STao Ma {
119c732eb16STao Ma 	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
120c732eb16STao Ma 
121c732eb16STao Ma 	return rf->rf_sb;
122c732eb16STao Ma }
123c732eb16STao Ma 
124c732eb16STao Ma static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci)
125c732eb16STao Ma {
126c732eb16STao Ma 	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
127c732eb16STao Ma 
128c732eb16STao Ma 	spin_lock(&rf->rf_lock);
129c732eb16STao Ma }
130c732eb16STao Ma 
131c732eb16STao Ma static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci)
132c732eb16STao Ma {
133c732eb16STao Ma 	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
134c732eb16STao Ma 
135c732eb16STao Ma 	spin_unlock(&rf->rf_lock);
136c732eb16STao Ma }
137c732eb16STao Ma 
138c732eb16STao Ma static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci)
139c732eb16STao Ma {
140c732eb16STao Ma 	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
141c732eb16STao Ma 
142c732eb16STao Ma 	mutex_lock(&rf->rf_io_mutex);
143c732eb16STao Ma }
144c732eb16STao Ma 
145c732eb16STao Ma static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci)
146c732eb16STao Ma {
147c732eb16STao Ma 	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
148c732eb16STao Ma 
149c732eb16STao Ma 	mutex_unlock(&rf->rf_io_mutex);
150c732eb16STao Ma }
151c732eb16STao Ma 
152c732eb16STao Ma static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
153c732eb16STao Ma 	.co_owner		= ocfs2_refcount_cache_owner,
154c732eb16STao Ma 	.co_get_super		= ocfs2_refcount_cache_get_super,
155c732eb16STao Ma 	.co_cache_lock		= ocfs2_refcount_cache_lock,
156c732eb16STao Ma 	.co_cache_unlock	= ocfs2_refcount_cache_unlock,
157c732eb16STao Ma 	.co_io_lock		= ocfs2_refcount_cache_io_lock,
158c732eb16STao Ma 	.co_io_unlock		= ocfs2_refcount_cache_io_unlock,
159c732eb16STao Ma };
160374a263eSTao Ma 
161374a263eSTao Ma static struct ocfs2_refcount_tree *
162374a263eSTao Ma ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno)
163374a263eSTao Ma {
164374a263eSTao Ma 	struct rb_node *n = osb->osb_rf_lock_tree.rb_node;
165374a263eSTao Ma 	struct ocfs2_refcount_tree *tree = NULL;
166374a263eSTao Ma 
167374a263eSTao Ma 	while (n) {
168374a263eSTao Ma 		tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node);
169374a263eSTao Ma 
170374a263eSTao Ma 		if (blkno < tree->rf_blkno)
171374a263eSTao Ma 			n = n->rb_left;
172374a263eSTao Ma 		else if (blkno > tree->rf_blkno)
173374a263eSTao Ma 			n = n->rb_right;
174374a263eSTao Ma 		else
175374a263eSTao Ma 			return tree;
176374a263eSTao Ma 	}
177374a263eSTao Ma 
178374a263eSTao Ma 	return NULL;
179374a263eSTao Ma }
180374a263eSTao Ma 
181374a263eSTao Ma /* osb_lock is already locked. */
182374a263eSTao Ma static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb,
183374a263eSTao Ma 				       struct ocfs2_refcount_tree *new)
184374a263eSTao Ma {
185374a263eSTao Ma 	u64 rf_blkno = new->rf_blkno;
186374a263eSTao Ma 	struct rb_node *parent = NULL;
187374a263eSTao Ma 	struct rb_node **p = &osb->osb_rf_lock_tree.rb_node;
188374a263eSTao Ma 	struct ocfs2_refcount_tree *tmp;
189374a263eSTao Ma 
190374a263eSTao Ma 	while (*p) {
191374a263eSTao Ma 		parent = *p;
192374a263eSTao Ma 
193374a263eSTao Ma 		tmp = rb_entry(parent, struct ocfs2_refcount_tree,
194374a263eSTao Ma 			       rf_node);
195374a263eSTao Ma 
196374a263eSTao Ma 		if (rf_blkno < tmp->rf_blkno)
197374a263eSTao Ma 			p = &(*p)->rb_left;
198374a263eSTao Ma 		else if (rf_blkno > tmp->rf_blkno)
199374a263eSTao Ma 			p = &(*p)->rb_right;
200374a263eSTao Ma 		else {
201374a263eSTao Ma 			/* This should never happen! */
202374a263eSTao Ma 			mlog(ML_ERROR, "Duplicate refcount block %llu found!\n",
203374a263eSTao Ma 			     (unsigned long long)rf_blkno);
204374a263eSTao Ma 			BUG();
205374a263eSTao Ma 		}
206374a263eSTao Ma 	}
207374a263eSTao Ma 
208374a263eSTao Ma 	rb_link_node(&new->rf_node, parent, p);
209374a263eSTao Ma 	rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree);
210374a263eSTao Ma }
211374a263eSTao Ma 
212374a263eSTao Ma static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree)
213374a263eSTao Ma {
214374a263eSTao Ma 	ocfs2_metadata_cache_exit(&tree->rf_ci);
215374a263eSTao Ma 	ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres);
216374a263eSTao Ma 	ocfs2_lock_res_free(&tree->rf_lockres);
217374a263eSTao Ma 	kfree(tree);
218374a263eSTao Ma }
219374a263eSTao Ma 
220374a263eSTao Ma static inline void
221374a263eSTao Ma ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb,
222374a263eSTao Ma 					struct ocfs2_refcount_tree *tree)
223374a263eSTao Ma {
224374a263eSTao Ma 	rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree);
225374a263eSTao Ma 	if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree)
226374a263eSTao Ma 		osb->osb_ref_tree_lru = NULL;
227374a263eSTao Ma }
228374a263eSTao Ma 
229374a263eSTao Ma static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
230374a263eSTao Ma 					struct ocfs2_refcount_tree *tree)
231374a263eSTao Ma {
232374a263eSTao Ma 	spin_lock(&osb->osb_lock);
233374a263eSTao Ma 	ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
234374a263eSTao Ma 	spin_unlock(&osb->osb_lock);
235374a263eSTao Ma }
236374a263eSTao Ma 
237374a263eSTao Ma void ocfs2_kref_remove_refcount_tree(struct kref *kref)
238374a263eSTao Ma {
239374a263eSTao Ma 	struct ocfs2_refcount_tree *tree =
240374a263eSTao Ma 		container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
241374a263eSTao Ma 
242374a263eSTao Ma 	ocfs2_free_refcount_tree(tree);
243374a263eSTao Ma }
244374a263eSTao Ma 
245374a263eSTao Ma static inline void
246374a263eSTao Ma ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree)
247374a263eSTao Ma {
248374a263eSTao Ma 	kref_get(&tree->rf_getcnt);
249374a263eSTao Ma }
250374a263eSTao Ma 
251374a263eSTao Ma static inline void
252374a263eSTao Ma ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree)
253374a263eSTao Ma {
254374a263eSTao Ma 	kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree);
255374a263eSTao Ma }
256374a263eSTao Ma 
257374a263eSTao Ma static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new,
258374a263eSTao Ma 					       struct super_block *sb)
259374a263eSTao Ma {
260374a263eSTao Ma 	ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops);
261374a263eSTao Ma 	mutex_init(&new->rf_io_mutex);
262374a263eSTao Ma 	new->rf_sb = sb;
263374a263eSTao Ma 	spin_lock_init(&new->rf_lock);
264374a263eSTao Ma }
265374a263eSTao Ma 
266374a263eSTao Ma static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
267374a263eSTao Ma 					struct ocfs2_refcount_tree *new,
268374a263eSTao Ma 					u64 rf_blkno, u32 generation)
269374a263eSTao Ma {
270374a263eSTao Ma 	init_rwsem(&new->rf_sem);
271374a263eSTao Ma 	ocfs2_refcount_lock_res_init(&new->rf_lockres, osb,
272374a263eSTao Ma 				     rf_blkno, generation);
273374a263eSTao Ma }
274374a263eSTao Ma 
275374a263eSTao Ma static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
276374a263eSTao Ma 				   struct ocfs2_refcount_tree **ret_tree)
277374a263eSTao Ma {
278374a263eSTao Ma 	int ret = 0;
279374a263eSTao Ma 	struct ocfs2_refcount_tree *tree, *new = NULL;
280374a263eSTao Ma 	struct buffer_head *ref_root_bh = NULL;
281374a263eSTao Ma 	struct ocfs2_refcount_block *ref_rb;
282374a263eSTao Ma 
283374a263eSTao Ma 	spin_lock(&osb->osb_lock);
284374a263eSTao Ma 	if (osb->osb_ref_tree_lru &&
285374a263eSTao Ma 	    osb->osb_ref_tree_lru->rf_blkno == rf_blkno)
286374a263eSTao Ma 		tree = osb->osb_ref_tree_lru;
287374a263eSTao Ma 	else
288374a263eSTao Ma 		tree = ocfs2_find_refcount_tree(osb, rf_blkno);
289374a263eSTao Ma 	if (tree)
290374a263eSTao Ma 		goto out;
291374a263eSTao Ma 
292374a263eSTao Ma 	spin_unlock(&osb->osb_lock);
293374a263eSTao Ma 
294374a263eSTao Ma 	new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
295374a263eSTao Ma 	if (!new) {
296374a263eSTao Ma 		ret = -ENOMEM;
297374a263eSTao Ma 		return ret;
298374a263eSTao Ma 	}
299374a263eSTao Ma 
300374a263eSTao Ma 	new->rf_blkno = rf_blkno;
301374a263eSTao Ma 	kref_init(&new->rf_getcnt);
302374a263eSTao Ma 	ocfs2_init_refcount_tree_ci(new, osb->sb);
303374a263eSTao Ma 
304374a263eSTao Ma 	/*
305374a263eSTao Ma 	 * We need the generation to create the refcount tree lock and since
306374a263eSTao Ma 	 * it isn't changed during the tree modification, we are safe here to
307374a263eSTao Ma 	 * read without protection.
308374a263eSTao Ma 	 * We also have to purge the cache after we create the lock since the
309374a263eSTao Ma 	 * refcount block may have the stale data. It can only be trusted when
310374a263eSTao Ma 	 * we hold the refcount lock.
311374a263eSTao Ma 	 */
312374a263eSTao Ma 	ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh);
313374a263eSTao Ma 	if (ret) {
314374a263eSTao Ma 		mlog_errno(ret);
315374a263eSTao Ma 		ocfs2_metadata_cache_exit(&new->rf_ci);
316374a263eSTao Ma 		kfree(new);
317374a263eSTao Ma 		return ret;
318374a263eSTao Ma 	}
319374a263eSTao Ma 
320374a263eSTao Ma 	ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
321374a263eSTao Ma 	new->rf_generation = le32_to_cpu(ref_rb->rf_generation);
322374a263eSTao Ma 	ocfs2_init_refcount_tree_lock(osb, new, rf_blkno,
323374a263eSTao Ma 				      new->rf_generation);
324374a263eSTao Ma 	ocfs2_metadata_cache_purge(&new->rf_ci);
325374a263eSTao Ma 
326374a263eSTao Ma 	spin_lock(&osb->osb_lock);
327374a263eSTao Ma 	tree = ocfs2_find_refcount_tree(osb, rf_blkno);
328374a263eSTao Ma 	if (tree)
329374a263eSTao Ma 		goto out;
330374a263eSTao Ma 
331374a263eSTao Ma 	ocfs2_insert_refcount_tree(osb, new);
332374a263eSTao Ma 
333374a263eSTao Ma 	tree = new;
334374a263eSTao Ma 	new = NULL;
335374a263eSTao Ma 
336374a263eSTao Ma out:
337374a263eSTao Ma 	*ret_tree = tree;
338374a263eSTao Ma 
339374a263eSTao Ma 	osb->osb_ref_tree_lru = tree;
340374a263eSTao Ma 
341374a263eSTao Ma 	spin_unlock(&osb->osb_lock);
342374a263eSTao Ma 
343374a263eSTao Ma 	if (new)
344374a263eSTao Ma 		ocfs2_free_refcount_tree(new);
345374a263eSTao Ma 
346374a263eSTao Ma 	brelse(ref_root_bh);
347374a263eSTao Ma 	return ret;
348374a263eSTao Ma }
349374a263eSTao Ma 
350374a263eSTao Ma static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno)
351374a263eSTao Ma {
352374a263eSTao Ma 	int ret;
353374a263eSTao Ma 	struct buffer_head *di_bh = NULL;
354374a263eSTao Ma 	struct ocfs2_dinode *di;
355374a263eSTao Ma 
356374a263eSTao Ma 	ret = ocfs2_read_inode_block(inode, &di_bh);
357374a263eSTao Ma 	if (ret) {
358374a263eSTao Ma 		mlog_errno(ret);
359374a263eSTao Ma 		goto out;
360374a263eSTao Ma 	}
361374a263eSTao Ma 
362374a263eSTao Ma 	BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
363374a263eSTao Ma 
364374a263eSTao Ma 	di = (struct ocfs2_dinode *)di_bh->b_data;
365374a263eSTao Ma 	*ref_blkno = le64_to_cpu(di->i_refcount_loc);
366374a263eSTao Ma 	brelse(di_bh);
367374a263eSTao Ma out:
368374a263eSTao Ma 	return ret;
369374a263eSTao Ma }
370374a263eSTao Ma 
371374a263eSTao Ma static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
372374a263eSTao Ma 				      struct ocfs2_refcount_tree *tree, int rw)
373374a263eSTao Ma {
374374a263eSTao Ma 	int ret;
375374a263eSTao Ma 
376374a263eSTao Ma 	ret = ocfs2_refcount_lock(tree, rw);
377374a263eSTao Ma 	if (ret) {
378374a263eSTao Ma 		mlog_errno(ret);
379374a263eSTao Ma 		goto out;
380374a263eSTao Ma 	}
381374a263eSTao Ma 
382374a263eSTao Ma 	if (rw)
383374a263eSTao Ma 		down_write(&tree->rf_sem);
384374a263eSTao Ma 	else
385374a263eSTao Ma 		down_read(&tree->rf_sem);
386374a263eSTao Ma 
387374a263eSTao Ma out:
388374a263eSTao Ma 	return ret;
389374a263eSTao Ma }
390374a263eSTao Ma 
391374a263eSTao Ma /*
392374a263eSTao Ma  * Lock the refcount tree pointed by ref_blkno and return the tree.
393374a263eSTao Ma  * In most case, we lock the tree and read the refcount block.
394374a263eSTao Ma  * So read it here if the caller really needs it.
395374a263eSTao Ma  *
396374a263eSTao Ma  * If the tree has been re-created by other node, it will free the
397374a263eSTao Ma  * old one and re-create it.
398374a263eSTao Ma  */
399374a263eSTao Ma int ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
400374a263eSTao Ma 			     u64 ref_blkno, int rw,
401374a263eSTao Ma 			     struct ocfs2_refcount_tree **ret_tree,
402374a263eSTao Ma 			     struct buffer_head **ref_bh)
403374a263eSTao Ma {
404374a263eSTao Ma 	int ret, delete_tree = 0;
405374a263eSTao Ma 	struct ocfs2_refcount_tree *tree = NULL;
406374a263eSTao Ma 	struct buffer_head *ref_root_bh = NULL;
407374a263eSTao Ma 	struct ocfs2_refcount_block *rb;
408374a263eSTao Ma 
409374a263eSTao Ma again:
410374a263eSTao Ma 	ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree);
411374a263eSTao Ma 	if (ret) {
412374a263eSTao Ma 		mlog_errno(ret);
413374a263eSTao Ma 		return ret;
414374a263eSTao Ma 	}
415374a263eSTao Ma 
416374a263eSTao Ma 	ocfs2_refcount_tree_get(tree);
417374a263eSTao Ma 
418374a263eSTao Ma 	ret = __ocfs2_lock_refcount_tree(osb, tree, rw);
419374a263eSTao Ma 	if (ret) {
420374a263eSTao Ma 		mlog_errno(ret);
421374a263eSTao Ma 		ocfs2_refcount_tree_put(tree);
422374a263eSTao Ma 		goto out;
423374a263eSTao Ma 	}
424374a263eSTao Ma 
425374a263eSTao Ma 	ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
426374a263eSTao Ma 					&ref_root_bh);
427374a263eSTao Ma 	if (ret) {
428374a263eSTao Ma 		mlog_errno(ret);
429374a263eSTao Ma 		ocfs2_unlock_refcount_tree(osb, tree, rw);
430374a263eSTao Ma 		ocfs2_refcount_tree_put(tree);
431374a263eSTao Ma 		goto out;
432374a263eSTao Ma 	}
433374a263eSTao Ma 
434374a263eSTao Ma 	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
435374a263eSTao Ma 	/*
436374a263eSTao Ma 	 * If the refcount block has been freed and re-created, we may need
437374a263eSTao Ma 	 * to recreate the refcount tree also.
438374a263eSTao Ma 	 *
439374a263eSTao Ma 	 * Here we just remove the tree from the rb-tree, and the last
440374a263eSTao Ma 	 * kref holder will unlock and delete this refcount_tree.
441374a263eSTao Ma 	 * Then we goto "again" and ocfs2_get_refcount_tree will create
442374a263eSTao Ma 	 * the new refcount tree for us.
443374a263eSTao Ma 	 */
444374a263eSTao Ma 	if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) {
445374a263eSTao Ma 		if (!tree->rf_removed) {
446374a263eSTao Ma 			ocfs2_erase_refcount_tree_from_list(osb, tree);
447374a263eSTao Ma 			tree->rf_removed = 1;
448374a263eSTao Ma 			delete_tree = 1;
449374a263eSTao Ma 		}
450374a263eSTao Ma 
451374a263eSTao Ma 		ocfs2_unlock_refcount_tree(osb, tree, rw);
452374a263eSTao Ma 		/*
453374a263eSTao Ma 		 * We get an extra reference when we create the refcount
454374a263eSTao Ma 		 * tree, so another put will destroy it.
455374a263eSTao Ma 		 */
456374a263eSTao Ma 		if (delete_tree)
457374a263eSTao Ma 			ocfs2_refcount_tree_put(tree);
458374a263eSTao Ma 		brelse(ref_root_bh);
459374a263eSTao Ma 		ref_root_bh = NULL;
460374a263eSTao Ma 		goto again;
461374a263eSTao Ma 	}
462374a263eSTao Ma 
463374a263eSTao Ma 	*ret_tree = tree;
464374a263eSTao Ma 	if (ref_bh) {
465374a263eSTao Ma 		*ref_bh = ref_root_bh;
466374a263eSTao Ma 		ref_root_bh = NULL;
467374a263eSTao Ma 	}
468374a263eSTao Ma out:
469374a263eSTao Ma 	brelse(ref_root_bh);
470374a263eSTao Ma 	return ret;
471374a263eSTao Ma }
472374a263eSTao Ma 
473374a263eSTao Ma int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
474374a263eSTao Ma 				      struct ocfs2_refcount_tree **ret_tree,
475374a263eSTao Ma 				      struct buffer_head **ref_bh)
476374a263eSTao Ma {
477374a263eSTao Ma 	int ret;
478374a263eSTao Ma 	u64 ref_blkno;
479374a263eSTao Ma 
480374a263eSTao Ma 	ret = ocfs2_get_refcount_block(inode, &ref_blkno);
481374a263eSTao Ma 	if (ret) {
482374a263eSTao Ma 		mlog_errno(ret);
483374a263eSTao Ma 		return ret;
484374a263eSTao Ma 	}
485374a263eSTao Ma 
486374a263eSTao Ma 	return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
487374a263eSTao Ma 					rw, ret_tree, ref_bh);
488374a263eSTao Ma }
489374a263eSTao Ma 
490374a263eSTao Ma void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
491374a263eSTao Ma 				struct ocfs2_refcount_tree *tree, int rw)
492374a263eSTao Ma {
493374a263eSTao Ma 	if (rw)
494374a263eSTao Ma 		up_write(&tree->rf_sem);
495374a263eSTao Ma 	else
496374a263eSTao Ma 		up_read(&tree->rf_sem);
497374a263eSTao Ma 
498374a263eSTao Ma 	ocfs2_refcount_unlock(tree, rw);
499374a263eSTao Ma 	ocfs2_refcount_tree_put(tree);
500374a263eSTao Ma }
501374a263eSTao Ma 
502374a263eSTao Ma void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
503374a263eSTao Ma {
504374a263eSTao Ma 	struct rb_node *node;
505374a263eSTao Ma 	struct ocfs2_refcount_tree *tree;
506374a263eSTao Ma 	struct rb_root *root = &osb->osb_rf_lock_tree;
507374a263eSTao Ma 
508374a263eSTao Ma 	while ((node = rb_last(root)) != NULL) {
509374a263eSTao Ma 		tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
510374a263eSTao Ma 
511374a263eSTao Ma 		mlog(0, "Purge tree %llu\n",
512374a263eSTao Ma 		     (unsigned long long) tree->rf_blkno);
513374a263eSTao Ma 
514374a263eSTao Ma 		rb_erase(&tree->rf_node, root);
515374a263eSTao Ma 		ocfs2_free_refcount_tree(tree);
516374a263eSTao Ma 	}
517374a263eSTao Ma }
518