xref: /openbmc/linux/fs/nilfs2/btnode.c (revision fd589a8f)
1 /*
2  * btnode.c - NILFS B-tree node cache
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * This file was originally written by Seiji Kihara <kihara@osrg.net>
21  * and fully revised by Ryusuke Konishi <ryusuke@osrg.net> for
22  * stabilization and simplification.
23  *
24  */
25 
26 #include <linux/types.h>
27 #include <linux/buffer_head.h>
28 #include <linux/mm.h>
29 #include <linux/backing-dev.h>
30 #include "nilfs.h"
31 #include "mdt.h"
32 #include "dat.h"
33 #include "page.h"
34 #include "btnode.h"
35 
36 
37 void nilfs_btnode_cache_init_once(struct address_space *btnc)
38 {
39 	INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC);
40 	spin_lock_init(&btnc->tree_lock);
41 	INIT_LIST_HEAD(&btnc->private_list);
42 	spin_lock_init(&btnc->private_lock);
43 
44 	spin_lock_init(&btnc->i_mmap_lock);
45 	INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap);
46 	INIT_LIST_HEAD(&btnc->i_mmap_nonlinear);
47 }
48 
49 static struct address_space_operations def_btnode_aops = {
50 	.sync_page		= block_sync_page,
51 };
52 
53 void nilfs_btnode_cache_init(struct address_space *btnc,
54 			     struct backing_dev_info *bdi)
55 {
56 	btnc->host = NULL;  /* can safely set to host inode ? */
57 	btnc->flags = 0;
58 	mapping_set_gfp_mask(btnc, GFP_NOFS);
59 	btnc->assoc_mapping = NULL;
60 	btnc->backing_dev_info = bdi;
61 	btnc->a_ops = &def_btnode_aops;
62 }
63 
64 void nilfs_btnode_cache_clear(struct address_space *btnc)
65 {
66 	invalidate_mapping_pages(btnc, 0, -1);
67 	truncate_inode_pages(btnc, 0);
68 }
69 
70 int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
71 			      sector_t pblocknr, struct buffer_head **pbh,
72 			      int newblk)
73 {
74 	struct buffer_head *bh;
75 	struct inode *inode = NILFS_BTNC_I(btnc);
76 	int err;
77 
78 	bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node);
79 	if (unlikely(!bh))
80 		return -ENOMEM;
81 
82 	err = -EEXIST; /* internal code */
83 	if (newblk) {
84 		if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) ||
85 			     buffer_dirty(bh))) {
86 			brelse(bh);
87 			BUG();
88 		}
89 		bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
90 		bh->b_blocknr = blocknr;
91 		set_buffer_mapped(bh);
92 		set_buffer_uptodate(bh);
93 		goto found;
94 	}
95 
96 	if (buffer_uptodate(bh) || buffer_dirty(bh))
97 		goto found;
98 
99 	if (pblocknr == 0) {
100 		pblocknr = blocknr;
101 		if (inode->i_ino != NILFS_DAT_INO) {
102 			struct inode *dat =
103 				nilfs_dat_inode(NILFS_I_NILFS(inode));
104 
105 			/* blocknr is a virtual block number */
106 			err = nilfs_dat_translate(dat, blocknr, &pblocknr);
107 			if (unlikely(err)) {
108 				brelse(bh);
109 				goto out_locked;
110 			}
111 		}
112 	}
113 	lock_buffer(bh);
114 	if (buffer_uptodate(bh)) {
115 		unlock_buffer(bh);
116 		err = -EEXIST; /* internal code */
117 		goto found;
118 	}
119 	set_buffer_mapped(bh);
120 	bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev;
121 	bh->b_blocknr = pblocknr; /* set block address for read */
122 	bh->b_end_io = end_buffer_read_sync;
123 	get_bh(bh);
124 	submit_bh(READ, bh);
125 	bh->b_blocknr = blocknr; /* set back to the given block address */
126 	err = 0;
127 found:
128 	*pbh = bh;
129 
130 out_locked:
131 	unlock_page(bh->b_page);
132 	page_cache_release(bh->b_page);
133 	return err;
134 }
135 
136 int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr,
137 		     sector_t pblocknr, struct buffer_head **pbh, int newblk)
138 {
139 	struct buffer_head *bh;
140 	int err;
141 
142 	err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh, newblk);
143 	if (err == -EEXIST) /* internal code (cache hit) */
144 		return 0;
145 	if (unlikely(err))
146 		return err;
147 
148 	bh = *pbh;
149 	wait_on_buffer(bh);
150 	if (!buffer_uptodate(bh)) {
151 		brelse(bh);
152 		return -EIO;
153 	}
154 	return 0;
155 }
156 
157 /**
158  * nilfs_btnode_delete - delete B-tree node buffer
159  * @bh: buffer to be deleted
160  *
161  * nilfs_btnode_delete() invalidates the specified buffer and delete the page
162  * including the buffer if the page gets unbusy.
163  */
164 void nilfs_btnode_delete(struct buffer_head *bh)
165 {
166 	struct address_space *mapping;
167 	struct page *page = bh->b_page;
168 	pgoff_t index = page_index(page);
169 	int still_dirty;
170 
171 	page_cache_get(page);
172 	lock_page(page);
173 	wait_on_page_writeback(page);
174 
175 	nilfs_forget_buffer(bh);
176 	still_dirty = PageDirty(page);
177 	mapping = page->mapping;
178 	unlock_page(page);
179 	page_cache_release(page);
180 
181 	if (!still_dirty && mapping)
182 		invalidate_inode_pages2_range(mapping, index, index);
183 }
184 
185 /**
186  * nilfs_btnode_prepare_change_key
187  *  prepare to move contents of the block for old key to one of new key.
188  *  the old buffer will not be removed, but might be reused for new buffer.
189  *  it might return -ENOMEM because of memory allocation errors,
190  *  and might return -EIO because of disk read errors.
191  */
192 int nilfs_btnode_prepare_change_key(struct address_space *btnc,
193 				    struct nilfs_btnode_chkey_ctxt *ctxt)
194 {
195 	struct buffer_head *obh, *nbh;
196 	struct inode *inode = NILFS_BTNC_I(btnc);
197 	__u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
198 	int err;
199 
200 	if (oldkey == newkey)
201 		return 0;
202 
203 	obh = ctxt->bh;
204 	ctxt->newbh = NULL;
205 
206 	if (inode->i_blkbits == PAGE_CACHE_SHIFT) {
207 		lock_page(obh->b_page);
208 		/*
209 		 * We cannot call radix_tree_preload for the kernels older
210 		 * than 2.6.23, because it is not exported for modules.
211 		 */
212 retry:
213 		err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
214 		if (err)
215 			goto failed_unlock;
216 		/* BUG_ON(oldkey != obh->b_page->index); */
217 		if (unlikely(oldkey != obh->b_page->index))
218 			NILFS_PAGE_BUG(obh->b_page,
219 				       "invalid oldkey %lld (newkey=%lld)",
220 				       (unsigned long long)oldkey,
221 				       (unsigned long long)newkey);
222 
223 		spin_lock_irq(&btnc->tree_lock);
224 		err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
225 		spin_unlock_irq(&btnc->tree_lock);
226 		/*
227 		 * Note: page->index will not change to newkey until
228 		 * nilfs_btnode_commit_change_key() will be called.
229 		 * To protect the page in intermediate state, the page lock
230 		 * is held.
231 		 */
232 		radix_tree_preload_end();
233 		if (!err)
234 			return 0;
235 		else if (err != -EEXIST)
236 			goto failed_unlock;
237 
238 		err = invalidate_inode_pages2_range(btnc, newkey, newkey);
239 		if (!err)
240 			goto retry;
241 		/* fallback to copy mode */
242 		unlock_page(obh->b_page);
243 	}
244 
245 	err = nilfs_btnode_get(btnc, newkey, 0, &nbh, 1);
246 	if (likely(!err)) {
247 		BUG_ON(nbh == obh);
248 		ctxt->newbh = nbh;
249 	}
250 	return err;
251 
252  failed_unlock:
253 	unlock_page(obh->b_page);
254 	return err;
255 }
256 
257 /**
258  * nilfs_btnode_commit_change_key
259  *  commit the change_key operation prepared by prepare_change_key().
260  */
261 void nilfs_btnode_commit_change_key(struct address_space *btnc,
262 				    struct nilfs_btnode_chkey_ctxt *ctxt)
263 {
264 	struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh;
265 	__u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
266 	struct page *opage;
267 
268 	if (oldkey == newkey)
269 		return;
270 
271 	if (nbh == NULL) {	/* blocksize == pagesize */
272 		opage = obh->b_page;
273 		if (unlikely(oldkey != opage->index))
274 			NILFS_PAGE_BUG(opage,
275 				       "invalid oldkey %lld (newkey=%lld)",
276 				       (unsigned long long)oldkey,
277 				       (unsigned long long)newkey);
278 		if (!test_set_buffer_dirty(obh) && TestSetPageDirty(opage))
279 			BUG();
280 
281 		spin_lock_irq(&btnc->tree_lock);
282 		radix_tree_delete(&btnc->page_tree, oldkey);
283 		radix_tree_tag_set(&btnc->page_tree, newkey,
284 				   PAGECACHE_TAG_DIRTY);
285 		spin_unlock_irq(&btnc->tree_lock);
286 
287 		opage->index = obh->b_blocknr = newkey;
288 		unlock_page(opage);
289 	} else {
290 		nilfs_copy_buffer(nbh, obh);
291 		nilfs_btnode_mark_dirty(nbh);
292 
293 		nbh->b_blocknr = newkey;
294 		ctxt->bh = nbh;
295 		nilfs_btnode_delete(obh); /* will decrement bh->b_count */
296 	}
297 }
298 
299 /**
300  * nilfs_btnode_abort_change_key
301  *  abort the change_key operation prepared by prepare_change_key().
302  */
303 void nilfs_btnode_abort_change_key(struct address_space *btnc,
304 				   struct nilfs_btnode_chkey_ctxt *ctxt)
305 {
306 	struct buffer_head *nbh = ctxt->newbh;
307 	__u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey;
308 
309 	if (oldkey == newkey)
310 		return;
311 
312 	if (nbh == NULL) {	/* blocksize == pagesize */
313 		spin_lock_irq(&btnc->tree_lock);
314 		radix_tree_delete(&btnc->page_tree, newkey);
315 		spin_unlock_irq(&btnc->tree_lock);
316 		unlock_page(ctxt->bh->b_page);
317 	} else
318 		brelse(nbh);
319 }
320