xref: /openbmc/linux/fs/nilfs2/page.c (revision 206a81c1)
1 /*
2  * page.c - buffer/page management specific to NILFS
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Ryusuke Konishi <ryusuke@osrg.net>,
21  *            Seiji Kihara <kihara@osrg.net>.
22  */
23 
24 #include <linux/pagemap.h>
25 #include <linux/writeback.h>
26 #include <linux/swap.h>
27 #include <linux/bitops.h>
28 #include <linux/page-flags.h>
29 #include <linux/list.h>
30 #include <linux/highmem.h>
31 #include <linux/pagevec.h>
32 #include <linux/gfp.h>
33 #include "nilfs.h"
34 #include "page.h"
35 #include "mdt.h"
36 
37 
38 #define NILFS_BUFFER_INHERENT_BITS  \
39 	((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
40 	 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked))
41 
42 static struct buffer_head *
43 __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
44 		       int blkbits, unsigned long b_state)
45 
46 {
47 	unsigned long first_block;
48 	struct buffer_head *bh;
49 
50 	if (!page_has_buffers(page))
51 		create_empty_buffers(page, 1 << blkbits, b_state);
52 
53 	first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits);
54 	bh = nilfs_page_get_nth_block(page, block - first_block);
55 
56 	touch_buffer(bh);
57 	wait_on_buffer(bh);
58 	return bh;
59 }
60 
61 struct buffer_head *nilfs_grab_buffer(struct inode *inode,
62 				      struct address_space *mapping,
63 				      unsigned long blkoff,
64 				      unsigned long b_state)
65 {
66 	int blkbits = inode->i_blkbits;
67 	pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
68 	struct page *page;
69 	struct buffer_head *bh;
70 
71 	page = grab_cache_page(mapping, index);
72 	if (unlikely(!page))
73 		return NULL;
74 
75 	bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
76 	if (unlikely(!bh)) {
77 		unlock_page(page);
78 		page_cache_release(page);
79 		return NULL;
80 	}
81 	return bh;
82 }
83 
84 /**
85  * nilfs_forget_buffer - discard dirty state
86  * @inode: owner inode of the buffer
87  * @bh: buffer head of the buffer to be discarded
88  */
89 void nilfs_forget_buffer(struct buffer_head *bh)
90 {
91 	struct page *page = bh->b_page;
92 
93 	lock_buffer(bh);
94 	clear_buffer_nilfs_volatile(bh);
95 	clear_buffer_nilfs_checked(bh);
96 	clear_buffer_nilfs_redirected(bh);
97 	clear_buffer_async_write(bh);
98 	clear_buffer_dirty(bh);
99 	if (nilfs_page_buffers_clean(page))
100 		__nilfs_clear_page_dirty(page);
101 
102 	clear_buffer_uptodate(bh);
103 	clear_buffer_mapped(bh);
104 	bh->b_blocknr = -1;
105 	ClearPageUptodate(page);
106 	ClearPageMappedToDisk(page);
107 	unlock_buffer(bh);
108 	brelse(bh);
109 }
110 
111 /**
112  * nilfs_copy_buffer -- copy buffer data and flags
113  * @dbh: destination buffer
114  * @sbh: source buffer
115  */
116 void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
117 {
118 	void *kaddr0, *kaddr1;
119 	unsigned long bits;
120 	struct page *spage = sbh->b_page, *dpage = dbh->b_page;
121 	struct buffer_head *bh;
122 
123 	kaddr0 = kmap_atomic(spage);
124 	kaddr1 = kmap_atomic(dpage);
125 	memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
126 	kunmap_atomic(kaddr1);
127 	kunmap_atomic(kaddr0);
128 
129 	dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
130 	dbh->b_blocknr = sbh->b_blocknr;
131 	dbh->b_bdev = sbh->b_bdev;
132 
133 	bh = dbh;
134 	bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped));
135 	while ((bh = bh->b_this_page) != dbh) {
136 		lock_buffer(bh);
137 		bits &= bh->b_state;
138 		unlock_buffer(bh);
139 	}
140 	if (bits & (1UL << BH_Uptodate))
141 		SetPageUptodate(dpage);
142 	else
143 		ClearPageUptodate(dpage);
144 	if (bits & (1UL << BH_Mapped))
145 		SetPageMappedToDisk(dpage);
146 	else
147 		ClearPageMappedToDisk(dpage);
148 }
149 
150 /**
151  * nilfs_page_buffers_clean - check if a page has dirty buffers or not.
152  * @page: page to be checked
153  *
154  * nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
155  * Otherwise, it returns non-zero value.
156  */
157 int nilfs_page_buffers_clean(struct page *page)
158 {
159 	struct buffer_head *bh, *head;
160 
161 	bh = head = page_buffers(page);
162 	do {
163 		if (buffer_dirty(bh))
164 			return 0;
165 		bh = bh->b_this_page;
166 	} while (bh != head);
167 	return 1;
168 }
169 
170 void nilfs_page_bug(struct page *page)
171 {
172 	struct address_space *m;
173 	unsigned long ino;
174 
175 	if (unlikely(!page)) {
176 		printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
177 		return;
178 	}
179 
180 	m = page->mapping;
181 	ino = m ? m->host->i_ino : 0;
182 
183 	printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
184 	       "mapping=%p ino=%lu\n",
185 	       page, atomic_read(&page->_count),
186 	       (unsigned long long)page->index, page->flags, m, ino);
187 
188 	if (page_has_buffers(page)) {
189 		struct buffer_head *bh, *head;
190 		int i = 0;
191 
192 		bh = head = page_buffers(page);
193 		do {
194 			printk(KERN_CRIT
195 			       " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
196 			       i++, bh, atomic_read(&bh->b_count),
197 			       (unsigned long long)bh->b_blocknr, bh->b_state);
198 			bh = bh->b_this_page;
199 		} while (bh != head);
200 	}
201 }
202 
203 /**
204  * nilfs_copy_page -- copy the page with buffers
205  * @dst: destination page
206  * @src: source page
207  * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
208  *
209  * This function is for both data pages and btnode pages.  The dirty flag
210  * should be treated by caller.  The page must not be under i/o.
211  * Both src and dst page must be locked
212  */
213 static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
214 {
215 	struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
216 	unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
217 
218 	BUG_ON(PageWriteback(dst));
219 
220 	sbh = sbufs = page_buffers(src);
221 	if (!page_has_buffers(dst))
222 		create_empty_buffers(dst, sbh->b_size, 0);
223 
224 	if (copy_dirty)
225 		mask |= (1UL << BH_Dirty);
226 
227 	dbh = dbufs = page_buffers(dst);
228 	do {
229 		lock_buffer(sbh);
230 		lock_buffer(dbh);
231 		dbh->b_state = sbh->b_state & mask;
232 		dbh->b_blocknr = sbh->b_blocknr;
233 		dbh->b_bdev = sbh->b_bdev;
234 		sbh = sbh->b_this_page;
235 		dbh = dbh->b_this_page;
236 	} while (dbh != dbufs);
237 
238 	copy_highpage(dst, src);
239 
240 	if (PageUptodate(src) && !PageUptodate(dst))
241 		SetPageUptodate(dst);
242 	else if (!PageUptodate(src) && PageUptodate(dst))
243 		ClearPageUptodate(dst);
244 	if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
245 		SetPageMappedToDisk(dst);
246 	else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
247 		ClearPageMappedToDisk(dst);
248 
249 	do {
250 		unlock_buffer(sbh);
251 		unlock_buffer(dbh);
252 		sbh = sbh->b_this_page;
253 		dbh = dbh->b_this_page;
254 	} while (dbh != dbufs);
255 }
256 
257 int nilfs_copy_dirty_pages(struct address_space *dmap,
258 			   struct address_space *smap)
259 {
260 	struct pagevec pvec;
261 	unsigned int i;
262 	pgoff_t index = 0;
263 	int err = 0;
264 
265 	pagevec_init(&pvec, 0);
266 repeat:
267 	if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
268 				PAGEVEC_SIZE))
269 		return 0;
270 
271 	for (i = 0; i < pagevec_count(&pvec); i++) {
272 		struct page *page = pvec.pages[i], *dpage;
273 
274 		lock_page(page);
275 		if (unlikely(!PageDirty(page)))
276 			NILFS_PAGE_BUG(page, "inconsistent dirty state");
277 
278 		dpage = grab_cache_page(dmap, page->index);
279 		if (unlikely(!dpage)) {
280 			/* No empty page is added to the page cache */
281 			err = -ENOMEM;
282 			unlock_page(page);
283 			break;
284 		}
285 		if (unlikely(!page_has_buffers(page)))
286 			NILFS_PAGE_BUG(page,
287 				       "found empty page in dat page cache");
288 
289 		nilfs_copy_page(dpage, page, 1);
290 		__set_page_dirty_nobuffers(dpage);
291 
292 		unlock_page(dpage);
293 		page_cache_release(dpage);
294 		unlock_page(page);
295 	}
296 	pagevec_release(&pvec);
297 	cond_resched();
298 
299 	if (likely(!err))
300 		goto repeat;
301 	return err;
302 }
303 
304 /**
305  * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache
306  * @dmap: destination page cache
307  * @smap: source page cache
308  *
309  * No pages must no be added to the cache during this process.
310  * This must be ensured by the caller.
311  */
312 void nilfs_copy_back_pages(struct address_space *dmap,
313 			   struct address_space *smap)
314 {
315 	struct pagevec pvec;
316 	unsigned int i, n;
317 	pgoff_t index = 0;
318 	int err;
319 
320 	pagevec_init(&pvec, 0);
321 repeat:
322 	n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE);
323 	if (!n)
324 		return;
325 	index = pvec.pages[n - 1]->index + 1;
326 
327 	for (i = 0; i < pagevec_count(&pvec); i++) {
328 		struct page *page = pvec.pages[i], *dpage;
329 		pgoff_t offset = page->index;
330 
331 		lock_page(page);
332 		dpage = find_lock_page(dmap, offset);
333 		if (dpage) {
334 			/* override existing page on the destination cache */
335 			WARN_ON(PageDirty(dpage));
336 			nilfs_copy_page(dpage, page, 0);
337 			unlock_page(dpage);
338 			page_cache_release(dpage);
339 		} else {
340 			struct page *page2;
341 
342 			/* move the page to the destination cache */
343 			spin_lock_irq(&smap->tree_lock);
344 			page2 = radix_tree_delete(&smap->page_tree, offset);
345 			WARN_ON(page2 != page);
346 
347 			smap->nrpages--;
348 			spin_unlock_irq(&smap->tree_lock);
349 
350 			spin_lock_irq(&dmap->tree_lock);
351 			err = radix_tree_insert(&dmap->page_tree, offset, page);
352 			if (unlikely(err < 0)) {
353 				WARN_ON(err == -EEXIST);
354 				page->mapping = NULL;
355 				page_cache_release(page); /* for cache */
356 			} else {
357 				page->mapping = dmap;
358 				dmap->nrpages++;
359 				if (PageDirty(page))
360 					radix_tree_tag_set(&dmap->page_tree,
361 							   offset,
362 							   PAGECACHE_TAG_DIRTY);
363 			}
364 			spin_unlock_irq(&dmap->tree_lock);
365 		}
366 		unlock_page(page);
367 	}
368 	pagevec_release(&pvec);
369 	cond_resched();
370 
371 	goto repeat;
372 }
373 
374 /**
375  * nilfs_clear_dirty_pages - discard dirty pages in address space
376  * @mapping: address space with dirty pages for discarding
377  * @silent: suppress [true] or print [false] warning messages
378  */
379 void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
380 {
381 	struct pagevec pvec;
382 	unsigned int i;
383 	pgoff_t index = 0;
384 
385 	pagevec_init(&pvec, 0);
386 
387 	while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
388 				  PAGEVEC_SIZE)) {
389 		for (i = 0; i < pagevec_count(&pvec); i++) {
390 			struct page *page = pvec.pages[i];
391 
392 			lock_page(page);
393 			nilfs_clear_dirty_page(page, silent);
394 			unlock_page(page);
395 		}
396 		pagevec_release(&pvec);
397 		cond_resched();
398 	}
399 }
400 
401 /**
402  * nilfs_clear_dirty_page - discard dirty page
403  * @page: dirty page that will be discarded
404  * @silent: suppress [true] or print [false] warning messages
405  */
406 void nilfs_clear_dirty_page(struct page *page, bool silent)
407 {
408 	struct inode *inode = page->mapping->host;
409 	struct super_block *sb = inode->i_sb;
410 
411 	BUG_ON(!PageLocked(page));
412 
413 	if (!silent) {
414 		nilfs_warning(sb, __func__,
415 				"discard page: offset %lld, ino %lu",
416 				page_offset(page), inode->i_ino);
417 	}
418 
419 	ClearPageUptodate(page);
420 	ClearPageMappedToDisk(page);
421 
422 	if (page_has_buffers(page)) {
423 		struct buffer_head *bh, *head;
424 
425 		bh = head = page_buffers(page);
426 		do {
427 			lock_buffer(bh);
428 			if (!silent) {
429 				nilfs_warning(sb, __func__,
430 					"discard block %llu, size %zu",
431 					(u64)bh->b_blocknr, bh->b_size);
432 			}
433 			clear_buffer_async_write(bh);
434 			clear_buffer_dirty(bh);
435 			clear_buffer_nilfs_volatile(bh);
436 			clear_buffer_nilfs_checked(bh);
437 			clear_buffer_nilfs_redirected(bh);
438 			clear_buffer_uptodate(bh);
439 			clear_buffer_mapped(bh);
440 			unlock_buffer(bh);
441 		} while (bh = bh->b_this_page, bh != head);
442 	}
443 
444 	__nilfs_clear_page_dirty(page);
445 }
446 
447 unsigned nilfs_page_count_clean_buffers(struct page *page,
448 					unsigned from, unsigned to)
449 {
450 	unsigned block_start, block_end;
451 	struct buffer_head *bh, *head;
452 	unsigned nc = 0;
453 
454 	for (bh = head = page_buffers(page), block_start = 0;
455 	     bh != head || !block_start;
456 	     block_start = block_end, bh = bh->b_this_page) {
457 		block_end = block_start + bh->b_size;
458 		if (block_end > from && block_start < to && !buffer_dirty(bh))
459 			nc++;
460 	}
461 	return nc;
462 }
463 
464 void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
465 			struct backing_dev_info *bdi)
466 {
467 	mapping->host = inode;
468 	mapping->flags = 0;
469 	mapping_set_gfp_mask(mapping, GFP_NOFS);
470 	mapping->private_data = NULL;
471 	mapping->backing_dev_info = bdi;
472 	mapping->a_ops = &empty_aops;
473 }
474 
475 /*
476  * NILFS2 needs clear_page_dirty() in the following two cases:
477  *
478  * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
479  *    page dirty flags when it copies back pages from the shadow cache
480  *    (gcdat->{i_mapping,i_btnode_cache}) to its original cache
481  *    (dat->{i_mapping,i_btnode_cache}).
482  *
483  * 2) Some B-tree operations like insertion or deletion may dispose buffers
484  *    in dirty state, and this needs to cancel the dirty state of their pages.
485  */
486 int __nilfs_clear_page_dirty(struct page *page)
487 {
488 	struct address_space *mapping = page->mapping;
489 
490 	if (mapping) {
491 		spin_lock_irq(&mapping->tree_lock);
492 		if (test_bit(PG_dirty, &page->flags)) {
493 			radix_tree_tag_clear(&mapping->page_tree,
494 					     page_index(page),
495 					     PAGECACHE_TAG_DIRTY);
496 			spin_unlock_irq(&mapping->tree_lock);
497 			return clear_page_dirty_for_io(page);
498 		}
499 		spin_unlock_irq(&mapping->tree_lock);
500 		return 0;
501 	}
502 	return TestClearPageDirty(page);
503 }
504 
505 /**
506  * nilfs_find_uncommitted_extent - find extent of uncommitted data
507  * @inode: inode
508  * @start_blk: start block offset (in)
509  * @blkoff: start offset of the found extent (out)
510  *
511  * This function searches an extent of buffers marked "delayed" which
512  * starts from a block offset equal to or larger than @start_blk.  If
513  * such an extent was found, this will store the start offset in
514  * @blkoff and return its length in blocks.  Otherwise, zero is
515  * returned.
516  */
517 unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
518 					    sector_t start_blk,
519 					    sector_t *blkoff)
520 {
521 	unsigned int i;
522 	pgoff_t index;
523 	unsigned int nblocks_in_page;
524 	unsigned long length = 0;
525 	sector_t b;
526 	struct pagevec pvec;
527 	struct page *page;
528 
529 	if (inode->i_mapping->nrpages == 0)
530 		return 0;
531 
532 	index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
533 	nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits);
534 
535 	pagevec_init(&pvec, 0);
536 
537 repeat:
538 	pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
539 					pvec.pages);
540 	if (pvec.nr == 0)
541 		return length;
542 
543 	if (length > 0 && pvec.pages[0]->index > index)
544 		goto out;
545 
546 	b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
547 	i = 0;
548 	do {
549 		page = pvec.pages[i];
550 
551 		lock_page(page);
552 		if (page_has_buffers(page)) {
553 			struct buffer_head *bh, *head;
554 
555 			bh = head = page_buffers(page);
556 			do {
557 				if (b < start_blk)
558 					continue;
559 				if (buffer_delay(bh)) {
560 					if (length == 0)
561 						*blkoff = b;
562 					length++;
563 				} else if (length > 0) {
564 					goto out_locked;
565 				}
566 			} while (++b, bh = bh->b_this_page, bh != head);
567 		} else {
568 			if (length > 0)
569 				goto out_locked;
570 
571 			b += nblocks_in_page;
572 		}
573 		unlock_page(page);
574 
575 	} while (++i < pagevec_count(&pvec));
576 
577 	index = page->index + 1;
578 	pagevec_release(&pvec);
579 	cond_resched();
580 	goto repeat;
581 
582 out_locked:
583 	unlock_page(page);
584 out:
585 	pagevec_release(&pvec);
586 	return length;
587 }
588