1 /* 2 * page.c - buffer/page management specific to NILFS 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * Written by Ryusuke Konishi <ryusuke@osrg.net>, 21 * Seiji Kihara <kihara@osrg.net>. 22 */ 23 24 #include <linux/pagemap.h> 25 #include <linux/writeback.h> 26 #include <linux/swap.h> 27 #include <linux/bitops.h> 28 #include <linux/page-flags.h> 29 #include <linux/list.h> 30 #include <linux/highmem.h> 31 #include <linux/pagevec.h> 32 #include "nilfs.h" 33 #include "page.h" 34 #include "mdt.h" 35 36 37 #define NILFS_BUFFER_INHERENT_BITS \ 38 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ 39 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated)) 40 41 static struct buffer_head * 42 __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, 43 int blkbits, unsigned long b_state) 44 45 { 46 unsigned long first_block; 47 struct buffer_head *bh; 48 49 if (!page_has_buffers(page)) 50 create_empty_buffers(page, 1 << blkbits, b_state); 51 52 first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits); 53 bh = nilfs_page_get_nth_block(page, block - first_block); 54 55 touch_buffer(bh); 56 wait_on_buffer(bh); 57 return bh; 58 } 59 60 /* 61 * Since the page cache of B-tree node pages or data page cache of pseudo 62 * inodes does not have a valid mapping->host pointer, calling 63 * mark_buffer_dirty() for their buffers causes a NULL pointer dereference; 64 * it calls __mark_inode_dirty(NULL) through __set_page_dirty(). 65 * To avoid this problem, the old style mark_buffer_dirty() is used instead. 66 */ 67 void nilfs_mark_buffer_dirty(struct buffer_head *bh) 68 { 69 if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) 70 __set_page_dirty_nobuffers(bh->b_page); 71 } 72 73 struct buffer_head *nilfs_grab_buffer(struct inode *inode, 74 struct address_space *mapping, 75 unsigned long blkoff, 76 unsigned long b_state) 77 { 78 int blkbits = inode->i_blkbits; 79 pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); 80 struct page *page, *opage; 81 struct buffer_head *bh, *obh; 82 83 page = grab_cache_page(mapping, index); 84 if (unlikely(!page)) 85 return NULL; 86 87 bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); 88 if (unlikely(!bh)) { 89 unlock_page(page); 90 page_cache_release(page); 91 return NULL; 92 } 93 if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) { 94 /* 95 * Shadow page cache uses assoc_mapping to point its original 96 * page cache. The following code tries the original cache 97 * if the given cache is a shadow and it didn't hit. 98 */ 99 opage = find_lock_page(mapping->assoc_mapping, index); 100 if (!opage) 101 return bh; 102 103 obh = __nilfs_get_page_block(opage, blkoff, index, blkbits, 104 b_state); 105 if (buffer_uptodate(obh)) { 106 nilfs_copy_buffer(bh, obh); 107 if (buffer_dirty(obh)) { 108 nilfs_mark_buffer_dirty(bh); 109 if (!buffer_nilfs_node(bh) && NILFS_MDT(inode)) 110 nilfs_mdt_mark_dirty(inode); 111 } 112 } 113 brelse(obh); 114 unlock_page(opage); 115 page_cache_release(opage); 116 } 117 return bh; 118 } 119 120 /** 121 * nilfs_forget_buffer - discard dirty state 122 * @inode: owner inode of the buffer 123 * @bh: buffer head of the buffer to be discarded 124 */ 125 void nilfs_forget_buffer(struct buffer_head *bh) 126 { 127 struct page *page = bh->b_page; 128 129 lock_buffer(bh); 130 clear_buffer_nilfs_volatile(bh); 131 clear_buffer_dirty(bh); 132 if (nilfs_page_buffers_clean(page)) 133 __nilfs_clear_page_dirty(page); 134 135 clear_buffer_uptodate(bh); 136 clear_buffer_mapped(bh); 137 bh->b_blocknr = -1; 138 ClearPageUptodate(page); 139 ClearPageMappedToDisk(page); 140 unlock_buffer(bh); 141 brelse(bh); 142 } 143 144 /** 145 * nilfs_copy_buffer -- copy buffer data and flags 146 * @dbh: destination buffer 147 * @sbh: source buffer 148 */ 149 void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) 150 { 151 void *kaddr0, *kaddr1; 152 unsigned long bits; 153 struct page *spage = sbh->b_page, *dpage = dbh->b_page; 154 struct buffer_head *bh; 155 156 kaddr0 = kmap_atomic(spage, KM_USER0); 157 kaddr1 = kmap_atomic(dpage, KM_USER1); 158 memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); 159 kunmap_atomic(kaddr1, KM_USER1); 160 kunmap_atomic(kaddr0, KM_USER0); 161 162 dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; 163 dbh->b_blocknr = sbh->b_blocknr; 164 dbh->b_bdev = sbh->b_bdev; 165 166 bh = dbh; 167 bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped)); 168 while ((bh = bh->b_this_page) != dbh) { 169 lock_buffer(bh); 170 bits &= bh->b_state; 171 unlock_buffer(bh); 172 } 173 if (bits & (1UL << BH_Uptodate)) 174 SetPageUptodate(dpage); 175 else 176 ClearPageUptodate(dpage); 177 if (bits & (1UL << BH_Mapped)) 178 SetPageMappedToDisk(dpage); 179 else 180 ClearPageMappedToDisk(dpage); 181 } 182 183 /** 184 * nilfs_page_buffers_clean - check if a page has dirty buffers or not. 185 * @page: page to be checked 186 * 187 * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. 188 * Otherwise, it returns non-zero value. 189 */ 190 int nilfs_page_buffers_clean(struct page *page) 191 { 192 struct buffer_head *bh, *head; 193 194 bh = head = page_buffers(page); 195 do { 196 if (buffer_dirty(bh)) 197 return 0; 198 bh = bh->b_this_page; 199 } while (bh != head); 200 return 1; 201 } 202 203 void nilfs_page_bug(struct page *page) 204 { 205 struct address_space *m; 206 unsigned long ino = 0; 207 208 if (unlikely(!page)) { 209 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); 210 return; 211 } 212 213 m = page->mapping; 214 if (m) { 215 struct inode *inode = NILFS_AS_I(m); 216 if (inode != NULL) 217 ino = inode->i_ino; 218 } 219 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " 220 "mapping=%p ino=%lu\n", 221 page, atomic_read(&page->_count), 222 (unsigned long long)page->index, page->flags, m, ino); 223 224 if (page_has_buffers(page)) { 225 struct buffer_head *bh, *head; 226 int i = 0; 227 228 bh = head = page_buffers(page); 229 do { 230 printk(KERN_CRIT 231 " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", 232 i++, bh, atomic_read(&bh->b_count), 233 (unsigned long long)bh->b_blocknr, bh->b_state); 234 bh = bh->b_this_page; 235 } while (bh != head); 236 } 237 } 238 239 /** 240 * nilfs_alloc_private_page - allocate a private page with buffer heads 241 * 242 * Return Value: On success, a pointer to the allocated page is returned. 243 * On error, NULL is returned. 244 */ 245 struct page *nilfs_alloc_private_page(struct block_device *bdev, int size, 246 unsigned long state) 247 { 248 struct buffer_head *bh, *head, *tail; 249 struct page *page; 250 251 page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */ 252 if (unlikely(!page)) 253 return NULL; 254 255 lock_page(page); 256 head = alloc_page_buffers(page, size, 0); 257 if (unlikely(!head)) { 258 unlock_page(page); 259 __free_page(page); 260 return NULL; 261 } 262 263 bh = head; 264 do { 265 bh->b_state = (1UL << BH_NILFS_Allocated) | state; 266 tail = bh; 267 bh->b_bdev = bdev; 268 bh = bh->b_this_page; 269 } while (bh); 270 271 tail->b_this_page = head; 272 attach_page_buffers(page, head); 273 274 return page; 275 } 276 277 void nilfs_free_private_page(struct page *page) 278 { 279 BUG_ON(!PageLocked(page)); 280 BUG_ON(page->mapping); 281 282 if (page_has_buffers(page) && !try_to_free_buffers(page)) 283 NILFS_PAGE_BUG(page, "failed to free page"); 284 285 unlock_page(page); 286 __free_page(page); 287 } 288 289 /** 290 * nilfs_copy_page -- copy the page with buffers 291 * @dst: destination page 292 * @src: source page 293 * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. 294 * 295 * This fuction is for both data pages and btnode pages. The dirty flag 296 * should be treated by caller. The page must not be under i/o. 297 * Both src and dst page must be locked 298 */ 299 static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) 300 { 301 struct buffer_head *dbh, *dbufs, *sbh, *sbufs; 302 unsigned long mask = NILFS_BUFFER_INHERENT_BITS; 303 304 BUG_ON(PageWriteback(dst)); 305 306 sbh = sbufs = page_buffers(src); 307 if (!page_has_buffers(dst)) 308 create_empty_buffers(dst, sbh->b_size, 0); 309 310 if (copy_dirty) 311 mask |= (1UL << BH_Dirty); 312 313 dbh = dbufs = page_buffers(dst); 314 do { 315 lock_buffer(sbh); 316 lock_buffer(dbh); 317 dbh->b_state = sbh->b_state & mask; 318 dbh->b_blocknr = sbh->b_blocknr; 319 dbh->b_bdev = sbh->b_bdev; 320 sbh = sbh->b_this_page; 321 dbh = dbh->b_this_page; 322 } while (dbh != dbufs); 323 324 copy_highpage(dst, src); 325 326 if (PageUptodate(src) && !PageUptodate(dst)) 327 SetPageUptodate(dst); 328 else if (!PageUptodate(src) && PageUptodate(dst)) 329 ClearPageUptodate(dst); 330 if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) 331 SetPageMappedToDisk(dst); 332 else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) 333 ClearPageMappedToDisk(dst); 334 335 do { 336 unlock_buffer(sbh); 337 unlock_buffer(dbh); 338 sbh = sbh->b_this_page; 339 dbh = dbh->b_this_page; 340 } while (dbh != dbufs); 341 } 342 343 int nilfs_copy_dirty_pages(struct address_space *dmap, 344 struct address_space *smap) 345 { 346 struct pagevec pvec; 347 unsigned int i; 348 pgoff_t index = 0; 349 int err = 0; 350 351 pagevec_init(&pvec, 0); 352 repeat: 353 if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, 354 PAGEVEC_SIZE)) 355 return 0; 356 357 for (i = 0; i < pagevec_count(&pvec); i++) { 358 struct page *page = pvec.pages[i], *dpage; 359 360 lock_page(page); 361 if (unlikely(!PageDirty(page))) 362 NILFS_PAGE_BUG(page, "inconsistent dirty state"); 363 364 dpage = grab_cache_page(dmap, page->index); 365 if (unlikely(!dpage)) { 366 /* No empty page is added to the page cache */ 367 err = -ENOMEM; 368 unlock_page(page); 369 break; 370 } 371 if (unlikely(!page_has_buffers(page))) 372 NILFS_PAGE_BUG(page, 373 "found empty page in dat page cache"); 374 375 nilfs_copy_page(dpage, page, 1); 376 __set_page_dirty_nobuffers(dpage); 377 378 unlock_page(dpage); 379 page_cache_release(dpage); 380 unlock_page(page); 381 } 382 pagevec_release(&pvec); 383 cond_resched(); 384 385 if (likely(!err)) 386 goto repeat; 387 return err; 388 } 389 390 /** 391 * nilfs_copy_back_pages -- copy back pages to orignal cache from shadow cache 392 * @dmap: destination page cache 393 * @smap: source page cache 394 * 395 * No pages must no be added to the cache during this process. 396 * This must be ensured by the caller. 397 */ 398 void nilfs_copy_back_pages(struct address_space *dmap, 399 struct address_space *smap) 400 { 401 struct pagevec pvec; 402 unsigned int i, n; 403 pgoff_t index = 0; 404 int err; 405 406 pagevec_init(&pvec, 0); 407 repeat: 408 n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE); 409 if (!n) 410 return; 411 index = pvec.pages[n - 1]->index + 1; 412 413 for (i = 0; i < pagevec_count(&pvec); i++) { 414 struct page *page = pvec.pages[i], *dpage; 415 pgoff_t offset = page->index; 416 417 lock_page(page); 418 dpage = find_lock_page(dmap, offset); 419 if (dpage) { 420 /* override existing page on the destination cache */ 421 WARN_ON(PageDirty(dpage)); 422 nilfs_copy_page(dpage, page, 0); 423 unlock_page(dpage); 424 page_cache_release(dpage); 425 } else { 426 struct page *page2; 427 428 /* move the page to the destination cache */ 429 spin_lock_irq(&smap->tree_lock); 430 page2 = radix_tree_delete(&smap->page_tree, offset); 431 WARN_ON(page2 != page); 432 433 smap->nrpages--; 434 spin_unlock_irq(&smap->tree_lock); 435 436 spin_lock_irq(&dmap->tree_lock); 437 err = radix_tree_insert(&dmap->page_tree, offset, page); 438 if (unlikely(err < 0)) { 439 WARN_ON(err == -EEXIST); 440 page->mapping = NULL; 441 page_cache_release(page); /* for cache */ 442 } else { 443 page->mapping = dmap; 444 dmap->nrpages++; 445 if (PageDirty(page)) 446 radix_tree_tag_set(&dmap->page_tree, 447 offset, 448 PAGECACHE_TAG_DIRTY); 449 } 450 spin_unlock_irq(&dmap->tree_lock); 451 } 452 unlock_page(page); 453 } 454 pagevec_release(&pvec); 455 cond_resched(); 456 457 goto repeat; 458 } 459 460 void nilfs_clear_dirty_pages(struct address_space *mapping) 461 { 462 struct pagevec pvec; 463 unsigned int i; 464 pgoff_t index = 0; 465 466 pagevec_init(&pvec, 0); 467 468 while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, 469 PAGEVEC_SIZE)) { 470 for (i = 0; i < pagevec_count(&pvec); i++) { 471 struct page *page = pvec.pages[i]; 472 struct buffer_head *bh, *head; 473 474 lock_page(page); 475 ClearPageUptodate(page); 476 ClearPageMappedToDisk(page); 477 bh = head = page_buffers(page); 478 do { 479 lock_buffer(bh); 480 clear_buffer_dirty(bh); 481 clear_buffer_nilfs_volatile(bh); 482 clear_buffer_uptodate(bh); 483 clear_buffer_mapped(bh); 484 unlock_buffer(bh); 485 bh = bh->b_this_page; 486 } while (bh != head); 487 488 __nilfs_clear_page_dirty(page); 489 unlock_page(page); 490 } 491 pagevec_release(&pvec); 492 cond_resched(); 493 } 494 } 495 496 unsigned nilfs_page_count_clean_buffers(struct page *page, 497 unsigned from, unsigned to) 498 { 499 unsigned block_start, block_end; 500 struct buffer_head *bh, *head; 501 unsigned nc = 0; 502 503 for (bh = head = page_buffers(page), block_start = 0; 504 bh != head || !block_start; 505 block_start = block_end, bh = bh->b_this_page) { 506 block_end = block_start + bh->b_size; 507 if (block_end > from && block_start < to && !buffer_dirty(bh)) 508 nc++; 509 } 510 return nc; 511 } 512 513 /* 514 * NILFS2 needs clear_page_dirty() in the following two cases: 515 * 516 * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears 517 * page dirty flags when it copies back pages from the shadow cache 518 * (gcdat->{i_mapping,i_btnode_cache}) to its original cache 519 * (dat->{i_mapping,i_btnode_cache}). 520 * 521 * 2) Some B-tree operations like insertion or deletion may dispose buffers 522 * in dirty state, and this needs to cancel the dirty state of their pages. 523 */ 524 int __nilfs_clear_page_dirty(struct page *page) 525 { 526 struct address_space *mapping = page->mapping; 527 528 if (mapping) { 529 spin_lock_irq(&mapping->tree_lock); 530 if (test_bit(PG_dirty, &page->flags)) { 531 radix_tree_tag_clear(&mapping->page_tree, 532 page_index(page), 533 PAGECACHE_TAG_DIRTY); 534 spin_unlock_irq(&mapping->tree_lock); 535 return clear_page_dirty_for_io(page); 536 } 537 spin_unlock_irq(&mapping->tree_lock); 538 return 0; 539 } 540 return TestClearPageDirty(page); 541 } 542