1 /* 2 * page.c - buffer/page management specific to NILFS 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * Written by Ryusuke Konishi <ryusuke@osrg.net>, 21 * Seiji Kihara <kihara@osrg.net>. 22 */ 23 24 #include <linux/pagemap.h> 25 #include <linux/writeback.h> 26 #include <linux/swap.h> 27 #include <linux/bitops.h> 28 #include <linux/page-flags.h> 29 #include <linux/list.h> 30 #include <linux/highmem.h> 31 #include <linux/pagevec.h> 32 #include "nilfs.h" 33 #include "page.h" 34 #include "mdt.h" 35 36 37 #define NILFS_BUFFER_INHERENT_BITS \ 38 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ 39 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated)) 40 41 static struct buffer_head * 42 __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, 43 int blkbits, unsigned long b_state) 44 45 { 46 unsigned long first_block; 47 struct buffer_head *bh; 48 49 if (!page_has_buffers(page)) 50 create_empty_buffers(page, 1 << blkbits, b_state); 51 52 first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits); 53 bh = nilfs_page_get_nth_block(page, block - first_block); 54 55 touch_buffer(bh); 56 wait_on_buffer(bh); 57 return bh; 58 } 59 60 /* 61 * Since the page cache of B-tree node pages or data page cache of pseudo 62 * inodes does not have a valid mapping->host pointer, calling 63 * mark_buffer_dirty() for their buffers causes a NULL pointer dereference; 64 * it calls __mark_inode_dirty(NULL) through __set_page_dirty(). 65 * To avoid this problem, the old style mark_buffer_dirty() is used instead. 66 */ 67 void nilfs_mark_buffer_dirty(struct buffer_head *bh) 68 { 69 if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) 70 __set_page_dirty_nobuffers(bh->b_page); 71 } 72 73 struct buffer_head *nilfs_grab_buffer(struct inode *inode, 74 struct address_space *mapping, 75 unsigned long blkoff, 76 unsigned long b_state) 77 { 78 int blkbits = inode->i_blkbits; 79 pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); 80 struct page *page, *opage; 81 struct buffer_head *bh, *obh; 82 83 page = grab_cache_page(mapping, index); 84 if (unlikely(!page)) 85 return NULL; 86 87 bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); 88 if (unlikely(!bh)) { 89 unlock_page(page); 90 page_cache_release(page); 91 return NULL; 92 } 93 if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) { 94 /* 95 * Shadow page cache uses assoc_mapping to point its original 96 * page cache. The following code tries the original cache 97 * if the given cache is a shadow and it didn't hit. 98 */ 99 opage = find_lock_page(mapping->assoc_mapping, index); 100 if (!opage) 101 return bh; 102 103 obh = __nilfs_get_page_block(opage, blkoff, index, blkbits, 104 b_state); 105 if (buffer_uptodate(obh)) { 106 nilfs_copy_buffer(bh, obh); 107 if (buffer_dirty(obh)) { 108 nilfs_mark_buffer_dirty(bh); 109 if (!buffer_nilfs_node(bh) && NILFS_MDT(inode)) 110 nilfs_mdt_mark_dirty(inode); 111 } 112 } 113 brelse(obh); 114 unlock_page(opage); 115 page_cache_release(opage); 116 } 117 return bh; 118 } 119 120 /** 121 * nilfs_forget_buffer - discard dirty state 122 * @inode: owner inode of the buffer 123 * @bh: buffer head of the buffer to be discarded 124 */ 125 void nilfs_forget_buffer(struct buffer_head *bh) 126 { 127 struct page *page = bh->b_page; 128 129 lock_buffer(bh); 130 clear_buffer_nilfs_volatile(bh); 131 if (test_clear_buffer_dirty(bh) && nilfs_page_buffers_clean(page)) 132 __nilfs_clear_page_dirty(page); 133 134 clear_buffer_uptodate(bh); 135 clear_buffer_mapped(bh); 136 bh->b_blocknr = -1; 137 ClearPageUptodate(page); 138 ClearPageMappedToDisk(page); 139 unlock_buffer(bh); 140 brelse(bh); 141 } 142 143 /** 144 * nilfs_copy_buffer -- copy buffer data and flags 145 * @dbh: destination buffer 146 * @sbh: source buffer 147 */ 148 void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) 149 { 150 void *kaddr0, *kaddr1; 151 unsigned long bits; 152 struct page *spage = sbh->b_page, *dpage = dbh->b_page; 153 struct buffer_head *bh; 154 155 kaddr0 = kmap_atomic(spage, KM_USER0); 156 kaddr1 = kmap_atomic(dpage, KM_USER1); 157 memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); 158 kunmap_atomic(kaddr1, KM_USER1); 159 kunmap_atomic(kaddr0, KM_USER0); 160 161 dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; 162 dbh->b_blocknr = sbh->b_blocknr; 163 dbh->b_bdev = sbh->b_bdev; 164 165 bh = dbh; 166 bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped)); 167 while ((bh = bh->b_this_page) != dbh) { 168 lock_buffer(bh); 169 bits &= bh->b_state; 170 unlock_buffer(bh); 171 } 172 if (bits & (1UL << BH_Uptodate)) 173 SetPageUptodate(dpage); 174 else 175 ClearPageUptodate(dpage); 176 if (bits & (1UL << BH_Mapped)) 177 SetPageMappedToDisk(dpage); 178 else 179 ClearPageMappedToDisk(dpage); 180 } 181 182 /** 183 * nilfs_page_buffers_clean - check if a page has dirty buffers or not. 184 * @page: page to be checked 185 * 186 * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. 187 * Otherwise, it returns non-zero value. 188 */ 189 int nilfs_page_buffers_clean(struct page *page) 190 { 191 struct buffer_head *bh, *head; 192 193 bh = head = page_buffers(page); 194 do { 195 if (buffer_dirty(bh)) 196 return 0; 197 bh = bh->b_this_page; 198 } while (bh != head); 199 return 1; 200 } 201 202 void nilfs_page_bug(struct page *page) 203 { 204 struct address_space *m; 205 unsigned long ino = 0; 206 207 if (unlikely(!page)) { 208 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); 209 return; 210 } 211 212 m = page->mapping; 213 if (m) { 214 struct inode *inode = NILFS_AS_I(m); 215 if (inode != NULL) 216 ino = inode->i_ino; 217 } 218 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " 219 "mapping=%p ino=%lu\n", 220 page, atomic_read(&page->_count), 221 (unsigned long long)page->index, page->flags, m, ino); 222 223 if (page_has_buffers(page)) { 224 struct buffer_head *bh, *head; 225 int i = 0; 226 227 bh = head = page_buffers(page); 228 do { 229 printk(KERN_CRIT 230 " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", 231 i++, bh, atomic_read(&bh->b_count), 232 (unsigned long long)bh->b_blocknr, bh->b_state); 233 bh = bh->b_this_page; 234 } while (bh != head); 235 } 236 } 237 238 /** 239 * nilfs_alloc_private_page - allocate a private page with buffer heads 240 * 241 * Return Value: On success, a pointer to the allocated page is returned. 242 * On error, NULL is returned. 243 */ 244 struct page *nilfs_alloc_private_page(struct block_device *bdev, int size, 245 unsigned long state) 246 { 247 struct buffer_head *bh, *head, *tail; 248 struct page *page; 249 250 page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */ 251 if (unlikely(!page)) 252 return NULL; 253 254 lock_page(page); 255 head = alloc_page_buffers(page, size, 0); 256 if (unlikely(!head)) { 257 unlock_page(page); 258 __free_page(page); 259 return NULL; 260 } 261 262 bh = head; 263 do { 264 bh->b_state = (1UL << BH_NILFS_Allocated) | state; 265 tail = bh; 266 bh->b_bdev = bdev; 267 bh = bh->b_this_page; 268 } while (bh); 269 270 tail->b_this_page = head; 271 attach_page_buffers(page, head); 272 273 return page; 274 } 275 276 void nilfs_free_private_page(struct page *page) 277 { 278 BUG_ON(!PageLocked(page)); 279 BUG_ON(page->mapping); 280 281 if (page_has_buffers(page) && !try_to_free_buffers(page)) 282 NILFS_PAGE_BUG(page, "failed to free page"); 283 284 unlock_page(page); 285 __free_page(page); 286 } 287 288 /** 289 * nilfs_copy_page -- copy the page with buffers 290 * @dst: destination page 291 * @src: source page 292 * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. 293 * 294 * This fuction is for both data pages and btnode pages. The dirty flag 295 * should be treated by caller. The page must not be under i/o. 296 * Both src and dst page must be locked 297 */ 298 static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) 299 { 300 struct buffer_head *dbh, *dbufs, *sbh, *sbufs; 301 unsigned long mask = NILFS_BUFFER_INHERENT_BITS; 302 303 BUG_ON(PageWriteback(dst)); 304 305 sbh = sbufs = page_buffers(src); 306 if (!page_has_buffers(dst)) 307 create_empty_buffers(dst, sbh->b_size, 0); 308 309 if (copy_dirty) 310 mask |= (1UL << BH_Dirty); 311 312 dbh = dbufs = page_buffers(dst); 313 do { 314 lock_buffer(sbh); 315 lock_buffer(dbh); 316 dbh->b_state = sbh->b_state & mask; 317 dbh->b_blocknr = sbh->b_blocknr; 318 dbh->b_bdev = sbh->b_bdev; 319 sbh = sbh->b_this_page; 320 dbh = dbh->b_this_page; 321 } while (dbh != dbufs); 322 323 copy_highpage(dst, src); 324 325 if (PageUptodate(src) && !PageUptodate(dst)) 326 SetPageUptodate(dst); 327 else if (!PageUptodate(src) && PageUptodate(dst)) 328 ClearPageUptodate(dst); 329 if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) 330 SetPageMappedToDisk(dst); 331 else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) 332 ClearPageMappedToDisk(dst); 333 334 do { 335 unlock_buffer(sbh); 336 unlock_buffer(dbh); 337 sbh = sbh->b_this_page; 338 dbh = dbh->b_this_page; 339 } while (dbh != dbufs); 340 } 341 342 int nilfs_copy_dirty_pages(struct address_space *dmap, 343 struct address_space *smap) 344 { 345 struct pagevec pvec; 346 unsigned int i; 347 pgoff_t index = 0; 348 int err = 0; 349 350 pagevec_init(&pvec, 0); 351 repeat: 352 if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, 353 PAGEVEC_SIZE)) 354 return 0; 355 356 for (i = 0; i < pagevec_count(&pvec); i++) { 357 struct page *page = pvec.pages[i], *dpage; 358 359 lock_page(page); 360 if (unlikely(!PageDirty(page))) 361 NILFS_PAGE_BUG(page, "inconsistent dirty state"); 362 363 dpage = grab_cache_page(dmap, page->index); 364 if (unlikely(!dpage)) { 365 /* No empty page is added to the page cache */ 366 err = -ENOMEM; 367 unlock_page(page); 368 break; 369 } 370 if (unlikely(!page_has_buffers(page))) 371 NILFS_PAGE_BUG(page, 372 "found empty page in dat page cache"); 373 374 nilfs_copy_page(dpage, page, 1); 375 __set_page_dirty_nobuffers(dpage); 376 377 unlock_page(dpage); 378 page_cache_release(dpage); 379 unlock_page(page); 380 } 381 pagevec_release(&pvec); 382 cond_resched(); 383 384 if (likely(!err)) 385 goto repeat; 386 return err; 387 } 388 389 /** 390 * nilfs_copy_back_pages -- copy back pages to orignal cache from shadow cache 391 * @dmap: destination page cache 392 * @smap: source page cache 393 * 394 * No pages must no be added to the cache during this process. 395 * This must be ensured by the caller. 396 */ 397 void nilfs_copy_back_pages(struct address_space *dmap, 398 struct address_space *smap) 399 { 400 struct pagevec pvec; 401 unsigned int i, n; 402 pgoff_t index = 0; 403 int err; 404 405 pagevec_init(&pvec, 0); 406 repeat: 407 n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE); 408 if (!n) 409 return; 410 index = pvec.pages[n - 1]->index + 1; 411 412 for (i = 0; i < pagevec_count(&pvec); i++) { 413 struct page *page = pvec.pages[i], *dpage; 414 pgoff_t offset = page->index; 415 416 lock_page(page); 417 dpage = find_lock_page(dmap, offset); 418 if (dpage) { 419 /* override existing page on the destination cache */ 420 WARN_ON(PageDirty(dpage)); 421 nilfs_copy_page(dpage, page, 0); 422 unlock_page(dpage); 423 page_cache_release(dpage); 424 } else { 425 struct page *page2; 426 427 /* move the page to the destination cache */ 428 spin_lock_irq(&smap->tree_lock); 429 page2 = radix_tree_delete(&smap->page_tree, offset); 430 WARN_ON(page2 != page); 431 432 smap->nrpages--; 433 spin_unlock_irq(&smap->tree_lock); 434 435 spin_lock_irq(&dmap->tree_lock); 436 err = radix_tree_insert(&dmap->page_tree, offset, page); 437 if (unlikely(err < 0)) { 438 WARN_ON(err == -EEXIST); 439 page->mapping = NULL; 440 page_cache_release(page); /* for cache */ 441 } else { 442 page->mapping = dmap; 443 dmap->nrpages++; 444 if (PageDirty(page)) 445 radix_tree_tag_set(&dmap->page_tree, 446 offset, 447 PAGECACHE_TAG_DIRTY); 448 } 449 spin_unlock_irq(&dmap->tree_lock); 450 } 451 unlock_page(page); 452 } 453 pagevec_release(&pvec); 454 cond_resched(); 455 456 goto repeat; 457 } 458 459 void nilfs_clear_dirty_pages(struct address_space *mapping) 460 { 461 struct pagevec pvec; 462 unsigned int i; 463 pgoff_t index = 0; 464 465 pagevec_init(&pvec, 0); 466 467 while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, 468 PAGEVEC_SIZE)) { 469 for (i = 0; i < pagevec_count(&pvec); i++) { 470 struct page *page = pvec.pages[i]; 471 struct buffer_head *bh, *head; 472 473 lock_page(page); 474 ClearPageUptodate(page); 475 ClearPageMappedToDisk(page); 476 bh = head = page_buffers(page); 477 do { 478 lock_buffer(bh); 479 clear_buffer_dirty(bh); 480 clear_buffer_nilfs_volatile(bh); 481 clear_buffer_uptodate(bh); 482 clear_buffer_mapped(bh); 483 unlock_buffer(bh); 484 bh = bh->b_this_page; 485 } while (bh != head); 486 487 __nilfs_clear_page_dirty(page); 488 unlock_page(page); 489 } 490 pagevec_release(&pvec); 491 cond_resched(); 492 } 493 } 494 495 unsigned nilfs_page_count_clean_buffers(struct page *page, 496 unsigned from, unsigned to) 497 { 498 unsigned block_start, block_end; 499 struct buffer_head *bh, *head; 500 unsigned nc = 0; 501 502 for (bh = head = page_buffers(page), block_start = 0; 503 bh != head || !block_start; 504 block_start = block_end, bh = bh->b_this_page) { 505 block_end = block_start + bh->b_size; 506 if (block_end > from && block_start < to && !buffer_dirty(bh)) 507 nc++; 508 } 509 return nc; 510 } 511 512 /* 513 * NILFS2 needs clear_page_dirty() in the following two cases: 514 * 515 * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears 516 * page dirty flags when it copies back pages from the shadow cache 517 * (gcdat->{i_mapping,i_btnode_cache}) to its original cache 518 * (dat->{i_mapping,i_btnode_cache}). 519 * 520 * 2) Some B-tree operations like insertion or deletion may dispose buffers 521 * in dirty state, and this needs to cancel the dirty state of their pages. 522 */ 523 int __nilfs_clear_page_dirty(struct page *page) 524 { 525 struct address_space *mapping = page->mapping; 526 527 if (mapping) { 528 spin_lock_irq(&mapping->tree_lock); 529 if (test_bit(PG_dirty, &page->flags)) { 530 radix_tree_tag_clear(&mapping->page_tree, 531 page_index(page), 532 PAGECACHE_TAG_DIRTY); 533 spin_unlock_irq(&mapping->tree_lock); 534 return clear_page_dirty_for_io(page); 535 } 536 spin_unlock_irq(&mapping->tree_lock); 537 return 0; 538 } 539 return TestClearPageDirty(page); 540 } 541