1 #include <linux/bitops.h> 2 #include <linux/slab.h> 3 #include <linux/bio.h> 4 #include <linux/mm.h> 5 #include <linux/gfp.h> 6 #include <linux/pagemap.h> 7 #include <linux/page-flags.h> 8 #include <linux/module.h> 9 #include <linux/spinlock.h> 10 #include <linux/blkdev.h> 11 #include <linux/swap.h> 12 #include <linux/writeback.h> 13 #include <linux/pagevec.h> 14 #include "extent_io.h" 15 #include "extent_map.h" 16 #include "compat.h" 17 #include "ctree.h" 18 #include "btrfs_inode.h" 19 20 /* temporary define until extent_map moves out of btrfs */ 21 struct kmem_cache *btrfs_cache_create(const char *name, size_t size, 22 unsigned long extra_flags, 23 void (*ctor)(void *, struct kmem_cache *, 24 unsigned long)); 25 26 static struct kmem_cache *extent_state_cache; 27 static struct kmem_cache *extent_buffer_cache; 28 29 static LIST_HEAD(buffers); 30 static LIST_HEAD(states); 31 32 #define LEAK_DEBUG 0 33 #if LEAK_DEBUG 34 static DEFINE_SPINLOCK(leak_lock); 35 #endif 36 37 #define BUFFER_LRU_MAX 64 38 39 struct tree_entry { 40 u64 start; 41 u64 end; 42 struct rb_node rb_node; 43 }; 44 45 struct extent_page_data { 46 struct bio *bio; 47 struct extent_io_tree *tree; 48 get_extent_t *get_extent; 49 50 /* tells writepage not to lock the state bits for this range 51 * it still does the unlocking 52 */ 53 int extent_locked; 54 }; 55 56 int __init extent_io_init(void) 57 { 58 extent_state_cache = btrfs_cache_create("extent_state", 59 sizeof(struct extent_state), 0, 60 NULL); 61 if (!extent_state_cache) 62 return -ENOMEM; 63 64 extent_buffer_cache = btrfs_cache_create("extent_buffers", 65 sizeof(struct extent_buffer), 0, 66 NULL); 67 if (!extent_buffer_cache) 68 goto free_state_cache; 69 return 0; 70 71 free_state_cache: 72 kmem_cache_destroy(extent_state_cache); 73 return -ENOMEM; 74 } 75 76 void extent_io_exit(void) 77 { 78 struct extent_state *state; 79 struct extent_buffer *eb; 80 81 while (!list_empty(&states)) { 82 state = list_entry(states.next, struct extent_state, leak_list); 83 printk(KERN_ERR "btrfs state leak: start %llu end %llu " 84 "state %lu in tree %p refs %d\n", 85 (unsigned long long)state->start, 86 (unsigned long long)state->end, 87 state->state, state->tree, atomic_read(&state->refs)); 88 list_del(&state->leak_list); 89 kmem_cache_free(extent_state_cache, state); 90 91 } 92 93 while (!list_empty(&buffers)) { 94 eb = list_entry(buffers.next, struct extent_buffer, leak_list); 95 printk(KERN_ERR "btrfs buffer leak start %llu len %lu " 96 "refs %d\n", (unsigned long long)eb->start, 97 eb->len, atomic_read(&eb->refs)); 98 list_del(&eb->leak_list); 99 kmem_cache_free(extent_buffer_cache, eb); 100 } 101 if (extent_state_cache) 102 kmem_cache_destroy(extent_state_cache); 103 if (extent_buffer_cache) 104 kmem_cache_destroy(extent_buffer_cache); 105 } 106 107 void extent_io_tree_init(struct extent_io_tree *tree, 108 struct address_space *mapping, gfp_t mask) 109 { 110 tree->state.rb_node = NULL; 111 tree->buffer.rb_node = NULL; 112 tree->ops = NULL; 113 tree->dirty_bytes = 0; 114 spin_lock_init(&tree->lock); 115 spin_lock_init(&tree->buffer_lock); 116 tree->mapping = mapping; 117 } 118 119 static struct extent_state *alloc_extent_state(gfp_t mask) 120 { 121 struct extent_state *state; 122 #if LEAK_DEBUG 123 unsigned long flags; 124 #endif 125 126 state = kmem_cache_alloc(extent_state_cache, mask); 127 if (!state) 128 return state; 129 state->state = 0; 130 state->private = 0; 131 state->tree = NULL; 132 #if LEAK_DEBUG 133 spin_lock_irqsave(&leak_lock, flags); 134 list_add(&state->leak_list, &states); 135 spin_unlock_irqrestore(&leak_lock, flags); 136 #endif 137 atomic_set(&state->refs, 1); 138 init_waitqueue_head(&state->wq); 139 return state; 140 } 141 142 static void free_extent_state(struct extent_state *state) 143 { 144 if (!state) 145 return; 146 if (atomic_dec_and_test(&state->refs)) { 147 #if LEAK_DEBUG 148 unsigned long flags; 149 #endif 150 WARN_ON(state->tree); 151 #if LEAK_DEBUG 152 spin_lock_irqsave(&leak_lock, flags); 153 list_del(&state->leak_list); 154 spin_unlock_irqrestore(&leak_lock, flags); 155 #endif 156 kmem_cache_free(extent_state_cache, state); 157 } 158 } 159 160 static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 161 struct rb_node *node) 162 { 163 struct rb_node **p = &root->rb_node; 164 struct rb_node *parent = NULL; 165 struct tree_entry *entry; 166 167 while (*p) { 168 parent = *p; 169 entry = rb_entry(parent, struct tree_entry, rb_node); 170 171 if (offset < entry->start) 172 p = &(*p)->rb_left; 173 else if (offset > entry->end) 174 p = &(*p)->rb_right; 175 else 176 return parent; 177 } 178 179 entry = rb_entry(node, struct tree_entry, rb_node); 180 rb_link_node(node, parent, p); 181 rb_insert_color(node, root); 182 return NULL; 183 } 184 185 static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, 186 struct rb_node **prev_ret, 187 struct rb_node **next_ret) 188 { 189 struct rb_root *root = &tree->state; 190 struct rb_node *n = root->rb_node; 191 struct rb_node *prev = NULL; 192 struct rb_node *orig_prev = NULL; 193 struct tree_entry *entry; 194 struct tree_entry *prev_entry = NULL; 195 196 while (n) { 197 entry = rb_entry(n, struct tree_entry, rb_node); 198 prev = n; 199 prev_entry = entry; 200 201 if (offset < entry->start) 202 n = n->rb_left; 203 else if (offset > entry->end) 204 n = n->rb_right; 205 else 206 return n; 207 } 208 209 if (prev_ret) { 210 orig_prev = prev; 211 while (prev && offset > prev_entry->end) { 212 prev = rb_next(prev); 213 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 214 } 215 *prev_ret = prev; 216 prev = orig_prev; 217 } 218 219 if (next_ret) { 220 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 221 while (prev && offset < prev_entry->start) { 222 prev = rb_prev(prev); 223 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 224 } 225 *next_ret = prev; 226 } 227 return NULL; 228 } 229 230 static inline struct rb_node *tree_search(struct extent_io_tree *tree, 231 u64 offset) 232 { 233 struct rb_node *prev = NULL; 234 struct rb_node *ret; 235 236 ret = __etree_search(tree, offset, &prev, NULL); 237 if (!ret) 238 return prev; 239 return ret; 240 } 241 242 static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree, 243 u64 offset, struct rb_node *node) 244 { 245 struct rb_root *root = &tree->buffer; 246 struct rb_node **p = &root->rb_node; 247 struct rb_node *parent = NULL; 248 struct extent_buffer *eb; 249 250 while (*p) { 251 parent = *p; 252 eb = rb_entry(parent, struct extent_buffer, rb_node); 253 254 if (offset < eb->start) 255 p = &(*p)->rb_left; 256 else if (offset > eb->start) 257 p = &(*p)->rb_right; 258 else 259 return eb; 260 } 261 262 rb_link_node(node, parent, p); 263 rb_insert_color(node, root); 264 return NULL; 265 } 266 267 static struct extent_buffer *buffer_search(struct extent_io_tree *tree, 268 u64 offset) 269 { 270 struct rb_root *root = &tree->buffer; 271 struct rb_node *n = root->rb_node; 272 struct extent_buffer *eb; 273 274 while (n) { 275 eb = rb_entry(n, struct extent_buffer, rb_node); 276 if (offset < eb->start) 277 n = n->rb_left; 278 else if (offset > eb->start) 279 n = n->rb_right; 280 else 281 return eb; 282 } 283 return NULL; 284 } 285 286 /* 287 * utility function to look for merge candidates inside a given range. 288 * Any extents with matching state are merged together into a single 289 * extent in the tree. Extents with EXTENT_IO in their state field 290 * are not merged because the end_io handlers need to be able to do 291 * operations on them without sleeping (or doing allocations/splits). 292 * 293 * This should be called with the tree lock held. 294 */ 295 static int merge_state(struct extent_io_tree *tree, 296 struct extent_state *state) 297 { 298 struct extent_state *other; 299 struct rb_node *other_node; 300 301 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) 302 return 0; 303 304 other_node = rb_prev(&state->rb_node); 305 if (other_node) { 306 other = rb_entry(other_node, struct extent_state, rb_node); 307 if (other->end == state->start - 1 && 308 other->state == state->state) { 309 state->start = other->start; 310 other->tree = NULL; 311 rb_erase(&other->rb_node, &tree->state); 312 free_extent_state(other); 313 } 314 } 315 other_node = rb_next(&state->rb_node); 316 if (other_node) { 317 other = rb_entry(other_node, struct extent_state, rb_node); 318 if (other->start == state->end + 1 && 319 other->state == state->state) { 320 other->start = state->start; 321 state->tree = NULL; 322 rb_erase(&state->rb_node, &tree->state); 323 free_extent_state(state); 324 } 325 } 326 return 0; 327 } 328 329 static void set_state_cb(struct extent_io_tree *tree, 330 struct extent_state *state, 331 unsigned long bits) 332 { 333 if (tree->ops && tree->ops->set_bit_hook) { 334 tree->ops->set_bit_hook(tree->mapping->host, state->start, 335 state->end, state->state, bits); 336 } 337 } 338 339 static void clear_state_cb(struct extent_io_tree *tree, 340 struct extent_state *state, 341 unsigned long bits) 342 { 343 if (tree->ops && tree->ops->clear_bit_hook) { 344 tree->ops->clear_bit_hook(tree->mapping->host, state->start, 345 state->end, state->state, bits); 346 } 347 } 348 349 /* 350 * insert an extent_state struct into the tree. 'bits' are set on the 351 * struct before it is inserted. 352 * 353 * This may return -EEXIST if the extent is already there, in which case the 354 * state struct is freed. 355 * 356 * The tree lock is not taken internally. This is a utility function and 357 * probably isn't what you want to call (see set/clear_extent_bit). 358 */ 359 static int insert_state(struct extent_io_tree *tree, 360 struct extent_state *state, u64 start, u64 end, 361 int bits) 362 { 363 struct rb_node *node; 364 365 if (end < start) { 366 printk(KERN_ERR "btrfs end < start %llu %llu\n", 367 (unsigned long long)end, 368 (unsigned long long)start); 369 WARN_ON(1); 370 } 371 if (bits & EXTENT_DIRTY) 372 tree->dirty_bytes += end - start + 1; 373 set_state_cb(tree, state, bits); 374 state->state |= bits; 375 state->start = start; 376 state->end = end; 377 node = tree_insert(&tree->state, end, &state->rb_node); 378 if (node) { 379 struct extent_state *found; 380 found = rb_entry(node, struct extent_state, rb_node); 381 printk(KERN_ERR "btrfs found node %llu %llu on insert of " 382 "%llu %llu\n", (unsigned long long)found->start, 383 (unsigned long long)found->end, 384 (unsigned long long)start, (unsigned long long)end); 385 free_extent_state(state); 386 return -EEXIST; 387 } 388 state->tree = tree; 389 merge_state(tree, state); 390 return 0; 391 } 392 393 /* 394 * split a given extent state struct in two, inserting the preallocated 395 * struct 'prealloc' as the newly created second half. 'split' indicates an 396 * offset inside 'orig' where it should be split. 397 * 398 * Before calling, 399 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 400 * are two extent state structs in the tree: 401 * prealloc: [orig->start, split - 1] 402 * orig: [ split, orig->end ] 403 * 404 * The tree locks are not taken by this function. They need to be held 405 * by the caller. 406 */ 407 static int split_state(struct extent_io_tree *tree, struct extent_state *orig, 408 struct extent_state *prealloc, u64 split) 409 { 410 struct rb_node *node; 411 prealloc->start = orig->start; 412 prealloc->end = split - 1; 413 prealloc->state = orig->state; 414 orig->start = split; 415 416 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); 417 if (node) { 418 free_extent_state(prealloc); 419 return -EEXIST; 420 } 421 prealloc->tree = tree; 422 return 0; 423 } 424 425 /* 426 * utility function to clear some bits in an extent state struct. 427 * it will optionally wake up any one waiting on this state (wake == 1), or 428 * forcibly remove the state from the tree (delete == 1). 429 * 430 * If no bits are set on the state struct after clearing things, the 431 * struct is freed and removed from the tree 432 */ 433 static int clear_state_bit(struct extent_io_tree *tree, 434 struct extent_state *state, int bits, int wake, 435 int delete) 436 { 437 int ret = state->state & bits; 438 439 if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { 440 u64 range = state->end - state->start + 1; 441 WARN_ON(range > tree->dirty_bytes); 442 tree->dirty_bytes -= range; 443 } 444 clear_state_cb(tree, state, bits); 445 state->state &= ~bits; 446 if (wake) 447 wake_up(&state->wq); 448 if (delete || state->state == 0) { 449 if (state->tree) { 450 clear_state_cb(tree, state, state->state); 451 rb_erase(&state->rb_node, &tree->state); 452 state->tree = NULL; 453 free_extent_state(state); 454 } else { 455 WARN_ON(1); 456 } 457 } else { 458 merge_state(tree, state); 459 } 460 return ret; 461 } 462 463 /* 464 * clear some bits on a range in the tree. This may require splitting 465 * or inserting elements in the tree, so the gfp mask is used to 466 * indicate which allocations or sleeping are allowed. 467 * 468 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove 469 * the given range from the tree regardless of state (ie for truncate). 470 * 471 * the range [start, end] is inclusive. 472 * 473 * This takes the tree lock, and returns < 0 on error, > 0 if any of the 474 * bits were already set, or zero if none of the bits were already set. 475 */ 476 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 477 int bits, int wake, int delete, gfp_t mask) 478 { 479 struct extent_state *state; 480 struct extent_state *prealloc = NULL; 481 struct rb_node *node; 482 int err; 483 int set = 0; 484 485 again: 486 if (!prealloc && (mask & __GFP_WAIT)) { 487 prealloc = alloc_extent_state(mask); 488 if (!prealloc) 489 return -ENOMEM; 490 } 491 492 spin_lock(&tree->lock); 493 /* 494 * this search will find the extents that end after 495 * our range starts 496 */ 497 node = tree_search(tree, start); 498 if (!node) 499 goto out; 500 state = rb_entry(node, struct extent_state, rb_node); 501 if (state->start > end) 502 goto out; 503 WARN_ON(state->end < start); 504 505 /* 506 * | ---- desired range ---- | 507 * | state | or 508 * | ------------- state -------------- | 509 * 510 * We need to split the extent we found, and may flip 511 * bits on second half. 512 * 513 * If the extent we found extends past our range, we 514 * just split and search again. It'll get split again 515 * the next time though. 516 * 517 * If the extent we found is inside our range, we clear 518 * the desired bit on it. 519 */ 520 521 if (state->start < start) { 522 if (!prealloc) 523 prealloc = alloc_extent_state(GFP_ATOMIC); 524 err = split_state(tree, state, prealloc, start); 525 BUG_ON(err == -EEXIST); 526 prealloc = NULL; 527 if (err) 528 goto out; 529 if (state->end <= end) { 530 start = state->end + 1; 531 set |= clear_state_bit(tree, state, bits, 532 wake, delete); 533 } else { 534 start = state->start; 535 } 536 goto search_again; 537 } 538 /* 539 * | ---- desired range ---- | 540 * | state | 541 * We need to split the extent, and clear the bit 542 * on the first half 543 */ 544 if (state->start <= end && state->end > end) { 545 if (!prealloc) 546 prealloc = alloc_extent_state(GFP_ATOMIC); 547 err = split_state(tree, state, prealloc, end + 1); 548 BUG_ON(err == -EEXIST); 549 550 if (wake) 551 wake_up(&state->wq); 552 set |= clear_state_bit(tree, prealloc, bits, 553 wake, delete); 554 prealloc = NULL; 555 goto out; 556 } 557 558 start = state->end + 1; 559 set |= clear_state_bit(tree, state, bits, wake, delete); 560 goto search_again; 561 562 out: 563 spin_unlock(&tree->lock); 564 if (prealloc) 565 free_extent_state(prealloc); 566 567 return set; 568 569 search_again: 570 if (start > end) 571 goto out; 572 spin_unlock(&tree->lock); 573 if (mask & __GFP_WAIT) 574 cond_resched(); 575 goto again; 576 } 577 578 static int wait_on_state(struct extent_io_tree *tree, 579 struct extent_state *state) 580 __releases(tree->lock) 581 __acquires(tree->lock) 582 { 583 DEFINE_WAIT(wait); 584 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 585 spin_unlock(&tree->lock); 586 schedule(); 587 spin_lock(&tree->lock); 588 finish_wait(&state->wq, &wait); 589 return 0; 590 } 591 592 /* 593 * waits for one or more bits to clear on a range in the state tree. 594 * The range [start, end] is inclusive. 595 * The tree lock is taken by this function 596 */ 597 int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) 598 { 599 struct extent_state *state; 600 struct rb_node *node; 601 602 spin_lock(&tree->lock); 603 again: 604 while (1) { 605 /* 606 * this search will find all the extents that end after 607 * our range starts 608 */ 609 node = tree_search(tree, start); 610 if (!node) 611 break; 612 613 state = rb_entry(node, struct extent_state, rb_node); 614 615 if (state->start > end) 616 goto out; 617 618 if (state->state & bits) { 619 start = state->start; 620 atomic_inc(&state->refs); 621 wait_on_state(tree, state); 622 free_extent_state(state); 623 goto again; 624 } 625 start = state->end + 1; 626 627 if (start > end) 628 break; 629 630 if (need_resched()) { 631 spin_unlock(&tree->lock); 632 cond_resched(); 633 spin_lock(&tree->lock); 634 } 635 } 636 out: 637 spin_unlock(&tree->lock); 638 return 0; 639 } 640 641 static void set_state_bits(struct extent_io_tree *tree, 642 struct extent_state *state, 643 int bits) 644 { 645 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { 646 u64 range = state->end - state->start + 1; 647 tree->dirty_bytes += range; 648 } 649 set_state_cb(tree, state, bits); 650 state->state |= bits; 651 } 652 653 /* 654 * set some bits on a range in the tree. This may require allocations 655 * or sleeping, so the gfp mask is used to indicate what is allowed. 656 * 657 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 658 * range already has the desired bits set. The start of the existing 659 * range is returned in failed_start in this case. 660 * 661 * [start, end] is inclusive 662 * This takes the tree lock. 663 */ 664 static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 665 int bits, int exclusive, u64 *failed_start, 666 gfp_t mask) 667 { 668 struct extent_state *state; 669 struct extent_state *prealloc = NULL; 670 struct rb_node *node; 671 int err = 0; 672 int set; 673 u64 last_start; 674 u64 last_end; 675 again: 676 if (!prealloc && (mask & __GFP_WAIT)) { 677 prealloc = alloc_extent_state(mask); 678 if (!prealloc) 679 return -ENOMEM; 680 } 681 682 spin_lock(&tree->lock); 683 /* 684 * this search will find all the extents that end after 685 * our range starts. 686 */ 687 node = tree_search(tree, start); 688 if (!node) { 689 err = insert_state(tree, prealloc, start, end, bits); 690 prealloc = NULL; 691 BUG_ON(err == -EEXIST); 692 goto out; 693 } 694 695 state = rb_entry(node, struct extent_state, rb_node); 696 last_start = state->start; 697 last_end = state->end; 698 699 /* 700 * | ---- desired range ---- | 701 * | state | 702 * 703 * Just lock what we found and keep going 704 */ 705 if (state->start == start && state->end <= end) { 706 set = state->state & bits; 707 if (set && exclusive) { 708 *failed_start = state->start; 709 err = -EEXIST; 710 goto out; 711 } 712 set_state_bits(tree, state, bits); 713 start = state->end + 1; 714 merge_state(tree, state); 715 goto search_again; 716 } 717 718 /* 719 * | ---- desired range ---- | 720 * | state | 721 * or 722 * | ------------- state -------------- | 723 * 724 * We need to split the extent we found, and may flip bits on 725 * second half. 726 * 727 * If the extent we found extends past our 728 * range, we just split and search again. It'll get split 729 * again the next time though. 730 * 731 * If the extent we found is inside our range, we set the 732 * desired bit on it. 733 */ 734 if (state->start < start) { 735 set = state->state & bits; 736 if (exclusive && set) { 737 *failed_start = start; 738 err = -EEXIST; 739 goto out; 740 } 741 err = split_state(tree, state, prealloc, start); 742 BUG_ON(err == -EEXIST); 743 prealloc = NULL; 744 if (err) 745 goto out; 746 if (state->end <= end) { 747 set_state_bits(tree, state, bits); 748 start = state->end + 1; 749 merge_state(tree, state); 750 } else { 751 start = state->start; 752 } 753 goto search_again; 754 } 755 /* 756 * | ---- desired range ---- | 757 * | state | or | state | 758 * 759 * There's a hole, we need to insert something in it and 760 * ignore the extent we found. 761 */ 762 if (state->start > start) { 763 u64 this_end; 764 if (end < last_start) 765 this_end = end; 766 else 767 this_end = last_start - 1; 768 err = insert_state(tree, prealloc, start, this_end, 769 bits); 770 prealloc = NULL; 771 BUG_ON(err == -EEXIST); 772 if (err) 773 goto out; 774 start = this_end + 1; 775 goto search_again; 776 } 777 /* 778 * | ---- desired range ---- | 779 * | state | 780 * We need to split the extent, and set the bit 781 * on the first half 782 */ 783 if (state->start <= end && state->end > end) { 784 set = state->state & bits; 785 if (exclusive && set) { 786 *failed_start = start; 787 err = -EEXIST; 788 goto out; 789 } 790 err = split_state(tree, state, prealloc, end + 1); 791 BUG_ON(err == -EEXIST); 792 793 set_state_bits(tree, prealloc, bits); 794 merge_state(tree, prealloc); 795 prealloc = NULL; 796 goto out; 797 } 798 799 goto search_again; 800 801 out: 802 spin_unlock(&tree->lock); 803 if (prealloc) 804 free_extent_state(prealloc); 805 806 return err; 807 808 search_again: 809 if (start > end) 810 goto out; 811 spin_unlock(&tree->lock); 812 if (mask & __GFP_WAIT) 813 cond_resched(); 814 goto again; 815 } 816 817 /* wrappers around set/clear extent bit */ 818 int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 819 gfp_t mask) 820 { 821 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 822 mask); 823 } 824 825 int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, 826 gfp_t mask) 827 { 828 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); 829 } 830 831 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 832 int bits, gfp_t mask) 833 { 834 return set_extent_bit(tree, start, end, bits, 0, NULL, 835 mask); 836 } 837 838 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 839 int bits, gfp_t mask) 840 { 841 return clear_extent_bit(tree, start, end, bits, 0, 0, mask); 842 } 843 844 int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 845 gfp_t mask) 846 { 847 return set_extent_bit(tree, start, end, 848 EXTENT_DELALLOC | EXTENT_DIRTY, 849 0, NULL, mask); 850 } 851 852 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 853 gfp_t mask) 854 { 855 return clear_extent_bit(tree, start, end, 856 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); 857 } 858 859 int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, 860 gfp_t mask) 861 { 862 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); 863 } 864 865 int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 866 gfp_t mask) 867 { 868 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 869 mask); 870 } 871 872 static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 873 gfp_t mask) 874 { 875 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); 876 } 877 878 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 879 gfp_t mask) 880 { 881 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 882 mask); 883 } 884 885 static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 886 u64 end, gfp_t mask) 887 { 888 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 889 } 890 891 static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, 892 gfp_t mask) 893 { 894 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, 895 0, NULL, mask); 896 } 897 898 static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, 899 u64 end, gfp_t mask) 900 { 901 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); 902 } 903 904 int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) 905 { 906 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); 907 } 908 909 /* 910 * either insert or lock state struct between start and end use mask to tell 911 * us if waiting is desired. 912 */ 913 int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) 914 { 915 int err; 916 u64 failed_start; 917 while (1) { 918 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 919 &failed_start, mask); 920 if (err == -EEXIST && (mask & __GFP_WAIT)) { 921 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 922 start = failed_start; 923 } else { 924 break; 925 } 926 WARN_ON(start > end); 927 } 928 return err; 929 } 930 931 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 932 gfp_t mask) 933 { 934 int err; 935 u64 failed_start; 936 937 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 938 &failed_start, mask); 939 if (err == -EEXIST) { 940 if (failed_start > start) 941 clear_extent_bit(tree, start, failed_start - 1, 942 EXTENT_LOCKED, 1, 0, mask); 943 return 0; 944 } 945 return 1; 946 } 947 948 int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, 949 gfp_t mask) 950 { 951 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); 952 } 953 954 /* 955 * helper function to set pages and extents in the tree dirty 956 */ 957 int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) 958 { 959 unsigned long index = start >> PAGE_CACHE_SHIFT; 960 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 961 struct page *page; 962 963 while (index <= end_index) { 964 page = find_get_page(tree->mapping, index); 965 BUG_ON(!page); 966 __set_page_dirty_nobuffers(page); 967 page_cache_release(page); 968 index++; 969 } 970 set_extent_dirty(tree, start, end, GFP_NOFS); 971 return 0; 972 } 973 974 /* 975 * helper function to set both pages and extents in the tree writeback 976 */ 977 static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) 978 { 979 unsigned long index = start >> PAGE_CACHE_SHIFT; 980 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 981 struct page *page; 982 983 while (index <= end_index) { 984 page = find_get_page(tree->mapping, index); 985 BUG_ON(!page); 986 set_page_writeback(page); 987 page_cache_release(page); 988 index++; 989 } 990 set_extent_writeback(tree, start, end, GFP_NOFS); 991 return 0; 992 } 993 994 /* 995 * find the first offset in the io tree with 'bits' set. zero is 996 * returned if we find something, and *start_ret and *end_ret are 997 * set to reflect the state struct that was found. 998 * 999 * If nothing was found, 1 is returned, < 0 on error 1000 */ 1001 int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 1002 u64 *start_ret, u64 *end_ret, int bits) 1003 { 1004 struct rb_node *node; 1005 struct extent_state *state; 1006 int ret = 1; 1007 1008 spin_lock(&tree->lock); 1009 /* 1010 * this search will find all the extents that end after 1011 * our range starts. 1012 */ 1013 node = tree_search(tree, start); 1014 if (!node) 1015 goto out; 1016 1017 while (1) { 1018 state = rb_entry(node, struct extent_state, rb_node); 1019 if (state->end >= start && (state->state & bits)) { 1020 *start_ret = state->start; 1021 *end_ret = state->end; 1022 ret = 0; 1023 break; 1024 } 1025 node = rb_next(node); 1026 if (!node) 1027 break; 1028 } 1029 out: 1030 spin_unlock(&tree->lock); 1031 return ret; 1032 } 1033 1034 /* find the first state struct with 'bits' set after 'start', and 1035 * return it. tree->lock must be held. NULL will returned if 1036 * nothing was found after 'start' 1037 */ 1038 struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, 1039 u64 start, int bits) 1040 { 1041 struct rb_node *node; 1042 struct extent_state *state; 1043 1044 /* 1045 * this search will find all the extents that end after 1046 * our range starts. 1047 */ 1048 node = tree_search(tree, start); 1049 if (!node) 1050 goto out; 1051 1052 while (1) { 1053 state = rb_entry(node, struct extent_state, rb_node); 1054 if (state->end >= start && (state->state & bits)) 1055 return state; 1056 1057 node = rb_next(node); 1058 if (!node) 1059 break; 1060 } 1061 out: 1062 return NULL; 1063 } 1064 1065 /* 1066 * find a contiguous range of bytes in the file marked as delalloc, not 1067 * more than 'max_bytes'. start and end are used to return the range, 1068 * 1069 * 1 is returned if we find something, 0 if nothing was in the tree 1070 */ 1071 static noinline u64 find_delalloc_range(struct extent_io_tree *tree, 1072 u64 *start, u64 *end, u64 max_bytes) 1073 { 1074 struct rb_node *node; 1075 struct extent_state *state; 1076 u64 cur_start = *start; 1077 u64 found = 0; 1078 u64 total_bytes = 0; 1079 1080 spin_lock(&tree->lock); 1081 1082 /* 1083 * this search will find all the extents that end after 1084 * our range starts. 1085 */ 1086 node = tree_search(tree, cur_start); 1087 if (!node) { 1088 if (!found) 1089 *end = (u64)-1; 1090 goto out; 1091 } 1092 1093 while (1) { 1094 state = rb_entry(node, struct extent_state, rb_node); 1095 if (found && (state->start != cur_start || 1096 (state->state & EXTENT_BOUNDARY))) { 1097 goto out; 1098 } 1099 if (!(state->state & EXTENT_DELALLOC)) { 1100 if (!found) 1101 *end = state->end; 1102 goto out; 1103 } 1104 if (!found) 1105 *start = state->start; 1106 found++; 1107 *end = state->end; 1108 cur_start = state->end + 1; 1109 node = rb_next(node); 1110 if (!node) 1111 break; 1112 total_bytes += state->end - state->start + 1; 1113 if (total_bytes >= max_bytes) 1114 break; 1115 } 1116 out: 1117 spin_unlock(&tree->lock); 1118 return found; 1119 } 1120 1121 static noinline int __unlock_for_delalloc(struct inode *inode, 1122 struct page *locked_page, 1123 u64 start, u64 end) 1124 { 1125 int ret; 1126 struct page *pages[16]; 1127 unsigned long index = start >> PAGE_CACHE_SHIFT; 1128 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1129 unsigned long nr_pages = end_index - index + 1; 1130 int i; 1131 1132 if (index == locked_page->index && end_index == index) 1133 return 0; 1134 1135 while (nr_pages > 0) { 1136 ret = find_get_pages_contig(inode->i_mapping, index, 1137 min_t(unsigned long, nr_pages, 1138 ARRAY_SIZE(pages)), pages); 1139 for (i = 0; i < ret; i++) { 1140 if (pages[i] != locked_page) 1141 unlock_page(pages[i]); 1142 page_cache_release(pages[i]); 1143 } 1144 nr_pages -= ret; 1145 index += ret; 1146 cond_resched(); 1147 } 1148 return 0; 1149 } 1150 1151 static noinline int lock_delalloc_pages(struct inode *inode, 1152 struct page *locked_page, 1153 u64 delalloc_start, 1154 u64 delalloc_end) 1155 { 1156 unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT; 1157 unsigned long start_index = index; 1158 unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT; 1159 unsigned long pages_locked = 0; 1160 struct page *pages[16]; 1161 unsigned long nrpages; 1162 int ret; 1163 int i; 1164 1165 /* the caller is responsible for locking the start index */ 1166 if (index == locked_page->index && index == end_index) 1167 return 0; 1168 1169 /* skip the page at the start index */ 1170 nrpages = end_index - index + 1; 1171 while (nrpages > 0) { 1172 ret = find_get_pages_contig(inode->i_mapping, index, 1173 min_t(unsigned long, 1174 nrpages, ARRAY_SIZE(pages)), pages); 1175 if (ret == 0) { 1176 ret = -EAGAIN; 1177 goto done; 1178 } 1179 /* now we have an array of pages, lock them all */ 1180 for (i = 0; i < ret; i++) { 1181 /* 1182 * the caller is taking responsibility for 1183 * locked_page 1184 */ 1185 if (pages[i] != locked_page) { 1186 lock_page(pages[i]); 1187 if (!PageDirty(pages[i]) || 1188 pages[i]->mapping != inode->i_mapping) { 1189 ret = -EAGAIN; 1190 unlock_page(pages[i]); 1191 page_cache_release(pages[i]); 1192 goto done; 1193 } 1194 } 1195 page_cache_release(pages[i]); 1196 pages_locked++; 1197 } 1198 nrpages -= ret; 1199 index += ret; 1200 cond_resched(); 1201 } 1202 ret = 0; 1203 done: 1204 if (ret && pages_locked) { 1205 __unlock_for_delalloc(inode, locked_page, 1206 delalloc_start, 1207 ((u64)(start_index + pages_locked - 1)) << 1208 PAGE_CACHE_SHIFT); 1209 } 1210 return ret; 1211 } 1212 1213 /* 1214 * find a contiguous range of bytes in the file marked as delalloc, not 1215 * more than 'max_bytes'. start and end are used to return the range, 1216 * 1217 * 1 is returned if we find something, 0 if nothing was in the tree 1218 */ 1219 static noinline u64 find_lock_delalloc_range(struct inode *inode, 1220 struct extent_io_tree *tree, 1221 struct page *locked_page, 1222 u64 *start, u64 *end, 1223 u64 max_bytes) 1224 { 1225 u64 delalloc_start; 1226 u64 delalloc_end; 1227 u64 found; 1228 int ret; 1229 int loops = 0; 1230 1231 again: 1232 /* step one, find a bunch of delalloc bytes starting at start */ 1233 delalloc_start = *start; 1234 delalloc_end = 0; 1235 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end, 1236 max_bytes); 1237 if (!found || delalloc_end <= *start) { 1238 *start = delalloc_start; 1239 *end = delalloc_end; 1240 return found; 1241 } 1242 1243 /* 1244 * start comes from the offset of locked_page. We have to lock 1245 * pages in order, so we can't process delalloc bytes before 1246 * locked_page 1247 */ 1248 if (delalloc_start < *start) 1249 delalloc_start = *start; 1250 1251 /* 1252 * make sure to limit the number of pages we try to lock down 1253 * if we're looping. 1254 */ 1255 if (delalloc_end + 1 - delalloc_start > max_bytes && loops) 1256 delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1; 1257 1258 /* step two, lock all the pages after the page that has start */ 1259 ret = lock_delalloc_pages(inode, locked_page, 1260 delalloc_start, delalloc_end); 1261 if (ret == -EAGAIN) { 1262 /* some of the pages are gone, lets avoid looping by 1263 * shortening the size of the delalloc range we're searching 1264 */ 1265 if (!loops) { 1266 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); 1267 max_bytes = PAGE_CACHE_SIZE - offset; 1268 loops = 1; 1269 goto again; 1270 } else { 1271 found = 0; 1272 goto out_failed; 1273 } 1274 } 1275 BUG_ON(ret); 1276 1277 /* step three, lock the state bits for the whole range */ 1278 lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); 1279 1280 /* then test to make sure it is all still delalloc */ 1281 ret = test_range_bit(tree, delalloc_start, delalloc_end, 1282 EXTENT_DELALLOC, 1); 1283 if (!ret) { 1284 unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); 1285 __unlock_for_delalloc(inode, locked_page, 1286 delalloc_start, delalloc_end); 1287 cond_resched(); 1288 goto again; 1289 } 1290 *start = delalloc_start; 1291 *end = delalloc_end; 1292 out_failed: 1293 return found; 1294 } 1295 1296 int extent_clear_unlock_delalloc(struct inode *inode, 1297 struct extent_io_tree *tree, 1298 u64 start, u64 end, struct page *locked_page, 1299 int unlock_pages, 1300 int clear_unlock, 1301 int clear_delalloc, int clear_dirty, 1302 int set_writeback, 1303 int end_writeback) 1304 { 1305 int ret; 1306 struct page *pages[16]; 1307 unsigned long index = start >> PAGE_CACHE_SHIFT; 1308 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1309 unsigned long nr_pages = end_index - index + 1; 1310 int i; 1311 int clear_bits = 0; 1312 1313 if (clear_unlock) 1314 clear_bits |= EXTENT_LOCKED; 1315 if (clear_dirty) 1316 clear_bits |= EXTENT_DIRTY; 1317 1318 if (clear_delalloc) 1319 clear_bits |= EXTENT_DELALLOC; 1320 1321 clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); 1322 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) 1323 return 0; 1324 1325 while (nr_pages > 0) { 1326 ret = find_get_pages_contig(inode->i_mapping, index, 1327 min_t(unsigned long, 1328 nr_pages, ARRAY_SIZE(pages)), pages); 1329 for (i = 0; i < ret; i++) { 1330 if (pages[i] == locked_page) { 1331 page_cache_release(pages[i]); 1332 continue; 1333 } 1334 if (clear_dirty) 1335 clear_page_dirty_for_io(pages[i]); 1336 if (set_writeback) 1337 set_page_writeback(pages[i]); 1338 if (end_writeback) 1339 end_page_writeback(pages[i]); 1340 if (unlock_pages) 1341 unlock_page(pages[i]); 1342 page_cache_release(pages[i]); 1343 } 1344 nr_pages -= ret; 1345 index += ret; 1346 cond_resched(); 1347 } 1348 return 0; 1349 } 1350 1351 /* 1352 * count the number of bytes in the tree that have a given bit(s) 1353 * set. This can be fairly slow, except for EXTENT_DIRTY which is 1354 * cached. The total number found is returned. 1355 */ 1356 u64 count_range_bits(struct extent_io_tree *tree, 1357 u64 *start, u64 search_end, u64 max_bytes, 1358 unsigned long bits) 1359 { 1360 struct rb_node *node; 1361 struct extent_state *state; 1362 u64 cur_start = *start; 1363 u64 total_bytes = 0; 1364 int found = 0; 1365 1366 if (search_end <= cur_start) { 1367 WARN_ON(1); 1368 return 0; 1369 } 1370 1371 spin_lock(&tree->lock); 1372 if (cur_start == 0 && bits == EXTENT_DIRTY) { 1373 total_bytes = tree->dirty_bytes; 1374 goto out; 1375 } 1376 /* 1377 * this search will find all the extents that end after 1378 * our range starts. 1379 */ 1380 node = tree_search(tree, cur_start); 1381 if (!node) 1382 goto out; 1383 1384 while (1) { 1385 state = rb_entry(node, struct extent_state, rb_node); 1386 if (state->start > search_end) 1387 break; 1388 if (state->end >= cur_start && (state->state & bits)) { 1389 total_bytes += min(search_end, state->end) + 1 - 1390 max(cur_start, state->start); 1391 if (total_bytes >= max_bytes) 1392 break; 1393 if (!found) { 1394 *start = state->start; 1395 found = 1; 1396 } 1397 } 1398 node = rb_next(node); 1399 if (!node) 1400 break; 1401 } 1402 out: 1403 spin_unlock(&tree->lock); 1404 return total_bytes; 1405 } 1406 1407 #if 0 1408 /* 1409 * helper function to lock both pages and extents in the tree. 1410 * pages must be locked first. 1411 */ 1412 static int lock_range(struct extent_io_tree *tree, u64 start, u64 end) 1413 { 1414 unsigned long index = start >> PAGE_CACHE_SHIFT; 1415 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1416 struct page *page; 1417 int err; 1418 1419 while (index <= end_index) { 1420 page = grab_cache_page(tree->mapping, index); 1421 if (!page) { 1422 err = -ENOMEM; 1423 goto failed; 1424 } 1425 if (IS_ERR(page)) { 1426 err = PTR_ERR(page); 1427 goto failed; 1428 } 1429 index++; 1430 } 1431 lock_extent(tree, start, end, GFP_NOFS); 1432 return 0; 1433 1434 failed: 1435 /* 1436 * we failed above in getting the page at 'index', so we undo here 1437 * up to but not including the page at 'index' 1438 */ 1439 end_index = index; 1440 index = start >> PAGE_CACHE_SHIFT; 1441 while (index < end_index) { 1442 page = find_get_page(tree->mapping, index); 1443 unlock_page(page); 1444 page_cache_release(page); 1445 index++; 1446 } 1447 return err; 1448 } 1449 1450 /* 1451 * helper function to unlock both pages and extents in the tree. 1452 */ 1453 static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) 1454 { 1455 unsigned long index = start >> PAGE_CACHE_SHIFT; 1456 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1457 struct page *page; 1458 1459 while (index <= end_index) { 1460 page = find_get_page(tree->mapping, index); 1461 unlock_page(page); 1462 page_cache_release(page); 1463 index++; 1464 } 1465 unlock_extent(tree, start, end, GFP_NOFS); 1466 return 0; 1467 } 1468 #endif 1469 1470 /* 1471 * set the private field for a given byte offset in the tree. If there isn't 1472 * an extent_state there already, this does nothing. 1473 */ 1474 int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) 1475 { 1476 struct rb_node *node; 1477 struct extent_state *state; 1478 int ret = 0; 1479 1480 spin_lock(&tree->lock); 1481 /* 1482 * this search will find all the extents that end after 1483 * our range starts. 1484 */ 1485 node = tree_search(tree, start); 1486 if (!node) { 1487 ret = -ENOENT; 1488 goto out; 1489 } 1490 state = rb_entry(node, struct extent_state, rb_node); 1491 if (state->start != start) { 1492 ret = -ENOENT; 1493 goto out; 1494 } 1495 state->private = private; 1496 out: 1497 spin_unlock(&tree->lock); 1498 return ret; 1499 } 1500 1501 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) 1502 { 1503 struct rb_node *node; 1504 struct extent_state *state; 1505 int ret = 0; 1506 1507 spin_lock(&tree->lock); 1508 /* 1509 * this search will find all the extents that end after 1510 * our range starts. 1511 */ 1512 node = tree_search(tree, start); 1513 if (!node) { 1514 ret = -ENOENT; 1515 goto out; 1516 } 1517 state = rb_entry(node, struct extent_state, rb_node); 1518 if (state->start != start) { 1519 ret = -ENOENT; 1520 goto out; 1521 } 1522 *private = state->private; 1523 out: 1524 spin_unlock(&tree->lock); 1525 return ret; 1526 } 1527 1528 /* 1529 * searches a range in the state tree for a given mask. 1530 * If 'filled' == 1, this returns 1 only if every extent in the tree 1531 * has the bits set. Otherwise, 1 is returned if any bit in the 1532 * range is found set. 1533 */ 1534 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 1535 int bits, int filled) 1536 { 1537 struct extent_state *state = NULL; 1538 struct rb_node *node; 1539 int bitset = 0; 1540 1541 spin_lock(&tree->lock); 1542 node = tree_search(tree, start); 1543 while (node && start <= end) { 1544 state = rb_entry(node, struct extent_state, rb_node); 1545 1546 if (filled && state->start > start) { 1547 bitset = 0; 1548 break; 1549 } 1550 1551 if (state->start > end) 1552 break; 1553 1554 if (state->state & bits) { 1555 bitset = 1; 1556 if (!filled) 1557 break; 1558 } else if (filled) { 1559 bitset = 0; 1560 break; 1561 } 1562 start = state->end + 1; 1563 if (start > end) 1564 break; 1565 node = rb_next(node); 1566 if (!node) { 1567 if (filled) 1568 bitset = 0; 1569 break; 1570 } 1571 } 1572 spin_unlock(&tree->lock); 1573 return bitset; 1574 } 1575 1576 /* 1577 * helper function to set a given page up to date if all the 1578 * extents in the tree for that page are up to date 1579 */ 1580 static int check_page_uptodate(struct extent_io_tree *tree, 1581 struct page *page) 1582 { 1583 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1584 u64 end = start + PAGE_CACHE_SIZE - 1; 1585 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) 1586 SetPageUptodate(page); 1587 return 0; 1588 } 1589 1590 /* 1591 * helper function to unlock a page if all the extents in the tree 1592 * for that page are unlocked 1593 */ 1594 static int check_page_locked(struct extent_io_tree *tree, 1595 struct page *page) 1596 { 1597 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1598 u64 end = start + PAGE_CACHE_SIZE - 1; 1599 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) 1600 unlock_page(page); 1601 return 0; 1602 } 1603 1604 /* 1605 * helper function to end page writeback if all the extents 1606 * in the tree for that page are done with writeback 1607 */ 1608 static int check_page_writeback(struct extent_io_tree *tree, 1609 struct page *page) 1610 { 1611 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1612 u64 end = start + PAGE_CACHE_SIZE - 1; 1613 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) 1614 end_page_writeback(page); 1615 return 0; 1616 } 1617 1618 /* lots and lots of room for performance fixes in the end_bio funcs */ 1619 1620 /* 1621 * after a writepage IO is done, we need to: 1622 * clear the uptodate bits on error 1623 * clear the writeback bits in the extent tree for this IO 1624 * end_page_writeback if the page has no more pending IO 1625 * 1626 * Scheduling is not allowed, so the extent state tree is expected 1627 * to have one and only one object corresponding to this IO. 1628 */ 1629 static void end_bio_extent_writepage(struct bio *bio, int err) 1630 { 1631 int uptodate = err == 0; 1632 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1633 struct extent_io_tree *tree; 1634 u64 start; 1635 u64 end; 1636 int whole_page; 1637 int ret; 1638 1639 do { 1640 struct page *page = bvec->bv_page; 1641 tree = &BTRFS_I(page->mapping->host)->io_tree; 1642 1643 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1644 bvec->bv_offset; 1645 end = start + bvec->bv_len - 1; 1646 1647 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1648 whole_page = 1; 1649 else 1650 whole_page = 0; 1651 1652 if (--bvec >= bio->bi_io_vec) 1653 prefetchw(&bvec->bv_page->flags); 1654 if (tree->ops && tree->ops->writepage_end_io_hook) { 1655 ret = tree->ops->writepage_end_io_hook(page, start, 1656 end, NULL, uptodate); 1657 if (ret) 1658 uptodate = 0; 1659 } 1660 1661 if (!uptodate && tree->ops && 1662 tree->ops->writepage_io_failed_hook) { 1663 ret = tree->ops->writepage_io_failed_hook(bio, page, 1664 start, end, NULL); 1665 if (ret == 0) { 1666 uptodate = (err == 0); 1667 continue; 1668 } 1669 } 1670 1671 if (!uptodate) { 1672 clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1673 ClearPageUptodate(page); 1674 SetPageError(page); 1675 } 1676 1677 clear_extent_writeback(tree, start, end, GFP_ATOMIC); 1678 1679 if (whole_page) 1680 end_page_writeback(page); 1681 else 1682 check_page_writeback(tree, page); 1683 } while (bvec >= bio->bi_io_vec); 1684 1685 bio_put(bio); 1686 } 1687 1688 /* 1689 * after a readpage IO is done, we need to: 1690 * clear the uptodate bits on error 1691 * set the uptodate bits if things worked 1692 * set the page up to date if all extents in the tree are uptodate 1693 * clear the lock bit in the extent tree 1694 * unlock the page if there are no other extents locked for it 1695 * 1696 * Scheduling is not allowed, so the extent state tree is expected 1697 * to have one and only one object corresponding to this IO. 1698 */ 1699 static void end_bio_extent_readpage(struct bio *bio, int err) 1700 { 1701 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1702 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1703 struct extent_io_tree *tree; 1704 u64 start; 1705 u64 end; 1706 int whole_page; 1707 int ret; 1708 1709 if (err) 1710 uptodate = 0; 1711 1712 do { 1713 struct page *page = bvec->bv_page; 1714 tree = &BTRFS_I(page->mapping->host)->io_tree; 1715 1716 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1717 bvec->bv_offset; 1718 end = start + bvec->bv_len - 1; 1719 1720 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1721 whole_page = 1; 1722 else 1723 whole_page = 0; 1724 1725 if (--bvec >= bio->bi_io_vec) 1726 prefetchw(&bvec->bv_page->flags); 1727 1728 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1729 ret = tree->ops->readpage_end_io_hook(page, start, end, 1730 NULL); 1731 if (ret) 1732 uptodate = 0; 1733 } 1734 if (!uptodate && tree->ops && 1735 tree->ops->readpage_io_failed_hook) { 1736 ret = tree->ops->readpage_io_failed_hook(bio, page, 1737 start, end, NULL); 1738 if (ret == 0) { 1739 uptodate = 1740 test_bit(BIO_UPTODATE, &bio->bi_flags); 1741 if (err) 1742 uptodate = 0; 1743 continue; 1744 } 1745 } 1746 1747 if (uptodate) { 1748 set_extent_uptodate(tree, start, end, 1749 GFP_ATOMIC); 1750 } 1751 unlock_extent(tree, start, end, GFP_ATOMIC); 1752 1753 if (whole_page) { 1754 if (uptodate) { 1755 SetPageUptodate(page); 1756 } else { 1757 ClearPageUptodate(page); 1758 SetPageError(page); 1759 } 1760 unlock_page(page); 1761 } else { 1762 if (uptodate) { 1763 check_page_uptodate(tree, page); 1764 } else { 1765 ClearPageUptodate(page); 1766 SetPageError(page); 1767 } 1768 check_page_locked(tree, page); 1769 } 1770 } while (bvec >= bio->bi_io_vec); 1771 1772 bio_put(bio); 1773 } 1774 1775 /* 1776 * IO done from prepare_write is pretty simple, we just unlock 1777 * the structs in the extent tree when done, and set the uptodate bits 1778 * as appropriate. 1779 */ 1780 static void end_bio_extent_preparewrite(struct bio *bio, int err) 1781 { 1782 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1783 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1784 struct extent_io_tree *tree; 1785 u64 start; 1786 u64 end; 1787 1788 do { 1789 struct page *page = bvec->bv_page; 1790 tree = &BTRFS_I(page->mapping->host)->io_tree; 1791 1792 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1793 bvec->bv_offset; 1794 end = start + bvec->bv_len - 1; 1795 1796 if (--bvec >= bio->bi_io_vec) 1797 prefetchw(&bvec->bv_page->flags); 1798 1799 if (uptodate) { 1800 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1801 } else { 1802 ClearPageUptodate(page); 1803 SetPageError(page); 1804 } 1805 1806 unlock_extent(tree, start, end, GFP_ATOMIC); 1807 1808 } while (bvec >= bio->bi_io_vec); 1809 1810 bio_put(bio); 1811 } 1812 1813 static struct bio * 1814 extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 1815 gfp_t gfp_flags) 1816 { 1817 struct bio *bio; 1818 1819 bio = bio_alloc(gfp_flags, nr_vecs); 1820 1821 if (bio == NULL && (current->flags & PF_MEMALLOC)) { 1822 while (!bio && (nr_vecs /= 2)) 1823 bio = bio_alloc(gfp_flags, nr_vecs); 1824 } 1825 1826 if (bio) { 1827 bio->bi_size = 0; 1828 bio->bi_bdev = bdev; 1829 bio->bi_sector = first_sector; 1830 } 1831 return bio; 1832 } 1833 1834 static int submit_one_bio(int rw, struct bio *bio, int mirror_num, 1835 unsigned long bio_flags) 1836 { 1837 int ret = 0; 1838 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1839 struct page *page = bvec->bv_page; 1840 struct extent_io_tree *tree = bio->bi_private; 1841 u64 start; 1842 u64 end; 1843 1844 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1845 end = start + bvec->bv_len - 1; 1846 1847 bio->bi_private = NULL; 1848 1849 bio_get(bio); 1850 1851 if (tree->ops && tree->ops->submit_bio_hook) 1852 tree->ops->submit_bio_hook(page->mapping->host, rw, bio, 1853 mirror_num, bio_flags); 1854 else 1855 submit_bio(rw, bio); 1856 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 1857 ret = -EOPNOTSUPP; 1858 bio_put(bio); 1859 return ret; 1860 } 1861 1862 static int submit_extent_page(int rw, struct extent_io_tree *tree, 1863 struct page *page, sector_t sector, 1864 size_t size, unsigned long offset, 1865 struct block_device *bdev, 1866 struct bio **bio_ret, 1867 unsigned long max_pages, 1868 bio_end_io_t end_io_func, 1869 int mirror_num, 1870 unsigned long prev_bio_flags, 1871 unsigned long bio_flags) 1872 { 1873 int ret = 0; 1874 struct bio *bio; 1875 int nr; 1876 int contig = 0; 1877 int this_compressed = bio_flags & EXTENT_BIO_COMPRESSED; 1878 int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED; 1879 size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE); 1880 1881 if (bio_ret && *bio_ret) { 1882 bio = *bio_ret; 1883 if (old_compressed) 1884 contig = bio->bi_sector == sector; 1885 else 1886 contig = bio->bi_sector + (bio->bi_size >> 9) == 1887 sector; 1888 1889 if (prev_bio_flags != bio_flags || !contig || 1890 (tree->ops && tree->ops->merge_bio_hook && 1891 tree->ops->merge_bio_hook(page, offset, page_size, bio, 1892 bio_flags)) || 1893 bio_add_page(bio, page, page_size, offset) < page_size) { 1894 ret = submit_one_bio(rw, bio, mirror_num, 1895 prev_bio_flags); 1896 bio = NULL; 1897 } else { 1898 return 0; 1899 } 1900 } 1901 if (this_compressed) 1902 nr = BIO_MAX_PAGES; 1903 else 1904 nr = bio_get_nr_vecs(bdev); 1905 1906 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); 1907 1908 bio_add_page(bio, page, page_size, offset); 1909 bio->bi_end_io = end_io_func; 1910 bio->bi_private = tree; 1911 1912 if (bio_ret) 1913 *bio_ret = bio; 1914 else 1915 ret = submit_one_bio(rw, bio, mirror_num, bio_flags); 1916 1917 return ret; 1918 } 1919 1920 void set_page_extent_mapped(struct page *page) 1921 { 1922 if (!PagePrivate(page)) { 1923 SetPagePrivate(page); 1924 page_cache_get(page); 1925 set_page_private(page, EXTENT_PAGE_PRIVATE); 1926 } 1927 } 1928 1929 static void set_page_extent_head(struct page *page, unsigned long len) 1930 { 1931 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); 1932 } 1933 1934 /* 1935 * basic readpage implementation. Locked extent state structs are inserted 1936 * into the tree that are removed when the IO is done (by the end_io 1937 * handlers) 1938 */ 1939 static int __extent_read_full_page(struct extent_io_tree *tree, 1940 struct page *page, 1941 get_extent_t *get_extent, 1942 struct bio **bio, int mirror_num, 1943 unsigned long *bio_flags) 1944 { 1945 struct inode *inode = page->mapping->host; 1946 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1947 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1948 u64 end; 1949 u64 cur = start; 1950 u64 extent_offset; 1951 u64 last_byte = i_size_read(inode); 1952 u64 block_start; 1953 u64 cur_end; 1954 sector_t sector; 1955 struct extent_map *em; 1956 struct block_device *bdev; 1957 int ret; 1958 int nr = 0; 1959 size_t page_offset = 0; 1960 size_t iosize; 1961 size_t disk_io_size; 1962 size_t blocksize = inode->i_sb->s_blocksize; 1963 unsigned long this_bio_flag = 0; 1964 1965 set_page_extent_mapped(page); 1966 1967 end = page_end; 1968 lock_extent(tree, start, end, GFP_NOFS); 1969 1970 if (page->index == last_byte >> PAGE_CACHE_SHIFT) { 1971 char *userpage; 1972 size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1); 1973 1974 if (zero_offset) { 1975 iosize = PAGE_CACHE_SIZE - zero_offset; 1976 userpage = kmap_atomic(page, KM_USER0); 1977 memset(userpage + zero_offset, 0, iosize); 1978 flush_dcache_page(page); 1979 kunmap_atomic(userpage, KM_USER0); 1980 } 1981 } 1982 while (cur <= end) { 1983 if (cur >= last_byte) { 1984 char *userpage; 1985 iosize = PAGE_CACHE_SIZE - page_offset; 1986 userpage = kmap_atomic(page, KM_USER0); 1987 memset(userpage + page_offset, 0, iosize); 1988 flush_dcache_page(page); 1989 kunmap_atomic(userpage, KM_USER0); 1990 set_extent_uptodate(tree, cur, cur + iosize - 1, 1991 GFP_NOFS); 1992 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1993 break; 1994 } 1995 em = get_extent(inode, page, page_offset, cur, 1996 end - cur + 1, 0); 1997 if (IS_ERR(em) || !em) { 1998 SetPageError(page); 1999 unlock_extent(tree, cur, end, GFP_NOFS); 2000 break; 2001 } 2002 extent_offset = cur - em->start; 2003 BUG_ON(extent_map_end(em) <= cur); 2004 BUG_ON(end < cur); 2005 2006 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 2007 this_bio_flag = EXTENT_BIO_COMPRESSED; 2008 2009 iosize = min(extent_map_end(em) - cur, end - cur + 1); 2010 cur_end = min(extent_map_end(em) - 1, end); 2011 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 2012 if (this_bio_flag & EXTENT_BIO_COMPRESSED) { 2013 disk_io_size = em->block_len; 2014 sector = em->block_start >> 9; 2015 } else { 2016 sector = (em->block_start + extent_offset) >> 9; 2017 disk_io_size = iosize; 2018 } 2019 bdev = em->bdev; 2020 block_start = em->block_start; 2021 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 2022 block_start = EXTENT_MAP_HOLE; 2023 free_extent_map(em); 2024 em = NULL; 2025 2026 /* we've found a hole, just zero and go on */ 2027 if (block_start == EXTENT_MAP_HOLE) { 2028 char *userpage; 2029 userpage = kmap_atomic(page, KM_USER0); 2030 memset(userpage + page_offset, 0, iosize); 2031 flush_dcache_page(page); 2032 kunmap_atomic(userpage, KM_USER0); 2033 2034 set_extent_uptodate(tree, cur, cur + iosize - 1, 2035 GFP_NOFS); 2036 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2037 cur = cur + iosize; 2038 page_offset += iosize; 2039 continue; 2040 } 2041 /* the get_extent function already copied into the page */ 2042 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { 2043 check_page_uptodate(tree, page); 2044 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2045 cur = cur + iosize; 2046 page_offset += iosize; 2047 continue; 2048 } 2049 /* we have an inline extent but it didn't get marked up 2050 * to date. Error out 2051 */ 2052 if (block_start == EXTENT_MAP_INLINE) { 2053 SetPageError(page); 2054 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2055 cur = cur + iosize; 2056 page_offset += iosize; 2057 continue; 2058 } 2059 2060 ret = 0; 2061 if (tree->ops && tree->ops->readpage_io_hook) { 2062 ret = tree->ops->readpage_io_hook(page, cur, 2063 cur + iosize - 1); 2064 } 2065 if (!ret) { 2066 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; 2067 pnr -= page->index; 2068 ret = submit_extent_page(READ, tree, page, 2069 sector, disk_io_size, page_offset, 2070 bdev, bio, pnr, 2071 end_bio_extent_readpage, mirror_num, 2072 *bio_flags, 2073 this_bio_flag); 2074 nr++; 2075 *bio_flags = this_bio_flag; 2076 } 2077 if (ret) 2078 SetPageError(page); 2079 cur = cur + iosize; 2080 page_offset += iosize; 2081 } 2082 if (!nr) { 2083 if (!PageError(page)) 2084 SetPageUptodate(page); 2085 unlock_page(page); 2086 } 2087 return 0; 2088 } 2089 2090 int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 2091 get_extent_t *get_extent) 2092 { 2093 struct bio *bio = NULL; 2094 unsigned long bio_flags = 0; 2095 int ret; 2096 2097 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0, 2098 &bio_flags); 2099 if (bio) 2100 submit_one_bio(READ, bio, 0, bio_flags); 2101 return ret; 2102 } 2103 2104 /* 2105 * the writepage semantics are similar to regular writepage. extent 2106 * records are inserted to lock ranges in the tree, and as dirty areas 2107 * are found, they are marked writeback. Then the lock bits are removed 2108 * and the end_io handler clears the writeback ranges 2109 */ 2110 static int __extent_writepage(struct page *page, struct writeback_control *wbc, 2111 void *data) 2112 { 2113 struct inode *inode = page->mapping->host; 2114 struct extent_page_data *epd = data; 2115 struct extent_io_tree *tree = epd->tree; 2116 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 2117 u64 delalloc_start; 2118 u64 page_end = start + PAGE_CACHE_SIZE - 1; 2119 u64 end; 2120 u64 cur = start; 2121 u64 extent_offset; 2122 u64 last_byte = i_size_read(inode); 2123 u64 block_start; 2124 u64 iosize; 2125 u64 unlock_start; 2126 sector_t sector; 2127 struct extent_map *em; 2128 struct block_device *bdev; 2129 int ret; 2130 int nr = 0; 2131 size_t pg_offset = 0; 2132 size_t blocksize; 2133 loff_t i_size = i_size_read(inode); 2134 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; 2135 u64 nr_delalloc; 2136 u64 delalloc_end; 2137 int page_started; 2138 int compressed; 2139 unsigned long nr_written = 0; 2140 2141 WARN_ON(!PageLocked(page)); 2142 pg_offset = i_size & (PAGE_CACHE_SIZE - 1); 2143 if (page->index > end_index || 2144 (page->index == end_index && !pg_offset)) { 2145 page->mapping->a_ops->invalidatepage(page, 0); 2146 unlock_page(page); 2147 return 0; 2148 } 2149 2150 if (page->index == end_index) { 2151 char *userpage; 2152 2153 userpage = kmap_atomic(page, KM_USER0); 2154 memset(userpage + pg_offset, 0, 2155 PAGE_CACHE_SIZE - pg_offset); 2156 kunmap_atomic(userpage, KM_USER0); 2157 flush_dcache_page(page); 2158 } 2159 pg_offset = 0; 2160 2161 set_page_extent_mapped(page); 2162 2163 delalloc_start = start; 2164 delalloc_end = 0; 2165 page_started = 0; 2166 if (!epd->extent_locked) { 2167 while (delalloc_end < page_end) { 2168 nr_delalloc = find_lock_delalloc_range(inode, tree, 2169 page, 2170 &delalloc_start, 2171 &delalloc_end, 2172 128 * 1024 * 1024); 2173 if (nr_delalloc == 0) { 2174 delalloc_start = delalloc_end + 1; 2175 continue; 2176 } 2177 tree->ops->fill_delalloc(inode, page, delalloc_start, 2178 delalloc_end, &page_started, 2179 &nr_written); 2180 delalloc_start = delalloc_end + 1; 2181 } 2182 2183 /* did the fill delalloc function already unlock and start 2184 * the IO? 2185 */ 2186 if (page_started) { 2187 ret = 0; 2188 goto update_nr_written; 2189 } 2190 } 2191 lock_extent(tree, start, page_end, GFP_NOFS); 2192 2193 unlock_start = start; 2194 2195 if (tree->ops && tree->ops->writepage_start_hook) { 2196 ret = tree->ops->writepage_start_hook(page, start, 2197 page_end); 2198 if (ret == -EAGAIN) { 2199 unlock_extent(tree, start, page_end, GFP_NOFS); 2200 redirty_page_for_writepage(wbc, page); 2201 unlock_page(page); 2202 ret = 0; 2203 goto update_nr_written; 2204 } 2205 } 2206 2207 nr_written++; 2208 2209 end = page_end; 2210 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) 2211 printk(KERN_ERR "btrfs delalloc bits after lock_extent\n"); 2212 2213 if (last_byte <= start) { 2214 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 2215 unlock_extent(tree, start, page_end, GFP_NOFS); 2216 if (tree->ops && tree->ops->writepage_end_io_hook) 2217 tree->ops->writepage_end_io_hook(page, start, 2218 page_end, NULL, 1); 2219 unlock_start = page_end + 1; 2220 goto done; 2221 } 2222 2223 set_extent_uptodate(tree, start, page_end, GFP_NOFS); 2224 blocksize = inode->i_sb->s_blocksize; 2225 2226 while (cur <= end) { 2227 if (cur >= last_byte) { 2228 clear_extent_dirty(tree, cur, page_end, GFP_NOFS); 2229 unlock_extent(tree, unlock_start, page_end, GFP_NOFS); 2230 if (tree->ops && tree->ops->writepage_end_io_hook) 2231 tree->ops->writepage_end_io_hook(page, cur, 2232 page_end, NULL, 1); 2233 unlock_start = page_end + 1; 2234 break; 2235 } 2236 em = epd->get_extent(inode, page, pg_offset, cur, 2237 end - cur + 1, 1); 2238 if (IS_ERR(em) || !em) { 2239 SetPageError(page); 2240 break; 2241 } 2242 2243 extent_offset = cur - em->start; 2244 BUG_ON(extent_map_end(em) <= cur); 2245 BUG_ON(end < cur); 2246 iosize = min(extent_map_end(em) - cur, end - cur + 1); 2247 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 2248 sector = (em->block_start + extent_offset) >> 9; 2249 bdev = em->bdev; 2250 block_start = em->block_start; 2251 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 2252 free_extent_map(em); 2253 em = NULL; 2254 2255 /* 2256 * compressed and inline extents are written through other 2257 * paths in the FS 2258 */ 2259 if (compressed || block_start == EXTENT_MAP_HOLE || 2260 block_start == EXTENT_MAP_INLINE) { 2261 clear_extent_dirty(tree, cur, 2262 cur + iosize - 1, GFP_NOFS); 2263 2264 unlock_extent(tree, unlock_start, cur + iosize - 1, 2265 GFP_NOFS); 2266 2267 /* 2268 * end_io notification does not happen here for 2269 * compressed extents 2270 */ 2271 if (!compressed && tree->ops && 2272 tree->ops->writepage_end_io_hook) 2273 tree->ops->writepage_end_io_hook(page, cur, 2274 cur + iosize - 1, 2275 NULL, 1); 2276 else if (compressed) { 2277 /* we don't want to end_page_writeback on 2278 * a compressed extent. this happens 2279 * elsewhere 2280 */ 2281 nr++; 2282 } 2283 2284 cur += iosize; 2285 pg_offset += iosize; 2286 unlock_start = cur; 2287 continue; 2288 } 2289 /* leave this out until we have a page_mkwrite call */ 2290 if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 2291 EXTENT_DIRTY, 0)) { 2292 cur = cur + iosize; 2293 pg_offset += iosize; 2294 continue; 2295 } 2296 2297 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); 2298 if (tree->ops && tree->ops->writepage_io_hook) { 2299 ret = tree->ops->writepage_io_hook(page, cur, 2300 cur + iosize - 1); 2301 } else { 2302 ret = 0; 2303 } 2304 if (ret) { 2305 SetPageError(page); 2306 } else { 2307 unsigned long max_nr = end_index + 1; 2308 2309 set_range_writeback(tree, cur, cur + iosize - 1); 2310 if (!PageWriteback(page)) { 2311 printk(KERN_ERR "btrfs warning page %lu not " 2312 "writeback, cur %llu end %llu\n", 2313 page->index, (unsigned long long)cur, 2314 (unsigned long long)end); 2315 } 2316 2317 ret = submit_extent_page(WRITE, tree, page, sector, 2318 iosize, pg_offset, bdev, 2319 &epd->bio, max_nr, 2320 end_bio_extent_writepage, 2321 0, 0, 0); 2322 if (ret) 2323 SetPageError(page); 2324 } 2325 cur = cur + iosize; 2326 pg_offset += iosize; 2327 nr++; 2328 } 2329 done: 2330 if (nr == 0) { 2331 /* make sure the mapping tag for page dirty gets cleared */ 2332 set_page_writeback(page); 2333 end_page_writeback(page); 2334 } 2335 if (unlock_start <= page_end) 2336 unlock_extent(tree, unlock_start, page_end, GFP_NOFS); 2337 unlock_page(page); 2338 2339 update_nr_written: 2340 wbc->nr_to_write -= nr_written; 2341 if (wbc->range_cyclic || (wbc->nr_to_write > 0 && 2342 wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) 2343 page->mapping->writeback_index = page->index + nr_written; 2344 return 0; 2345 } 2346 2347 /** 2348 * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. 2349 * @mapping: address space structure to write 2350 * @wbc: subtract the number of written pages from *@wbc->nr_to_write 2351 * @writepage: function called for each page 2352 * @data: data passed to writepage function 2353 * 2354 * If a page is already under I/O, write_cache_pages() skips it, even 2355 * if it's dirty. This is desirable behaviour for memory-cleaning writeback, 2356 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() 2357 * and msync() need to guarantee that all the data which was dirty at the time 2358 * the call was made get new I/O started against them. If wbc->sync_mode is 2359 * WB_SYNC_ALL then we were called for data integrity and we must wait for 2360 * existing IO to complete. 2361 */ 2362 static int extent_write_cache_pages(struct extent_io_tree *tree, 2363 struct address_space *mapping, 2364 struct writeback_control *wbc, 2365 writepage_t writepage, void *data, 2366 void (*flush_fn)(void *)) 2367 { 2368 struct backing_dev_info *bdi = mapping->backing_dev_info; 2369 int ret = 0; 2370 int done = 0; 2371 struct pagevec pvec; 2372 int nr_pages; 2373 pgoff_t index; 2374 pgoff_t end; /* Inclusive */ 2375 int scanned = 0; 2376 int range_whole = 0; 2377 2378 pagevec_init(&pvec, 0); 2379 if (wbc->range_cyclic) { 2380 index = mapping->writeback_index; /* Start from prev offset */ 2381 end = -1; 2382 } else { 2383 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2384 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2385 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 2386 range_whole = 1; 2387 scanned = 1; 2388 } 2389 retry: 2390 while (!done && (index <= end) && 2391 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 2392 PAGECACHE_TAG_DIRTY, min(end - index, 2393 (pgoff_t)PAGEVEC_SIZE-1) + 1))) { 2394 unsigned i; 2395 2396 scanned = 1; 2397 for (i = 0; i < nr_pages; i++) { 2398 struct page *page = pvec.pages[i]; 2399 2400 /* 2401 * At this point we hold neither mapping->tree_lock nor 2402 * lock on the page itself: the page may be truncated or 2403 * invalidated (changing page->mapping to NULL), or even 2404 * swizzled back from swapper_space to tmpfs file 2405 * mapping 2406 */ 2407 if (tree->ops && tree->ops->write_cache_pages_lock_hook) 2408 tree->ops->write_cache_pages_lock_hook(page); 2409 else 2410 lock_page(page); 2411 2412 if (unlikely(page->mapping != mapping)) { 2413 unlock_page(page); 2414 continue; 2415 } 2416 2417 if (!wbc->range_cyclic && page->index > end) { 2418 done = 1; 2419 unlock_page(page); 2420 continue; 2421 } 2422 2423 if (wbc->sync_mode != WB_SYNC_NONE) { 2424 if (PageWriteback(page)) 2425 flush_fn(data); 2426 wait_on_page_writeback(page); 2427 } 2428 2429 if (PageWriteback(page) || 2430 !clear_page_dirty_for_io(page)) { 2431 unlock_page(page); 2432 continue; 2433 } 2434 2435 ret = (*writepage)(page, wbc, data); 2436 2437 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { 2438 unlock_page(page); 2439 ret = 0; 2440 } 2441 if (ret || wbc->nr_to_write <= 0) 2442 done = 1; 2443 if (wbc->nonblocking && bdi_write_congested(bdi)) { 2444 wbc->encountered_congestion = 1; 2445 done = 1; 2446 } 2447 } 2448 pagevec_release(&pvec); 2449 cond_resched(); 2450 } 2451 if (!scanned && !done) { 2452 /* 2453 * We hit the last page and there is more work to be done: wrap 2454 * back to the start of the file 2455 */ 2456 scanned = 1; 2457 index = 0; 2458 goto retry; 2459 } 2460 return ret; 2461 } 2462 2463 static noinline void flush_write_bio(void *data) 2464 { 2465 struct extent_page_data *epd = data; 2466 if (epd->bio) { 2467 submit_one_bio(WRITE, epd->bio, 0, 0); 2468 epd->bio = NULL; 2469 } 2470 } 2471 2472 int extent_write_full_page(struct extent_io_tree *tree, struct page *page, 2473 get_extent_t *get_extent, 2474 struct writeback_control *wbc) 2475 { 2476 int ret; 2477 struct address_space *mapping = page->mapping; 2478 struct extent_page_data epd = { 2479 .bio = NULL, 2480 .tree = tree, 2481 .get_extent = get_extent, 2482 .extent_locked = 0, 2483 }; 2484 struct writeback_control wbc_writepages = { 2485 .bdi = wbc->bdi, 2486 .sync_mode = WB_SYNC_NONE, 2487 .older_than_this = NULL, 2488 .nr_to_write = 64, 2489 .range_start = page_offset(page) + PAGE_CACHE_SIZE, 2490 .range_end = (loff_t)-1, 2491 }; 2492 2493 2494 ret = __extent_writepage(page, wbc, &epd); 2495 2496 extent_write_cache_pages(tree, mapping, &wbc_writepages, 2497 __extent_writepage, &epd, flush_write_bio); 2498 if (epd.bio) 2499 submit_one_bio(WRITE, epd.bio, 0, 0); 2500 return ret; 2501 } 2502 2503 int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, 2504 u64 start, u64 end, get_extent_t *get_extent, 2505 int mode) 2506 { 2507 int ret = 0; 2508 struct address_space *mapping = inode->i_mapping; 2509 struct page *page; 2510 unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >> 2511 PAGE_CACHE_SHIFT; 2512 2513 struct extent_page_data epd = { 2514 .bio = NULL, 2515 .tree = tree, 2516 .get_extent = get_extent, 2517 .extent_locked = 1, 2518 }; 2519 struct writeback_control wbc_writepages = { 2520 .bdi = inode->i_mapping->backing_dev_info, 2521 .sync_mode = mode, 2522 .older_than_this = NULL, 2523 .nr_to_write = nr_pages * 2, 2524 .range_start = start, 2525 .range_end = end + 1, 2526 }; 2527 2528 while (start <= end) { 2529 page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); 2530 if (clear_page_dirty_for_io(page)) 2531 ret = __extent_writepage(page, &wbc_writepages, &epd); 2532 else { 2533 if (tree->ops && tree->ops->writepage_end_io_hook) 2534 tree->ops->writepage_end_io_hook(page, start, 2535 start + PAGE_CACHE_SIZE - 1, 2536 NULL, 1); 2537 unlock_page(page); 2538 } 2539 page_cache_release(page); 2540 start += PAGE_CACHE_SIZE; 2541 } 2542 2543 if (epd.bio) 2544 submit_one_bio(WRITE, epd.bio, 0, 0); 2545 return ret; 2546 } 2547 2548 int extent_writepages(struct extent_io_tree *tree, 2549 struct address_space *mapping, 2550 get_extent_t *get_extent, 2551 struct writeback_control *wbc) 2552 { 2553 int ret = 0; 2554 struct extent_page_data epd = { 2555 .bio = NULL, 2556 .tree = tree, 2557 .get_extent = get_extent, 2558 .extent_locked = 0, 2559 }; 2560 2561 ret = extent_write_cache_pages(tree, mapping, wbc, 2562 __extent_writepage, &epd, 2563 flush_write_bio); 2564 if (epd.bio) 2565 submit_one_bio(WRITE, epd.bio, 0, 0); 2566 return ret; 2567 } 2568 2569 int extent_readpages(struct extent_io_tree *tree, 2570 struct address_space *mapping, 2571 struct list_head *pages, unsigned nr_pages, 2572 get_extent_t get_extent) 2573 { 2574 struct bio *bio = NULL; 2575 unsigned page_idx; 2576 struct pagevec pvec; 2577 unsigned long bio_flags = 0; 2578 2579 pagevec_init(&pvec, 0); 2580 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 2581 struct page *page = list_entry(pages->prev, struct page, lru); 2582 2583 prefetchw(&page->flags); 2584 list_del(&page->lru); 2585 /* 2586 * what we want to do here is call add_to_page_cache_lru, 2587 * but that isn't exported, so we reproduce it here 2588 */ 2589 if (!add_to_page_cache(page, mapping, 2590 page->index, GFP_KERNEL)) { 2591 2592 /* open coding of lru_cache_add, also not exported */ 2593 page_cache_get(page); 2594 if (!pagevec_add(&pvec, page)) 2595 __pagevec_lru_add_file(&pvec); 2596 __extent_read_full_page(tree, page, get_extent, 2597 &bio, 0, &bio_flags); 2598 } 2599 page_cache_release(page); 2600 } 2601 if (pagevec_count(&pvec)) 2602 __pagevec_lru_add_file(&pvec); 2603 BUG_ON(!list_empty(pages)); 2604 if (bio) 2605 submit_one_bio(READ, bio, 0, bio_flags); 2606 return 0; 2607 } 2608 2609 /* 2610 * basic invalidatepage code, this waits on any locked or writeback 2611 * ranges corresponding to the page, and then deletes any extent state 2612 * records from the tree 2613 */ 2614 int extent_invalidatepage(struct extent_io_tree *tree, 2615 struct page *page, unsigned long offset) 2616 { 2617 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); 2618 u64 end = start + PAGE_CACHE_SIZE - 1; 2619 size_t blocksize = page->mapping->host->i_sb->s_blocksize; 2620 2621 start += (offset + blocksize - 1) & ~(blocksize - 1); 2622 if (start > end) 2623 return 0; 2624 2625 lock_extent(tree, start, end, GFP_NOFS); 2626 wait_on_extent_writeback(tree, start, end); 2627 clear_extent_bit(tree, start, end, 2628 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 2629 1, 1, GFP_NOFS); 2630 return 0; 2631 } 2632 2633 /* 2634 * simple commit_write call, set_range_dirty is used to mark both 2635 * the pages and the extent records as dirty 2636 */ 2637 int extent_commit_write(struct extent_io_tree *tree, 2638 struct inode *inode, struct page *page, 2639 unsigned from, unsigned to) 2640 { 2641 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 2642 2643 set_page_extent_mapped(page); 2644 set_page_dirty(page); 2645 2646 if (pos > inode->i_size) { 2647 i_size_write(inode, pos); 2648 mark_inode_dirty(inode); 2649 } 2650 return 0; 2651 } 2652 2653 int extent_prepare_write(struct extent_io_tree *tree, 2654 struct inode *inode, struct page *page, 2655 unsigned from, unsigned to, get_extent_t *get_extent) 2656 { 2657 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2658 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 2659 u64 block_start; 2660 u64 orig_block_start; 2661 u64 block_end; 2662 u64 cur_end; 2663 struct extent_map *em; 2664 unsigned blocksize = 1 << inode->i_blkbits; 2665 size_t page_offset = 0; 2666 size_t block_off_start; 2667 size_t block_off_end; 2668 int err = 0; 2669 int iocount = 0; 2670 int ret = 0; 2671 int isnew; 2672 2673 set_page_extent_mapped(page); 2674 2675 block_start = (page_start + from) & ~((u64)blocksize - 1); 2676 block_end = (page_start + to - 1) | (blocksize - 1); 2677 orig_block_start = block_start; 2678 2679 lock_extent(tree, page_start, page_end, GFP_NOFS); 2680 while (block_start <= block_end) { 2681 em = get_extent(inode, page, page_offset, block_start, 2682 block_end - block_start + 1, 1); 2683 if (IS_ERR(em) || !em) 2684 goto err; 2685 2686 cur_end = min(block_end, extent_map_end(em) - 1); 2687 block_off_start = block_start & (PAGE_CACHE_SIZE - 1); 2688 block_off_end = block_off_start + blocksize; 2689 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); 2690 2691 if (!PageUptodate(page) && isnew && 2692 (block_off_end > to || block_off_start < from)) { 2693 void *kaddr; 2694 2695 kaddr = kmap_atomic(page, KM_USER0); 2696 if (block_off_end > to) 2697 memset(kaddr + to, 0, block_off_end - to); 2698 if (block_off_start < from) 2699 memset(kaddr + block_off_start, 0, 2700 from - block_off_start); 2701 flush_dcache_page(page); 2702 kunmap_atomic(kaddr, KM_USER0); 2703 } 2704 if ((em->block_start != EXTENT_MAP_HOLE && 2705 em->block_start != EXTENT_MAP_INLINE) && 2706 !isnew && !PageUptodate(page) && 2707 (block_off_end > to || block_off_start < from) && 2708 !test_range_bit(tree, block_start, cur_end, 2709 EXTENT_UPTODATE, 1)) { 2710 u64 sector; 2711 u64 extent_offset = block_start - em->start; 2712 size_t iosize; 2713 sector = (em->block_start + extent_offset) >> 9; 2714 iosize = (cur_end - block_start + blocksize) & 2715 ~((u64)blocksize - 1); 2716 /* 2717 * we've already got the extent locked, but we 2718 * need to split the state such that our end_bio 2719 * handler can clear the lock. 2720 */ 2721 set_extent_bit(tree, block_start, 2722 block_start + iosize - 1, 2723 EXTENT_LOCKED, 0, NULL, GFP_NOFS); 2724 ret = submit_extent_page(READ, tree, page, 2725 sector, iosize, page_offset, em->bdev, 2726 NULL, 1, 2727 end_bio_extent_preparewrite, 0, 2728 0, 0); 2729 iocount++; 2730 block_start = block_start + iosize; 2731 } else { 2732 set_extent_uptodate(tree, block_start, cur_end, 2733 GFP_NOFS); 2734 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 2735 block_start = cur_end + 1; 2736 } 2737 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 2738 free_extent_map(em); 2739 } 2740 if (iocount) { 2741 wait_extent_bit(tree, orig_block_start, 2742 block_end, EXTENT_LOCKED); 2743 } 2744 check_page_uptodate(tree, page); 2745 err: 2746 /* FIXME, zero out newly allocated blocks on error */ 2747 return err; 2748 } 2749 2750 /* 2751 * a helper for releasepage, this tests for areas of the page that 2752 * are locked or under IO and drops the related state bits if it is safe 2753 * to drop the page. 2754 */ 2755 int try_release_extent_state(struct extent_map_tree *map, 2756 struct extent_io_tree *tree, struct page *page, 2757 gfp_t mask) 2758 { 2759 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 2760 u64 end = start + PAGE_CACHE_SIZE - 1; 2761 int ret = 1; 2762 2763 if (test_range_bit(tree, start, end, 2764 EXTENT_IOBITS | EXTENT_ORDERED, 0)) 2765 ret = 0; 2766 else { 2767 if ((mask & GFP_NOFS) == GFP_NOFS) 2768 mask = GFP_NOFS; 2769 clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 2770 1, 1, mask); 2771 } 2772 return ret; 2773 } 2774 2775 /* 2776 * a helper for releasepage. As long as there are no locked extents 2777 * in the range corresponding to the page, both state records and extent 2778 * map records are removed 2779 */ 2780 int try_release_extent_mapping(struct extent_map_tree *map, 2781 struct extent_io_tree *tree, struct page *page, 2782 gfp_t mask) 2783 { 2784 struct extent_map *em; 2785 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 2786 u64 end = start + PAGE_CACHE_SIZE - 1; 2787 2788 if ((mask & __GFP_WAIT) && 2789 page->mapping->host->i_size > 16 * 1024 * 1024) { 2790 u64 len; 2791 while (start <= end) { 2792 len = end - start + 1; 2793 spin_lock(&map->lock); 2794 em = lookup_extent_mapping(map, start, len); 2795 if (!em || IS_ERR(em)) { 2796 spin_unlock(&map->lock); 2797 break; 2798 } 2799 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || 2800 em->start != start) { 2801 spin_unlock(&map->lock); 2802 free_extent_map(em); 2803 break; 2804 } 2805 if (!test_range_bit(tree, em->start, 2806 extent_map_end(em) - 1, 2807 EXTENT_LOCKED | EXTENT_WRITEBACK | 2808 EXTENT_ORDERED, 2809 0)) { 2810 remove_extent_mapping(map, em); 2811 /* once for the rb tree */ 2812 free_extent_map(em); 2813 } 2814 start = extent_map_end(em); 2815 spin_unlock(&map->lock); 2816 2817 /* once for us */ 2818 free_extent_map(em); 2819 } 2820 } 2821 return try_release_extent_state(map, tree, page, mask); 2822 } 2823 2824 sector_t extent_bmap(struct address_space *mapping, sector_t iblock, 2825 get_extent_t *get_extent) 2826 { 2827 struct inode *inode = mapping->host; 2828 u64 start = iblock << inode->i_blkbits; 2829 sector_t sector = 0; 2830 size_t blksize = (1 << inode->i_blkbits); 2831 struct extent_map *em; 2832 2833 lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, 2834 GFP_NOFS); 2835 em = get_extent(inode, NULL, 0, start, blksize, 0); 2836 unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, 2837 GFP_NOFS); 2838 if (!em || IS_ERR(em)) 2839 return 0; 2840 2841 if (em->block_start > EXTENT_MAP_LAST_BYTE) 2842 goto out; 2843 2844 sector = (em->block_start + start - em->start) >> inode->i_blkbits; 2845 out: 2846 free_extent_map(em); 2847 return sector; 2848 } 2849 2850 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2851 __u64 start, __u64 len, get_extent_t *get_extent) 2852 { 2853 int ret; 2854 u64 off = start; 2855 u64 max = start + len; 2856 u32 flags = 0; 2857 u64 disko = 0; 2858 struct extent_map *em = NULL; 2859 int end = 0; 2860 u64 em_start = 0, em_len = 0; 2861 unsigned long emflags; 2862 ret = 0; 2863 2864 if (len == 0) 2865 return -EINVAL; 2866 2867 lock_extent(&BTRFS_I(inode)->io_tree, start, start + len, 2868 GFP_NOFS); 2869 em = get_extent(inode, NULL, 0, off, max - off, 0); 2870 if (!em) 2871 goto out; 2872 if (IS_ERR(em)) { 2873 ret = PTR_ERR(em); 2874 goto out; 2875 } 2876 while (!end) { 2877 off = em->start + em->len; 2878 if (off >= max) 2879 end = 1; 2880 2881 em_start = em->start; 2882 em_len = em->len; 2883 2884 disko = 0; 2885 flags = 0; 2886 2887 switch (em->block_start) { 2888 case EXTENT_MAP_LAST_BYTE: 2889 end = 1; 2890 flags |= FIEMAP_EXTENT_LAST; 2891 break; 2892 case EXTENT_MAP_HOLE: 2893 flags |= FIEMAP_EXTENT_UNWRITTEN; 2894 break; 2895 case EXTENT_MAP_INLINE: 2896 flags |= (FIEMAP_EXTENT_DATA_INLINE | 2897 FIEMAP_EXTENT_NOT_ALIGNED); 2898 break; 2899 case EXTENT_MAP_DELALLOC: 2900 flags |= (FIEMAP_EXTENT_DELALLOC | 2901 FIEMAP_EXTENT_UNKNOWN); 2902 break; 2903 default: 2904 disko = em->block_start; 2905 break; 2906 } 2907 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 2908 flags |= FIEMAP_EXTENT_ENCODED; 2909 2910 emflags = em->flags; 2911 free_extent_map(em); 2912 em = NULL; 2913 2914 if (!end) { 2915 em = get_extent(inode, NULL, 0, off, max - off, 0); 2916 if (!em) 2917 goto out; 2918 if (IS_ERR(em)) { 2919 ret = PTR_ERR(em); 2920 goto out; 2921 } 2922 emflags = em->flags; 2923 } 2924 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { 2925 flags |= FIEMAP_EXTENT_LAST; 2926 end = 1; 2927 } 2928 2929 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 2930 em_len, flags); 2931 if (ret) 2932 goto out_free; 2933 } 2934 out_free: 2935 free_extent_map(em); 2936 out: 2937 unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len, 2938 GFP_NOFS); 2939 return ret; 2940 } 2941 2942 static inline struct page *extent_buffer_page(struct extent_buffer *eb, 2943 unsigned long i) 2944 { 2945 struct page *p; 2946 struct address_space *mapping; 2947 2948 if (i == 0) 2949 return eb->first_page; 2950 i += eb->start >> PAGE_CACHE_SHIFT; 2951 mapping = eb->first_page->mapping; 2952 if (!mapping) 2953 return NULL; 2954 2955 /* 2956 * extent_buffer_page is only called after pinning the page 2957 * by increasing the reference count. So we know the page must 2958 * be in the radix tree. 2959 */ 2960 rcu_read_lock(); 2961 p = radix_tree_lookup(&mapping->page_tree, i); 2962 rcu_read_unlock(); 2963 2964 return p; 2965 } 2966 2967 static inline unsigned long num_extent_pages(u64 start, u64 len) 2968 { 2969 return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - 2970 (start >> PAGE_CACHE_SHIFT); 2971 } 2972 2973 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, 2974 u64 start, 2975 unsigned long len, 2976 gfp_t mask) 2977 { 2978 struct extent_buffer *eb = NULL; 2979 #if LEAK_DEBUG 2980 unsigned long flags; 2981 #endif 2982 2983 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 2984 eb->start = start; 2985 eb->len = len; 2986 spin_lock_init(&eb->lock); 2987 init_waitqueue_head(&eb->lock_wq); 2988 2989 #if LEAK_DEBUG 2990 spin_lock_irqsave(&leak_lock, flags); 2991 list_add(&eb->leak_list, &buffers); 2992 spin_unlock_irqrestore(&leak_lock, flags); 2993 #endif 2994 atomic_set(&eb->refs, 1); 2995 2996 return eb; 2997 } 2998 2999 static void __free_extent_buffer(struct extent_buffer *eb) 3000 { 3001 #if LEAK_DEBUG 3002 unsigned long flags; 3003 spin_lock_irqsave(&leak_lock, flags); 3004 list_del(&eb->leak_list); 3005 spin_unlock_irqrestore(&leak_lock, flags); 3006 #endif 3007 kmem_cache_free(extent_buffer_cache, eb); 3008 } 3009 3010 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 3011 u64 start, unsigned long len, 3012 struct page *page0, 3013 gfp_t mask) 3014 { 3015 unsigned long num_pages = num_extent_pages(start, len); 3016 unsigned long i; 3017 unsigned long index = start >> PAGE_CACHE_SHIFT; 3018 struct extent_buffer *eb; 3019 struct extent_buffer *exists = NULL; 3020 struct page *p; 3021 struct address_space *mapping = tree->mapping; 3022 int uptodate = 1; 3023 3024 spin_lock(&tree->buffer_lock); 3025 eb = buffer_search(tree, start); 3026 if (eb) { 3027 atomic_inc(&eb->refs); 3028 spin_unlock(&tree->buffer_lock); 3029 mark_page_accessed(eb->first_page); 3030 return eb; 3031 } 3032 spin_unlock(&tree->buffer_lock); 3033 3034 eb = __alloc_extent_buffer(tree, start, len, mask); 3035 if (!eb) 3036 return NULL; 3037 3038 if (page0) { 3039 eb->first_page = page0; 3040 i = 1; 3041 index++; 3042 page_cache_get(page0); 3043 mark_page_accessed(page0); 3044 set_page_extent_mapped(page0); 3045 set_page_extent_head(page0, len); 3046 uptodate = PageUptodate(page0); 3047 } else { 3048 i = 0; 3049 } 3050 for (; i < num_pages; i++, index++) { 3051 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); 3052 if (!p) { 3053 WARN_ON(1); 3054 goto free_eb; 3055 } 3056 set_page_extent_mapped(p); 3057 mark_page_accessed(p); 3058 if (i == 0) { 3059 eb->first_page = p; 3060 set_page_extent_head(p, len); 3061 } else { 3062 set_page_private(p, EXTENT_PAGE_PRIVATE); 3063 } 3064 if (!PageUptodate(p)) 3065 uptodate = 0; 3066 unlock_page(p); 3067 } 3068 if (uptodate) 3069 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3070 3071 spin_lock(&tree->buffer_lock); 3072 exists = buffer_tree_insert(tree, start, &eb->rb_node); 3073 if (exists) { 3074 /* add one reference for the caller */ 3075 atomic_inc(&exists->refs); 3076 spin_unlock(&tree->buffer_lock); 3077 goto free_eb; 3078 } 3079 spin_unlock(&tree->buffer_lock); 3080 3081 /* add one reference for the tree */ 3082 atomic_inc(&eb->refs); 3083 return eb; 3084 3085 free_eb: 3086 if (!atomic_dec_and_test(&eb->refs)) 3087 return exists; 3088 for (index = 1; index < i; index++) 3089 page_cache_release(extent_buffer_page(eb, index)); 3090 page_cache_release(extent_buffer_page(eb, 0)); 3091 __free_extent_buffer(eb); 3092 return exists; 3093 } 3094 3095 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, 3096 u64 start, unsigned long len, 3097 gfp_t mask) 3098 { 3099 struct extent_buffer *eb; 3100 3101 spin_lock(&tree->buffer_lock); 3102 eb = buffer_search(tree, start); 3103 if (eb) 3104 atomic_inc(&eb->refs); 3105 spin_unlock(&tree->buffer_lock); 3106 3107 if (eb) 3108 mark_page_accessed(eb->first_page); 3109 3110 return eb; 3111 } 3112 3113 void free_extent_buffer(struct extent_buffer *eb) 3114 { 3115 if (!eb) 3116 return; 3117 3118 if (!atomic_dec_and_test(&eb->refs)) 3119 return; 3120 3121 WARN_ON(1); 3122 } 3123 3124 int clear_extent_buffer_dirty(struct extent_io_tree *tree, 3125 struct extent_buffer *eb) 3126 { 3127 unsigned long i; 3128 unsigned long num_pages; 3129 struct page *page; 3130 3131 num_pages = num_extent_pages(eb->start, eb->len); 3132 3133 for (i = 0; i < num_pages; i++) { 3134 page = extent_buffer_page(eb, i); 3135 if (!PageDirty(page)) 3136 continue; 3137 3138 lock_page(page); 3139 if (i == 0) 3140 set_page_extent_head(page, eb->len); 3141 else 3142 set_page_private(page, EXTENT_PAGE_PRIVATE); 3143 3144 clear_page_dirty_for_io(page); 3145 spin_lock_irq(&page->mapping->tree_lock); 3146 if (!PageDirty(page)) { 3147 radix_tree_tag_clear(&page->mapping->page_tree, 3148 page_index(page), 3149 PAGECACHE_TAG_DIRTY); 3150 } 3151 spin_unlock_irq(&page->mapping->tree_lock); 3152 unlock_page(page); 3153 } 3154 return 0; 3155 } 3156 3157 int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, 3158 struct extent_buffer *eb) 3159 { 3160 return wait_on_extent_writeback(tree, eb->start, 3161 eb->start + eb->len - 1); 3162 } 3163 3164 int set_extent_buffer_dirty(struct extent_io_tree *tree, 3165 struct extent_buffer *eb) 3166 { 3167 unsigned long i; 3168 unsigned long num_pages; 3169 int was_dirty = 0; 3170 3171 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); 3172 num_pages = num_extent_pages(eb->start, eb->len); 3173 for (i = 0; i < num_pages; i++) 3174 __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); 3175 return was_dirty; 3176 } 3177 3178 int clear_extent_buffer_uptodate(struct extent_io_tree *tree, 3179 struct extent_buffer *eb) 3180 { 3181 unsigned long i; 3182 struct page *page; 3183 unsigned long num_pages; 3184 3185 num_pages = num_extent_pages(eb->start, eb->len); 3186 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3187 3188 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3189 GFP_NOFS); 3190 for (i = 0; i < num_pages; i++) { 3191 page = extent_buffer_page(eb, i); 3192 if (page) 3193 ClearPageUptodate(page); 3194 } 3195 return 0; 3196 } 3197 3198 int set_extent_buffer_uptodate(struct extent_io_tree *tree, 3199 struct extent_buffer *eb) 3200 { 3201 unsigned long i; 3202 struct page *page; 3203 unsigned long num_pages; 3204 3205 num_pages = num_extent_pages(eb->start, eb->len); 3206 3207 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3208 GFP_NOFS); 3209 for (i = 0; i < num_pages; i++) { 3210 page = extent_buffer_page(eb, i); 3211 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 3212 ((i == num_pages - 1) && 3213 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { 3214 check_page_uptodate(tree, page); 3215 continue; 3216 } 3217 SetPageUptodate(page); 3218 } 3219 return 0; 3220 } 3221 3222 int extent_range_uptodate(struct extent_io_tree *tree, 3223 u64 start, u64 end) 3224 { 3225 struct page *page; 3226 int ret; 3227 int pg_uptodate = 1; 3228 int uptodate; 3229 unsigned long index; 3230 3231 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); 3232 if (ret) 3233 return 1; 3234 while (start <= end) { 3235 index = start >> PAGE_CACHE_SHIFT; 3236 page = find_get_page(tree->mapping, index); 3237 uptodate = PageUptodate(page); 3238 page_cache_release(page); 3239 if (!uptodate) { 3240 pg_uptodate = 0; 3241 break; 3242 } 3243 start += PAGE_CACHE_SIZE; 3244 } 3245 return pg_uptodate; 3246 } 3247 3248 int extent_buffer_uptodate(struct extent_io_tree *tree, 3249 struct extent_buffer *eb) 3250 { 3251 int ret = 0; 3252 unsigned long num_pages; 3253 unsigned long i; 3254 struct page *page; 3255 int pg_uptodate = 1; 3256 3257 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 3258 return 1; 3259 3260 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3261 EXTENT_UPTODATE, 1); 3262 if (ret) 3263 return ret; 3264 3265 num_pages = num_extent_pages(eb->start, eb->len); 3266 for (i = 0; i < num_pages; i++) { 3267 page = extent_buffer_page(eb, i); 3268 if (!PageUptodate(page)) { 3269 pg_uptodate = 0; 3270 break; 3271 } 3272 } 3273 return pg_uptodate; 3274 } 3275 3276 int read_extent_buffer_pages(struct extent_io_tree *tree, 3277 struct extent_buffer *eb, 3278 u64 start, int wait, 3279 get_extent_t *get_extent, int mirror_num) 3280 { 3281 unsigned long i; 3282 unsigned long start_i; 3283 struct page *page; 3284 int err; 3285 int ret = 0; 3286 int locked_pages = 0; 3287 int all_uptodate = 1; 3288 int inc_all_pages = 0; 3289 unsigned long num_pages; 3290 struct bio *bio = NULL; 3291 unsigned long bio_flags = 0; 3292 3293 if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 3294 return 0; 3295 3296 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3297 EXTENT_UPTODATE, 1)) { 3298 return 0; 3299 } 3300 3301 if (start) { 3302 WARN_ON(start < eb->start); 3303 start_i = (start >> PAGE_CACHE_SHIFT) - 3304 (eb->start >> PAGE_CACHE_SHIFT); 3305 } else { 3306 start_i = 0; 3307 } 3308 3309 num_pages = num_extent_pages(eb->start, eb->len); 3310 for (i = start_i; i < num_pages; i++) { 3311 page = extent_buffer_page(eb, i); 3312 if (!wait) { 3313 if (!trylock_page(page)) 3314 goto unlock_exit; 3315 } else { 3316 lock_page(page); 3317 } 3318 locked_pages++; 3319 if (!PageUptodate(page)) 3320 all_uptodate = 0; 3321 } 3322 if (all_uptodate) { 3323 if (start_i == 0) 3324 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3325 goto unlock_exit; 3326 } 3327 3328 for (i = start_i; i < num_pages; i++) { 3329 page = extent_buffer_page(eb, i); 3330 if (inc_all_pages) 3331 page_cache_get(page); 3332 if (!PageUptodate(page)) { 3333 if (start_i == 0) 3334 inc_all_pages = 1; 3335 ClearPageError(page); 3336 err = __extent_read_full_page(tree, page, 3337 get_extent, &bio, 3338 mirror_num, &bio_flags); 3339 if (err) 3340 ret = err; 3341 } else { 3342 unlock_page(page); 3343 } 3344 } 3345 3346 if (bio) 3347 submit_one_bio(READ, bio, mirror_num, bio_flags); 3348 3349 if (ret || !wait) 3350 return ret; 3351 3352 for (i = start_i; i < num_pages; i++) { 3353 page = extent_buffer_page(eb, i); 3354 wait_on_page_locked(page); 3355 if (!PageUptodate(page)) 3356 ret = -EIO; 3357 } 3358 3359 if (!ret) 3360 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3361 return ret; 3362 3363 unlock_exit: 3364 i = start_i; 3365 while (locked_pages > 0) { 3366 page = extent_buffer_page(eb, i); 3367 i++; 3368 unlock_page(page); 3369 locked_pages--; 3370 } 3371 return ret; 3372 } 3373 3374 void read_extent_buffer(struct extent_buffer *eb, void *dstv, 3375 unsigned long start, 3376 unsigned long len) 3377 { 3378 size_t cur; 3379 size_t offset; 3380 struct page *page; 3381 char *kaddr; 3382 char *dst = (char *)dstv; 3383 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3384 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3385 3386 WARN_ON(start > eb->len); 3387 WARN_ON(start + len > eb->start + eb->len); 3388 3389 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 3390 3391 while (len > 0) { 3392 page = extent_buffer_page(eb, i); 3393 3394 cur = min(len, (PAGE_CACHE_SIZE - offset)); 3395 kaddr = kmap_atomic(page, KM_USER1); 3396 memcpy(dst, kaddr + offset, cur); 3397 kunmap_atomic(kaddr, KM_USER1); 3398 3399 dst += cur; 3400 len -= cur; 3401 offset = 0; 3402 i++; 3403 } 3404 } 3405 3406 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, 3407 unsigned long min_len, char **token, char **map, 3408 unsigned long *map_start, 3409 unsigned long *map_len, int km) 3410 { 3411 size_t offset = start & (PAGE_CACHE_SIZE - 1); 3412 char *kaddr; 3413 struct page *p; 3414 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3415 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3416 unsigned long end_i = (start_offset + start + min_len - 1) >> 3417 PAGE_CACHE_SHIFT; 3418 3419 if (i != end_i) 3420 return -EINVAL; 3421 3422 if (i == 0) { 3423 offset = start_offset; 3424 *map_start = 0; 3425 } else { 3426 offset = 0; 3427 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; 3428 } 3429 3430 if (start + min_len > eb->len) { 3431 printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, " 3432 "wanted %lu %lu\n", (unsigned long long)eb->start, 3433 eb->len, start, min_len); 3434 WARN_ON(1); 3435 } 3436 3437 p = extent_buffer_page(eb, i); 3438 kaddr = kmap_atomic(p, km); 3439 *token = kaddr; 3440 *map = kaddr + offset; 3441 *map_len = PAGE_CACHE_SIZE - offset; 3442 return 0; 3443 } 3444 3445 int map_extent_buffer(struct extent_buffer *eb, unsigned long start, 3446 unsigned long min_len, 3447 char **token, char **map, 3448 unsigned long *map_start, 3449 unsigned long *map_len, int km) 3450 { 3451 int err; 3452 int save = 0; 3453 if (eb->map_token) { 3454 unmap_extent_buffer(eb, eb->map_token, km); 3455 eb->map_token = NULL; 3456 save = 1; 3457 } 3458 err = map_private_extent_buffer(eb, start, min_len, token, map, 3459 map_start, map_len, km); 3460 if (!err && save) { 3461 eb->map_token = *token; 3462 eb->kaddr = *map; 3463 eb->map_start = *map_start; 3464 eb->map_len = *map_len; 3465 } 3466 return err; 3467 } 3468 3469 void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) 3470 { 3471 kunmap_atomic(token, km); 3472 } 3473 3474 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, 3475 unsigned long start, 3476 unsigned long len) 3477 { 3478 size_t cur; 3479 size_t offset; 3480 struct page *page; 3481 char *kaddr; 3482 char *ptr = (char *)ptrv; 3483 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3484 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3485 int ret = 0; 3486 3487 WARN_ON(start > eb->len); 3488 WARN_ON(start + len > eb->start + eb->len); 3489 3490 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 3491 3492 while (len > 0) { 3493 page = extent_buffer_page(eb, i); 3494 3495 cur = min(len, (PAGE_CACHE_SIZE - offset)); 3496 3497 kaddr = kmap_atomic(page, KM_USER0); 3498 ret = memcmp(ptr, kaddr + offset, cur); 3499 kunmap_atomic(kaddr, KM_USER0); 3500 if (ret) 3501 break; 3502 3503 ptr += cur; 3504 len -= cur; 3505 offset = 0; 3506 i++; 3507 } 3508 return ret; 3509 } 3510 3511 void write_extent_buffer(struct extent_buffer *eb, const void *srcv, 3512 unsigned long start, unsigned long len) 3513 { 3514 size_t cur; 3515 size_t offset; 3516 struct page *page; 3517 char *kaddr; 3518 char *src = (char *)srcv; 3519 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3520 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3521 3522 WARN_ON(start > eb->len); 3523 WARN_ON(start + len > eb->start + eb->len); 3524 3525 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 3526 3527 while (len > 0) { 3528 page = extent_buffer_page(eb, i); 3529 WARN_ON(!PageUptodate(page)); 3530 3531 cur = min(len, PAGE_CACHE_SIZE - offset); 3532 kaddr = kmap_atomic(page, KM_USER1); 3533 memcpy(kaddr + offset, src, cur); 3534 kunmap_atomic(kaddr, KM_USER1); 3535 3536 src += cur; 3537 len -= cur; 3538 offset = 0; 3539 i++; 3540 } 3541 } 3542 3543 void memset_extent_buffer(struct extent_buffer *eb, char c, 3544 unsigned long start, unsigned long len) 3545 { 3546 size_t cur; 3547 size_t offset; 3548 struct page *page; 3549 char *kaddr; 3550 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 3551 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 3552 3553 WARN_ON(start > eb->len); 3554 WARN_ON(start + len > eb->start + eb->len); 3555 3556 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 3557 3558 while (len > 0) { 3559 page = extent_buffer_page(eb, i); 3560 WARN_ON(!PageUptodate(page)); 3561 3562 cur = min(len, PAGE_CACHE_SIZE - offset); 3563 kaddr = kmap_atomic(page, KM_USER0); 3564 memset(kaddr + offset, c, cur); 3565 kunmap_atomic(kaddr, KM_USER0); 3566 3567 len -= cur; 3568 offset = 0; 3569 i++; 3570 } 3571 } 3572 3573 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, 3574 unsigned long dst_offset, unsigned long src_offset, 3575 unsigned long len) 3576 { 3577 u64 dst_len = dst->len; 3578 size_t cur; 3579 size_t offset; 3580 struct page *page; 3581 char *kaddr; 3582 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 3583 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 3584 3585 WARN_ON(src->len != dst_len); 3586 3587 offset = (start_offset + dst_offset) & 3588 ((unsigned long)PAGE_CACHE_SIZE - 1); 3589 3590 while (len > 0) { 3591 page = extent_buffer_page(dst, i); 3592 WARN_ON(!PageUptodate(page)); 3593 3594 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); 3595 3596 kaddr = kmap_atomic(page, KM_USER0); 3597 read_extent_buffer(src, kaddr + offset, src_offset, cur); 3598 kunmap_atomic(kaddr, KM_USER0); 3599 3600 src_offset += cur; 3601 len -= cur; 3602 offset = 0; 3603 i++; 3604 } 3605 } 3606 3607 static void move_pages(struct page *dst_page, struct page *src_page, 3608 unsigned long dst_off, unsigned long src_off, 3609 unsigned long len) 3610 { 3611 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3612 if (dst_page == src_page) { 3613 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); 3614 } else { 3615 char *src_kaddr = kmap_atomic(src_page, KM_USER1); 3616 char *p = dst_kaddr + dst_off + len; 3617 char *s = src_kaddr + src_off + len; 3618 3619 while (len--) 3620 *--p = *--s; 3621 3622 kunmap_atomic(src_kaddr, KM_USER1); 3623 } 3624 kunmap_atomic(dst_kaddr, KM_USER0); 3625 } 3626 3627 static void copy_pages(struct page *dst_page, struct page *src_page, 3628 unsigned long dst_off, unsigned long src_off, 3629 unsigned long len) 3630 { 3631 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3632 char *src_kaddr; 3633 3634 if (dst_page != src_page) 3635 src_kaddr = kmap_atomic(src_page, KM_USER1); 3636 else 3637 src_kaddr = dst_kaddr; 3638 3639 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 3640 kunmap_atomic(dst_kaddr, KM_USER0); 3641 if (dst_page != src_page) 3642 kunmap_atomic(src_kaddr, KM_USER1); 3643 } 3644 3645 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 3646 unsigned long src_offset, unsigned long len) 3647 { 3648 size_t cur; 3649 size_t dst_off_in_page; 3650 size_t src_off_in_page; 3651 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 3652 unsigned long dst_i; 3653 unsigned long src_i; 3654 3655 if (src_offset + len > dst->len) { 3656 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " 3657 "len %lu dst len %lu\n", src_offset, len, dst->len); 3658 BUG_ON(1); 3659 } 3660 if (dst_offset + len > dst->len) { 3661 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " 3662 "len %lu dst len %lu\n", dst_offset, len, dst->len); 3663 BUG_ON(1); 3664 } 3665 3666 while (len > 0) { 3667 dst_off_in_page = (start_offset + dst_offset) & 3668 ((unsigned long)PAGE_CACHE_SIZE - 1); 3669 src_off_in_page = (start_offset + src_offset) & 3670 ((unsigned long)PAGE_CACHE_SIZE - 1); 3671 3672 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 3673 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; 3674 3675 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - 3676 src_off_in_page)); 3677 cur = min_t(unsigned long, cur, 3678 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); 3679 3680 copy_pages(extent_buffer_page(dst, dst_i), 3681 extent_buffer_page(dst, src_i), 3682 dst_off_in_page, src_off_in_page, cur); 3683 3684 src_offset += cur; 3685 dst_offset += cur; 3686 len -= cur; 3687 } 3688 } 3689 3690 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 3691 unsigned long src_offset, unsigned long len) 3692 { 3693 size_t cur; 3694 size_t dst_off_in_page; 3695 size_t src_off_in_page; 3696 unsigned long dst_end = dst_offset + len - 1; 3697 unsigned long src_end = src_offset + len - 1; 3698 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 3699 unsigned long dst_i; 3700 unsigned long src_i; 3701 3702 if (src_offset + len > dst->len) { 3703 printk(KERN_ERR "btrfs memmove bogus src_offset %lu move " 3704 "len %lu len %lu\n", src_offset, len, dst->len); 3705 BUG_ON(1); 3706 } 3707 if (dst_offset + len > dst->len) { 3708 printk(KERN_ERR "btrfs memmove bogus dst_offset %lu move " 3709 "len %lu len %lu\n", dst_offset, len, dst->len); 3710 BUG_ON(1); 3711 } 3712 if (dst_offset < src_offset) { 3713 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 3714 return; 3715 } 3716 while (len > 0) { 3717 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; 3718 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; 3719 3720 dst_off_in_page = (start_offset + dst_end) & 3721 ((unsigned long)PAGE_CACHE_SIZE - 1); 3722 src_off_in_page = (start_offset + src_end) & 3723 ((unsigned long)PAGE_CACHE_SIZE - 1); 3724 3725 cur = min_t(unsigned long, len, src_off_in_page + 1); 3726 cur = min(cur, dst_off_in_page + 1); 3727 move_pages(extent_buffer_page(dst, dst_i), 3728 extent_buffer_page(dst, src_i), 3729 dst_off_in_page - cur + 1, 3730 src_off_in_page - cur + 1, cur); 3731 3732 dst_end -= cur; 3733 src_end -= cur; 3734 len -= cur; 3735 } 3736 } 3737 3738 int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) 3739 { 3740 u64 start = page_offset(page); 3741 struct extent_buffer *eb; 3742 int ret = 1; 3743 unsigned long i; 3744 unsigned long num_pages; 3745 3746 spin_lock(&tree->buffer_lock); 3747 eb = buffer_search(tree, start); 3748 if (!eb) 3749 goto out; 3750 3751 if (atomic_read(&eb->refs) > 1) { 3752 ret = 0; 3753 goto out; 3754 } 3755 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 3756 ret = 0; 3757 goto out; 3758 } 3759 /* at this point we can safely release the extent buffer */ 3760 num_pages = num_extent_pages(eb->start, eb->len); 3761 for (i = 0; i < num_pages; i++) 3762 page_cache_release(extent_buffer_page(eb, i)); 3763 rb_erase(&eb->rb_node, &tree->buffer); 3764 __free_extent_buffer(eb); 3765 out: 3766 spin_unlock(&tree->buffer_lock); 3767 return ret; 3768 } 3769