1 #include <linux/bitops.h> 2 #include <linux/slab.h> 3 #include <linux/bio.h> 4 #include <linux/mm.h> 5 #include <linux/gfp.h> 6 #include <linux/pagemap.h> 7 #include <linux/page-flags.h> 8 #include <linux/module.h> 9 #include <linux/spinlock.h> 10 #include <linux/blkdev.h> 11 #include <linux/swap.h> 12 #include <linux/version.h> 13 #include <linux/writeback.h> 14 #include "extent_map.h" 15 16 /* temporary define until extent_map moves out of btrfs */ 17 struct kmem_cache *btrfs_cache_create(const char *name, size_t size, 18 unsigned long extra_flags, 19 void (*ctor)(void *, struct kmem_cache *, 20 unsigned long)); 21 22 static struct kmem_cache *extent_map_cache; 23 static struct kmem_cache *extent_state_cache; 24 static struct kmem_cache *extent_buffer_cache; 25 26 static LIST_HEAD(buffers); 27 static LIST_HEAD(states); 28 29 static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; 30 #define BUFFER_LRU_MAX 64 31 32 struct tree_entry { 33 u64 start; 34 u64 end; 35 int in_tree; 36 struct rb_node rb_node; 37 }; 38 39 struct extent_page_data { 40 struct bio *bio; 41 struct extent_map_tree *tree; 42 get_extent_t *get_extent; 43 }; 44 45 void __init extent_map_init(void) 46 { 47 extent_map_cache = btrfs_cache_create("extent_map", 48 sizeof(struct extent_map), 0, 49 NULL); 50 extent_state_cache = btrfs_cache_create("extent_state", 51 sizeof(struct extent_state), 0, 52 NULL); 53 extent_buffer_cache = btrfs_cache_create("extent_buffers", 54 sizeof(struct extent_buffer), 0, 55 NULL); 56 } 57 58 void __exit extent_map_exit(void) 59 { 60 struct extent_state *state; 61 62 while (!list_empty(&states)) { 63 state = list_entry(states.next, struct extent_state, list); 64 printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); 65 list_del(&state->list); 66 kmem_cache_free(extent_state_cache, state); 67 68 } 69 70 if (extent_map_cache) 71 kmem_cache_destroy(extent_map_cache); 72 if (extent_state_cache) 73 kmem_cache_destroy(extent_state_cache); 74 if (extent_buffer_cache) 75 kmem_cache_destroy(extent_buffer_cache); 76 } 77 78 void extent_map_tree_init(struct extent_map_tree *tree, 79 struct address_space *mapping, gfp_t mask) 80 { 81 tree->map.rb_node = NULL; 82 tree->state.rb_node = NULL; 83 tree->ops = NULL; 84 rwlock_init(&tree->lock); 85 spin_lock_init(&tree->lru_lock); 86 tree->mapping = mapping; 87 INIT_LIST_HEAD(&tree->buffer_lru); 88 tree->lru_size = 0; 89 } 90 EXPORT_SYMBOL(extent_map_tree_init); 91 92 void extent_map_tree_empty_lru(struct extent_map_tree *tree) 93 { 94 struct extent_buffer *eb; 95 while(!list_empty(&tree->buffer_lru)) { 96 eb = list_entry(tree->buffer_lru.next, struct extent_buffer, 97 lru); 98 list_del(&eb->lru); 99 free_extent_buffer(eb); 100 } 101 } 102 EXPORT_SYMBOL(extent_map_tree_empty_lru); 103 104 struct extent_map *alloc_extent_map(gfp_t mask) 105 { 106 struct extent_map *em; 107 em = kmem_cache_alloc(extent_map_cache, mask); 108 if (!em || IS_ERR(em)) 109 return em; 110 em->in_tree = 0; 111 atomic_set(&em->refs, 1); 112 return em; 113 } 114 EXPORT_SYMBOL(alloc_extent_map); 115 116 void free_extent_map(struct extent_map *em) 117 { 118 if (!em) 119 return; 120 if (atomic_dec_and_test(&em->refs)) { 121 WARN_ON(em->in_tree); 122 kmem_cache_free(extent_map_cache, em); 123 } 124 } 125 EXPORT_SYMBOL(free_extent_map); 126 127 128 struct extent_state *alloc_extent_state(gfp_t mask) 129 { 130 struct extent_state *state; 131 unsigned long flags; 132 133 state = kmem_cache_alloc(extent_state_cache, mask); 134 if (!state || IS_ERR(state)) 135 return state; 136 state->state = 0; 137 state->in_tree = 0; 138 state->private = 0; 139 140 spin_lock_irqsave(&state_lock, flags); 141 list_add(&state->list, &states); 142 spin_unlock_irqrestore(&state_lock, flags); 143 144 atomic_set(&state->refs, 1); 145 init_waitqueue_head(&state->wq); 146 return state; 147 } 148 EXPORT_SYMBOL(alloc_extent_state); 149 150 void free_extent_state(struct extent_state *state) 151 { 152 unsigned long flags; 153 if (!state) 154 return; 155 if (atomic_dec_and_test(&state->refs)) { 156 WARN_ON(state->in_tree); 157 spin_lock_irqsave(&state_lock, flags); 158 list_del(&state->list); 159 spin_unlock_irqrestore(&state_lock, flags); 160 kmem_cache_free(extent_state_cache, state); 161 } 162 } 163 EXPORT_SYMBOL(free_extent_state); 164 165 static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 166 struct rb_node *node) 167 { 168 struct rb_node ** p = &root->rb_node; 169 struct rb_node * parent = NULL; 170 struct tree_entry *entry; 171 172 while(*p) { 173 parent = *p; 174 entry = rb_entry(parent, struct tree_entry, rb_node); 175 176 if (offset < entry->start) 177 p = &(*p)->rb_left; 178 else if (offset > entry->end) 179 p = &(*p)->rb_right; 180 else 181 return parent; 182 } 183 184 entry = rb_entry(node, struct tree_entry, rb_node); 185 entry->in_tree = 1; 186 rb_link_node(node, parent, p); 187 rb_insert_color(node, root); 188 return NULL; 189 } 190 191 static struct rb_node *__tree_search(struct rb_root *root, u64 offset, 192 struct rb_node **prev_ret) 193 { 194 struct rb_node * n = root->rb_node; 195 struct rb_node *prev = NULL; 196 struct tree_entry *entry; 197 struct tree_entry *prev_entry = NULL; 198 199 while(n) { 200 entry = rb_entry(n, struct tree_entry, rb_node); 201 prev = n; 202 prev_entry = entry; 203 204 if (offset < entry->start) 205 n = n->rb_left; 206 else if (offset > entry->end) 207 n = n->rb_right; 208 else 209 return n; 210 } 211 if (!prev_ret) 212 return NULL; 213 while(prev && offset > prev_entry->end) { 214 prev = rb_next(prev); 215 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 216 } 217 *prev_ret = prev; 218 return NULL; 219 } 220 221 static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) 222 { 223 struct rb_node *prev; 224 struct rb_node *ret; 225 ret = __tree_search(root, offset, &prev); 226 if (!ret) 227 return prev; 228 return ret; 229 } 230 231 static int tree_delete(struct rb_root *root, u64 offset) 232 { 233 struct rb_node *node; 234 struct tree_entry *entry; 235 236 node = __tree_search(root, offset, NULL); 237 if (!node) 238 return -ENOENT; 239 entry = rb_entry(node, struct tree_entry, rb_node); 240 entry->in_tree = 0; 241 rb_erase(node, root); 242 return 0; 243 } 244 245 /* 246 * add_extent_mapping tries a simple backward merge with existing 247 * mappings. The extent_map struct passed in will be inserted into 248 * the tree directly (no copies made, just a reference taken). 249 */ 250 int add_extent_mapping(struct extent_map_tree *tree, 251 struct extent_map *em) 252 { 253 int ret = 0; 254 struct extent_map *prev = NULL; 255 struct rb_node *rb; 256 257 write_lock_irq(&tree->lock); 258 rb = tree_insert(&tree->map, em->end, &em->rb_node); 259 if (rb) { 260 prev = rb_entry(rb, struct extent_map, rb_node); 261 printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end); 262 ret = -EEXIST; 263 goto out; 264 } 265 atomic_inc(&em->refs); 266 if (em->start != 0) { 267 rb = rb_prev(&em->rb_node); 268 if (rb) 269 prev = rb_entry(rb, struct extent_map, rb_node); 270 if (prev && prev->end + 1 == em->start && 271 ((em->block_start == EXTENT_MAP_HOLE && 272 prev->block_start == EXTENT_MAP_HOLE) || 273 (em->block_start == EXTENT_MAP_INLINE && 274 prev->block_start == EXTENT_MAP_INLINE) || 275 (em->block_start == EXTENT_MAP_DELALLOC && 276 prev->block_start == EXTENT_MAP_DELALLOC) || 277 (em->block_start < EXTENT_MAP_DELALLOC - 1 && 278 em->block_start == prev->block_end + 1))) { 279 em->start = prev->start; 280 em->block_start = prev->block_start; 281 rb_erase(&prev->rb_node, &tree->map); 282 prev->in_tree = 0; 283 free_extent_map(prev); 284 } 285 } 286 out: 287 write_unlock_irq(&tree->lock); 288 return ret; 289 } 290 EXPORT_SYMBOL(add_extent_mapping); 291 292 /* 293 * lookup_extent_mapping returns the first extent_map struct in the 294 * tree that intersects the [start, end] (inclusive) range. There may 295 * be additional objects in the tree that intersect, so check the object 296 * returned carefully to make sure you don't need additional lookups. 297 */ 298 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, 299 u64 start, u64 end) 300 { 301 struct extent_map *em; 302 struct rb_node *rb_node; 303 304 read_lock_irq(&tree->lock); 305 rb_node = tree_search(&tree->map, start); 306 if (!rb_node) { 307 em = NULL; 308 goto out; 309 } 310 if (IS_ERR(rb_node)) { 311 em = ERR_PTR(PTR_ERR(rb_node)); 312 goto out; 313 } 314 em = rb_entry(rb_node, struct extent_map, rb_node); 315 if (em->end < start || em->start > end) { 316 em = NULL; 317 goto out; 318 } 319 atomic_inc(&em->refs); 320 out: 321 read_unlock_irq(&tree->lock); 322 return em; 323 } 324 EXPORT_SYMBOL(lookup_extent_mapping); 325 326 /* 327 * removes an extent_map struct from the tree. No reference counts are 328 * dropped, and no checks are done to see if the range is in use 329 */ 330 int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) 331 { 332 int ret; 333 334 write_lock_irq(&tree->lock); 335 ret = tree_delete(&tree->map, em->end); 336 write_unlock_irq(&tree->lock); 337 return ret; 338 } 339 EXPORT_SYMBOL(remove_extent_mapping); 340 341 /* 342 * utility function to look for merge candidates inside a given range. 343 * Any extents with matching state are merged together into a single 344 * extent in the tree. Extents with EXTENT_IO in their state field 345 * are not merged because the end_io handlers need to be able to do 346 * operations on them without sleeping (or doing allocations/splits). 347 * 348 * This should be called with the tree lock held. 349 */ 350 static int merge_state(struct extent_map_tree *tree, 351 struct extent_state *state) 352 { 353 struct extent_state *other; 354 struct rb_node *other_node; 355 356 if (state->state & EXTENT_IOBITS) 357 return 0; 358 359 other_node = rb_prev(&state->rb_node); 360 if (other_node) { 361 other = rb_entry(other_node, struct extent_state, rb_node); 362 if (other->end == state->start - 1 && 363 other->state == state->state) { 364 state->start = other->start; 365 other->in_tree = 0; 366 rb_erase(&other->rb_node, &tree->state); 367 free_extent_state(other); 368 } 369 } 370 other_node = rb_next(&state->rb_node); 371 if (other_node) { 372 other = rb_entry(other_node, struct extent_state, rb_node); 373 if (other->start == state->end + 1 && 374 other->state == state->state) { 375 other->start = state->start; 376 state->in_tree = 0; 377 rb_erase(&state->rb_node, &tree->state); 378 free_extent_state(state); 379 } 380 } 381 return 0; 382 } 383 384 /* 385 * insert an extent_state struct into the tree. 'bits' are set on the 386 * struct before it is inserted. 387 * 388 * This may return -EEXIST if the extent is already there, in which case the 389 * state struct is freed. 390 * 391 * The tree lock is not taken internally. This is a utility function and 392 * probably isn't what you want to call (see set/clear_extent_bit). 393 */ 394 static int insert_state(struct extent_map_tree *tree, 395 struct extent_state *state, u64 start, u64 end, 396 int bits) 397 { 398 struct rb_node *node; 399 400 if (end < start) { 401 printk("end < start %Lu %Lu\n", end, start); 402 WARN_ON(1); 403 } 404 state->state |= bits; 405 state->start = start; 406 state->end = end; 407 node = tree_insert(&tree->state, end, &state->rb_node); 408 if (node) { 409 struct extent_state *found; 410 found = rb_entry(node, struct extent_state, rb_node); 411 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); 412 free_extent_state(state); 413 return -EEXIST; 414 } 415 merge_state(tree, state); 416 return 0; 417 } 418 419 /* 420 * split a given extent state struct in two, inserting the preallocated 421 * struct 'prealloc' as the newly created second half. 'split' indicates an 422 * offset inside 'orig' where it should be split. 423 * 424 * Before calling, 425 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 426 * are two extent state structs in the tree: 427 * prealloc: [orig->start, split - 1] 428 * orig: [ split, orig->end ] 429 * 430 * The tree locks are not taken by this function. They need to be held 431 * by the caller. 432 */ 433 static int split_state(struct extent_map_tree *tree, struct extent_state *orig, 434 struct extent_state *prealloc, u64 split) 435 { 436 struct rb_node *node; 437 prealloc->start = orig->start; 438 prealloc->end = split - 1; 439 prealloc->state = orig->state; 440 orig->start = split; 441 442 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); 443 if (node) { 444 struct extent_state *found; 445 found = rb_entry(node, struct extent_state, rb_node); 446 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); 447 free_extent_state(prealloc); 448 return -EEXIST; 449 } 450 return 0; 451 } 452 453 /* 454 * utility function to clear some bits in an extent state struct. 455 * it will optionally wake up any one waiting on this state (wake == 1), or 456 * forcibly remove the state from the tree (delete == 1). 457 * 458 * If no bits are set on the state struct after clearing things, the 459 * struct is freed and removed from the tree 460 */ 461 static int clear_state_bit(struct extent_map_tree *tree, 462 struct extent_state *state, int bits, int wake, 463 int delete) 464 { 465 int ret = state->state & bits; 466 state->state &= ~bits; 467 if (wake) 468 wake_up(&state->wq); 469 if (delete || state->state == 0) { 470 if (state->in_tree) { 471 rb_erase(&state->rb_node, &tree->state); 472 state->in_tree = 0; 473 free_extent_state(state); 474 } else { 475 WARN_ON(1); 476 } 477 } else { 478 merge_state(tree, state); 479 } 480 return ret; 481 } 482 483 /* 484 * clear some bits on a range in the tree. This may require splitting 485 * or inserting elements in the tree, so the gfp mask is used to 486 * indicate which allocations or sleeping are allowed. 487 * 488 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove 489 * the given range from the tree regardless of state (ie for truncate). 490 * 491 * the range [start, end] is inclusive. 492 * 493 * This takes the tree lock, and returns < 0 on error, > 0 if any of the 494 * bits were already set, or zero if none of the bits were already set. 495 */ 496 int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, 497 int bits, int wake, int delete, gfp_t mask) 498 { 499 struct extent_state *state; 500 struct extent_state *prealloc = NULL; 501 struct rb_node *node; 502 unsigned long flags; 503 int err; 504 int set = 0; 505 506 again: 507 if (!prealloc && (mask & __GFP_WAIT)) { 508 prealloc = alloc_extent_state(mask); 509 if (!prealloc) 510 return -ENOMEM; 511 } 512 513 write_lock_irqsave(&tree->lock, flags); 514 /* 515 * this search will find the extents that end after 516 * our range starts 517 */ 518 node = tree_search(&tree->state, start); 519 if (!node) 520 goto out; 521 state = rb_entry(node, struct extent_state, rb_node); 522 if (state->start > end) 523 goto out; 524 WARN_ON(state->end < start); 525 526 /* 527 * | ---- desired range ---- | 528 * | state | or 529 * | ------------- state -------------- | 530 * 531 * We need to split the extent we found, and may flip 532 * bits on second half. 533 * 534 * If the extent we found extends past our range, we 535 * just split and search again. It'll get split again 536 * the next time though. 537 * 538 * If the extent we found is inside our range, we clear 539 * the desired bit on it. 540 */ 541 542 if (state->start < start) { 543 err = split_state(tree, state, prealloc, start); 544 BUG_ON(err == -EEXIST); 545 prealloc = NULL; 546 if (err) 547 goto out; 548 if (state->end <= end) { 549 start = state->end + 1; 550 set |= clear_state_bit(tree, state, bits, 551 wake, delete); 552 } else { 553 start = state->start; 554 } 555 goto search_again; 556 } 557 /* 558 * | ---- desired range ---- | 559 * | state | 560 * We need to split the extent, and clear the bit 561 * on the first half 562 */ 563 if (state->start <= end && state->end > end) { 564 err = split_state(tree, state, prealloc, end + 1); 565 BUG_ON(err == -EEXIST); 566 567 if (wake) 568 wake_up(&state->wq); 569 set |= clear_state_bit(tree, prealloc, bits, 570 wake, delete); 571 prealloc = NULL; 572 goto out; 573 } 574 575 start = state->end + 1; 576 set |= clear_state_bit(tree, state, bits, wake, delete); 577 goto search_again; 578 579 out: 580 write_unlock_irqrestore(&tree->lock, flags); 581 if (prealloc) 582 free_extent_state(prealloc); 583 584 return set; 585 586 search_again: 587 if (start > end) 588 goto out; 589 write_unlock_irqrestore(&tree->lock, flags); 590 if (mask & __GFP_WAIT) 591 cond_resched(); 592 goto again; 593 } 594 EXPORT_SYMBOL(clear_extent_bit); 595 596 static int wait_on_state(struct extent_map_tree *tree, 597 struct extent_state *state) 598 { 599 DEFINE_WAIT(wait); 600 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 601 read_unlock_irq(&tree->lock); 602 schedule(); 603 read_lock_irq(&tree->lock); 604 finish_wait(&state->wq, &wait); 605 return 0; 606 } 607 608 /* 609 * waits for one or more bits to clear on a range in the state tree. 610 * The range [start, end] is inclusive. 611 * The tree lock is taken by this function 612 */ 613 int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) 614 { 615 struct extent_state *state; 616 struct rb_node *node; 617 618 read_lock_irq(&tree->lock); 619 again: 620 while (1) { 621 /* 622 * this search will find all the extents that end after 623 * our range starts 624 */ 625 node = tree_search(&tree->state, start); 626 if (!node) 627 break; 628 629 state = rb_entry(node, struct extent_state, rb_node); 630 631 if (state->start > end) 632 goto out; 633 634 if (state->state & bits) { 635 start = state->start; 636 atomic_inc(&state->refs); 637 wait_on_state(tree, state); 638 free_extent_state(state); 639 goto again; 640 } 641 start = state->end + 1; 642 643 if (start > end) 644 break; 645 646 if (need_resched()) { 647 read_unlock_irq(&tree->lock); 648 cond_resched(); 649 read_lock_irq(&tree->lock); 650 } 651 } 652 out: 653 read_unlock_irq(&tree->lock); 654 return 0; 655 } 656 EXPORT_SYMBOL(wait_extent_bit); 657 658 /* 659 * set some bits on a range in the tree. This may require allocations 660 * or sleeping, so the gfp mask is used to indicate what is allowed. 661 * 662 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 663 * range already has the desired bits set. The start of the existing 664 * range is returned in failed_start in this case. 665 * 666 * [start, end] is inclusive 667 * This takes the tree lock. 668 */ 669 int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, 670 int exclusive, u64 *failed_start, gfp_t mask) 671 { 672 struct extent_state *state; 673 struct extent_state *prealloc = NULL; 674 struct rb_node *node; 675 unsigned long flags; 676 int err = 0; 677 int set; 678 u64 last_start; 679 u64 last_end; 680 again: 681 if (!prealloc && (mask & __GFP_WAIT)) { 682 prealloc = alloc_extent_state(mask); 683 if (!prealloc) 684 return -ENOMEM; 685 } 686 687 write_lock_irqsave(&tree->lock, flags); 688 /* 689 * this search will find all the extents that end after 690 * our range starts. 691 */ 692 node = tree_search(&tree->state, start); 693 if (!node) { 694 err = insert_state(tree, prealloc, start, end, bits); 695 prealloc = NULL; 696 BUG_ON(err == -EEXIST); 697 goto out; 698 } 699 700 state = rb_entry(node, struct extent_state, rb_node); 701 last_start = state->start; 702 last_end = state->end; 703 704 /* 705 * | ---- desired range ---- | 706 * | state | 707 * 708 * Just lock what we found and keep going 709 */ 710 if (state->start == start && state->end <= end) { 711 set = state->state & bits; 712 if (set && exclusive) { 713 *failed_start = state->start; 714 err = -EEXIST; 715 goto out; 716 } 717 state->state |= bits; 718 start = state->end + 1; 719 merge_state(tree, state); 720 goto search_again; 721 } 722 723 /* 724 * | ---- desired range ---- | 725 * | state | 726 * or 727 * | ------------- state -------------- | 728 * 729 * We need to split the extent we found, and may flip bits on 730 * second half. 731 * 732 * If the extent we found extends past our 733 * range, we just split and search again. It'll get split 734 * again the next time though. 735 * 736 * If the extent we found is inside our range, we set the 737 * desired bit on it. 738 */ 739 if (state->start < start) { 740 set = state->state & bits; 741 if (exclusive && set) { 742 *failed_start = start; 743 err = -EEXIST; 744 goto out; 745 } 746 err = split_state(tree, state, prealloc, start); 747 BUG_ON(err == -EEXIST); 748 prealloc = NULL; 749 if (err) 750 goto out; 751 if (state->end <= end) { 752 state->state |= bits; 753 start = state->end + 1; 754 merge_state(tree, state); 755 } else { 756 start = state->start; 757 } 758 goto search_again; 759 } 760 /* 761 * | ---- desired range ---- | 762 * | state | or | state | 763 * 764 * There's a hole, we need to insert something in it and 765 * ignore the extent we found. 766 */ 767 if (state->start > start) { 768 u64 this_end; 769 if (end < last_start) 770 this_end = end; 771 else 772 this_end = last_start -1; 773 err = insert_state(tree, prealloc, start, this_end, 774 bits); 775 prealloc = NULL; 776 BUG_ON(err == -EEXIST); 777 if (err) 778 goto out; 779 start = this_end + 1; 780 goto search_again; 781 } 782 /* 783 * | ---- desired range ---- | 784 * | state | 785 * We need to split the extent, and set the bit 786 * on the first half 787 */ 788 if (state->start <= end && state->end > end) { 789 set = state->state & bits; 790 if (exclusive && set) { 791 *failed_start = start; 792 err = -EEXIST; 793 goto out; 794 } 795 err = split_state(tree, state, prealloc, end + 1); 796 BUG_ON(err == -EEXIST); 797 798 prealloc->state |= bits; 799 merge_state(tree, prealloc); 800 prealloc = NULL; 801 goto out; 802 } 803 804 goto search_again; 805 806 out: 807 write_unlock_irqrestore(&tree->lock, flags); 808 if (prealloc) 809 free_extent_state(prealloc); 810 811 return err; 812 813 search_again: 814 if (start > end) 815 goto out; 816 write_unlock_irqrestore(&tree->lock, flags); 817 if (mask & __GFP_WAIT) 818 cond_resched(); 819 goto again; 820 } 821 EXPORT_SYMBOL(set_extent_bit); 822 823 /* wrappers around set/clear extent bit */ 824 int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, 825 gfp_t mask) 826 { 827 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 828 mask); 829 } 830 EXPORT_SYMBOL(set_extent_dirty); 831 832 int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, 833 int bits, gfp_t mask) 834 { 835 return set_extent_bit(tree, start, end, bits, 0, NULL, 836 mask); 837 } 838 EXPORT_SYMBOL(set_extent_bits); 839 840 int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, 841 int bits, gfp_t mask) 842 { 843 return clear_extent_bit(tree, start, end, bits, 0, 0, mask); 844 } 845 EXPORT_SYMBOL(clear_extent_bits); 846 847 int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, 848 gfp_t mask) 849 { 850 return set_extent_bit(tree, start, end, 851 EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, 852 mask); 853 } 854 EXPORT_SYMBOL(set_extent_delalloc); 855 856 int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, 857 gfp_t mask) 858 { 859 return clear_extent_bit(tree, start, end, 860 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); 861 } 862 EXPORT_SYMBOL(clear_extent_dirty); 863 864 int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, 865 gfp_t mask) 866 { 867 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 868 mask); 869 } 870 EXPORT_SYMBOL(set_extent_new); 871 872 int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, 873 gfp_t mask) 874 { 875 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); 876 } 877 EXPORT_SYMBOL(clear_extent_new); 878 879 int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, 880 gfp_t mask) 881 { 882 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 883 mask); 884 } 885 EXPORT_SYMBOL(set_extent_uptodate); 886 887 int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, 888 gfp_t mask) 889 { 890 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 891 } 892 EXPORT_SYMBOL(clear_extent_uptodate); 893 894 int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, 895 gfp_t mask) 896 { 897 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, 898 0, NULL, mask); 899 } 900 EXPORT_SYMBOL(set_extent_writeback); 901 902 int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, 903 gfp_t mask) 904 { 905 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); 906 } 907 EXPORT_SYMBOL(clear_extent_writeback); 908 909 int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) 910 { 911 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); 912 } 913 EXPORT_SYMBOL(wait_on_extent_writeback); 914 915 /* 916 * locks a range in ascending order, waiting for any locked regions 917 * it hits on the way. [start,end] are inclusive, and this will sleep. 918 */ 919 int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) 920 { 921 int err; 922 u64 failed_start; 923 while (1) { 924 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 925 &failed_start, mask); 926 if (err == -EEXIST && (mask & __GFP_WAIT)) { 927 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 928 start = failed_start; 929 } else { 930 break; 931 } 932 WARN_ON(start > end); 933 } 934 return err; 935 } 936 EXPORT_SYMBOL(lock_extent); 937 938 int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, 939 gfp_t mask) 940 { 941 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); 942 } 943 EXPORT_SYMBOL(unlock_extent); 944 945 /* 946 * helper function to set pages and extents in the tree dirty 947 */ 948 int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) 949 { 950 unsigned long index = start >> PAGE_CACHE_SHIFT; 951 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 952 struct page *page; 953 954 while (index <= end_index) { 955 page = find_get_page(tree->mapping, index); 956 BUG_ON(!page); 957 __set_page_dirty_nobuffers(page); 958 page_cache_release(page); 959 index++; 960 } 961 set_extent_dirty(tree, start, end, GFP_NOFS); 962 return 0; 963 } 964 EXPORT_SYMBOL(set_range_dirty); 965 966 /* 967 * helper function to set both pages and extents in the tree writeback 968 */ 969 int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) 970 { 971 unsigned long index = start >> PAGE_CACHE_SHIFT; 972 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 973 struct page *page; 974 975 while (index <= end_index) { 976 page = find_get_page(tree->mapping, index); 977 BUG_ON(!page); 978 set_page_writeback(page); 979 page_cache_release(page); 980 index++; 981 } 982 set_extent_writeback(tree, start, end, GFP_NOFS); 983 return 0; 984 } 985 EXPORT_SYMBOL(set_range_writeback); 986 987 int find_first_extent_bit(struct extent_map_tree *tree, u64 start, 988 u64 *start_ret, u64 *end_ret, int bits) 989 { 990 struct rb_node *node; 991 struct extent_state *state; 992 int ret = 1; 993 994 read_lock_irq(&tree->lock); 995 /* 996 * this search will find all the extents that end after 997 * our range starts. 998 */ 999 node = tree_search(&tree->state, start); 1000 if (!node || IS_ERR(node)) { 1001 goto out; 1002 } 1003 1004 while(1) { 1005 state = rb_entry(node, struct extent_state, rb_node); 1006 if (state->end >= start && (state->state & bits)) { 1007 *start_ret = state->start; 1008 *end_ret = state->end; 1009 ret = 0; 1010 break; 1011 } 1012 node = rb_next(node); 1013 if (!node) 1014 break; 1015 } 1016 out: 1017 read_unlock_irq(&tree->lock); 1018 return ret; 1019 } 1020 EXPORT_SYMBOL(find_first_extent_bit); 1021 1022 u64 find_lock_delalloc_range(struct extent_map_tree *tree, 1023 u64 start, u64 lock_start, u64 *end, u64 max_bytes) 1024 { 1025 struct rb_node *node; 1026 struct extent_state *state; 1027 u64 cur_start = start; 1028 u64 found = 0; 1029 u64 total_bytes = 0; 1030 1031 write_lock_irq(&tree->lock); 1032 /* 1033 * this search will find all the extents that end after 1034 * our range starts. 1035 */ 1036 search_again: 1037 node = tree_search(&tree->state, cur_start); 1038 if (!node || IS_ERR(node)) { 1039 goto out; 1040 } 1041 1042 while(1) { 1043 state = rb_entry(node, struct extent_state, rb_node); 1044 if (state->start != cur_start) { 1045 goto out; 1046 } 1047 if (!(state->state & EXTENT_DELALLOC)) { 1048 goto out; 1049 } 1050 if (state->start >= lock_start) { 1051 if (state->state & EXTENT_LOCKED) { 1052 DEFINE_WAIT(wait); 1053 atomic_inc(&state->refs); 1054 prepare_to_wait(&state->wq, &wait, 1055 TASK_UNINTERRUPTIBLE); 1056 write_unlock_irq(&tree->lock); 1057 schedule(); 1058 write_lock_irq(&tree->lock); 1059 finish_wait(&state->wq, &wait); 1060 free_extent_state(state); 1061 goto search_again; 1062 } 1063 state->state |= EXTENT_LOCKED; 1064 } 1065 found++; 1066 *end = state->end; 1067 cur_start = state->end + 1; 1068 node = rb_next(node); 1069 if (!node) 1070 break; 1071 total_bytes += state->end - state->start + 1; 1072 if (total_bytes >= max_bytes) 1073 break; 1074 } 1075 out: 1076 write_unlock_irq(&tree->lock); 1077 return found; 1078 } 1079 1080 /* 1081 * helper function to lock both pages and extents in the tree. 1082 * pages must be locked first. 1083 */ 1084 int lock_range(struct extent_map_tree *tree, u64 start, u64 end) 1085 { 1086 unsigned long index = start >> PAGE_CACHE_SHIFT; 1087 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1088 struct page *page; 1089 int err; 1090 1091 while (index <= end_index) { 1092 page = grab_cache_page(tree->mapping, index); 1093 if (!page) { 1094 err = -ENOMEM; 1095 goto failed; 1096 } 1097 if (IS_ERR(page)) { 1098 err = PTR_ERR(page); 1099 goto failed; 1100 } 1101 index++; 1102 } 1103 lock_extent(tree, start, end, GFP_NOFS); 1104 return 0; 1105 1106 failed: 1107 /* 1108 * we failed above in getting the page at 'index', so we undo here 1109 * up to but not including the page at 'index' 1110 */ 1111 end_index = index; 1112 index = start >> PAGE_CACHE_SHIFT; 1113 while (index < end_index) { 1114 page = find_get_page(tree->mapping, index); 1115 unlock_page(page); 1116 page_cache_release(page); 1117 index++; 1118 } 1119 return err; 1120 } 1121 EXPORT_SYMBOL(lock_range); 1122 1123 /* 1124 * helper function to unlock both pages and extents in the tree. 1125 */ 1126 int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) 1127 { 1128 unsigned long index = start >> PAGE_CACHE_SHIFT; 1129 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1130 struct page *page; 1131 1132 while (index <= end_index) { 1133 page = find_get_page(tree->mapping, index); 1134 unlock_page(page); 1135 page_cache_release(page); 1136 index++; 1137 } 1138 unlock_extent(tree, start, end, GFP_NOFS); 1139 return 0; 1140 } 1141 EXPORT_SYMBOL(unlock_range); 1142 1143 int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) 1144 { 1145 struct rb_node *node; 1146 struct extent_state *state; 1147 int ret = 0; 1148 1149 write_lock_irq(&tree->lock); 1150 /* 1151 * this search will find all the extents that end after 1152 * our range starts. 1153 */ 1154 node = tree_search(&tree->state, start); 1155 if (!node || IS_ERR(node)) { 1156 ret = -ENOENT; 1157 goto out; 1158 } 1159 state = rb_entry(node, struct extent_state, rb_node); 1160 if (state->start != start) { 1161 ret = -ENOENT; 1162 goto out; 1163 } 1164 state->private = private; 1165 out: 1166 write_unlock_irq(&tree->lock); 1167 return ret; 1168 } 1169 1170 int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) 1171 { 1172 struct rb_node *node; 1173 struct extent_state *state; 1174 int ret = 0; 1175 1176 read_lock_irq(&tree->lock); 1177 /* 1178 * this search will find all the extents that end after 1179 * our range starts. 1180 */ 1181 node = tree_search(&tree->state, start); 1182 if (!node || IS_ERR(node)) { 1183 ret = -ENOENT; 1184 goto out; 1185 } 1186 state = rb_entry(node, struct extent_state, rb_node); 1187 if (state->start != start) { 1188 ret = -ENOENT; 1189 goto out; 1190 } 1191 *private = state->private; 1192 out: 1193 read_unlock_irq(&tree->lock); 1194 return ret; 1195 } 1196 1197 /* 1198 * searches a range in the state tree for a given mask. 1199 * If 'filled' == 1, this returns 1 only if ever extent in the tree 1200 * has the bits set. Otherwise, 1 is returned if any bit in the 1201 * range is found set. 1202 */ 1203 int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, 1204 int bits, int filled) 1205 { 1206 struct extent_state *state = NULL; 1207 struct rb_node *node; 1208 int bitset = 0; 1209 1210 read_lock_irq(&tree->lock); 1211 node = tree_search(&tree->state, start); 1212 while (node && start <= end) { 1213 state = rb_entry(node, struct extent_state, rb_node); 1214 if (state->start > end) 1215 break; 1216 1217 if (filled && state->start > start) { 1218 bitset = 0; 1219 break; 1220 } 1221 if (state->state & bits) { 1222 bitset = 1; 1223 if (!filled) 1224 break; 1225 } else if (filled) { 1226 bitset = 0; 1227 break; 1228 } 1229 start = state->end + 1; 1230 if (start > end) 1231 break; 1232 node = rb_next(node); 1233 } 1234 read_unlock_irq(&tree->lock); 1235 return bitset; 1236 } 1237 EXPORT_SYMBOL(test_range_bit); 1238 1239 /* 1240 * helper function to set a given page up to date if all the 1241 * extents in the tree for that page are up to date 1242 */ 1243 static int check_page_uptodate(struct extent_map_tree *tree, 1244 struct page *page) 1245 { 1246 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1247 u64 end = start + PAGE_CACHE_SIZE - 1; 1248 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) 1249 SetPageUptodate(page); 1250 return 0; 1251 } 1252 1253 /* 1254 * helper function to unlock a page if all the extents in the tree 1255 * for that page are unlocked 1256 */ 1257 static int check_page_locked(struct extent_map_tree *tree, 1258 struct page *page) 1259 { 1260 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1261 u64 end = start + PAGE_CACHE_SIZE - 1; 1262 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) 1263 unlock_page(page); 1264 return 0; 1265 } 1266 1267 /* 1268 * helper function to end page writeback if all the extents 1269 * in the tree for that page are done with writeback 1270 */ 1271 static int check_page_writeback(struct extent_map_tree *tree, 1272 struct page *page) 1273 { 1274 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1275 u64 end = start + PAGE_CACHE_SIZE - 1; 1276 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) 1277 end_page_writeback(page); 1278 return 0; 1279 } 1280 1281 /* lots and lots of room for performance fixes in the end_bio funcs */ 1282 1283 /* 1284 * after a writepage IO is done, we need to: 1285 * clear the uptodate bits on error 1286 * clear the writeback bits in the extent tree for this IO 1287 * end_page_writeback if the page has no more pending IO 1288 * 1289 * Scheduling is not allowed, so the extent state tree is expected 1290 * to have one and only one object corresponding to this IO. 1291 */ 1292 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) 1293 static void end_bio_extent_writepage(struct bio *bio, int err) 1294 #else 1295 static int end_bio_extent_writepage(struct bio *bio, 1296 unsigned int bytes_done, int err) 1297 #endif 1298 { 1299 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1300 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1301 struct extent_map_tree *tree = bio->bi_private; 1302 u64 start; 1303 u64 end; 1304 int whole_page; 1305 1306 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) 1307 if (bio->bi_size) 1308 return 1; 1309 #endif 1310 1311 do { 1312 struct page *page = bvec->bv_page; 1313 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1314 bvec->bv_offset; 1315 end = start + bvec->bv_len - 1; 1316 1317 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1318 whole_page = 1; 1319 else 1320 whole_page = 0; 1321 1322 if (--bvec >= bio->bi_io_vec) 1323 prefetchw(&bvec->bv_page->flags); 1324 1325 if (!uptodate) { 1326 clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1327 ClearPageUptodate(page); 1328 SetPageError(page); 1329 } 1330 clear_extent_writeback(tree, start, end, GFP_ATOMIC); 1331 1332 if (whole_page) 1333 end_page_writeback(page); 1334 else 1335 check_page_writeback(tree, page); 1336 if (tree->ops && tree->ops->writepage_end_io_hook) 1337 tree->ops->writepage_end_io_hook(page, start, end); 1338 } while (bvec >= bio->bi_io_vec); 1339 1340 bio_put(bio); 1341 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) 1342 return 0; 1343 #endif 1344 } 1345 1346 /* 1347 * after a readpage IO is done, we need to: 1348 * clear the uptodate bits on error 1349 * set the uptodate bits if things worked 1350 * set the page up to date if all extents in the tree are uptodate 1351 * clear the lock bit in the extent tree 1352 * unlock the page if there are no other extents locked for it 1353 * 1354 * Scheduling is not allowed, so the extent state tree is expected 1355 * to have one and only one object corresponding to this IO. 1356 */ 1357 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) 1358 static void end_bio_extent_readpage(struct bio *bio, int err) 1359 #else 1360 static int end_bio_extent_readpage(struct bio *bio, 1361 unsigned int bytes_done, int err) 1362 #endif 1363 { 1364 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1365 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1366 struct extent_map_tree *tree = bio->bi_private; 1367 u64 start; 1368 u64 end; 1369 int whole_page; 1370 int ret; 1371 1372 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) 1373 if (bio->bi_size) 1374 return 1; 1375 #endif 1376 1377 do { 1378 struct page *page = bvec->bv_page; 1379 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1380 bvec->bv_offset; 1381 end = start + bvec->bv_len - 1; 1382 1383 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1384 whole_page = 1; 1385 else 1386 whole_page = 0; 1387 1388 if (--bvec >= bio->bi_io_vec) 1389 prefetchw(&bvec->bv_page->flags); 1390 1391 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1392 ret = tree->ops->readpage_end_io_hook(page, start, end); 1393 if (ret) 1394 uptodate = 0; 1395 } 1396 if (uptodate) { 1397 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1398 if (whole_page) 1399 SetPageUptodate(page); 1400 else 1401 check_page_uptodate(tree, page); 1402 } else { 1403 ClearPageUptodate(page); 1404 SetPageError(page); 1405 } 1406 1407 unlock_extent(tree, start, end, GFP_ATOMIC); 1408 1409 if (whole_page) 1410 unlock_page(page); 1411 else 1412 check_page_locked(tree, page); 1413 } while (bvec >= bio->bi_io_vec); 1414 1415 bio_put(bio); 1416 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) 1417 return 0; 1418 #endif 1419 } 1420 1421 /* 1422 * IO done from prepare_write is pretty simple, we just unlock 1423 * the structs in the extent tree when done, and set the uptodate bits 1424 * as appropriate. 1425 */ 1426 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) 1427 static void end_bio_extent_preparewrite(struct bio *bio, int err) 1428 #else 1429 static int end_bio_extent_preparewrite(struct bio *bio, 1430 unsigned int bytes_done, int err) 1431 #endif 1432 { 1433 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1434 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1435 struct extent_map_tree *tree = bio->bi_private; 1436 u64 start; 1437 u64 end; 1438 1439 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) 1440 if (bio->bi_size) 1441 return 1; 1442 #endif 1443 1444 do { 1445 struct page *page = bvec->bv_page; 1446 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1447 bvec->bv_offset; 1448 end = start + bvec->bv_len - 1; 1449 1450 if (--bvec >= bio->bi_io_vec) 1451 prefetchw(&bvec->bv_page->flags); 1452 1453 if (uptodate) { 1454 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1455 } else { 1456 ClearPageUptodate(page); 1457 SetPageError(page); 1458 } 1459 1460 unlock_extent(tree, start, end, GFP_ATOMIC); 1461 1462 } while (bvec >= bio->bi_io_vec); 1463 1464 bio_put(bio); 1465 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) 1466 return 0; 1467 #endif 1468 } 1469 1470 static struct bio * 1471 extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 1472 gfp_t gfp_flags) 1473 { 1474 struct bio *bio; 1475 1476 bio = bio_alloc(gfp_flags, nr_vecs); 1477 1478 if (bio == NULL && (current->flags & PF_MEMALLOC)) { 1479 while (!bio && (nr_vecs /= 2)) 1480 bio = bio_alloc(gfp_flags, nr_vecs); 1481 } 1482 1483 if (bio) { 1484 bio->bi_bdev = bdev; 1485 bio->bi_sector = first_sector; 1486 } 1487 return bio; 1488 } 1489 1490 static int submit_one_bio(int rw, struct bio *bio) 1491 { 1492 int ret = 0; 1493 bio_get(bio); 1494 submit_bio(rw, bio); 1495 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 1496 ret = -EOPNOTSUPP; 1497 bio_put(bio); 1498 return ret; 1499 } 1500 1501 static int submit_extent_page(int rw, struct extent_map_tree *tree, 1502 struct page *page, sector_t sector, 1503 size_t size, unsigned long offset, 1504 struct block_device *bdev, 1505 struct bio **bio_ret, 1506 int max_pages, 1507 bio_end_io_t end_io_func) 1508 { 1509 int ret = 0; 1510 struct bio *bio; 1511 int nr; 1512 1513 if (bio_ret && *bio_ret) { 1514 bio = *bio_ret; 1515 if (bio->bi_sector + (bio->bi_size >> 9) != sector || 1516 bio_add_page(bio, page, size, offset) < size) { 1517 ret = submit_one_bio(rw, bio); 1518 bio = NULL; 1519 } else { 1520 return 0; 1521 } 1522 } 1523 nr = min(max_pages, bio_get_nr_vecs(bdev)); 1524 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); 1525 if (!bio) { 1526 printk("failed to allocate bio nr %d\n", nr); 1527 } 1528 bio_add_page(bio, page, size, offset); 1529 bio->bi_end_io = end_io_func; 1530 bio->bi_private = tree; 1531 if (bio_ret) { 1532 *bio_ret = bio; 1533 } else { 1534 ret = submit_one_bio(rw, bio); 1535 } 1536 1537 return ret; 1538 } 1539 1540 void set_page_extent_mapped(struct page *page) 1541 { 1542 if (!PagePrivate(page)) { 1543 SetPagePrivate(page); 1544 WARN_ON(!page->mapping->a_ops->invalidatepage); 1545 set_page_private(page, EXTENT_PAGE_PRIVATE); 1546 page_cache_get(page); 1547 } 1548 } 1549 1550 /* 1551 * basic readpage implementation. Locked extent state structs are inserted 1552 * into the tree that are removed when the IO is done (by the end_io 1553 * handlers) 1554 */ 1555 int extent_read_full_page(struct extent_map_tree *tree, struct page *page, 1556 get_extent_t *get_extent) 1557 { 1558 struct inode *inode = page->mapping->host; 1559 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1560 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1561 u64 end; 1562 u64 cur = start; 1563 u64 extent_offset; 1564 u64 last_byte = i_size_read(inode); 1565 u64 block_start; 1566 u64 cur_end; 1567 sector_t sector; 1568 struct extent_map *em; 1569 struct block_device *bdev; 1570 int ret; 1571 int nr = 0; 1572 size_t page_offset = 0; 1573 size_t iosize; 1574 size_t blocksize = inode->i_sb->s_blocksize; 1575 1576 set_page_extent_mapped(page); 1577 1578 end = page_end; 1579 lock_extent(tree, start, end, GFP_NOFS); 1580 1581 while (cur <= end) { 1582 if (cur >= last_byte) { 1583 iosize = PAGE_CACHE_SIZE - page_offset; 1584 zero_user_page(page, page_offset, iosize, KM_USER0); 1585 set_extent_uptodate(tree, cur, cur + iosize - 1, 1586 GFP_NOFS); 1587 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1588 break; 1589 } 1590 em = get_extent(inode, page, page_offset, cur, end, 0); 1591 if (IS_ERR(em) || !em) { 1592 SetPageError(page); 1593 unlock_extent(tree, cur, end, GFP_NOFS); 1594 break; 1595 } 1596 1597 extent_offset = cur - em->start; 1598 BUG_ON(em->end < cur); 1599 BUG_ON(end < cur); 1600 1601 iosize = min(em->end - cur, end - cur) + 1; 1602 cur_end = min(em->end, end); 1603 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1604 sector = (em->block_start + extent_offset) >> 9; 1605 bdev = em->bdev; 1606 block_start = em->block_start; 1607 free_extent_map(em); 1608 em = NULL; 1609 1610 /* we've found a hole, just zero and go on */ 1611 if (block_start == EXTENT_MAP_HOLE) { 1612 zero_user_page(page, page_offset, iosize, KM_USER0); 1613 set_extent_uptodate(tree, cur, cur + iosize - 1, 1614 GFP_NOFS); 1615 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1616 cur = cur + iosize; 1617 page_offset += iosize; 1618 continue; 1619 } 1620 /* the get_extent function already copied into the page */ 1621 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { 1622 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1623 cur = cur + iosize; 1624 page_offset += iosize; 1625 continue; 1626 } 1627 1628 ret = 0; 1629 if (tree->ops && tree->ops->readpage_io_hook) { 1630 ret = tree->ops->readpage_io_hook(page, cur, 1631 cur + iosize - 1); 1632 } 1633 if (!ret) { 1634 ret = submit_extent_page(READ, tree, page, 1635 sector, iosize, page_offset, 1636 bdev, NULL, 1, 1637 end_bio_extent_readpage); 1638 } 1639 if (ret) 1640 SetPageError(page); 1641 cur = cur + iosize; 1642 page_offset += iosize; 1643 nr++; 1644 } 1645 if (!nr) { 1646 if (!PageError(page)) 1647 SetPageUptodate(page); 1648 unlock_page(page); 1649 } 1650 return 0; 1651 } 1652 EXPORT_SYMBOL(extent_read_full_page); 1653 1654 /* 1655 * the writepage semantics are similar to regular writepage. extent 1656 * records are inserted to lock ranges in the tree, and as dirty areas 1657 * are found, they are marked writeback. Then the lock bits are removed 1658 * and the end_io handler clears the writeback ranges 1659 */ 1660 static int __extent_writepage(struct page *page, struct writeback_control *wbc, 1661 void *data) 1662 { 1663 struct inode *inode = page->mapping->host; 1664 struct extent_page_data *epd = data; 1665 struct extent_map_tree *tree = epd->tree; 1666 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1667 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1668 u64 end; 1669 u64 cur = start; 1670 u64 extent_offset; 1671 u64 last_byte = i_size_read(inode); 1672 u64 block_start; 1673 u64 iosize; 1674 sector_t sector; 1675 struct extent_map *em; 1676 struct block_device *bdev; 1677 int ret; 1678 int nr = 0; 1679 size_t page_offset = 0; 1680 size_t blocksize; 1681 loff_t i_size = i_size_read(inode); 1682 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; 1683 u64 nr_delalloc; 1684 u64 delalloc_end; 1685 1686 WARN_ON(!PageLocked(page)); 1687 if (page->index > end_index) { 1688 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 1689 unlock_page(page); 1690 return 0; 1691 } 1692 1693 if (page->index == end_index) { 1694 size_t offset = i_size & (PAGE_CACHE_SIZE - 1); 1695 zero_user_page(page, offset, 1696 PAGE_CACHE_SIZE - offset, KM_USER0); 1697 } 1698 1699 set_page_extent_mapped(page); 1700 1701 lock_extent(tree, start, page_end, GFP_NOFS); 1702 nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1, 1703 &delalloc_end, 1704 128 * 1024 * 1024); 1705 if (nr_delalloc) { 1706 tree->ops->fill_delalloc(inode, start, delalloc_end); 1707 if (delalloc_end >= page_end + 1) { 1708 clear_extent_bit(tree, page_end + 1, delalloc_end, 1709 EXTENT_LOCKED | EXTENT_DELALLOC, 1710 1, 0, GFP_NOFS); 1711 } 1712 clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC, 1713 0, 0, GFP_NOFS); 1714 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1715 printk("found delalloc bits after clear extent_bit\n"); 1716 } 1717 } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1718 printk("found delalloc bits after find_delalloc_range returns 0\n"); 1719 } 1720 1721 end = page_end; 1722 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1723 printk("found delalloc bits after lock_extent\n"); 1724 } 1725 1726 if (last_byte <= start) { 1727 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 1728 goto done; 1729 } 1730 1731 set_extent_uptodate(tree, start, page_end, GFP_NOFS); 1732 blocksize = inode->i_sb->s_blocksize; 1733 1734 while (cur <= end) { 1735 if (cur >= last_byte) { 1736 clear_extent_dirty(tree, cur, page_end, GFP_NOFS); 1737 break; 1738 } 1739 em = epd->get_extent(inode, page, page_offset, cur, end, 1); 1740 if (IS_ERR(em) || !em) { 1741 SetPageError(page); 1742 break; 1743 } 1744 1745 extent_offset = cur - em->start; 1746 BUG_ON(em->end < cur); 1747 BUG_ON(end < cur); 1748 iosize = min(em->end - cur, end - cur) + 1; 1749 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1750 sector = (em->block_start + extent_offset) >> 9; 1751 bdev = em->bdev; 1752 block_start = em->block_start; 1753 free_extent_map(em); 1754 em = NULL; 1755 1756 if (block_start == EXTENT_MAP_HOLE || 1757 block_start == EXTENT_MAP_INLINE) { 1758 clear_extent_dirty(tree, cur, 1759 cur + iosize - 1, GFP_NOFS); 1760 cur = cur + iosize; 1761 page_offset += iosize; 1762 continue; 1763 } 1764 1765 /* leave this out until we have a page_mkwrite call */ 1766 if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 1767 EXTENT_DIRTY, 0)) { 1768 cur = cur + iosize; 1769 page_offset += iosize; 1770 continue; 1771 } 1772 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); 1773 if (tree->ops && tree->ops->writepage_io_hook) { 1774 ret = tree->ops->writepage_io_hook(page, cur, 1775 cur + iosize - 1); 1776 } else { 1777 ret = 0; 1778 } 1779 if (ret) 1780 SetPageError(page); 1781 else { 1782 unsigned long nr = end_index + 1; 1783 set_range_writeback(tree, cur, cur + iosize - 1); 1784 1785 ret = submit_extent_page(WRITE, tree, page, sector, 1786 iosize, page_offset, bdev, 1787 &epd->bio, nr, 1788 end_bio_extent_writepage); 1789 if (ret) 1790 SetPageError(page); 1791 } 1792 cur = cur + iosize; 1793 page_offset += iosize; 1794 nr++; 1795 } 1796 done: 1797 unlock_extent(tree, start, page_end, GFP_NOFS); 1798 unlock_page(page); 1799 return 0; 1800 } 1801 1802 int extent_write_full_page(struct extent_map_tree *tree, struct page *page, 1803 get_extent_t *get_extent, 1804 struct writeback_control *wbc) 1805 { 1806 int ret; 1807 struct extent_page_data epd = { 1808 .bio = NULL, 1809 .tree = tree, 1810 .get_extent = get_extent, 1811 }; 1812 1813 ret = __extent_writepage(page, wbc, &epd); 1814 if (epd.bio) 1815 submit_one_bio(WRITE, epd.bio); 1816 return ret; 1817 } 1818 EXPORT_SYMBOL(extent_write_full_page); 1819 1820 int extent_writepages(struct extent_map_tree *tree, 1821 struct address_space *mapping, 1822 get_extent_t *get_extent, 1823 struct writeback_control *wbc) 1824 { 1825 int ret; 1826 struct extent_page_data epd = { 1827 .bio = NULL, 1828 .tree = tree, 1829 .get_extent = get_extent, 1830 }; 1831 1832 ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); 1833 if (epd.bio) 1834 submit_one_bio(WRITE, epd.bio); 1835 return ret; 1836 } 1837 EXPORT_SYMBOL(extent_writepages); 1838 1839 /* 1840 * basic invalidatepage code, this waits on any locked or writeback 1841 * ranges corresponding to the page, and then deletes any extent state 1842 * records from the tree 1843 */ 1844 int extent_invalidatepage(struct extent_map_tree *tree, 1845 struct page *page, unsigned long offset) 1846 { 1847 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); 1848 u64 end = start + PAGE_CACHE_SIZE - 1; 1849 size_t blocksize = page->mapping->host->i_sb->s_blocksize; 1850 1851 start += (offset + blocksize -1) & ~(blocksize - 1); 1852 if (start > end) 1853 return 0; 1854 1855 lock_extent(tree, start, end, GFP_NOFS); 1856 wait_on_extent_writeback(tree, start, end); 1857 clear_extent_bit(tree, start, end, 1858 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 1859 1, 1, GFP_NOFS); 1860 return 0; 1861 } 1862 EXPORT_SYMBOL(extent_invalidatepage); 1863 1864 /* 1865 * simple commit_write call, set_range_dirty is used to mark both 1866 * the pages and the extent records as dirty 1867 */ 1868 int extent_commit_write(struct extent_map_tree *tree, 1869 struct inode *inode, struct page *page, 1870 unsigned from, unsigned to) 1871 { 1872 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 1873 1874 set_page_extent_mapped(page); 1875 set_page_dirty(page); 1876 1877 if (pos > inode->i_size) { 1878 i_size_write(inode, pos); 1879 mark_inode_dirty(inode); 1880 } 1881 return 0; 1882 } 1883 EXPORT_SYMBOL(extent_commit_write); 1884 1885 int extent_prepare_write(struct extent_map_tree *tree, 1886 struct inode *inode, struct page *page, 1887 unsigned from, unsigned to, get_extent_t *get_extent) 1888 { 1889 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 1890 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 1891 u64 block_start; 1892 u64 orig_block_start; 1893 u64 block_end; 1894 u64 cur_end; 1895 struct extent_map *em; 1896 unsigned blocksize = 1 << inode->i_blkbits; 1897 size_t page_offset = 0; 1898 size_t block_off_start; 1899 size_t block_off_end; 1900 int err = 0; 1901 int iocount = 0; 1902 int ret = 0; 1903 int isnew; 1904 1905 set_page_extent_mapped(page); 1906 1907 block_start = (page_start + from) & ~((u64)blocksize - 1); 1908 block_end = (page_start + to - 1) | (blocksize - 1); 1909 orig_block_start = block_start; 1910 1911 lock_extent(tree, page_start, page_end, GFP_NOFS); 1912 while(block_start <= block_end) { 1913 em = get_extent(inode, page, page_offset, block_start, 1914 block_end, 1); 1915 if (IS_ERR(em) || !em) { 1916 goto err; 1917 } 1918 cur_end = min(block_end, em->end); 1919 block_off_start = block_start & (PAGE_CACHE_SIZE - 1); 1920 block_off_end = block_off_start + blocksize; 1921 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); 1922 1923 if (!PageUptodate(page) && isnew && 1924 (block_off_end > to || block_off_start < from)) { 1925 void *kaddr; 1926 1927 kaddr = kmap_atomic(page, KM_USER0); 1928 if (block_off_end > to) 1929 memset(kaddr + to, 0, block_off_end - to); 1930 if (block_off_start < from) 1931 memset(kaddr + block_off_start, 0, 1932 from - block_off_start); 1933 flush_dcache_page(page); 1934 kunmap_atomic(kaddr, KM_USER0); 1935 } 1936 if (!isnew && !PageUptodate(page) && 1937 (block_off_end > to || block_off_start < from) && 1938 !test_range_bit(tree, block_start, cur_end, 1939 EXTENT_UPTODATE, 1)) { 1940 u64 sector; 1941 u64 extent_offset = block_start - em->start; 1942 size_t iosize; 1943 sector = (em->block_start + extent_offset) >> 9; 1944 iosize = (cur_end - block_start + blocksize - 1) & 1945 ~((u64)blocksize - 1); 1946 /* 1947 * we've already got the extent locked, but we 1948 * need to split the state such that our end_bio 1949 * handler can clear the lock. 1950 */ 1951 set_extent_bit(tree, block_start, 1952 block_start + iosize - 1, 1953 EXTENT_LOCKED, 0, NULL, GFP_NOFS); 1954 ret = submit_extent_page(READ, tree, page, 1955 sector, iosize, page_offset, em->bdev, 1956 NULL, 1, 1957 end_bio_extent_preparewrite); 1958 iocount++; 1959 block_start = block_start + iosize; 1960 } else { 1961 set_extent_uptodate(tree, block_start, cur_end, 1962 GFP_NOFS); 1963 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 1964 block_start = cur_end + 1; 1965 } 1966 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 1967 free_extent_map(em); 1968 } 1969 if (iocount) { 1970 wait_extent_bit(tree, orig_block_start, 1971 block_end, EXTENT_LOCKED); 1972 } 1973 check_page_uptodate(tree, page); 1974 err: 1975 /* FIXME, zero out newly allocated blocks on error */ 1976 return err; 1977 } 1978 EXPORT_SYMBOL(extent_prepare_write); 1979 1980 /* 1981 * a helper for releasepage. As long as there are no locked extents 1982 * in the range corresponding to the page, both state records and extent 1983 * map records are removed 1984 */ 1985 int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) 1986 { 1987 struct extent_map *em; 1988 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1989 u64 end = start + PAGE_CACHE_SIZE - 1; 1990 u64 orig_start = start; 1991 int ret = 1; 1992 1993 while (start <= end) { 1994 em = lookup_extent_mapping(tree, start, end); 1995 if (!em || IS_ERR(em)) 1996 break; 1997 if (!test_range_bit(tree, em->start, em->end, 1998 EXTENT_LOCKED, 0)) { 1999 remove_extent_mapping(tree, em); 2000 /* once for the rb tree */ 2001 free_extent_map(em); 2002 } 2003 start = em->end + 1; 2004 /* once for us */ 2005 free_extent_map(em); 2006 } 2007 if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) 2008 ret = 0; 2009 else 2010 clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, 2011 1, 1, GFP_NOFS); 2012 return ret; 2013 } 2014 EXPORT_SYMBOL(try_release_extent_mapping); 2015 2016 sector_t extent_bmap(struct address_space *mapping, sector_t iblock, 2017 get_extent_t *get_extent) 2018 { 2019 struct inode *inode = mapping->host; 2020 u64 start = iblock << inode->i_blkbits; 2021 u64 end = start + (1 << inode->i_blkbits) - 1; 2022 sector_t sector = 0; 2023 struct extent_map *em; 2024 2025 em = get_extent(inode, NULL, 0, start, end, 0); 2026 if (!em || IS_ERR(em)) 2027 return 0; 2028 2029 if (em->block_start == EXTENT_MAP_INLINE || 2030 em->block_start == EXTENT_MAP_HOLE) 2031 goto out; 2032 2033 sector = (em->block_start + start - em->start) >> inode->i_blkbits; 2034 out: 2035 free_extent_map(em); 2036 return sector; 2037 } 2038 2039 static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb) 2040 { 2041 if (list_empty(&eb->lru)) { 2042 extent_buffer_get(eb); 2043 list_add(&eb->lru, &tree->buffer_lru); 2044 tree->lru_size++; 2045 if (tree->lru_size >= BUFFER_LRU_MAX) { 2046 struct extent_buffer *rm; 2047 rm = list_entry(tree->buffer_lru.prev, 2048 struct extent_buffer, lru); 2049 tree->lru_size--; 2050 list_del_init(&rm->lru); 2051 free_extent_buffer(rm); 2052 } 2053 } else 2054 list_move(&eb->lru, &tree->buffer_lru); 2055 return 0; 2056 } 2057 static struct extent_buffer *find_lru(struct extent_map_tree *tree, 2058 u64 start, unsigned long len) 2059 { 2060 struct list_head *lru = &tree->buffer_lru; 2061 struct list_head *cur = lru->next; 2062 struct extent_buffer *eb; 2063 2064 if (list_empty(lru)) 2065 return NULL; 2066 2067 do { 2068 eb = list_entry(cur, struct extent_buffer, lru); 2069 if (eb->start == start && eb->len == len) { 2070 extent_buffer_get(eb); 2071 return eb; 2072 } 2073 cur = cur->next; 2074 } while (cur != lru); 2075 return NULL; 2076 } 2077 2078 static inline unsigned long num_extent_pages(u64 start, u64 len) 2079 { 2080 return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - 2081 (start >> PAGE_CACHE_SHIFT); 2082 } 2083 2084 static inline struct page *extent_buffer_page(struct extent_buffer *eb, 2085 unsigned long i) 2086 { 2087 struct page *p; 2088 struct address_space *mapping; 2089 2090 if (i == 0) 2091 return eb->first_page; 2092 i += eb->start >> PAGE_CACHE_SHIFT; 2093 mapping = eb->first_page->mapping; 2094 read_lock_irq(&mapping->tree_lock); 2095 p = radix_tree_lookup(&mapping->page_tree, i); 2096 read_unlock_irq(&mapping->tree_lock); 2097 return p; 2098 } 2099 2100 static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree, 2101 u64 start, 2102 unsigned long len, 2103 gfp_t mask) 2104 { 2105 struct extent_buffer *eb = NULL; 2106 2107 spin_lock(&tree->lru_lock); 2108 eb = find_lru(tree, start, len); 2109 spin_unlock(&tree->lru_lock); 2110 if (eb) { 2111 return eb; 2112 } 2113 2114 eb = kmem_cache_zalloc(extent_buffer_cache, mask); 2115 INIT_LIST_HEAD(&eb->lru); 2116 eb->start = start; 2117 eb->len = len; 2118 atomic_set(&eb->refs, 1); 2119 2120 return eb; 2121 } 2122 2123 static void __free_extent_buffer(struct extent_buffer *eb) 2124 { 2125 kmem_cache_free(extent_buffer_cache, eb); 2126 } 2127 2128 struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, 2129 u64 start, unsigned long len, 2130 struct page *page0, 2131 gfp_t mask) 2132 { 2133 unsigned long num_pages = num_extent_pages(start, len); 2134 unsigned long i; 2135 unsigned long index = start >> PAGE_CACHE_SHIFT; 2136 struct extent_buffer *eb; 2137 struct page *p; 2138 struct address_space *mapping = tree->mapping; 2139 int uptodate = 1; 2140 2141 eb = __alloc_extent_buffer(tree, start, len, mask); 2142 if (!eb || IS_ERR(eb)) 2143 return NULL; 2144 2145 if (eb->flags & EXTENT_BUFFER_FILLED) 2146 goto lru_add; 2147 2148 if (page0) { 2149 eb->first_page = page0; 2150 i = 1; 2151 index++; 2152 page_cache_get(page0); 2153 mark_page_accessed(page0); 2154 set_page_extent_mapped(page0); 2155 set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE | 2156 len << 2); 2157 } else { 2158 i = 0; 2159 } 2160 for (; i < num_pages; i++, index++) { 2161 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); 2162 if (!p) { 2163 WARN_ON(1); 2164 goto fail; 2165 } 2166 set_page_extent_mapped(p); 2167 mark_page_accessed(p); 2168 if (i == 0) { 2169 eb->first_page = p; 2170 set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | 2171 len << 2); 2172 } else { 2173 set_page_private(p, EXTENT_PAGE_PRIVATE); 2174 } 2175 if (!PageUptodate(p)) 2176 uptodate = 0; 2177 unlock_page(p); 2178 } 2179 if (uptodate) 2180 eb->flags |= EXTENT_UPTODATE; 2181 eb->flags |= EXTENT_BUFFER_FILLED; 2182 2183 lru_add: 2184 spin_lock(&tree->lru_lock); 2185 add_lru(tree, eb); 2186 spin_unlock(&tree->lru_lock); 2187 return eb; 2188 2189 fail: 2190 spin_lock(&tree->lru_lock); 2191 list_del_init(&eb->lru); 2192 spin_unlock(&tree->lru_lock); 2193 if (!atomic_dec_and_test(&eb->refs)) 2194 return NULL; 2195 for (index = 0; index < i; index++) { 2196 page_cache_release(extent_buffer_page(eb, index)); 2197 } 2198 __free_extent_buffer(eb); 2199 return NULL; 2200 } 2201 EXPORT_SYMBOL(alloc_extent_buffer); 2202 2203 struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, 2204 u64 start, unsigned long len, 2205 gfp_t mask) 2206 { 2207 unsigned long num_pages = num_extent_pages(start, len); 2208 unsigned long i; 2209 unsigned long index = start >> PAGE_CACHE_SHIFT; 2210 struct extent_buffer *eb; 2211 struct page *p; 2212 struct address_space *mapping = tree->mapping; 2213 int uptodate = 1; 2214 2215 eb = __alloc_extent_buffer(tree, start, len, mask); 2216 if (!eb || IS_ERR(eb)) 2217 return NULL; 2218 2219 if (eb->flags & EXTENT_BUFFER_FILLED) 2220 goto lru_add; 2221 2222 for (i = 0; i < num_pages; i++, index++) { 2223 p = find_lock_page(mapping, index); 2224 if (!p) { 2225 goto fail; 2226 } 2227 set_page_extent_mapped(p); 2228 mark_page_accessed(p); 2229 2230 if (i == 0) { 2231 eb->first_page = p; 2232 set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE | 2233 len << 2); 2234 } else { 2235 set_page_private(p, EXTENT_PAGE_PRIVATE); 2236 } 2237 2238 if (!PageUptodate(p)) 2239 uptodate = 0; 2240 unlock_page(p); 2241 } 2242 if (uptodate) 2243 eb->flags |= EXTENT_UPTODATE; 2244 eb->flags |= EXTENT_BUFFER_FILLED; 2245 2246 lru_add: 2247 spin_lock(&tree->lru_lock); 2248 add_lru(tree, eb); 2249 spin_unlock(&tree->lru_lock); 2250 return eb; 2251 fail: 2252 spin_lock(&tree->lru_lock); 2253 list_del_init(&eb->lru); 2254 spin_unlock(&tree->lru_lock); 2255 if (!atomic_dec_and_test(&eb->refs)) 2256 return NULL; 2257 for (index = 0; index < i; index++) { 2258 page_cache_release(extent_buffer_page(eb, index)); 2259 } 2260 __free_extent_buffer(eb); 2261 return NULL; 2262 } 2263 EXPORT_SYMBOL(find_extent_buffer); 2264 2265 void free_extent_buffer(struct extent_buffer *eb) 2266 { 2267 unsigned long i; 2268 unsigned long num_pages; 2269 2270 if (!eb) 2271 return; 2272 2273 if (!atomic_dec_and_test(&eb->refs)) 2274 return; 2275 2276 num_pages = num_extent_pages(eb->start, eb->len); 2277 2278 for (i = 0; i < num_pages; i++) { 2279 page_cache_release(extent_buffer_page(eb, i)); 2280 } 2281 __free_extent_buffer(eb); 2282 } 2283 EXPORT_SYMBOL(free_extent_buffer); 2284 2285 int clear_extent_buffer_dirty(struct extent_map_tree *tree, 2286 struct extent_buffer *eb) 2287 { 2288 int set; 2289 unsigned long i; 2290 unsigned long num_pages; 2291 struct page *page; 2292 2293 u64 start = eb->start; 2294 u64 end = start + eb->len - 1; 2295 2296 set = clear_extent_dirty(tree, start, end, GFP_NOFS); 2297 num_pages = num_extent_pages(eb->start, eb->len); 2298 2299 for (i = 0; i < num_pages; i++) { 2300 page = extent_buffer_page(eb, i); 2301 lock_page(page); 2302 /* 2303 * if we're on the last page or the first page and the 2304 * block isn't aligned on a page boundary, do extra checks 2305 * to make sure we don't clean page that is partially dirty 2306 */ 2307 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 2308 ((i == num_pages - 1) && 2309 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { 2310 start = (u64)page->index << PAGE_CACHE_SHIFT; 2311 end = start + PAGE_CACHE_SIZE - 1; 2312 if (test_range_bit(tree, start, end, 2313 EXTENT_DIRTY, 0)) { 2314 unlock_page(page); 2315 continue; 2316 } 2317 } 2318 clear_page_dirty_for_io(page); 2319 unlock_page(page); 2320 } 2321 return 0; 2322 } 2323 EXPORT_SYMBOL(clear_extent_buffer_dirty); 2324 2325 int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, 2326 struct extent_buffer *eb) 2327 { 2328 return wait_on_extent_writeback(tree, eb->start, 2329 eb->start + eb->len - 1); 2330 } 2331 EXPORT_SYMBOL(wait_on_extent_buffer_writeback); 2332 2333 int set_extent_buffer_dirty(struct extent_map_tree *tree, 2334 struct extent_buffer *eb) 2335 { 2336 unsigned long i; 2337 unsigned long num_pages; 2338 2339 num_pages = num_extent_pages(eb->start, eb->len); 2340 for (i = 0; i < num_pages; i++) { 2341 struct page *page = extent_buffer_page(eb, i); 2342 /* writepage may need to do something special for the 2343 * first page, we have to make sure page->private is 2344 * properly set. releasepage may drop page->private 2345 * on us if the page isn't already dirty. 2346 */ 2347 if (i == 0) { 2348 lock_page(page); 2349 set_page_private(page, 2350 EXTENT_PAGE_PRIVATE_FIRST_PAGE | 2351 eb->len << 2); 2352 } 2353 __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); 2354 if (i == 0) 2355 unlock_page(page); 2356 } 2357 return set_extent_dirty(tree, eb->start, 2358 eb->start + eb->len - 1, GFP_NOFS); 2359 } 2360 EXPORT_SYMBOL(set_extent_buffer_dirty); 2361 2362 int set_extent_buffer_uptodate(struct extent_map_tree *tree, 2363 struct extent_buffer *eb) 2364 { 2365 unsigned long i; 2366 struct page *page; 2367 unsigned long num_pages; 2368 2369 num_pages = num_extent_pages(eb->start, eb->len); 2370 2371 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 2372 GFP_NOFS); 2373 for (i = 0; i < num_pages; i++) { 2374 page = extent_buffer_page(eb, i); 2375 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 2376 ((i == num_pages - 1) && 2377 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { 2378 check_page_uptodate(tree, page); 2379 continue; 2380 } 2381 SetPageUptodate(page); 2382 } 2383 return 0; 2384 } 2385 EXPORT_SYMBOL(set_extent_buffer_uptodate); 2386 2387 int extent_buffer_uptodate(struct extent_map_tree *tree, 2388 struct extent_buffer *eb) 2389 { 2390 if (eb->flags & EXTENT_UPTODATE) 2391 return 1; 2392 return test_range_bit(tree, eb->start, eb->start + eb->len - 1, 2393 EXTENT_UPTODATE, 1); 2394 } 2395 EXPORT_SYMBOL(extent_buffer_uptodate); 2396 2397 int read_extent_buffer_pages(struct extent_map_tree *tree, 2398 struct extent_buffer *eb, 2399 u64 start, 2400 int wait) 2401 { 2402 unsigned long i; 2403 unsigned long start_i; 2404 struct page *page; 2405 int err; 2406 int ret = 0; 2407 unsigned long num_pages; 2408 2409 if (eb->flags & EXTENT_UPTODATE) 2410 return 0; 2411 2412 if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, 2413 EXTENT_UPTODATE, 1)) { 2414 return 0; 2415 } 2416 if (start) { 2417 WARN_ON(start < eb->start); 2418 start_i = (start >> PAGE_CACHE_SHIFT) - 2419 (eb->start >> PAGE_CACHE_SHIFT); 2420 } else { 2421 start_i = 0; 2422 } 2423 2424 num_pages = num_extent_pages(eb->start, eb->len); 2425 for (i = start_i; i < num_pages; i++) { 2426 page = extent_buffer_page(eb, i); 2427 if (PageUptodate(page)) { 2428 continue; 2429 } 2430 if (!wait) { 2431 if (TestSetPageLocked(page)) { 2432 continue; 2433 } 2434 } else { 2435 lock_page(page); 2436 } 2437 if (!PageUptodate(page)) { 2438 err = page->mapping->a_ops->readpage(NULL, page); 2439 if (err) { 2440 ret = err; 2441 } 2442 } else { 2443 unlock_page(page); 2444 } 2445 } 2446 2447 if (ret || !wait) { 2448 return ret; 2449 } 2450 2451 for (i = start_i; i < num_pages; i++) { 2452 page = extent_buffer_page(eb, i); 2453 wait_on_page_locked(page); 2454 if (!PageUptodate(page)) { 2455 ret = -EIO; 2456 } 2457 } 2458 if (!ret) 2459 eb->flags |= EXTENT_UPTODATE; 2460 return ret; 2461 } 2462 EXPORT_SYMBOL(read_extent_buffer_pages); 2463 2464 void read_extent_buffer(struct extent_buffer *eb, void *dstv, 2465 unsigned long start, 2466 unsigned long len) 2467 { 2468 size_t cur; 2469 size_t offset; 2470 struct page *page; 2471 char *kaddr; 2472 char *dst = (char *)dstv; 2473 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2474 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2475 unsigned long num_pages = num_extent_pages(eb->start, eb->len); 2476 2477 WARN_ON(start > eb->len); 2478 WARN_ON(start + len > eb->start + eb->len); 2479 2480 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 2481 2482 while(len > 0) { 2483 page = extent_buffer_page(eb, i); 2484 if (!PageUptodate(page)) { 2485 printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); 2486 WARN_ON(1); 2487 } 2488 WARN_ON(!PageUptodate(page)); 2489 2490 cur = min(len, (PAGE_CACHE_SIZE - offset)); 2491 kaddr = kmap_atomic(page, KM_USER1); 2492 memcpy(dst, kaddr + offset, cur); 2493 kunmap_atomic(kaddr, KM_USER1); 2494 2495 dst += cur; 2496 len -= cur; 2497 offset = 0; 2498 i++; 2499 } 2500 } 2501 EXPORT_SYMBOL(read_extent_buffer); 2502 2503 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, 2504 unsigned long min_len, char **token, char **map, 2505 unsigned long *map_start, 2506 unsigned long *map_len, int km) 2507 { 2508 size_t offset = start & (PAGE_CACHE_SIZE - 1); 2509 char *kaddr; 2510 struct page *p; 2511 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2512 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2513 unsigned long end_i = (start_offset + start + min_len - 1) >> 2514 PAGE_CACHE_SHIFT; 2515 2516 if (i != end_i) 2517 return -EINVAL; 2518 2519 if (i == 0) { 2520 offset = start_offset; 2521 *map_start = 0; 2522 } else { 2523 offset = 0; 2524 *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; 2525 } 2526 if (start + min_len > eb->len) { 2527 printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); 2528 WARN_ON(1); 2529 } 2530 2531 p = extent_buffer_page(eb, i); 2532 WARN_ON(!PageUptodate(p)); 2533 kaddr = kmap_atomic(p, km); 2534 *token = kaddr; 2535 *map = kaddr + offset; 2536 *map_len = PAGE_CACHE_SIZE - offset; 2537 return 0; 2538 } 2539 EXPORT_SYMBOL(map_private_extent_buffer); 2540 2541 int map_extent_buffer(struct extent_buffer *eb, unsigned long start, 2542 unsigned long min_len, 2543 char **token, char **map, 2544 unsigned long *map_start, 2545 unsigned long *map_len, int km) 2546 { 2547 int err; 2548 int save = 0; 2549 if (eb->map_token) { 2550 unmap_extent_buffer(eb, eb->map_token, km); 2551 eb->map_token = NULL; 2552 save = 1; 2553 } 2554 err = map_private_extent_buffer(eb, start, min_len, token, map, 2555 map_start, map_len, km); 2556 if (!err && save) { 2557 eb->map_token = *token; 2558 eb->kaddr = *map; 2559 eb->map_start = *map_start; 2560 eb->map_len = *map_len; 2561 } 2562 return err; 2563 } 2564 EXPORT_SYMBOL(map_extent_buffer); 2565 2566 void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) 2567 { 2568 kunmap_atomic(token, km); 2569 } 2570 EXPORT_SYMBOL(unmap_extent_buffer); 2571 2572 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, 2573 unsigned long start, 2574 unsigned long len) 2575 { 2576 size_t cur; 2577 size_t offset; 2578 struct page *page; 2579 char *kaddr; 2580 char *ptr = (char *)ptrv; 2581 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2582 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2583 int ret = 0; 2584 2585 WARN_ON(start > eb->len); 2586 WARN_ON(start + len > eb->start + eb->len); 2587 2588 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 2589 2590 while(len > 0) { 2591 page = extent_buffer_page(eb, i); 2592 WARN_ON(!PageUptodate(page)); 2593 2594 cur = min(len, (PAGE_CACHE_SIZE - offset)); 2595 2596 kaddr = kmap_atomic(page, KM_USER0); 2597 ret = memcmp(ptr, kaddr + offset, cur); 2598 kunmap_atomic(kaddr, KM_USER0); 2599 if (ret) 2600 break; 2601 2602 ptr += cur; 2603 len -= cur; 2604 offset = 0; 2605 i++; 2606 } 2607 return ret; 2608 } 2609 EXPORT_SYMBOL(memcmp_extent_buffer); 2610 2611 void write_extent_buffer(struct extent_buffer *eb, const void *srcv, 2612 unsigned long start, unsigned long len) 2613 { 2614 size_t cur; 2615 size_t offset; 2616 struct page *page; 2617 char *kaddr; 2618 char *src = (char *)srcv; 2619 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2620 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2621 2622 WARN_ON(start > eb->len); 2623 WARN_ON(start + len > eb->start + eb->len); 2624 2625 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 2626 2627 while(len > 0) { 2628 page = extent_buffer_page(eb, i); 2629 WARN_ON(!PageUptodate(page)); 2630 2631 cur = min(len, PAGE_CACHE_SIZE - offset); 2632 kaddr = kmap_atomic(page, KM_USER1); 2633 memcpy(kaddr + offset, src, cur); 2634 kunmap_atomic(kaddr, KM_USER1); 2635 2636 src += cur; 2637 len -= cur; 2638 offset = 0; 2639 i++; 2640 } 2641 } 2642 EXPORT_SYMBOL(write_extent_buffer); 2643 2644 void memset_extent_buffer(struct extent_buffer *eb, char c, 2645 unsigned long start, unsigned long len) 2646 { 2647 size_t cur; 2648 size_t offset; 2649 struct page *page; 2650 char *kaddr; 2651 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2652 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2653 2654 WARN_ON(start > eb->len); 2655 WARN_ON(start + len > eb->start + eb->len); 2656 2657 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); 2658 2659 while(len > 0) { 2660 page = extent_buffer_page(eb, i); 2661 WARN_ON(!PageUptodate(page)); 2662 2663 cur = min(len, PAGE_CACHE_SIZE - offset); 2664 kaddr = kmap_atomic(page, KM_USER0); 2665 memset(kaddr + offset, c, cur); 2666 kunmap_atomic(kaddr, KM_USER0); 2667 2668 len -= cur; 2669 offset = 0; 2670 i++; 2671 } 2672 } 2673 EXPORT_SYMBOL(memset_extent_buffer); 2674 2675 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, 2676 unsigned long dst_offset, unsigned long src_offset, 2677 unsigned long len) 2678 { 2679 u64 dst_len = dst->len; 2680 size_t cur; 2681 size_t offset; 2682 struct page *page; 2683 char *kaddr; 2684 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 2685 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 2686 2687 WARN_ON(src->len != dst_len); 2688 2689 offset = (start_offset + dst_offset) & 2690 ((unsigned long)PAGE_CACHE_SIZE - 1); 2691 2692 while(len > 0) { 2693 page = extent_buffer_page(dst, i); 2694 WARN_ON(!PageUptodate(page)); 2695 2696 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); 2697 2698 kaddr = kmap_atomic(page, KM_USER0); 2699 read_extent_buffer(src, kaddr + offset, src_offset, cur); 2700 kunmap_atomic(kaddr, KM_USER0); 2701 2702 src_offset += cur; 2703 len -= cur; 2704 offset = 0; 2705 i++; 2706 } 2707 } 2708 EXPORT_SYMBOL(copy_extent_buffer); 2709 2710 static void move_pages(struct page *dst_page, struct page *src_page, 2711 unsigned long dst_off, unsigned long src_off, 2712 unsigned long len) 2713 { 2714 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 2715 if (dst_page == src_page) { 2716 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); 2717 } else { 2718 char *src_kaddr = kmap_atomic(src_page, KM_USER1); 2719 char *p = dst_kaddr + dst_off + len; 2720 char *s = src_kaddr + src_off + len; 2721 2722 while (len--) 2723 *--p = *--s; 2724 2725 kunmap_atomic(src_kaddr, KM_USER1); 2726 } 2727 kunmap_atomic(dst_kaddr, KM_USER0); 2728 } 2729 2730 static void copy_pages(struct page *dst_page, struct page *src_page, 2731 unsigned long dst_off, unsigned long src_off, 2732 unsigned long len) 2733 { 2734 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 2735 char *src_kaddr; 2736 2737 if (dst_page != src_page) 2738 src_kaddr = kmap_atomic(src_page, KM_USER1); 2739 else 2740 src_kaddr = dst_kaddr; 2741 2742 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 2743 kunmap_atomic(dst_kaddr, KM_USER0); 2744 if (dst_page != src_page) 2745 kunmap_atomic(src_kaddr, KM_USER1); 2746 } 2747 2748 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 2749 unsigned long src_offset, unsigned long len) 2750 { 2751 size_t cur; 2752 size_t dst_off_in_page; 2753 size_t src_off_in_page; 2754 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 2755 unsigned long dst_i; 2756 unsigned long src_i; 2757 2758 if (src_offset + len > dst->len) { 2759 printk("memmove bogus src_offset %lu move len %lu len %lu\n", 2760 src_offset, len, dst->len); 2761 BUG_ON(1); 2762 } 2763 if (dst_offset + len > dst->len) { 2764 printk("memmove bogus dst_offset %lu move len %lu len %lu\n", 2765 dst_offset, len, dst->len); 2766 BUG_ON(1); 2767 } 2768 2769 while(len > 0) { 2770 dst_off_in_page = (start_offset + dst_offset) & 2771 ((unsigned long)PAGE_CACHE_SIZE - 1); 2772 src_off_in_page = (start_offset + src_offset) & 2773 ((unsigned long)PAGE_CACHE_SIZE - 1); 2774 2775 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 2776 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; 2777 2778 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - 2779 src_off_in_page)); 2780 cur = min_t(unsigned long, cur, 2781 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); 2782 2783 copy_pages(extent_buffer_page(dst, dst_i), 2784 extent_buffer_page(dst, src_i), 2785 dst_off_in_page, src_off_in_page, cur); 2786 2787 src_offset += cur; 2788 dst_offset += cur; 2789 len -= cur; 2790 } 2791 } 2792 EXPORT_SYMBOL(memcpy_extent_buffer); 2793 2794 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 2795 unsigned long src_offset, unsigned long len) 2796 { 2797 size_t cur; 2798 size_t dst_off_in_page; 2799 size_t src_off_in_page; 2800 unsigned long dst_end = dst_offset + len - 1; 2801 unsigned long src_end = src_offset + len - 1; 2802 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 2803 unsigned long dst_i; 2804 unsigned long src_i; 2805 2806 if (src_offset + len > dst->len) { 2807 printk("memmove bogus src_offset %lu move len %lu len %lu\n", 2808 src_offset, len, dst->len); 2809 BUG_ON(1); 2810 } 2811 if (dst_offset + len > dst->len) { 2812 printk("memmove bogus dst_offset %lu move len %lu len %lu\n", 2813 dst_offset, len, dst->len); 2814 BUG_ON(1); 2815 } 2816 if (dst_offset < src_offset) { 2817 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 2818 return; 2819 } 2820 while(len > 0) { 2821 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; 2822 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; 2823 2824 dst_off_in_page = (start_offset + dst_end) & 2825 ((unsigned long)PAGE_CACHE_SIZE - 1); 2826 src_off_in_page = (start_offset + src_end) & 2827 ((unsigned long)PAGE_CACHE_SIZE - 1); 2828 2829 cur = min_t(unsigned long, len, src_off_in_page + 1); 2830 cur = min(cur, dst_off_in_page + 1); 2831 move_pages(extent_buffer_page(dst, dst_i), 2832 extent_buffer_page(dst, src_i), 2833 dst_off_in_page - cur + 1, 2834 src_off_in_page - cur + 1, cur); 2835 2836 dst_end -= cur; 2837 src_end -= cur; 2838 len -= cur; 2839 } 2840 } 2841 EXPORT_SYMBOL(memmove_extent_buffer); 2842