1 #include <linux/bitops.h> 2 #include <linux/slab.h> 3 #include <linux/bio.h> 4 #include <linux/mm.h> 5 #include <linux/gfp.h> 6 #include <linux/pagemap.h> 7 #include <linux/page-flags.h> 8 #include <linux/module.h> 9 #include <linux/spinlock.h> 10 #include <linux/blkdev.h> 11 #include "extent_map.h" 12 13 /* temporary define until extent_map moves out of btrfs */ 14 struct kmem_cache *btrfs_cache_create(const char *name, size_t size, 15 unsigned long extra_flags, 16 void (*ctor)(void *, struct kmem_cache *, 17 unsigned long)); 18 19 static struct kmem_cache *extent_map_cache; 20 static struct kmem_cache *extent_state_cache; 21 static struct kmem_cache *extent_buffer_cache; 22 static LIST_HEAD(extent_buffers); 23 static spinlock_t extent_buffers_lock; 24 static int nr_extent_buffers; 25 #define MAX_EXTENT_BUFFER_CACHE 128 26 27 struct tree_entry { 28 u64 start; 29 u64 end; 30 int in_tree; 31 struct rb_node rb_node; 32 }; 33 34 void __init extent_map_init(void) 35 { 36 extent_map_cache = btrfs_cache_create("extent_map", 37 sizeof(struct extent_map), 0, 38 NULL); 39 extent_state_cache = btrfs_cache_create("extent_state", 40 sizeof(struct extent_state), 0, 41 NULL); 42 extent_buffer_cache = btrfs_cache_create("extent_buffers", 43 sizeof(struct extent_buffer), 0, 44 NULL); 45 spin_lock_init(&extent_buffers_lock); 46 } 47 48 void __exit extent_map_exit(void) 49 { 50 struct extent_buffer *eb; 51 52 while (!list_empty(&extent_buffers)) { 53 eb = list_entry(extent_buffers.next, 54 struct extent_buffer, list); 55 list_del(&eb->list); 56 kmem_cache_free(extent_buffer_cache, eb); 57 } 58 if (extent_map_cache) 59 kmem_cache_destroy(extent_map_cache); 60 if (extent_state_cache) 61 kmem_cache_destroy(extent_state_cache); 62 if (extent_buffer_cache) 63 kmem_cache_destroy(extent_buffer_cache); 64 } 65 66 void extent_map_tree_init(struct extent_map_tree *tree, 67 struct address_space *mapping, gfp_t mask) 68 { 69 tree->map.rb_node = NULL; 70 tree->state.rb_node = NULL; 71 tree->ops = NULL; 72 rwlock_init(&tree->lock); 73 tree->mapping = mapping; 74 } 75 EXPORT_SYMBOL(extent_map_tree_init); 76 77 struct extent_map *alloc_extent_map(gfp_t mask) 78 { 79 struct extent_map *em; 80 em = kmem_cache_alloc(extent_map_cache, mask); 81 if (!em || IS_ERR(em)) 82 return em; 83 em->in_tree = 0; 84 atomic_set(&em->refs, 1); 85 return em; 86 } 87 EXPORT_SYMBOL(alloc_extent_map); 88 89 void free_extent_map(struct extent_map *em) 90 { 91 if (!em) 92 return; 93 if (atomic_dec_and_test(&em->refs)) { 94 WARN_ON(em->in_tree); 95 kmem_cache_free(extent_map_cache, em); 96 } 97 } 98 EXPORT_SYMBOL(free_extent_map); 99 100 101 struct extent_state *alloc_extent_state(gfp_t mask) 102 { 103 struct extent_state *state; 104 state = kmem_cache_alloc(extent_state_cache, mask); 105 if (!state || IS_ERR(state)) 106 return state; 107 state->state = 0; 108 state->in_tree = 0; 109 state->private = 0; 110 atomic_set(&state->refs, 1); 111 init_waitqueue_head(&state->wq); 112 return state; 113 } 114 EXPORT_SYMBOL(alloc_extent_state); 115 116 void free_extent_state(struct extent_state *state) 117 { 118 if (!state) 119 return; 120 if (atomic_dec_and_test(&state->refs)) { 121 WARN_ON(state->in_tree); 122 kmem_cache_free(extent_state_cache, state); 123 } 124 } 125 EXPORT_SYMBOL(free_extent_state); 126 127 static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 128 struct rb_node *node) 129 { 130 struct rb_node ** p = &root->rb_node; 131 struct rb_node * parent = NULL; 132 struct tree_entry *entry; 133 134 while(*p) { 135 parent = *p; 136 entry = rb_entry(parent, struct tree_entry, rb_node); 137 138 if (offset < entry->start) 139 p = &(*p)->rb_left; 140 else if (offset > entry->end) 141 p = &(*p)->rb_right; 142 else 143 return parent; 144 } 145 146 entry = rb_entry(node, struct tree_entry, rb_node); 147 entry->in_tree = 1; 148 rb_link_node(node, parent, p); 149 rb_insert_color(node, root); 150 return NULL; 151 } 152 153 static struct rb_node *__tree_search(struct rb_root *root, u64 offset, 154 struct rb_node **prev_ret) 155 { 156 struct rb_node * n = root->rb_node; 157 struct rb_node *prev = NULL; 158 struct tree_entry *entry; 159 struct tree_entry *prev_entry = NULL; 160 161 while(n) { 162 entry = rb_entry(n, struct tree_entry, rb_node); 163 prev = n; 164 prev_entry = entry; 165 166 if (offset < entry->start) 167 n = n->rb_left; 168 else if (offset > entry->end) 169 n = n->rb_right; 170 else 171 return n; 172 } 173 if (!prev_ret) 174 return NULL; 175 while(prev && offset > prev_entry->end) { 176 prev = rb_next(prev); 177 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 178 } 179 *prev_ret = prev; 180 return NULL; 181 } 182 183 static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) 184 { 185 struct rb_node *prev; 186 struct rb_node *ret; 187 ret = __tree_search(root, offset, &prev); 188 if (!ret) 189 return prev; 190 return ret; 191 } 192 193 static int tree_delete(struct rb_root *root, u64 offset) 194 { 195 struct rb_node *node; 196 struct tree_entry *entry; 197 198 node = __tree_search(root, offset, NULL); 199 if (!node) 200 return -ENOENT; 201 entry = rb_entry(node, struct tree_entry, rb_node); 202 entry->in_tree = 0; 203 rb_erase(node, root); 204 return 0; 205 } 206 207 /* 208 * add_extent_mapping tries a simple backward merge with existing 209 * mappings. The extent_map struct passed in will be inserted into 210 * the tree directly (no copies made, just a reference taken). 211 */ 212 int add_extent_mapping(struct extent_map_tree *tree, 213 struct extent_map *em) 214 { 215 int ret = 0; 216 struct extent_map *prev = NULL; 217 struct rb_node *rb; 218 219 write_lock_irq(&tree->lock); 220 rb = tree_insert(&tree->map, em->end, &em->rb_node); 221 if (rb) { 222 prev = rb_entry(rb, struct extent_map, rb_node); 223 printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end); 224 ret = -EEXIST; 225 goto out; 226 } 227 atomic_inc(&em->refs); 228 if (em->start != 0) { 229 rb = rb_prev(&em->rb_node); 230 if (rb) 231 prev = rb_entry(rb, struct extent_map, rb_node); 232 if (prev && prev->end + 1 == em->start && 233 ((em->block_start == EXTENT_MAP_HOLE && 234 prev->block_start == EXTENT_MAP_HOLE) || 235 (em->block_start == prev->block_end + 1))) { 236 em->start = prev->start; 237 em->block_start = prev->block_start; 238 rb_erase(&prev->rb_node, &tree->map); 239 prev->in_tree = 0; 240 free_extent_map(prev); 241 } 242 } 243 out: 244 write_unlock_irq(&tree->lock); 245 return ret; 246 } 247 EXPORT_SYMBOL(add_extent_mapping); 248 249 /* 250 * lookup_extent_mapping returns the first extent_map struct in the 251 * tree that intersects the [start, end] (inclusive) range. There may 252 * be additional objects in the tree that intersect, so check the object 253 * returned carefully to make sure you don't need additional lookups. 254 */ 255 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, 256 u64 start, u64 end) 257 { 258 struct extent_map *em; 259 struct rb_node *rb_node; 260 261 read_lock_irq(&tree->lock); 262 rb_node = tree_search(&tree->map, start); 263 if (!rb_node) { 264 em = NULL; 265 goto out; 266 } 267 if (IS_ERR(rb_node)) { 268 em = ERR_PTR(PTR_ERR(rb_node)); 269 goto out; 270 } 271 em = rb_entry(rb_node, struct extent_map, rb_node); 272 if (em->end < start || em->start > end) { 273 em = NULL; 274 goto out; 275 } 276 atomic_inc(&em->refs); 277 out: 278 read_unlock_irq(&tree->lock); 279 return em; 280 } 281 EXPORT_SYMBOL(lookup_extent_mapping); 282 283 /* 284 * removes an extent_map struct from the tree. No reference counts are 285 * dropped, and no checks are done to see if the range is in use 286 */ 287 int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) 288 { 289 int ret; 290 291 write_lock_irq(&tree->lock); 292 ret = tree_delete(&tree->map, em->end); 293 write_unlock_irq(&tree->lock); 294 return ret; 295 } 296 EXPORT_SYMBOL(remove_extent_mapping); 297 298 /* 299 * utility function to look for merge candidates inside a given range. 300 * Any extents with matching state are merged together into a single 301 * extent in the tree. Extents with EXTENT_IO in their state field 302 * are not merged because the end_io handlers need to be able to do 303 * operations on them without sleeping (or doing allocations/splits). 304 * 305 * This should be called with the tree lock held. 306 */ 307 static int merge_state(struct extent_map_tree *tree, 308 struct extent_state *state) 309 { 310 struct extent_state *other; 311 struct rb_node *other_node; 312 313 if (state->state & EXTENT_IOBITS) 314 return 0; 315 316 other_node = rb_prev(&state->rb_node); 317 if (other_node) { 318 other = rb_entry(other_node, struct extent_state, rb_node); 319 if (other->end == state->start - 1 && 320 other->state == state->state) { 321 state->start = other->start; 322 other->in_tree = 0; 323 rb_erase(&other->rb_node, &tree->state); 324 free_extent_state(other); 325 } 326 } 327 other_node = rb_next(&state->rb_node); 328 if (other_node) { 329 other = rb_entry(other_node, struct extent_state, rb_node); 330 if (other->start == state->end + 1 && 331 other->state == state->state) { 332 other->start = state->start; 333 state->in_tree = 0; 334 rb_erase(&state->rb_node, &tree->state); 335 free_extent_state(state); 336 } 337 } 338 return 0; 339 } 340 341 /* 342 * insert an extent_state struct into the tree. 'bits' are set on the 343 * struct before it is inserted. 344 * 345 * This may return -EEXIST if the extent is already there, in which case the 346 * state struct is freed. 347 * 348 * The tree lock is not taken internally. This is a utility function and 349 * probably isn't what you want to call (see set/clear_extent_bit). 350 */ 351 static int insert_state(struct extent_map_tree *tree, 352 struct extent_state *state, u64 start, u64 end, 353 int bits) 354 { 355 struct rb_node *node; 356 357 if (end < start) { 358 printk("end < start %Lu %Lu\n", end, start); 359 WARN_ON(1); 360 } 361 state->state |= bits; 362 state->start = start; 363 state->end = end; 364 if ((end & 4095) == 0) { 365 printk("insert state %Lu %Lu strange end\n", start, end); 366 WARN_ON(1); 367 } 368 node = tree_insert(&tree->state, end, &state->rb_node); 369 if (node) { 370 struct extent_state *found; 371 found = rb_entry(node, struct extent_state, rb_node); 372 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); 373 free_extent_state(state); 374 return -EEXIST; 375 } 376 merge_state(tree, state); 377 return 0; 378 } 379 380 /* 381 * split a given extent state struct in two, inserting the preallocated 382 * struct 'prealloc' as the newly created second half. 'split' indicates an 383 * offset inside 'orig' where it should be split. 384 * 385 * Before calling, 386 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 387 * are two extent state structs in the tree: 388 * prealloc: [orig->start, split - 1] 389 * orig: [ split, orig->end ] 390 * 391 * The tree locks are not taken by this function. They need to be held 392 * by the caller. 393 */ 394 static int split_state(struct extent_map_tree *tree, struct extent_state *orig, 395 struct extent_state *prealloc, u64 split) 396 { 397 struct rb_node *node; 398 prealloc->start = orig->start; 399 prealloc->end = split - 1; 400 prealloc->state = orig->state; 401 orig->start = split; 402 if ((prealloc->end & 4095) == 0) { 403 printk("insert state %Lu %Lu strange end\n", prealloc->start, 404 prealloc->end); 405 WARN_ON(1); 406 } 407 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); 408 if (node) { 409 struct extent_state *found; 410 found = rb_entry(node, struct extent_state, rb_node); 411 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); 412 free_extent_state(prealloc); 413 return -EEXIST; 414 } 415 return 0; 416 } 417 418 /* 419 * utility function to clear some bits in an extent state struct. 420 * it will optionally wake up any one waiting on this state (wake == 1), or 421 * forcibly remove the state from the tree (delete == 1). 422 * 423 * If no bits are set on the state struct after clearing things, the 424 * struct is freed and removed from the tree 425 */ 426 static int clear_state_bit(struct extent_map_tree *tree, 427 struct extent_state *state, int bits, int wake, 428 int delete) 429 { 430 int ret = state->state & bits; 431 state->state &= ~bits; 432 if (wake) 433 wake_up(&state->wq); 434 if (delete || state->state == 0) { 435 if (state->in_tree) { 436 rb_erase(&state->rb_node, &tree->state); 437 state->in_tree = 0; 438 free_extent_state(state); 439 } else { 440 WARN_ON(1); 441 } 442 } else { 443 merge_state(tree, state); 444 } 445 return ret; 446 } 447 448 /* 449 * clear some bits on a range in the tree. This may require splitting 450 * or inserting elements in the tree, so the gfp mask is used to 451 * indicate which allocations or sleeping are allowed. 452 * 453 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove 454 * the given range from the tree regardless of state (ie for truncate). 455 * 456 * the range [start, end] is inclusive. 457 * 458 * This takes the tree lock, and returns < 0 on error, > 0 if any of the 459 * bits were already set, or zero if none of the bits were already set. 460 */ 461 int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, 462 int bits, int wake, int delete, gfp_t mask) 463 { 464 struct extent_state *state; 465 struct extent_state *prealloc = NULL; 466 struct rb_node *node; 467 unsigned long flags; 468 int err; 469 int set = 0; 470 471 again: 472 if (!prealloc && (mask & __GFP_WAIT)) { 473 prealloc = alloc_extent_state(mask); 474 if (!prealloc) 475 return -ENOMEM; 476 } 477 478 write_lock_irqsave(&tree->lock, flags); 479 /* 480 * this search will find the extents that end after 481 * our range starts 482 */ 483 node = tree_search(&tree->state, start); 484 if (!node) 485 goto out; 486 state = rb_entry(node, struct extent_state, rb_node); 487 if (state->start > end) 488 goto out; 489 WARN_ON(state->end < start); 490 491 /* 492 * | ---- desired range ---- | 493 * | state | or 494 * | ------------- state -------------- | 495 * 496 * We need to split the extent we found, and may flip 497 * bits on second half. 498 * 499 * If the extent we found extends past our range, we 500 * just split and search again. It'll get split again 501 * the next time though. 502 * 503 * If the extent we found is inside our range, we clear 504 * the desired bit on it. 505 */ 506 507 if (state->start < start) { 508 err = split_state(tree, state, prealloc, start); 509 BUG_ON(err == -EEXIST); 510 prealloc = NULL; 511 if (err) 512 goto out; 513 if (state->end <= end) { 514 start = state->end + 1; 515 set |= clear_state_bit(tree, state, bits, 516 wake, delete); 517 } else { 518 start = state->start; 519 } 520 goto search_again; 521 } 522 /* 523 * | ---- desired range ---- | 524 * | state | 525 * We need to split the extent, and clear the bit 526 * on the first half 527 */ 528 if (state->start <= end && state->end > end) { 529 err = split_state(tree, state, prealloc, end + 1); 530 BUG_ON(err == -EEXIST); 531 532 if (wake) 533 wake_up(&state->wq); 534 set |= clear_state_bit(tree, prealloc, bits, 535 wake, delete); 536 prealloc = NULL; 537 goto out; 538 } 539 540 start = state->end + 1; 541 set |= clear_state_bit(tree, state, bits, wake, delete); 542 goto search_again; 543 544 out: 545 write_unlock_irqrestore(&tree->lock, flags); 546 if (prealloc) 547 free_extent_state(prealloc); 548 549 return set; 550 551 search_again: 552 if (start >= end) 553 goto out; 554 write_unlock_irqrestore(&tree->lock, flags); 555 if (mask & __GFP_WAIT) 556 cond_resched(); 557 goto again; 558 } 559 EXPORT_SYMBOL(clear_extent_bit); 560 561 static int wait_on_state(struct extent_map_tree *tree, 562 struct extent_state *state) 563 { 564 DEFINE_WAIT(wait); 565 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 566 read_unlock_irq(&tree->lock); 567 schedule(); 568 read_lock_irq(&tree->lock); 569 finish_wait(&state->wq, &wait); 570 return 0; 571 } 572 573 /* 574 * waits for one or more bits to clear on a range in the state tree. 575 * The range [start, end] is inclusive. 576 * The tree lock is taken by this function 577 */ 578 int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) 579 { 580 struct extent_state *state; 581 struct rb_node *node; 582 583 read_lock_irq(&tree->lock); 584 again: 585 while (1) { 586 /* 587 * this search will find all the extents that end after 588 * our range starts 589 */ 590 node = tree_search(&tree->state, start); 591 if (!node) 592 break; 593 594 state = rb_entry(node, struct extent_state, rb_node); 595 596 if (state->start > end) 597 goto out; 598 599 if (state->state & bits) { 600 start = state->start; 601 atomic_inc(&state->refs); 602 wait_on_state(tree, state); 603 free_extent_state(state); 604 goto again; 605 } 606 start = state->end + 1; 607 608 if (start > end) 609 break; 610 611 if (need_resched()) { 612 read_unlock_irq(&tree->lock); 613 cond_resched(); 614 read_lock_irq(&tree->lock); 615 } 616 } 617 out: 618 read_unlock_irq(&tree->lock); 619 return 0; 620 } 621 EXPORT_SYMBOL(wait_extent_bit); 622 623 /* 624 * set some bits on a range in the tree. This may require allocations 625 * or sleeping, so the gfp mask is used to indicate what is allowed. 626 * 627 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 628 * range already has the desired bits set. The start of the existing 629 * range is returned in failed_start in this case. 630 * 631 * [start, end] is inclusive 632 * This takes the tree lock. 633 */ 634 int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, 635 int exclusive, u64 *failed_start, gfp_t mask) 636 { 637 struct extent_state *state; 638 struct extent_state *prealloc = NULL; 639 struct rb_node *node; 640 unsigned long flags; 641 int err = 0; 642 int set; 643 u64 last_start; 644 u64 last_end; 645 again: 646 if (!prealloc && (mask & __GFP_WAIT)) { 647 prealloc = alloc_extent_state(mask); 648 if (!prealloc) 649 return -ENOMEM; 650 } 651 652 write_lock_irqsave(&tree->lock, flags); 653 /* 654 * this search will find all the extents that end after 655 * our range starts. 656 */ 657 node = tree_search(&tree->state, start); 658 if (!node) { 659 err = insert_state(tree, prealloc, start, end, bits); 660 prealloc = NULL; 661 BUG_ON(err == -EEXIST); 662 goto out; 663 } 664 665 state = rb_entry(node, struct extent_state, rb_node); 666 last_start = state->start; 667 last_end = state->end; 668 669 /* 670 * | ---- desired range ---- | 671 * | state | 672 * 673 * Just lock what we found and keep going 674 */ 675 if (state->start == start && state->end <= end) { 676 set = state->state & bits; 677 if (set && exclusive) { 678 *failed_start = state->start; 679 err = -EEXIST; 680 goto out; 681 } 682 state->state |= bits; 683 start = state->end + 1; 684 merge_state(tree, state); 685 goto search_again; 686 } 687 688 /* 689 * | ---- desired range ---- | 690 * | state | 691 * or 692 * | ------------- state -------------- | 693 * 694 * We need to split the extent we found, and may flip bits on 695 * second half. 696 * 697 * If the extent we found extends past our 698 * range, we just split and search again. It'll get split 699 * again the next time though. 700 * 701 * If the extent we found is inside our range, we set the 702 * desired bit on it. 703 */ 704 if (state->start < start) { 705 set = state->state & bits; 706 if (exclusive && set) { 707 *failed_start = start; 708 err = -EEXIST; 709 goto out; 710 } 711 err = split_state(tree, state, prealloc, start); 712 BUG_ON(err == -EEXIST); 713 prealloc = NULL; 714 if (err) 715 goto out; 716 if (state->end <= end) { 717 state->state |= bits; 718 start = state->end + 1; 719 merge_state(tree, state); 720 } else { 721 start = state->start; 722 } 723 goto search_again; 724 } 725 /* 726 * | ---- desired range ---- | 727 * | state | or | state | 728 * 729 * There's a hole, we need to insert something in it and 730 * ignore the extent we found. 731 */ 732 if (state->start > start) { 733 u64 this_end; 734 if (end < last_start) 735 this_end = end; 736 else 737 this_end = last_start -1; 738 err = insert_state(tree, prealloc, start, this_end, 739 bits); 740 prealloc = NULL; 741 BUG_ON(err == -EEXIST); 742 if (err) 743 goto out; 744 start = this_end + 1; 745 goto search_again; 746 } 747 /* 748 * | ---- desired range ---- | 749 * | state | 750 * We need to split the extent, and set the bit 751 * on the first half 752 */ 753 if (state->start <= end && state->end > end) { 754 set = state->state & bits; 755 if (exclusive && set) { 756 *failed_start = start; 757 err = -EEXIST; 758 goto out; 759 } 760 err = split_state(tree, state, prealloc, end + 1); 761 BUG_ON(err == -EEXIST); 762 763 prealloc->state |= bits; 764 merge_state(tree, prealloc); 765 prealloc = NULL; 766 goto out; 767 } 768 769 goto search_again; 770 771 out: 772 write_unlock_irqrestore(&tree->lock, flags); 773 if (prealloc) 774 free_extent_state(prealloc); 775 776 return err; 777 778 search_again: 779 if (start > end) 780 goto out; 781 write_unlock_irqrestore(&tree->lock, flags); 782 if (mask & __GFP_WAIT) 783 cond_resched(); 784 goto again; 785 } 786 EXPORT_SYMBOL(set_extent_bit); 787 788 /* wrappers around set/clear extent bit */ 789 int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, 790 gfp_t mask) 791 { 792 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 793 mask); 794 } 795 EXPORT_SYMBOL(set_extent_dirty); 796 797 int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, 798 gfp_t mask) 799 { 800 return set_extent_bit(tree, start, end, 801 EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, 802 mask); 803 } 804 EXPORT_SYMBOL(set_extent_delalloc); 805 806 int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, 807 gfp_t mask) 808 { 809 return clear_extent_bit(tree, start, end, 810 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); 811 } 812 EXPORT_SYMBOL(clear_extent_dirty); 813 814 int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, 815 gfp_t mask) 816 { 817 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 818 mask); 819 } 820 EXPORT_SYMBOL(set_extent_new); 821 822 int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, 823 gfp_t mask) 824 { 825 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); 826 } 827 EXPORT_SYMBOL(clear_extent_new); 828 829 int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, 830 gfp_t mask) 831 { 832 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 833 mask); 834 } 835 EXPORT_SYMBOL(set_extent_uptodate); 836 837 int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, 838 gfp_t mask) 839 { 840 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 841 } 842 EXPORT_SYMBOL(clear_extent_uptodate); 843 844 int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, 845 gfp_t mask) 846 { 847 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, 848 0, NULL, mask); 849 } 850 EXPORT_SYMBOL(set_extent_writeback); 851 852 int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, 853 gfp_t mask) 854 { 855 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); 856 } 857 EXPORT_SYMBOL(clear_extent_writeback); 858 859 int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) 860 { 861 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); 862 } 863 EXPORT_SYMBOL(wait_on_extent_writeback); 864 865 /* 866 * locks a range in ascending order, waiting for any locked regions 867 * it hits on the way. [start,end] are inclusive, and this will sleep. 868 */ 869 int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) 870 { 871 int err; 872 u64 failed_start; 873 while (1) { 874 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 875 &failed_start, mask); 876 if (err == -EEXIST && (mask & __GFP_WAIT)) { 877 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 878 start = failed_start; 879 } else { 880 break; 881 } 882 WARN_ON(start > end); 883 } 884 return err; 885 } 886 EXPORT_SYMBOL(lock_extent); 887 888 int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, 889 gfp_t mask) 890 { 891 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); 892 } 893 EXPORT_SYMBOL(unlock_extent); 894 895 /* 896 * helper function to set pages and extents in the tree dirty 897 */ 898 int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) 899 { 900 unsigned long index = start >> PAGE_CACHE_SHIFT; 901 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 902 struct page *page; 903 904 while (index <= end_index) { 905 page = find_get_page(tree->mapping, index); 906 BUG_ON(!page); 907 __set_page_dirty_nobuffers(page); 908 page_cache_release(page); 909 index++; 910 } 911 set_extent_dirty(tree, start, end, GFP_NOFS); 912 return 0; 913 } 914 EXPORT_SYMBOL(set_range_dirty); 915 916 /* 917 * helper function to set both pages and extents in the tree writeback 918 */ 919 int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) 920 { 921 unsigned long index = start >> PAGE_CACHE_SHIFT; 922 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 923 struct page *page; 924 925 while (index <= end_index) { 926 page = find_get_page(tree->mapping, index); 927 BUG_ON(!page); 928 set_page_writeback(page); 929 page_cache_release(page); 930 index++; 931 } 932 set_extent_writeback(tree, start, end, GFP_NOFS); 933 return 0; 934 } 935 EXPORT_SYMBOL(set_range_writeback); 936 937 int find_first_extent_bit(struct extent_map_tree *tree, u64 start, 938 u64 *start_ret, u64 *end_ret, int bits) 939 { 940 struct rb_node *node; 941 struct extent_state *state; 942 int ret = 1; 943 944 write_lock_irq(&tree->lock); 945 /* 946 * this search will find all the extents that end after 947 * our range starts. 948 */ 949 node = tree_search(&tree->state, start); 950 if (!node || IS_ERR(node)) { 951 goto out; 952 } 953 954 while(1) { 955 state = rb_entry(node, struct extent_state, rb_node); 956 if (state->state & bits) { 957 *start_ret = state->start; 958 *end_ret = state->end; 959 ret = 0; 960 } 961 node = rb_next(node); 962 if (!node) 963 break; 964 } 965 out: 966 write_unlock_irq(&tree->lock); 967 return ret; 968 } 969 EXPORT_SYMBOL(find_first_extent_bit); 970 971 u64 find_lock_delalloc_range(struct extent_map_tree *tree, 972 u64 start, u64 lock_start, u64 *end, u64 max_bytes) 973 { 974 struct rb_node *node; 975 struct extent_state *state; 976 u64 cur_start = start; 977 u64 found = 0; 978 u64 total_bytes = 0; 979 980 write_lock_irq(&tree->lock); 981 /* 982 * this search will find all the extents that end after 983 * our range starts. 984 */ 985 search_again: 986 node = tree_search(&tree->state, cur_start); 987 if (!node || IS_ERR(node)) { 988 goto out; 989 } 990 991 while(1) { 992 state = rb_entry(node, struct extent_state, rb_node); 993 if (state->start != cur_start) { 994 goto out; 995 } 996 if (!(state->state & EXTENT_DELALLOC)) { 997 goto out; 998 } 999 if (state->start >= lock_start) { 1000 if (state->state & EXTENT_LOCKED) { 1001 DEFINE_WAIT(wait); 1002 atomic_inc(&state->refs); 1003 write_unlock_irq(&tree->lock); 1004 schedule(); 1005 write_lock_irq(&tree->lock); 1006 finish_wait(&state->wq, &wait); 1007 free_extent_state(state); 1008 goto search_again; 1009 } 1010 state->state |= EXTENT_LOCKED; 1011 } 1012 found++; 1013 *end = state->end; 1014 cur_start = state->end + 1; 1015 node = rb_next(node); 1016 if (!node) 1017 break; 1018 total_bytes = state->end - state->start + 1; 1019 if (total_bytes >= max_bytes) 1020 break; 1021 } 1022 out: 1023 write_unlock_irq(&tree->lock); 1024 return found; 1025 } 1026 1027 /* 1028 * helper function to lock both pages and extents in the tree. 1029 * pages must be locked first. 1030 */ 1031 int lock_range(struct extent_map_tree *tree, u64 start, u64 end) 1032 { 1033 unsigned long index = start >> PAGE_CACHE_SHIFT; 1034 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1035 struct page *page; 1036 int err; 1037 1038 while (index <= end_index) { 1039 page = grab_cache_page(tree->mapping, index); 1040 if (!page) { 1041 err = -ENOMEM; 1042 goto failed; 1043 } 1044 if (IS_ERR(page)) { 1045 err = PTR_ERR(page); 1046 goto failed; 1047 } 1048 index++; 1049 } 1050 lock_extent(tree, start, end, GFP_NOFS); 1051 return 0; 1052 1053 failed: 1054 /* 1055 * we failed above in getting the page at 'index', so we undo here 1056 * up to but not including the page at 'index' 1057 */ 1058 end_index = index; 1059 index = start >> PAGE_CACHE_SHIFT; 1060 while (index < end_index) { 1061 page = find_get_page(tree->mapping, index); 1062 unlock_page(page); 1063 page_cache_release(page); 1064 index++; 1065 } 1066 return err; 1067 } 1068 EXPORT_SYMBOL(lock_range); 1069 1070 /* 1071 * helper function to unlock both pages and extents in the tree. 1072 */ 1073 int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) 1074 { 1075 unsigned long index = start >> PAGE_CACHE_SHIFT; 1076 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1077 struct page *page; 1078 1079 while (index <= end_index) { 1080 page = find_get_page(tree->mapping, index); 1081 unlock_page(page); 1082 page_cache_release(page); 1083 index++; 1084 } 1085 unlock_extent(tree, start, end, GFP_NOFS); 1086 return 0; 1087 } 1088 EXPORT_SYMBOL(unlock_range); 1089 1090 int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) 1091 { 1092 struct rb_node *node; 1093 struct extent_state *state; 1094 int ret = 0; 1095 1096 write_lock_irq(&tree->lock); 1097 /* 1098 * this search will find all the extents that end after 1099 * our range starts. 1100 */ 1101 node = tree_search(&tree->state, start); 1102 if (!node || IS_ERR(node)) { 1103 ret = -ENOENT; 1104 goto out; 1105 } 1106 state = rb_entry(node, struct extent_state, rb_node); 1107 if (state->start != start) { 1108 ret = -ENOENT; 1109 goto out; 1110 } 1111 state->private = private; 1112 out: 1113 write_unlock_irq(&tree->lock); 1114 return ret; 1115 1116 } 1117 1118 int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) 1119 { 1120 struct rb_node *node; 1121 struct extent_state *state; 1122 int ret = 0; 1123 1124 read_lock_irq(&tree->lock); 1125 /* 1126 * this search will find all the extents that end after 1127 * our range starts. 1128 */ 1129 node = tree_search(&tree->state, start); 1130 if (!node || IS_ERR(node)) { 1131 ret = -ENOENT; 1132 goto out; 1133 } 1134 state = rb_entry(node, struct extent_state, rb_node); 1135 if (state->start != start) { 1136 ret = -ENOENT; 1137 goto out; 1138 } 1139 *private = state->private; 1140 out: 1141 read_unlock_irq(&tree->lock); 1142 return ret; 1143 } 1144 1145 /* 1146 * searches a range in the state tree for a given mask. 1147 * If 'filled' == 1, this returns 1 only if ever extent in the tree 1148 * has the bits set. Otherwise, 1 is returned if any bit in the 1149 * range is found set. 1150 */ 1151 static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, 1152 int bits, int filled) 1153 { 1154 struct extent_state *state = NULL; 1155 struct rb_node *node; 1156 int bitset = 0; 1157 1158 read_lock_irq(&tree->lock); 1159 node = tree_search(&tree->state, start); 1160 while (node && start <= end) { 1161 state = rb_entry(node, struct extent_state, rb_node); 1162 if (state->start > end) 1163 break; 1164 1165 if (filled && state->start > start) { 1166 bitset = 0; 1167 break; 1168 } 1169 if (state->state & bits) { 1170 bitset = 1; 1171 if (!filled) 1172 break; 1173 } else if (filled) { 1174 bitset = 0; 1175 break; 1176 } 1177 start = state->end + 1; 1178 if (start > end) 1179 break; 1180 node = rb_next(node); 1181 } 1182 read_unlock_irq(&tree->lock); 1183 return bitset; 1184 } 1185 1186 /* 1187 * helper function to set a given page up to date if all the 1188 * extents in the tree for that page are up to date 1189 */ 1190 static int check_page_uptodate(struct extent_map_tree *tree, 1191 struct page *page) 1192 { 1193 u64 start = page->index << PAGE_CACHE_SHIFT; 1194 u64 end = start + PAGE_CACHE_SIZE - 1; 1195 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) 1196 SetPageUptodate(page); 1197 return 0; 1198 } 1199 1200 /* 1201 * helper function to unlock a page if all the extents in the tree 1202 * for that page are unlocked 1203 */ 1204 static int check_page_locked(struct extent_map_tree *tree, 1205 struct page *page) 1206 { 1207 u64 start = page->index << PAGE_CACHE_SHIFT; 1208 u64 end = start + PAGE_CACHE_SIZE - 1; 1209 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) 1210 unlock_page(page); 1211 return 0; 1212 } 1213 1214 /* 1215 * helper function to end page writeback if all the extents 1216 * in the tree for that page are done with writeback 1217 */ 1218 static int check_page_writeback(struct extent_map_tree *tree, 1219 struct page *page) 1220 { 1221 u64 start = page->index << PAGE_CACHE_SHIFT; 1222 u64 end = start + PAGE_CACHE_SIZE - 1; 1223 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) 1224 end_page_writeback(page); 1225 return 0; 1226 } 1227 1228 /* lots and lots of room for performance fixes in the end_bio funcs */ 1229 1230 /* 1231 * after a writepage IO is done, we need to: 1232 * clear the uptodate bits on error 1233 * clear the writeback bits in the extent tree for this IO 1234 * end_page_writeback if the page has no more pending IO 1235 * 1236 * Scheduling is not allowed, so the extent state tree is expected 1237 * to have one and only one object corresponding to this IO. 1238 */ 1239 static int end_bio_extent_writepage(struct bio *bio, 1240 unsigned int bytes_done, int err) 1241 { 1242 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1243 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1244 struct extent_map_tree *tree = bio->bi_private; 1245 u64 start; 1246 u64 end; 1247 int whole_page; 1248 1249 if (bio->bi_size) 1250 return 1; 1251 1252 do { 1253 struct page *page = bvec->bv_page; 1254 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1255 end = start + bvec->bv_len - 1; 1256 1257 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1258 whole_page = 1; 1259 else 1260 whole_page = 0; 1261 1262 if (--bvec >= bio->bi_io_vec) 1263 prefetchw(&bvec->bv_page->flags); 1264 1265 if (!uptodate) { 1266 clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1267 ClearPageUptodate(page); 1268 SetPageError(page); 1269 } 1270 clear_extent_writeback(tree, start, end, GFP_ATOMIC); 1271 1272 if (whole_page) 1273 end_page_writeback(page); 1274 else 1275 check_page_writeback(tree, page); 1276 if (tree->ops && tree->ops->writepage_end_io_hook) 1277 tree->ops->writepage_end_io_hook(page, start, end); 1278 } while (bvec >= bio->bi_io_vec); 1279 1280 bio_put(bio); 1281 return 0; 1282 } 1283 1284 /* 1285 * after a readpage IO is done, we need to: 1286 * clear the uptodate bits on error 1287 * set the uptodate bits if things worked 1288 * set the page up to date if all extents in the tree are uptodate 1289 * clear the lock bit in the extent tree 1290 * unlock the page if there are no other extents locked for it 1291 * 1292 * Scheduling is not allowed, so the extent state tree is expected 1293 * to have one and only one object corresponding to this IO. 1294 */ 1295 static int end_bio_extent_readpage(struct bio *bio, 1296 unsigned int bytes_done, int err) 1297 { 1298 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1299 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1300 struct extent_map_tree *tree = bio->bi_private; 1301 u64 start; 1302 u64 end; 1303 int whole_page; 1304 int ret; 1305 1306 if (bio->bi_size) 1307 return 1; 1308 1309 do { 1310 struct page *page = bvec->bv_page; 1311 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1312 end = start + bvec->bv_len - 1; 1313 1314 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1315 whole_page = 1; 1316 else 1317 whole_page = 0; 1318 1319 if (--bvec >= bio->bi_io_vec) 1320 prefetchw(&bvec->bv_page->flags); 1321 1322 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1323 ret = tree->ops->readpage_end_io_hook(page, start, end); 1324 if (ret) 1325 uptodate = 0; 1326 } 1327 if (uptodate) { 1328 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1329 if (whole_page) 1330 SetPageUptodate(page); 1331 else 1332 check_page_uptodate(tree, page); 1333 } else { 1334 ClearPageUptodate(page); 1335 SetPageError(page); 1336 } 1337 1338 unlock_extent(tree, start, end, GFP_ATOMIC); 1339 1340 if (whole_page) 1341 unlock_page(page); 1342 else 1343 check_page_locked(tree, page); 1344 } while (bvec >= bio->bi_io_vec); 1345 1346 bio_put(bio); 1347 return 0; 1348 } 1349 1350 /* 1351 * IO done from prepare_write is pretty simple, we just unlock 1352 * the structs in the extent tree when done, and set the uptodate bits 1353 * as appropriate. 1354 */ 1355 static int end_bio_extent_preparewrite(struct bio *bio, 1356 unsigned int bytes_done, int err) 1357 { 1358 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1359 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1360 struct extent_map_tree *tree = bio->bi_private; 1361 u64 start; 1362 u64 end; 1363 1364 if (bio->bi_size) 1365 return 1; 1366 1367 do { 1368 struct page *page = bvec->bv_page; 1369 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1370 end = start + bvec->bv_len - 1; 1371 1372 if (--bvec >= bio->bi_io_vec) 1373 prefetchw(&bvec->bv_page->flags); 1374 1375 if (uptodate) { 1376 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1377 } else { 1378 ClearPageUptodate(page); 1379 SetPageError(page); 1380 } 1381 1382 unlock_extent(tree, start, end, GFP_ATOMIC); 1383 1384 } while (bvec >= bio->bi_io_vec); 1385 1386 bio_put(bio); 1387 return 0; 1388 } 1389 1390 static int submit_extent_page(int rw, struct extent_map_tree *tree, 1391 struct page *page, sector_t sector, 1392 size_t size, unsigned long offset, 1393 struct block_device *bdev, 1394 bio_end_io_t end_io_func) 1395 { 1396 struct bio *bio; 1397 int ret = 0; 1398 1399 bio = bio_alloc(GFP_NOIO, 1); 1400 1401 bio->bi_sector = sector; 1402 bio->bi_bdev = bdev; 1403 bio->bi_io_vec[0].bv_page = page; 1404 bio->bi_io_vec[0].bv_len = size; 1405 bio->bi_io_vec[0].bv_offset = offset; 1406 1407 bio->bi_vcnt = 1; 1408 bio->bi_idx = 0; 1409 bio->bi_size = size; 1410 1411 bio->bi_end_io = end_io_func; 1412 bio->bi_private = tree; 1413 1414 bio_get(bio); 1415 submit_bio(rw, bio); 1416 1417 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 1418 ret = -EOPNOTSUPP; 1419 1420 bio_put(bio); 1421 return ret; 1422 } 1423 1424 void set_page_extent_mapped(struct page *page) 1425 { 1426 if (!PagePrivate(page)) { 1427 SetPagePrivate(page); 1428 WARN_ON(!page->mapping->a_ops->invalidatepage); 1429 set_page_private(page, 1); 1430 page_cache_get(page); 1431 } 1432 } 1433 1434 /* 1435 * basic readpage implementation. Locked extent state structs are inserted 1436 * into the tree that are removed when the IO is done (by the end_io 1437 * handlers) 1438 */ 1439 int extent_read_full_page(struct extent_map_tree *tree, struct page *page, 1440 get_extent_t *get_extent) 1441 { 1442 struct inode *inode = page->mapping->host; 1443 u64 start = page->index << PAGE_CACHE_SHIFT; 1444 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1445 u64 end; 1446 u64 cur = start; 1447 u64 extent_offset; 1448 u64 last_byte = i_size_read(inode); 1449 u64 block_start; 1450 u64 cur_end; 1451 sector_t sector; 1452 struct extent_map *em; 1453 struct block_device *bdev; 1454 int ret; 1455 int nr = 0; 1456 size_t page_offset = 0; 1457 size_t iosize; 1458 size_t blocksize = inode->i_sb->s_blocksize; 1459 1460 set_page_extent_mapped(page); 1461 1462 end = page_end; 1463 lock_extent(tree, start, end, GFP_NOFS); 1464 1465 while (cur <= end) { 1466 if (cur >= last_byte) { 1467 iosize = PAGE_CACHE_SIZE - page_offset; 1468 zero_user_page(page, page_offset, iosize, KM_USER0); 1469 set_extent_uptodate(tree, cur, cur + iosize - 1, 1470 GFP_NOFS); 1471 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1472 break; 1473 } 1474 em = get_extent(inode, page, page_offset, cur, end, 0); 1475 if (IS_ERR(em) || !em) { 1476 SetPageError(page); 1477 unlock_extent(tree, cur, end, GFP_NOFS); 1478 break; 1479 } 1480 1481 extent_offset = cur - em->start; 1482 BUG_ON(em->end < cur); 1483 BUG_ON(end < cur); 1484 1485 iosize = min(em->end - cur, end - cur) + 1; 1486 cur_end = min(em->end, end); 1487 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1488 sector = (em->block_start + extent_offset) >> 9; 1489 bdev = em->bdev; 1490 block_start = em->block_start; 1491 free_extent_map(em); 1492 em = NULL; 1493 1494 /* we've found a hole, just zero and go on */ 1495 if (block_start == EXTENT_MAP_HOLE) { 1496 zero_user_page(page, page_offset, iosize, KM_USER0); 1497 set_extent_uptodate(tree, cur, cur + iosize - 1, 1498 GFP_NOFS); 1499 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1500 cur = cur + iosize; 1501 page_offset += iosize; 1502 continue; 1503 } 1504 /* the get_extent function already copied into the page */ 1505 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { 1506 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1507 cur = cur + iosize; 1508 page_offset += iosize; 1509 continue; 1510 } 1511 1512 ret = 0; 1513 if (tree->ops && tree->ops->readpage_io_hook) { 1514 ret = tree->ops->readpage_io_hook(page, cur, 1515 cur + iosize - 1); 1516 } 1517 if (!ret) { 1518 ret = submit_extent_page(READ, tree, page, 1519 sector, iosize, page_offset, 1520 bdev, end_bio_extent_readpage); 1521 } 1522 if (ret) 1523 SetPageError(page); 1524 cur = cur + iosize; 1525 page_offset += iosize; 1526 nr++; 1527 } 1528 if (!nr) { 1529 if (!PageError(page)) 1530 SetPageUptodate(page); 1531 unlock_page(page); 1532 } 1533 return 0; 1534 } 1535 EXPORT_SYMBOL(extent_read_full_page); 1536 1537 /* 1538 * the writepage semantics are similar to regular writepage. extent 1539 * records are inserted to lock ranges in the tree, and as dirty areas 1540 * are found, they are marked writeback. Then the lock bits are removed 1541 * and the end_io handler clears the writeback ranges 1542 */ 1543 int extent_write_full_page(struct extent_map_tree *tree, struct page *page, 1544 get_extent_t *get_extent, 1545 struct writeback_control *wbc) 1546 { 1547 struct inode *inode = page->mapping->host; 1548 u64 start = page->index << PAGE_CACHE_SHIFT; 1549 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1550 u64 end; 1551 u64 cur = start; 1552 u64 extent_offset; 1553 u64 last_byte = i_size_read(inode); 1554 u64 block_start; 1555 sector_t sector; 1556 struct extent_map *em; 1557 struct block_device *bdev; 1558 int ret; 1559 int nr = 0; 1560 size_t page_offset = 0; 1561 size_t iosize; 1562 size_t blocksize; 1563 loff_t i_size = i_size_read(inode); 1564 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; 1565 u64 nr_delalloc; 1566 u64 delalloc_end; 1567 1568 WARN_ON(!PageLocked(page)); 1569 if (page->index > end_index) { 1570 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 1571 unlock_page(page); 1572 return 0; 1573 } 1574 1575 if (page->index == end_index) { 1576 size_t offset = i_size & (PAGE_CACHE_SIZE - 1); 1577 zero_user_page(page, offset, 1578 PAGE_CACHE_SIZE - offset, KM_USER0); 1579 } 1580 1581 set_page_extent_mapped(page); 1582 1583 lock_extent(tree, start, page_end, GFP_NOFS); 1584 nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1, 1585 &delalloc_end, 1586 128 * 1024 * 1024); 1587 if (nr_delalloc) { 1588 tree->ops->fill_delalloc(inode, start, delalloc_end); 1589 if (delalloc_end >= page_end + 1) { 1590 clear_extent_bit(tree, page_end + 1, delalloc_end, 1591 EXTENT_LOCKED | EXTENT_DELALLOC, 1592 1, 0, GFP_NOFS); 1593 } 1594 clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC, 1595 0, 0, GFP_NOFS); 1596 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1597 printk("found delalloc bits after clear extent_bit\n"); 1598 } 1599 } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1600 printk("found delalloc bits after find_delalloc_range returns 0\n"); 1601 } 1602 1603 end = page_end; 1604 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1605 printk("found delalloc bits after lock_extent\n"); 1606 } 1607 1608 if (last_byte <= start) { 1609 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 1610 goto done; 1611 } 1612 1613 set_extent_uptodate(tree, start, page_end, GFP_NOFS); 1614 blocksize = inode->i_sb->s_blocksize; 1615 1616 while (cur <= end) { 1617 if (cur >= last_byte) { 1618 clear_extent_dirty(tree, cur, page_end, GFP_NOFS); 1619 break; 1620 } 1621 em = get_extent(inode, page, page_offset, cur, end, 0); 1622 if (IS_ERR(em) || !em) { 1623 SetPageError(page); 1624 break; 1625 } 1626 1627 extent_offset = cur - em->start; 1628 BUG_ON(em->end < cur); 1629 BUG_ON(end < cur); 1630 iosize = min(em->end - cur, end - cur) + 1; 1631 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1632 sector = (em->block_start + extent_offset) >> 9; 1633 bdev = em->bdev; 1634 block_start = em->block_start; 1635 free_extent_map(em); 1636 em = NULL; 1637 1638 if (block_start == EXTENT_MAP_HOLE || 1639 block_start == EXTENT_MAP_INLINE) { 1640 clear_extent_dirty(tree, cur, 1641 cur + iosize - 1, GFP_NOFS); 1642 cur = cur + iosize; 1643 page_offset += iosize; 1644 continue; 1645 } 1646 1647 /* leave this out until we have a page_mkwrite call */ 1648 if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 1649 EXTENT_DIRTY, 0)) { 1650 cur = cur + iosize; 1651 page_offset += iosize; 1652 continue; 1653 } 1654 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); 1655 if (tree->ops && tree->ops->writepage_io_hook) { 1656 ret = tree->ops->writepage_io_hook(page, cur, 1657 cur + iosize - 1); 1658 } else { 1659 ret = 0; 1660 } 1661 if (ret) 1662 SetPageError(page); 1663 else { 1664 set_range_writeback(tree, cur, cur + iosize - 1); 1665 ret = submit_extent_page(WRITE, tree, page, sector, 1666 iosize, page_offset, bdev, 1667 end_bio_extent_writepage); 1668 if (ret) 1669 SetPageError(page); 1670 } 1671 cur = cur + iosize; 1672 page_offset += iosize; 1673 nr++; 1674 } 1675 done: 1676 unlock_extent(tree, start, page_end, GFP_NOFS); 1677 unlock_page(page); 1678 return 0; 1679 } 1680 EXPORT_SYMBOL(extent_write_full_page); 1681 1682 /* 1683 * basic invalidatepage code, this waits on any locked or writeback 1684 * ranges corresponding to the page, and then deletes any extent state 1685 * records from the tree 1686 */ 1687 int extent_invalidatepage(struct extent_map_tree *tree, 1688 struct page *page, unsigned long offset) 1689 { 1690 u64 start = (page->index << PAGE_CACHE_SHIFT); 1691 u64 end = start + PAGE_CACHE_SIZE - 1; 1692 size_t blocksize = page->mapping->host->i_sb->s_blocksize; 1693 1694 start += (offset + blocksize -1) & ~(blocksize - 1); 1695 if (start > end) 1696 return 0; 1697 1698 lock_extent(tree, start, end, GFP_NOFS); 1699 wait_on_extent_writeback(tree, start, end); 1700 clear_extent_bit(tree, start, end, 1701 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 1702 1, 1, GFP_NOFS); 1703 return 0; 1704 } 1705 EXPORT_SYMBOL(extent_invalidatepage); 1706 1707 /* 1708 * simple commit_write call, set_range_dirty is used to mark both 1709 * the pages and the extent records as dirty 1710 */ 1711 int extent_commit_write(struct extent_map_tree *tree, 1712 struct inode *inode, struct page *page, 1713 unsigned from, unsigned to) 1714 { 1715 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 1716 1717 set_page_extent_mapped(page); 1718 set_page_dirty(page); 1719 1720 if (pos > inode->i_size) { 1721 i_size_write(inode, pos); 1722 mark_inode_dirty(inode); 1723 } 1724 return 0; 1725 } 1726 EXPORT_SYMBOL(extent_commit_write); 1727 1728 int extent_prepare_write(struct extent_map_tree *tree, 1729 struct inode *inode, struct page *page, 1730 unsigned from, unsigned to, get_extent_t *get_extent) 1731 { 1732 u64 page_start = page->index << PAGE_CACHE_SHIFT; 1733 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 1734 u64 block_start; 1735 u64 orig_block_start; 1736 u64 block_end; 1737 u64 cur_end; 1738 struct extent_map *em; 1739 unsigned blocksize = 1 << inode->i_blkbits; 1740 size_t page_offset = 0; 1741 size_t block_off_start; 1742 size_t block_off_end; 1743 int err = 0; 1744 int iocount = 0; 1745 int ret = 0; 1746 int isnew; 1747 1748 set_page_extent_mapped(page); 1749 1750 block_start = (page_start + from) & ~((u64)blocksize - 1); 1751 block_end = (page_start + to - 1) | (blocksize - 1); 1752 orig_block_start = block_start; 1753 1754 lock_extent(tree, page_start, page_end, GFP_NOFS); 1755 while(block_start <= block_end) { 1756 em = get_extent(inode, page, page_offset, block_start, 1757 block_end, 1); 1758 if (IS_ERR(em) || !em) { 1759 goto err; 1760 } 1761 cur_end = min(block_end, em->end); 1762 block_off_start = block_start & (PAGE_CACHE_SIZE - 1); 1763 block_off_end = block_off_start + blocksize; 1764 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); 1765 1766 if (!PageUptodate(page) && isnew && 1767 (block_off_end > to || block_off_start < from)) { 1768 void *kaddr; 1769 1770 kaddr = kmap_atomic(page, KM_USER0); 1771 if (block_off_end > to) 1772 memset(kaddr + to, 0, block_off_end - to); 1773 if (block_off_start < from) 1774 memset(kaddr + block_off_start, 0, 1775 from - block_off_start); 1776 flush_dcache_page(page); 1777 kunmap_atomic(kaddr, KM_USER0); 1778 } 1779 if (!isnew && !PageUptodate(page) && 1780 (block_off_end > to || block_off_start < from) && 1781 !test_range_bit(tree, block_start, cur_end, 1782 EXTENT_UPTODATE, 1)) { 1783 u64 sector; 1784 u64 extent_offset = block_start - em->start; 1785 size_t iosize; 1786 sector = (em->block_start + extent_offset) >> 9; 1787 iosize = (cur_end - block_start + blocksize - 1) & 1788 ~((u64)blocksize - 1); 1789 /* 1790 * we've already got the extent locked, but we 1791 * need to split the state such that our end_bio 1792 * handler can clear the lock. 1793 */ 1794 set_extent_bit(tree, block_start, 1795 block_start + iosize - 1, 1796 EXTENT_LOCKED, 0, NULL, GFP_NOFS); 1797 ret = submit_extent_page(READ, tree, page, 1798 sector, iosize, page_offset, em->bdev, 1799 end_bio_extent_preparewrite); 1800 iocount++; 1801 block_start = block_start + iosize; 1802 } else { 1803 set_extent_uptodate(tree, block_start, cur_end, 1804 GFP_NOFS); 1805 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 1806 block_start = cur_end + 1; 1807 } 1808 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 1809 free_extent_map(em); 1810 } 1811 if (iocount) { 1812 wait_extent_bit(tree, orig_block_start, 1813 block_end, EXTENT_LOCKED); 1814 } 1815 check_page_uptodate(tree, page); 1816 err: 1817 /* FIXME, zero out newly allocated blocks on error */ 1818 return err; 1819 } 1820 EXPORT_SYMBOL(extent_prepare_write); 1821 1822 /* 1823 * a helper for releasepage. As long as there are no locked extents 1824 * in the range corresponding to the page, both state records and extent 1825 * map records are removed 1826 */ 1827 int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) 1828 { 1829 struct extent_map *em; 1830 u64 start = page->index << PAGE_CACHE_SHIFT; 1831 u64 end = start + PAGE_CACHE_SIZE - 1; 1832 u64 orig_start = start; 1833 int ret = 1; 1834 1835 while (start <= end) { 1836 em = lookup_extent_mapping(tree, start, end); 1837 if (!em || IS_ERR(em)) 1838 break; 1839 if (!test_range_bit(tree, em->start, em->end, 1840 EXTENT_LOCKED, 0)) { 1841 remove_extent_mapping(tree, em); 1842 /* once for the rb tree */ 1843 free_extent_map(em); 1844 } 1845 start = em->end + 1; 1846 /* once for us */ 1847 free_extent_map(em); 1848 } 1849 if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) 1850 ret = 0; 1851 else 1852 clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, 1853 1, 1, GFP_NOFS); 1854 return ret; 1855 } 1856 EXPORT_SYMBOL(try_release_extent_mapping); 1857 1858 sector_t extent_bmap(struct address_space *mapping, sector_t iblock, 1859 get_extent_t *get_extent) 1860 { 1861 struct inode *inode = mapping->host; 1862 u64 start = iblock << inode->i_blkbits; 1863 u64 end = start + (1 << inode->i_blkbits) - 1; 1864 struct extent_map *em; 1865 1866 em = get_extent(inode, NULL, 0, start, end, 0); 1867 if (!em || IS_ERR(em)) 1868 return 0; 1869 1870 if (em->block_start == EXTENT_MAP_INLINE || 1871 em->block_start == EXTENT_MAP_HOLE) 1872 return 0; 1873 1874 return (em->block_start + start - em->start) >> inode->i_blkbits; 1875 } 1876 1877 static struct extent_buffer *__alloc_extent_buffer(gfp_t mask) 1878 { 1879 struct extent_buffer *eb = NULL; 1880 spin_lock(&extent_buffers_lock); 1881 if (!list_empty(&extent_buffers)) { 1882 eb = list_entry(extent_buffers.next, struct extent_buffer, 1883 list); 1884 list_del(&eb->list); 1885 WARN_ON(nr_extent_buffers == 0); 1886 nr_extent_buffers--; 1887 } 1888 spin_unlock(&extent_buffers_lock); 1889 if (eb) { 1890 memset(eb, 0, sizeof(*eb)); 1891 return eb; 1892 } 1893 return kmem_cache_zalloc(extent_buffer_cache, mask); 1894 } 1895 1896 static void __free_extent_buffer(struct extent_buffer *eb) 1897 { 1898 if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) { 1899 kmem_cache_free(extent_buffer_cache, eb); 1900 } else { 1901 spin_lock(&extent_buffers_lock); 1902 list_add(&eb->list, &extent_buffers); 1903 nr_extent_buffers++; 1904 spin_unlock(&extent_buffers_lock); 1905 } 1906 } 1907 1908 static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i) 1909 { 1910 struct page *p; 1911 if (i == 0) 1912 return eb->first_page; 1913 i += eb->start >> PAGE_CACHE_SHIFT; 1914 p = find_get_page(eb->first_page->mapping, i); 1915 page_cache_release(p); 1916 return p; 1917 } 1918 1919 struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, 1920 u64 start, unsigned long len, 1921 gfp_t mask) 1922 { 1923 unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - 1924 (start >> PAGE_CACHE_SHIFT) + 1; 1925 unsigned long i; 1926 unsigned long index = start >> PAGE_CACHE_SHIFT; 1927 struct extent_buffer *eb; 1928 struct page *p; 1929 struct address_space *mapping = tree->mapping; 1930 int uptodate = 0; 1931 1932 eb = __alloc_extent_buffer(mask); 1933 if (!eb || IS_ERR(eb)) 1934 return NULL; 1935 1936 eb->start = start; 1937 eb->len = len; 1938 atomic_set(&eb->refs, 1); 1939 1940 for (i = 0; i < num_pages; i++, index++) { 1941 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); 1942 if (!p) { 1943 /* make sure the free only frees the pages we've 1944 * grabbed a reference on 1945 */ 1946 eb->len = i << PAGE_CACHE_SHIFT; 1947 eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); 1948 goto fail; 1949 } 1950 if (i == 0) 1951 eb->first_page = p; 1952 if (!PageUptodate(p)) 1953 uptodate = 0; 1954 unlock_page(p); 1955 } 1956 if (uptodate) 1957 eb->flags |= EXTENT_UPTODATE; 1958 return eb; 1959 fail: 1960 free_extent_buffer(eb); 1961 return NULL; 1962 } 1963 EXPORT_SYMBOL(alloc_extent_buffer); 1964 1965 struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, 1966 u64 start, unsigned long len, 1967 gfp_t mask) 1968 { 1969 unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - 1970 (start >> PAGE_CACHE_SHIFT) + 1; 1971 unsigned long i; 1972 unsigned long index = start >> PAGE_CACHE_SHIFT; 1973 struct extent_buffer *eb; 1974 struct page *p; 1975 struct address_space *mapping = tree->mapping; 1976 1977 eb = __alloc_extent_buffer(mask); 1978 if (!eb || IS_ERR(eb)) 1979 return NULL; 1980 1981 eb->start = start; 1982 eb->len = len; 1983 atomic_set(&eb->refs, 1); 1984 1985 for (i = 0; i < num_pages; i++, index++) { 1986 p = find_get_page(mapping, index); 1987 if (!p) { 1988 /* make sure the free only frees the pages we've 1989 * grabbed a reference on 1990 */ 1991 eb->len = i << PAGE_CACHE_SHIFT; 1992 eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); 1993 goto fail; 1994 } 1995 if (i == 0) 1996 eb->first_page = p; 1997 } 1998 return eb; 1999 fail: 2000 free_extent_buffer(eb); 2001 return NULL; 2002 } 2003 EXPORT_SYMBOL(find_extent_buffer); 2004 2005 void free_extent_buffer(struct extent_buffer *eb) 2006 { 2007 unsigned long i; 2008 unsigned long num_pages; 2009 2010 if (!eb) 2011 return; 2012 2013 if (!atomic_dec_and_test(&eb->refs)) 2014 return; 2015 2016 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - 2017 (eb->start >> PAGE_CACHE_SHIFT) + 1; 2018 2019 if (eb->first_page) 2020 page_cache_release(eb->first_page); 2021 for (i = 1; i < num_pages; i++) { 2022 page_cache_release(extent_buffer_page(eb, i)); 2023 } 2024 __free_extent_buffer(eb); 2025 } 2026 EXPORT_SYMBOL(free_extent_buffer); 2027 2028 int clear_extent_buffer_dirty(struct extent_map_tree *tree, 2029 struct extent_buffer *eb) 2030 { 2031 int set; 2032 unsigned long i; 2033 unsigned long num_pages; 2034 struct page *page; 2035 2036 u64 start = eb->start; 2037 u64 end = start + eb->len - 1; 2038 2039 set = clear_extent_dirty(tree, start, end, GFP_NOFS); 2040 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - 2041 (eb->start >> PAGE_CACHE_SHIFT) + 1; 2042 2043 for (i = 0; i < num_pages; i++) { 2044 page = extent_buffer_page(eb, i); 2045 lock_page(page); 2046 /* 2047 * if we're on the last page or the first page and the 2048 * block isn't aligned on a page boundary, do extra checks 2049 * to make sure we don't clean page that is partially dirty 2050 */ 2051 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 2052 ((i == num_pages - 1) && 2053 ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { 2054 start = page->index << PAGE_CACHE_SHIFT; 2055 end = start + PAGE_CACHE_SIZE - 1; 2056 if (test_range_bit(tree, start, end, 2057 EXTENT_DIRTY, 0)) { 2058 unlock_page(page); 2059 continue; 2060 } 2061 } 2062 clear_page_dirty_for_io(page); 2063 unlock_page(page); 2064 } 2065 return 0; 2066 } 2067 EXPORT_SYMBOL(clear_extent_buffer_dirty); 2068 2069 int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, 2070 struct extent_buffer *eb) 2071 { 2072 return wait_on_extent_writeback(tree, eb->start, 2073 eb->start + eb->len - 1); 2074 } 2075 EXPORT_SYMBOL(wait_on_extent_buffer_writeback); 2076 2077 int set_extent_buffer_dirty(struct extent_map_tree *tree, 2078 struct extent_buffer *eb) 2079 { 2080 return set_range_dirty(tree, eb->start, eb->start + eb->len - 1); 2081 } 2082 EXPORT_SYMBOL(set_extent_buffer_dirty); 2083 2084 int set_extent_buffer_uptodate(struct extent_map_tree *tree, 2085 struct extent_buffer *eb) 2086 { 2087 unsigned long i; 2088 struct page *page; 2089 unsigned long num_pages; 2090 2091 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - 2092 (eb->start >> PAGE_CACHE_SHIFT) + 1; 2093 2094 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 2095 GFP_NOFS); 2096 for (i = 0; i < num_pages; i++) { 2097 page = extent_buffer_page(eb, i); 2098 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 2099 ((i == num_pages - 1) && 2100 ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { 2101 check_page_uptodate(tree, page); 2102 continue; 2103 } 2104 SetPageUptodate(page); 2105 } 2106 return 0; 2107 } 2108 EXPORT_SYMBOL(set_extent_buffer_uptodate); 2109 2110 int extent_buffer_uptodate(struct extent_map_tree *tree, 2111 struct extent_buffer *eb) 2112 { 2113 if (eb->flags & EXTENT_UPTODATE) 2114 return 1; 2115 return test_range_bit(tree, eb->start, eb->start + eb->len - 1, 2116 EXTENT_UPTODATE, 1); 2117 } 2118 EXPORT_SYMBOL(extent_buffer_uptodate); 2119 2120 int read_extent_buffer_pages(struct extent_map_tree *tree, 2121 struct extent_buffer *eb, int wait) 2122 { 2123 unsigned long i; 2124 struct page *page; 2125 int err; 2126 int ret = 0; 2127 unsigned long num_pages; 2128 2129 if (eb->flags & EXTENT_UPTODATE) 2130 return 0; 2131 2132 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, 2133 EXTENT_UPTODATE, 1)) { 2134 return 0; 2135 } 2136 2137 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - 2138 (eb->start >> PAGE_CACHE_SHIFT) + 1; 2139 for (i = 0; i < num_pages; i++) { 2140 page = extent_buffer_page(eb, i); 2141 if (PageUptodate(page)) { 2142 continue; 2143 } 2144 if (!wait) { 2145 if (TestSetPageLocked(page)) { 2146 continue; 2147 } 2148 } else { 2149 lock_page(page); 2150 } 2151 if (!PageUptodate(page)) { 2152 err = page->mapping->a_ops->readpage(NULL, page); 2153 if (err) { 2154 ret = err; 2155 } 2156 } else { 2157 unlock_page(page); 2158 } 2159 } 2160 2161 if (ret || !wait) { 2162 return ret; 2163 } 2164 2165 for (i = 0; i < num_pages; i++) { 2166 page = extent_buffer_page(eb, i); 2167 wait_on_page_locked(page); 2168 if (!PageUptodate(page)) { 2169 ret = -EIO; 2170 } 2171 } 2172 eb->flags |= EXTENT_UPTODATE; 2173 return ret; 2174 } 2175 EXPORT_SYMBOL(read_extent_buffer_pages); 2176 2177 void read_extent_buffer(struct extent_buffer *eb, void *dstv, 2178 unsigned long start, 2179 unsigned long len) 2180 { 2181 size_t cur; 2182 size_t offset; 2183 struct page *page; 2184 char *kaddr; 2185 char *dst = (char *)dstv; 2186 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2187 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2188 2189 WARN_ON(start > eb->len); 2190 WARN_ON(start + len > eb->start + eb->len); 2191 2192 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); 2193 if (i == 0) 2194 offset += start_offset; 2195 2196 while(len > 0) { 2197 page = extent_buffer_page(eb, i); 2198 WARN_ON(!PageUptodate(page)); 2199 2200 cur = min(len, (PAGE_CACHE_SIZE - offset)); 2201 kaddr = kmap_atomic(page, KM_USER0); 2202 memcpy(dst, kaddr + offset, cur); 2203 kunmap_atomic(kaddr, KM_USER0); 2204 2205 dst += cur; 2206 len -= cur; 2207 offset = 0; 2208 i++; 2209 } 2210 } 2211 EXPORT_SYMBOL(read_extent_buffer); 2212 2213 int map_extent_buffer(struct extent_buffer *eb, unsigned long start, 2214 unsigned long min_len, 2215 char **token, char **map, 2216 unsigned long *map_start, 2217 unsigned long *map_len, int km) 2218 { 2219 size_t offset = start & (PAGE_CACHE_SIZE - 1); 2220 char *kaddr; 2221 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2222 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2223 unsigned long end_i = (start_offset + start + min_len) >> 2224 PAGE_CACHE_SHIFT; 2225 2226 if (i != end_i) 2227 return -EINVAL; 2228 2229 WARN_ON(start > eb->len); 2230 2231 if (i == 0) { 2232 offset = start_offset; 2233 *map_start = 0; 2234 } else { 2235 *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; 2236 } 2237 2238 kaddr = kmap_atomic(extent_buffer_page(eb, i), km); 2239 *token = kaddr; 2240 *map = kaddr + offset; 2241 *map_len = PAGE_CACHE_SIZE - offset; 2242 return 0; 2243 } 2244 EXPORT_SYMBOL(map_extent_buffer); 2245 2246 void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) 2247 { 2248 kunmap_atomic(token, km); 2249 } 2250 EXPORT_SYMBOL(unmap_extent_buffer); 2251 2252 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, 2253 unsigned long start, 2254 unsigned long len) 2255 { 2256 size_t cur; 2257 size_t offset; 2258 struct page *page; 2259 char *kaddr; 2260 char *ptr = (char *)ptrv; 2261 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2262 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2263 int ret = 0; 2264 2265 WARN_ON(start > eb->len); 2266 WARN_ON(start + len > eb->start + eb->len); 2267 2268 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); 2269 if (i == 0) 2270 offset += start_offset; 2271 2272 while(len > 0) { 2273 page = extent_buffer_page(eb, i); 2274 WARN_ON(!PageUptodate(page)); 2275 2276 cur = min(len, (PAGE_CACHE_SIZE - offset)); 2277 2278 kaddr = kmap_atomic(page, KM_USER0); 2279 ret = memcmp(ptr, kaddr + offset, cur); 2280 kunmap_atomic(kaddr, KM_USER0); 2281 if (ret) 2282 break; 2283 2284 ptr += cur; 2285 len -= cur; 2286 offset = 0; 2287 i++; 2288 } 2289 return ret; 2290 } 2291 EXPORT_SYMBOL(memcmp_extent_buffer); 2292 2293 void write_extent_buffer(struct extent_buffer *eb, const void *srcv, 2294 unsigned long start, unsigned long len) 2295 { 2296 size_t cur; 2297 size_t offset; 2298 struct page *page; 2299 char *kaddr; 2300 char *src = (char *)srcv; 2301 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2302 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2303 2304 WARN_ON(start > eb->len); 2305 WARN_ON(start + len > eb->start + eb->len); 2306 2307 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); 2308 if (i == 0) 2309 offset += start_offset; 2310 2311 while(len > 0) { 2312 page = extent_buffer_page(eb, i); 2313 WARN_ON(!PageUptodate(page)); 2314 2315 cur = min(len, PAGE_CACHE_SIZE - offset); 2316 kaddr = kmap_atomic(page, KM_USER0); 2317 memcpy(kaddr + offset, src, cur); 2318 kunmap_atomic(kaddr, KM_USER0); 2319 2320 src += cur; 2321 len -= cur; 2322 offset = 0; 2323 i++; 2324 } 2325 } 2326 EXPORT_SYMBOL(write_extent_buffer); 2327 2328 void memset_extent_buffer(struct extent_buffer *eb, char c, 2329 unsigned long start, unsigned long len) 2330 { 2331 size_t cur; 2332 size_t offset; 2333 struct page *page; 2334 char *kaddr; 2335 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2336 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2337 2338 WARN_ON(start > eb->len); 2339 WARN_ON(start + len > eb->start + eb->len); 2340 2341 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); 2342 if (i == 0) 2343 offset += start_offset; 2344 2345 while(len > 0) { 2346 page = extent_buffer_page(eb, i); 2347 WARN_ON(!PageUptodate(page)); 2348 2349 cur = min(len, PAGE_CACHE_SIZE - offset); 2350 kaddr = kmap_atomic(page, KM_USER0); 2351 memset(kaddr + offset, c, cur); 2352 kunmap_atomic(kaddr, KM_USER0); 2353 2354 len -= cur; 2355 offset = 0; 2356 i++; 2357 } 2358 } 2359 EXPORT_SYMBOL(memset_extent_buffer); 2360 2361 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, 2362 unsigned long dst_offset, unsigned long src_offset, 2363 unsigned long len) 2364 { 2365 u64 dst_len = dst->len; 2366 size_t cur; 2367 size_t offset; 2368 struct page *page; 2369 char *kaddr; 2370 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 2371 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 2372 2373 WARN_ON(src->len != dst_len); 2374 2375 offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1); 2376 if (i == 0) 2377 offset += start_offset; 2378 2379 while(len > 0) { 2380 page = extent_buffer_page(dst, i); 2381 WARN_ON(!PageUptodate(page)); 2382 2383 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); 2384 2385 kaddr = kmap_atomic(page, KM_USER1); 2386 read_extent_buffer(src, kaddr + offset, src_offset, cur); 2387 kunmap_atomic(kaddr, KM_USER1); 2388 2389 src_offset += cur; 2390 len -= cur; 2391 offset = 0; 2392 i++; 2393 } 2394 } 2395 EXPORT_SYMBOL(copy_extent_buffer); 2396 2397 static void move_pages(struct page *dst_page, struct page *src_page, 2398 unsigned long dst_off, unsigned long src_off, 2399 unsigned long len) 2400 { 2401 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 2402 if (dst_page == src_page) { 2403 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); 2404 } else { 2405 char *src_kaddr = kmap_atomic(src_page, KM_USER1); 2406 char *p = dst_kaddr + dst_off + len; 2407 char *s = src_kaddr + src_off + len; 2408 2409 while (len--) 2410 *--p = *--s; 2411 2412 kunmap_atomic(src_kaddr, KM_USER1); 2413 } 2414 kunmap_atomic(dst_kaddr, KM_USER0); 2415 } 2416 2417 static void copy_pages(struct page *dst_page, struct page *src_page, 2418 unsigned long dst_off, unsigned long src_off, 2419 unsigned long len) 2420 { 2421 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 2422 char *src_kaddr; 2423 2424 if (dst_page != src_page) 2425 src_kaddr = kmap_atomic(src_page, KM_USER1); 2426 else 2427 src_kaddr = dst_kaddr; 2428 2429 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 2430 kunmap_atomic(dst_kaddr, KM_USER0); 2431 if (dst_page != src_page) 2432 kunmap_atomic(src_kaddr, KM_USER1); 2433 } 2434 2435 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 2436 unsigned long src_offset, unsigned long len) 2437 { 2438 size_t cur; 2439 size_t dst_off_in_page; 2440 size_t src_off_in_page; 2441 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 2442 unsigned long dst_i; 2443 unsigned long src_i; 2444 2445 if (src_offset + len > dst->len) { 2446 printk("memmove bogus src_offset %lu move len %lu len %lu\n", 2447 src_offset, len, dst->len); 2448 BUG_ON(1); 2449 } 2450 if (dst_offset + len > dst->len) { 2451 printk("memmove bogus dst_offset %lu move len %lu len %lu\n", 2452 dst_offset, len, dst->len); 2453 BUG_ON(1); 2454 } 2455 2456 while(len > 0) { 2457 dst_off_in_page = dst_offset & 2458 ((unsigned long)PAGE_CACHE_SIZE - 1); 2459 src_off_in_page = src_offset & 2460 ((unsigned long)PAGE_CACHE_SIZE - 1); 2461 2462 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 2463 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; 2464 2465 if (src_i == 0) 2466 src_off_in_page += start_offset; 2467 if (dst_i == 0) 2468 dst_off_in_page += start_offset; 2469 2470 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - 2471 src_off_in_page)); 2472 cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE - 2473 dst_off_in_page)); 2474 2475 copy_pages(extent_buffer_page(dst, dst_i), 2476 extent_buffer_page(dst, src_i), 2477 dst_off_in_page, src_off_in_page, cur); 2478 2479 src_offset += cur; 2480 dst_offset += cur; 2481 len -= cur; 2482 } 2483 } 2484 EXPORT_SYMBOL(memcpy_extent_buffer); 2485 2486 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 2487 unsigned long src_offset, unsigned long len) 2488 { 2489 size_t cur; 2490 size_t dst_off_in_page; 2491 size_t src_off_in_page; 2492 unsigned long dst_end = dst_offset + len - 1; 2493 unsigned long src_end = src_offset + len - 1; 2494 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 2495 unsigned long dst_i; 2496 unsigned long src_i; 2497 2498 if (src_offset + len > dst->len) { 2499 printk("memmove bogus src_offset %lu move len %lu len %lu\n", 2500 src_offset, len, dst->len); 2501 BUG_ON(1); 2502 } 2503 if (dst_offset + len > dst->len) { 2504 printk("memmove bogus dst_offset %lu move len %lu len %lu\n", 2505 dst_offset, len, dst->len); 2506 BUG_ON(1); 2507 } 2508 if (dst_offset < src_offset) { 2509 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 2510 return; 2511 } 2512 while(len > 0) { 2513 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; 2514 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; 2515 2516 dst_off_in_page = dst_end & 2517 ((unsigned long)PAGE_CACHE_SIZE - 1); 2518 src_off_in_page = src_end & 2519 ((unsigned long)PAGE_CACHE_SIZE - 1); 2520 2521 if (src_i == 0) 2522 src_off_in_page += start_offset; 2523 if (dst_i == 0) 2524 dst_off_in_page += start_offset; 2525 2526 cur = min(len, src_off_in_page + 1); 2527 cur = min(cur, dst_off_in_page + 1); 2528 2529 move_pages(extent_buffer_page(dst, dst_i), 2530 extent_buffer_page(dst, src_i), 2531 dst_off_in_page - cur + 1, 2532 src_off_in_page - cur + 1, cur); 2533 2534 dst_end -= cur - 1; 2535 src_end -= cur - 1; 2536 len -= cur; 2537 } 2538 } 2539 EXPORT_SYMBOL(memmove_extent_buffer); 2540