1 #include <linux/bitops.h> 2 #include <linux/slab.h> 3 #include <linux/bio.h> 4 #include <linux/mm.h> 5 #include <linux/gfp.h> 6 #include <linux/pagemap.h> 7 #include <linux/page-flags.h> 8 #include <linux/module.h> 9 #include <linux/spinlock.h> 10 #include <linux/blkdev.h> 11 #include "extent_map.h" 12 13 /* temporary define until extent_map moves out of btrfs */ 14 struct kmem_cache *btrfs_cache_create(const char *name, size_t size, 15 unsigned long extra_flags, 16 void (*ctor)(void *, struct kmem_cache *, 17 unsigned long)); 18 19 static struct kmem_cache *extent_map_cache; 20 static struct kmem_cache *extent_state_cache; 21 22 struct tree_entry { 23 u64 start; 24 u64 end; 25 int in_tree; 26 struct rb_node rb_node; 27 }; 28 29 void __init extent_map_init(void) 30 { 31 extent_map_cache = btrfs_cache_create("extent_map", 32 sizeof(struct extent_map), 33 SLAB_DESTROY_BY_RCU, 34 NULL); 35 extent_state_cache = btrfs_cache_create("extent_state", 36 sizeof(struct extent_state), 37 SLAB_DESTROY_BY_RCU, 38 NULL); 39 } 40 41 void __exit extent_map_exit(void) 42 { 43 if (extent_map_cache) 44 kmem_cache_destroy(extent_map_cache); 45 if (extent_state_cache) 46 kmem_cache_destroy(extent_state_cache); 47 } 48 49 void extent_map_tree_init(struct extent_map_tree *tree, 50 struct address_space *mapping, gfp_t mask) 51 { 52 tree->map.rb_node = NULL; 53 tree->state.rb_node = NULL; 54 tree->ops = NULL; 55 rwlock_init(&tree->lock); 56 tree->mapping = mapping; 57 } 58 EXPORT_SYMBOL(extent_map_tree_init); 59 60 struct extent_map *alloc_extent_map(gfp_t mask) 61 { 62 struct extent_map *em; 63 em = kmem_cache_alloc(extent_map_cache, mask); 64 if (!em || IS_ERR(em)) 65 return em; 66 em->in_tree = 0; 67 atomic_set(&em->refs, 1); 68 return em; 69 } 70 EXPORT_SYMBOL(alloc_extent_map); 71 72 void free_extent_map(struct extent_map *em) 73 { 74 if (!em) 75 return; 76 if (atomic_dec_and_test(&em->refs)) { 77 WARN_ON(em->in_tree); 78 kmem_cache_free(extent_map_cache, em); 79 } 80 } 81 EXPORT_SYMBOL(free_extent_map); 82 83 84 struct extent_state *alloc_extent_state(gfp_t mask) 85 { 86 struct extent_state *state; 87 state = kmem_cache_alloc(extent_state_cache, mask); 88 if (!state || IS_ERR(state)) 89 return state; 90 state->state = 0; 91 state->in_tree = 0; 92 state->private = 0; 93 atomic_set(&state->refs, 1); 94 init_waitqueue_head(&state->wq); 95 return state; 96 } 97 EXPORT_SYMBOL(alloc_extent_state); 98 99 void free_extent_state(struct extent_state *state) 100 { 101 if (!state) 102 return; 103 if (atomic_dec_and_test(&state->refs)) { 104 WARN_ON(state->in_tree); 105 kmem_cache_free(extent_state_cache, state); 106 } 107 } 108 EXPORT_SYMBOL(free_extent_state); 109 110 static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 111 struct rb_node *node) 112 { 113 struct rb_node ** p = &root->rb_node; 114 struct rb_node * parent = NULL; 115 struct tree_entry *entry; 116 117 while(*p) { 118 parent = *p; 119 entry = rb_entry(parent, struct tree_entry, rb_node); 120 121 if (offset < entry->start) 122 p = &(*p)->rb_left; 123 else if (offset > entry->end) 124 p = &(*p)->rb_right; 125 else 126 return parent; 127 } 128 129 entry = rb_entry(node, struct tree_entry, rb_node); 130 entry->in_tree = 1; 131 rb_link_node(node, parent, p); 132 rb_insert_color(node, root); 133 return NULL; 134 } 135 136 static struct rb_node *__tree_search(struct rb_root *root, u64 offset, 137 struct rb_node **prev_ret) 138 { 139 struct rb_node * n = root->rb_node; 140 struct rb_node *prev = NULL; 141 struct tree_entry *entry; 142 struct tree_entry *prev_entry = NULL; 143 144 while(n) { 145 entry = rb_entry(n, struct tree_entry, rb_node); 146 prev = n; 147 prev_entry = entry; 148 149 if (offset < entry->start) 150 n = n->rb_left; 151 else if (offset > entry->end) 152 n = n->rb_right; 153 else 154 return n; 155 } 156 if (!prev_ret) 157 return NULL; 158 while(prev && offset > prev_entry->end) { 159 prev = rb_next(prev); 160 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 161 } 162 *prev_ret = prev; 163 return NULL; 164 } 165 166 static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) 167 { 168 struct rb_node *prev; 169 struct rb_node *ret; 170 ret = __tree_search(root, offset, &prev); 171 if (!ret) 172 return prev; 173 return ret; 174 } 175 176 static int tree_delete(struct rb_root *root, u64 offset) 177 { 178 struct rb_node *node; 179 struct tree_entry *entry; 180 181 node = __tree_search(root, offset, NULL); 182 if (!node) 183 return -ENOENT; 184 entry = rb_entry(node, struct tree_entry, rb_node); 185 entry->in_tree = 0; 186 rb_erase(node, root); 187 return 0; 188 } 189 190 /* 191 * add_extent_mapping tries a simple backward merge with existing 192 * mappings. The extent_map struct passed in will be inserted into 193 * the tree directly (no copies made, just a reference taken). 194 */ 195 int add_extent_mapping(struct extent_map_tree *tree, 196 struct extent_map *em) 197 { 198 int ret = 0; 199 struct extent_map *prev = NULL; 200 struct rb_node *rb; 201 202 write_lock_irq(&tree->lock); 203 rb = tree_insert(&tree->map, em->end, &em->rb_node); 204 if (rb) { 205 prev = rb_entry(rb, struct extent_map, rb_node); 206 printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end); 207 ret = -EEXIST; 208 goto out; 209 } 210 atomic_inc(&em->refs); 211 if (em->start != 0) { 212 rb = rb_prev(&em->rb_node); 213 if (rb) 214 prev = rb_entry(rb, struct extent_map, rb_node); 215 if (prev && prev->end + 1 == em->start && 216 ((em->block_start == EXTENT_MAP_HOLE && 217 prev->block_start == EXTENT_MAP_HOLE) || 218 (em->block_start == prev->block_end + 1))) { 219 em->start = prev->start; 220 em->block_start = prev->block_start; 221 rb_erase(&prev->rb_node, &tree->map); 222 prev->in_tree = 0; 223 free_extent_map(prev); 224 } 225 } 226 out: 227 write_unlock_irq(&tree->lock); 228 return ret; 229 } 230 EXPORT_SYMBOL(add_extent_mapping); 231 232 /* 233 * lookup_extent_mapping returns the first extent_map struct in the 234 * tree that intersects the [start, end] (inclusive) range. There may 235 * be additional objects in the tree that intersect, so check the object 236 * returned carefully to make sure you don't need additional lookups. 237 */ 238 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, 239 u64 start, u64 end) 240 { 241 struct extent_map *em; 242 struct rb_node *rb_node; 243 244 read_lock_irq(&tree->lock); 245 rb_node = tree_search(&tree->map, start); 246 if (!rb_node) { 247 em = NULL; 248 goto out; 249 } 250 if (IS_ERR(rb_node)) { 251 em = ERR_PTR(PTR_ERR(rb_node)); 252 goto out; 253 } 254 em = rb_entry(rb_node, struct extent_map, rb_node); 255 if (em->end < start || em->start > end) { 256 em = NULL; 257 goto out; 258 } 259 atomic_inc(&em->refs); 260 out: 261 read_unlock_irq(&tree->lock); 262 return em; 263 } 264 EXPORT_SYMBOL(lookup_extent_mapping); 265 266 /* 267 * removes an extent_map struct from the tree. No reference counts are 268 * dropped, and no checks are done to see if the range is in use 269 */ 270 int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) 271 { 272 int ret; 273 274 write_lock_irq(&tree->lock); 275 ret = tree_delete(&tree->map, em->end); 276 write_unlock_irq(&tree->lock); 277 return ret; 278 } 279 EXPORT_SYMBOL(remove_extent_mapping); 280 281 /* 282 * utility function to look for merge candidates inside a given range. 283 * Any extents with matching state are merged together into a single 284 * extent in the tree. Extents with EXTENT_IO in their state field 285 * are not merged because the end_io handlers need to be able to do 286 * operations on them without sleeping (or doing allocations/splits). 287 * 288 * This should be called with the tree lock held. 289 */ 290 static int merge_state(struct extent_map_tree *tree, 291 struct extent_state *state) 292 { 293 struct extent_state *other; 294 struct rb_node *other_node; 295 296 if (state->state & EXTENT_IOBITS) 297 return 0; 298 299 other_node = rb_prev(&state->rb_node); 300 if (other_node) { 301 other = rb_entry(other_node, struct extent_state, rb_node); 302 if (other->end == state->start - 1 && 303 other->state == state->state) { 304 state->start = other->start; 305 other->in_tree = 0; 306 rb_erase(&other->rb_node, &tree->state); 307 free_extent_state(other); 308 } 309 } 310 other_node = rb_next(&state->rb_node); 311 if (other_node) { 312 other = rb_entry(other_node, struct extent_state, rb_node); 313 if (other->start == state->end + 1 && 314 other->state == state->state) { 315 other->start = state->start; 316 state->in_tree = 0; 317 rb_erase(&state->rb_node, &tree->state); 318 free_extent_state(state); 319 } 320 } 321 return 0; 322 } 323 324 /* 325 * insert an extent_state struct into the tree. 'bits' are set on the 326 * struct before it is inserted. 327 * 328 * This may return -EEXIST if the extent is already there, in which case the 329 * state struct is freed. 330 * 331 * The tree lock is not taken internally. This is a utility function and 332 * probably isn't what you want to call (see set/clear_extent_bit). 333 */ 334 static int insert_state(struct extent_map_tree *tree, 335 struct extent_state *state, u64 start, u64 end, 336 int bits) 337 { 338 struct rb_node *node; 339 340 if (end < start) { 341 printk("end < start %Lu %Lu\n", end, start); 342 WARN_ON(1); 343 } 344 state->state |= bits; 345 state->start = start; 346 state->end = end; 347 if ((end & 4095) == 0) { 348 printk("insert state %Lu %Lu strange end\n", start, end); 349 WARN_ON(1); 350 } 351 node = tree_insert(&tree->state, end, &state->rb_node); 352 if (node) { 353 struct extent_state *found; 354 found = rb_entry(node, struct extent_state, rb_node); 355 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); 356 free_extent_state(state); 357 return -EEXIST; 358 } 359 merge_state(tree, state); 360 return 0; 361 } 362 363 /* 364 * split a given extent state struct in two, inserting the preallocated 365 * struct 'prealloc' as the newly created second half. 'split' indicates an 366 * offset inside 'orig' where it should be split. 367 * 368 * Before calling, 369 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 370 * are two extent state structs in the tree: 371 * prealloc: [orig->start, split - 1] 372 * orig: [ split, orig->end ] 373 * 374 * The tree locks are not taken by this function. They need to be held 375 * by the caller. 376 */ 377 static int split_state(struct extent_map_tree *tree, struct extent_state *orig, 378 struct extent_state *prealloc, u64 split) 379 { 380 struct rb_node *node; 381 prealloc->start = orig->start; 382 prealloc->end = split - 1; 383 prealloc->state = orig->state; 384 orig->start = split; 385 if ((prealloc->end & 4095) == 0) { 386 printk("insert state %Lu %Lu strange end\n", prealloc->start, 387 prealloc->end); 388 WARN_ON(1); 389 } 390 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); 391 if (node) { 392 struct extent_state *found; 393 found = rb_entry(node, struct extent_state, rb_node); 394 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); 395 free_extent_state(prealloc); 396 return -EEXIST; 397 } 398 return 0; 399 } 400 401 /* 402 * utility function to clear some bits in an extent state struct. 403 * it will optionally wake up any one waiting on this state (wake == 1), or 404 * forcibly remove the state from the tree (delete == 1). 405 * 406 * If no bits are set on the state struct after clearing things, the 407 * struct is freed and removed from the tree 408 */ 409 static int clear_state_bit(struct extent_map_tree *tree, 410 struct extent_state *state, int bits, int wake, 411 int delete) 412 { 413 int ret = state->state & bits; 414 state->state &= ~bits; 415 if (wake) 416 wake_up(&state->wq); 417 if (delete || state->state == 0) { 418 if (state->in_tree) { 419 rb_erase(&state->rb_node, &tree->state); 420 state->in_tree = 0; 421 free_extent_state(state); 422 } else { 423 WARN_ON(1); 424 } 425 } else { 426 merge_state(tree, state); 427 } 428 return ret; 429 } 430 431 /* 432 * clear some bits on a range in the tree. This may require splitting 433 * or inserting elements in the tree, so the gfp mask is used to 434 * indicate which allocations or sleeping are allowed. 435 * 436 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove 437 * the given range from the tree regardless of state (ie for truncate). 438 * 439 * the range [start, end] is inclusive. 440 * 441 * This takes the tree lock, and returns < 0 on error, > 0 if any of the 442 * bits were already set, or zero if none of the bits were already set. 443 */ 444 int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, 445 int bits, int wake, int delete, gfp_t mask) 446 { 447 struct extent_state *state; 448 struct extent_state *prealloc = NULL; 449 struct rb_node *node; 450 unsigned long flags; 451 int err; 452 int set = 0; 453 454 again: 455 if (!prealloc && (mask & __GFP_WAIT)) { 456 prealloc = alloc_extent_state(mask); 457 if (!prealloc) 458 return -ENOMEM; 459 } 460 461 write_lock_irqsave(&tree->lock, flags); 462 /* 463 * this search will find the extents that end after 464 * our range starts 465 */ 466 node = tree_search(&tree->state, start); 467 if (!node) 468 goto out; 469 state = rb_entry(node, struct extent_state, rb_node); 470 if (state->start > end) 471 goto out; 472 WARN_ON(state->end < start); 473 474 /* 475 * | ---- desired range ---- | 476 * | state | or 477 * | ------------- state -------------- | 478 * 479 * We need to split the extent we found, and may flip 480 * bits on second half. 481 * 482 * If the extent we found extends past our range, we 483 * just split and search again. It'll get split again 484 * the next time though. 485 * 486 * If the extent we found is inside our range, we clear 487 * the desired bit on it. 488 */ 489 490 if (state->start < start) { 491 err = split_state(tree, state, prealloc, start); 492 BUG_ON(err == -EEXIST); 493 prealloc = NULL; 494 if (err) 495 goto out; 496 if (state->end <= end) { 497 start = state->end + 1; 498 set |= clear_state_bit(tree, state, bits, 499 wake, delete); 500 } else { 501 start = state->start; 502 } 503 goto search_again; 504 } 505 /* 506 * | ---- desired range ---- | 507 * | state | 508 * We need to split the extent, and clear the bit 509 * on the first half 510 */ 511 if (state->start <= end && state->end > end) { 512 err = split_state(tree, state, prealloc, end + 1); 513 BUG_ON(err == -EEXIST); 514 515 if (wake) 516 wake_up(&state->wq); 517 set |= clear_state_bit(tree, prealloc, bits, 518 wake, delete); 519 prealloc = NULL; 520 goto out; 521 } 522 523 start = state->end + 1; 524 set |= clear_state_bit(tree, state, bits, wake, delete); 525 goto search_again; 526 527 out: 528 write_unlock_irqrestore(&tree->lock, flags); 529 if (prealloc) 530 free_extent_state(prealloc); 531 532 return set; 533 534 search_again: 535 if (start >= end) 536 goto out; 537 write_unlock_irqrestore(&tree->lock, flags); 538 if (mask & __GFP_WAIT) 539 cond_resched(); 540 goto again; 541 } 542 EXPORT_SYMBOL(clear_extent_bit); 543 544 static int wait_on_state(struct extent_map_tree *tree, 545 struct extent_state *state) 546 { 547 DEFINE_WAIT(wait); 548 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 549 read_unlock_irq(&tree->lock); 550 schedule(); 551 read_lock_irq(&tree->lock); 552 finish_wait(&state->wq, &wait); 553 return 0; 554 } 555 556 /* 557 * waits for one or more bits to clear on a range in the state tree. 558 * The range [start, end] is inclusive. 559 * The tree lock is taken by this function 560 */ 561 int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) 562 { 563 struct extent_state *state; 564 struct rb_node *node; 565 566 read_lock_irq(&tree->lock); 567 again: 568 while (1) { 569 /* 570 * this search will find all the extents that end after 571 * our range starts 572 */ 573 node = tree_search(&tree->state, start); 574 if (!node) 575 break; 576 577 state = rb_entry(node, struct extent_state, rb_node); 578 579 if (state->start > end) 580 goto out; 581 582 if (state->state & bits) { 583 start = state->start; 584 atomic_inc(&state->refs); 585 wait_on_state(tree, state); 586 free_extent_state(state); 587 goto again; 588 } 589 start = state->end + 1; 590 591 if (start > end) 592 break; 593 594 if (need_resched()) { 595 read_unlock_irq(&tree->lock); 596 cond_resched(); 597 read_lock_irq(&tree->lock); 598 } 599 } 600 out: 601 read_unlock_irq(&tree->lock); 602 return 0; 603 } 604 EXPORT_SYMBOL(wait_extent_bit); 605 606 /* 607 * set some bits on a range in the tree. This may require allocations 608 * or sleeping, so the gfp mask is used to indicate what is allowed. 609 * 610 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 611 * range already has the desired bits set. The start of the existing 612 * range is returned in failed_start in this case. 613 * 614 * [start, end] is inclusive 615 * This takes the tree lock. 616 */ 617 int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, 618 int exclusive, u64 *failed_start, gfp_t mask) 619 { 620 struct extent_state *state; 621 struct extent_state *prealloc = NULL; 622 struct rb_node *node; 623 unsigned long flags; 624 int err = 0; 625 int set; 626 u64 last_start; 627 u64 last_end; 628 again: 629 if (!prealloc && (mask & __GFP_WAIT)) { 630 prealloc = alloc_extent_state(mask); 631 if (!prealloc) 632 return -ENOMEM; 633 } 634 635 write_lock_irqsave(&tree->lock, flags); 636 /* 637 * this search will find all the extents that end after 638 * our range starts. 639 */ 640 node = tree_search(&tree->state, start); 641 if (!node) { 642 err = insert_state(tree, prealloc, start, end, bits); 643 prealloc = NULL; 644 BUG_ON(err == -EEXIST); 645 goto out; 646 } 647 648 state = rb_entry(node, struct extent_state, rb_node); 649 last_start = state->start; 650 last_end = state->end; 651 652 /* 653 * | ---- desired range ---- | 654 * | state | 655 * 656 * Just lock what we found and keep going 657 */ 658 if (state->start == start && state->end <= end) { 659 set = state->state & bits; 660 if (set && exclusive) { 661 *failed_start = state->start; 662 err = -EEXIST; 663 goto out; 664 } 665 state->state |= bits; 666 start = state->end + 1; 667 merge_state(tree, state); 668 goto search_again; 669 } 670 671 /* 672 * | ---- desired range ---- | 673 * | state | 674 * or 675 * | ------------- state -------------- | 676 * 677 * We need to split the extent we found, and may flip bits on 678 * second half. 679 * 680 * If the extent we found extends past our 681 * range, we just split and search again. It'll get split 682 * again the next time though. 683 * 684 * If the extent we found is inside our range, we set the 685 * desired bit on it. 686 */ 687 if (state->start < start) { 688 set = state->state & bits; 689 if (exclusive && set) { 690 *failed_start = start; 691 err = -EEXIST; 692 goto out; 693 } 694 err = split_state(tree, state, prealloc, start); 695 BUG_ON(err == -EEXIST); 696 prealloc = NULL; 697 if (err) 698 goto out; 699 if (state->end <= end) { 700 state->state |= bits; 701 start = state->end + 1; 702 merge_state(tree, state); 703 } else { 704 start = state->start; 705 } 706 goto search_again; 707 } 708 /* 709 * | ---- desired range ---- | 710 * | state | or | state | 711 * 712 * There's a hole, we need to insert something in it and 713 * ignore the extent we found. 714 */ 715 if (state->start > start) { 716 u64 this_end; 717 if (end < last_start) 718 this_end = end; 719 else 720 this_end = last_start -1; 721 err = insert_state(tree, prealloc, start, this_end, 722 bits); 723 prealloc = NULL; 724 BUG_ON(err == -EEXIST); 725 if (err) 726 goto out; 727 start = this_end + 1; 728 goto search_again; 729 } 730 /* 731 * | ---- desired range ---- | 732 * | state | 733 * We need to split the extent, and set the bit 734 * on the first half 735 */ 736 if (state->start <= end && state->end > end) { 737 set = state->state & bits; 738 if (exclusive && set) { 739 *failed_start = start; 740 err = -EEXIST; 741 goto out; 742 } 743 err = split_state(tree, state, prealloc, end + 1); 744 BUG_ON(err == -EEXIST); 745 746 prealloc->state |= bits; 747 merge_state(tree, prealloc); 748 prealloc = NULL; 749 goto out; 750 } 751 752 goto search_again; 753 754 out: 755 write_unlock_irqrestore(&tree->lock, flags); 756 if (prealloc) 757 free_extent_state(prealloc); 758 759 return err; 760 761 search_again: 762 if (start > end) 763 goto out; 764 write_unlock_irqrestore(&tree->lock, flags); 765 if (mask & __GFP_WAIT) 766 cond_resched(); 767 goto again; 768 } 769 EXPORT_SYMBOL(set_extent_bit); 770 771 /* wrappers around set/clear extent bit */ 772 int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, 773 gfp_t mask) 774 { 775 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 776 mask); 777 } 778 EXPORT_SYMBOL(set_extent_dirty); 779 780 int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, 781 gfp_t mask) 782 { 783 return set_extent_bit(tree, start, end, 784 EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, 785 mask); 786 } 787 EXPORT_SYMBOL(set_extent_delalloc); 788 789 int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, 790 gfp_t mask) 791 { 792 return clear_extent_bit(tree, start, end, 793 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); 794 } 795 EXPORT_SYMBOL(clear_extent_dirty); 796 797 int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, 798 gfp_t mask) 799 { 800 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 801 mask); 802 } 803 EXPORT_SYMBOL(set_extent_new); 804 805 int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, 806 gfp_t mask) 807 { 808 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); 809 } 810 EXPORT_SYMBOL(clear_extent_new); 811 812 int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, 813 gfp_t mask) 814 { 815 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 816 mask); 817 } 818 EXPORT_SYMBOL(set_extent_uptodate); 819 820 int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, 821 gfp_t mask) 822 { 823 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 824 } 825 EXPORT_SYMBOL(clear_extent_uptodate); 826 827 int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, 828 gfp_t mask) 829 { 830 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, 831 0, NULL, mask); 832 } 833 EXPORT_SYMBOL(set_extent_writeback); 834 835 int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, 836 gfp_t mask) 837 { 838 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); 839 } 840 EXPORT_SYMBOL(clear_extent_writeback); 841 842 int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) 843 { 844 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); 845 } 846 EXPORT_SYMBOL(wait_on_extent_writeback); 847 848 /* 849 * locks a range in ascending order, waiting for any locked regions 850 * it hits on the way. [start,end] are inclusive, and this will sleep. 851 */ 852 int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) 853 { 854 int err; 855 u64 failed_start; 856 while (1) { 857 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 858 &failed_start, mask); 859 if (err == -EEXIST && (mask & __GFP_WAIT)) { 860 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 861 start = failed_start; 862 } else { 863 break; 864 } 865 WARN_ON(start > end); 866 } 867 return err; 868 } 869 EXPORT_SYMBOL(lock_extent); 870 871 int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, 872 gfp_t mask) 873 { 874 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); 875 } 876 EXPORT_SYMBOL(unlock_extent); 877 878 /* 879 * helper function to set pages and extents in the tree dirty 880 */ 881 int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) 882 { 883 unsigned long index = start >> PAGE_CACHE_SHIFT; 884 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 885 struct page *page; 886 887 while (index <= end_index) { 888 page = find_get_page(tree->mapping, index); 889 BUG_ON(!page); 890 __set_page_dirty_nobuffers(page); 891 page_cache_release(page); 892 index++; 893 } 894 set_extent_dirty(tree, start, end, GFP_NOFS); 895 return 0; 896 } 897 EXPORT_SYMBOL(set_range_dirty); 898 899 /* 900 * helper function to set both pages and extents in the tree writeback 901 */ 902 int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) 903 { 904 unsigned long index = start >> PAGE_CACHE_SHIFT; 905 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 906 struct page *page; 907 908 while (index <= end_index) { 909 page = find_get_page(tree->mapping, index); 910 BUG_ON(!page); 911 set_page_writeback(page); 912 page_cache_release(page); 913 index++; 914 } 915 set_extent_writeback(tree, start, end, GFP_NOFS); 916 return 0; 917 } 918 EXPORT_SYMBOL(set_range_writeback); 919 920 int find_first_extent_bit(struct extent_map_tree *tree, u64 start, 921 u64 *start_ret, u64 *end_ret, int bits) 922 { 923 struct rb_node *node; 924 struct extent_state *state; 925 int ret = 1; 926 927 write_lock_irq(&tree->lock); 928 /* 929 * this search will find all the extents that end after 930 * our range starts. 931 */ 932 node = tree_search(&tree->state, start); 933 if (!node || IS_ERR(node)) { 934 goto out; 935 } 936 937 while(1) { 938 state = rb_entry(node, struct extent_state, rb_node); 939 if (state->state & bits) { 940 *start_ret = state->start; 941 *end_ret = state->end; 942 ret = 0; 943 } 944 node = rb_next(node); 945 if (!node) 946 break; 947 } 948 out: 949 write_unlock_irq(&tree->lock); 950 return ret; 951 } 952 EXPORT_SYMBOL(find_first_extent_bit); 953 954 u64 find_lock_delalloc_range(struct extent_map_tree *tree, 955 u64 start, u64 lock_start, u64 *end, u64 max_bytes) 956 { 957 struct rb_node *node; 958 struct extent_state *state; 959 u64 cur_start = start; 960 u64 found = 0; 961 u64 total_bytes = 0; 962 963 write_lock_irq(&tree->lock); 964 /* 965 * this search will find all the extents that end after 966 * our range starts. 967 */ 968 search_again: 969 node = tree_search(&tree->state, cur_start); 970 if (!node || IS_ERR(node)) { 971 goto out; 972 } 973 974 while(1) { 975 state = rb_entry(node, struct extent_state, rb_node); 976 if (state->start != cur_start) { 977 goto out; 978 } 979 if (!(state->state & EXTENT_DELALLOC)) { 980 goto out; 981 } 982 if (state->start >= lock_start) { 983 if (state->state & EXTENT_LOCKED) { 984 DEFINE_WAIT(wait); 985 atomic_inc(&state->refs); 986 write_unlock_irq(&tree->lock); 987 schedule(); 988 write_lock_irq(&tree->lock); 989 finish_wait(&state->wq, &wait); 990 free_extent_state(state); 991 goto search_again; 992 } 993 state->state |= EXTENT_LOCKED; 994 } 995 found++; 996 *end = state->end; 997 cur_start = state->end + 1; 998 node = rb_next(node); 999 if (!node) 1000 break; 1001 total_bytes = state->end - state->start + 1; 1002 if (total_bytes >= max_bytes) 1003 break; 1004 } 1005 out: 1006 write_unlock_irq(&tree->lock); 1007 return found; 1008 } 1009 1010 /* 1011 * helper function to lock both pages and extents in the tree. 1012 * pages must be locked first. 1013 */ 1014 int lock_range(struct extent_map_tree *tree, u64 start, u64 end) 1015 { 1016 unsigned long index = start >> PAGE_CACHE_SHIFT; 1017 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1018 struct page *page; 1019 int err; 1020 1021 while (index <= end_index) { 1022 page = grab_cache_page(tree->mapping, index); 1023 if (!page) { 1024 err = -ENOMEM; 1025 goto failed; 1026 } 1027 if (IS_ERR(page)) { 1028 err = PTR_ERR(page); 1029 goto failed; 1030 } 1031 index++; 1032 } 1033 lock_extent(tree, start, end, GFP_NOFS); 1034 return 0; 1035 1036 failed: 1037 /* 1038 * we failed above in getting the page at 'index', so we undo here 1039 * up to but not including the page at 'index' 1040 */ 1041 end_index = index; 1042 index = start >> PAGE_CACHE_SHIFT; 1043 while (index < end_index) { 1044 page = find_get_page(tree->mapping, index); 1045 unlock_page(page); 1046 page_cache_release(page); 1047 index++; 1048 } 1049 return err; 1050 } 1051 EXPORT_SYMBOL(lock_range); 1052 1053 /* 1054 * helper function to unlock both pages and extents in the tree. 1055 */ 1056 int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) 1057 { 1058 unsigned long index = start >> PAGE_CACHE_SHIFT; 1059 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1060 struct page *page; 1061 1062 while (index <= end_index) { 1063 page = find_get_page(tree->mapping, index); 1064 unlock_page(page); 1065 page_cache_release(page); 1066 index++; 1067 } 1068 unlock_extent(tree, start, end, GFP_NOFS); 1069 return 0; 1070 } 1071 EXPORT_SYMBOL(unlock_range); 1072 1073 int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) 1074 { 1075 struct rb_node *node; 1076 struct extent_state *state; 1077 int ret = 0; 1078 1079 write_lock_irq(&tree->lock); 1080 /* 1081 * this search will find all the extents that end after 1082 * our range starts. 1083 */ 1084 node = tree_search(&tree->state, start); 1085 if (!node || IS_ERR(node)) { 1086 ret = -ENOENT; 1087 goto out; 1088 } 1089 state = rb_entry(node, struct extent_state, rb_node); 1090 if (state->start != start) { 1091 ret = -ENOENT; 1092 goto out; 1093 } 1094 state->private = private; 1095 out: 1096 write_unlock_irq(&tree->lock); 1097 return ret; 1098 1099 } 1100 1101 int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) 1102 { 1103 struct rb_node *node; 1104 struct extent_state *state; 1105 int ret = 0; 1106 1107 read_lock_irq(&tree->lock); 1108 /* 1109 * this search will find all the extents that end after 1110 * our range starts. 1111 */ 1112 node = tree_search(&tree->state, start); 1113 if (!node || IS_ERR(node)) { 1114 ret = -ENOENT; 1115 goto out; 1116 } 1117 state = rb_entry(node, struct extent_state, rb_node); 1118 if (state->start != start) { 1119 ret = -ENOENT; 1120 goto out; 1121 } 1122 *private = state->private; 1123 out: 1124 read_unlock_irq(&tree->lock); 1125 return ret; 1126 } 1127 1128 /* 1129 * searches a range in the state tree for a given mask. 1130 * If 'filled' == 1, this returns 1 only if ever extent in the tree 1131 * has the bits set. Otherwise, 1 is returned if any bit in the 1132 * range is found set. 1133 */ 1134 static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, 1135 int bits, int filled) 1136 { 1137 struct extent_state *state = NULL; 1138 struct rb_node *node; 1139 int bitset = 0; 1140 1141 read_lock_irq(&tree->lock); 1142 node = tree_search(&tree->state, start); 1143 while (node && start <= end) { 1144 state = rb_entry(node, struct extent_state, rb_node); 1145 if (state->start > end) 1146 break; 1147 1148 if (filled && state->start > start) { 1149 bitset = 0; 1150 break; 1151 } 1152 if (state->state & bits) { 1153 bitset = 1; 1154 if (!filled) 1155 break; 1156 } else if (filled) { 1157 bitset = 0; 1158 break; 1159 } 1160 start = state->end + 1; 1161 if (start > end) 1162 break; 1163 node = rb_next(node); 1164 } 1165 read_unlock_irq(&tree->lock); 1166 return bitset; 1167 } 1168 1169 /* 1170 * helper function to set a given page up to date if all the 1171 * extents in the tree for that page are up to date 1172 */ 1173 static int check_page_uptodate(struct extent_map_tree *tree, 1174 struct page *page) 1175 { 1176 u64 start = page->index << PAGE_CACHE_SHIFT; 1177 u64 end = start + PAGE_CACHE_SIZE - 1; 1178 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) 1179 SetPageUptodate(page); 1180 return 0; 1181 } 1182 1183 /* 1184 * helper function to unlock a page if all the extents in the tree 1185 * for that page are unlocked 1186 */ 1187 static int check_page_locked(struct extent_map_tree *tree, 1188 struct page *page) 1189 { 1190 u64 start = page->index << PAGE_CACHE_SHIFT; 1191 u64 end = start + PAGE_CACHE_SIZE - 1; 1192 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) 1193 unlock_page(page); 1194 return 0; 1195 } 1196 1197 /* 1198 * helper function to end page writeback if all the extents 1199 * in the tree for that page are done with writeback 1200 */ 1201 static int check_page_writeback(struct extent_map_tree *tree, 1202 struct page *page) 1203 { 1204 u64 start = page->index << PAGE_CACHE_SHIFT; 1205 u64 end = start + PAGE_CACHE_SIZE - 1; 1206 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) 1207 end_page_writeback(page); 1208 return 0; 1209 } 1210 1211 /* lots and lots of room for performance fixes in the end_bio funcs */ 1212 1213 /* 1214 * after a writepage IO is done, we need to: 1215 * clear the uptodate bits on error 1216 * clear the writeback bits in the extent tree for this IO 1217 * end_page_writeback if the page has no more pending IO 1218 * 1219 * Scheduling is not allowed, so the extent state tree is expected 1220 * to have one and only one object corresponding to this IO. 1221 */ 1222 static int end_bio_extent_writepage(struct bio *bio, 1223 unsigned int bytes_done, int err) 1224 { 1225 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1226 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1227 struct extent_map_tree *tree = bio->bi_private; 1228 u64 start; 1229 u64 end; 1230 int whole_page; 1231 1232 if (bio->bi_size) 1233 return 1; 1234 1235 do { 1236 struct page *page = bvec->bv_page; 1237 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1238 end = start + bvec->bv_len - 1; 1239 1240 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1241 whole_page = 1; 1242 else 1243 whole_page = 0; 1244 1245 if (--bvec >= bio->bi_io_vec) 1246 prefetchw(&bvec->bv_page->flags); 1247 1248 if (!uptodate) { 1249 clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1250 ClearPageUptodate(page); 1251 SetPageError(page); 1252 } 1253 clear_extent_writeback(tree, start, end, GFP_ATOMIC); 1254 1255 if (whole_page) 1256 end_page_writeback(page); 1257 else 1258 check_page_writeback(tree, page); 1259 if (tree->ops && tree->ops->writepage_end_io_hook) 1260 tree->ops->writepage_end_io_hook(page, start, end); 1261 } while (bvec >= bio->bi_io_vec); 1262 1263 bio_put(bio); 1264 return 0; 1265 } 1266 1267 /* 1268 * after a readpage IO is done, we need to: 1269 * clear the uptodate bits on error 1270 * set the uptodate bits if things worked 1271 * set the page up to date if all extents in the tree are uptodate 1272 * clear the lock bit in the extent tree 1273 * unlock the page if there are no other extents locked for it 1274 * 1275 * Scheduling is not allowed, so the extent state tree is expected 1276 * to have one and only one object corresponding to this IO. 1277 */ 1278 static int end_bio_extent_readpage(struct bio *bio, 1279 unsigned int bytes_done, int err) 1280 { 1281 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1282 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1283 struct extent_map_tree *tree = bio->bi_private; 1284 u64 start; 1285 u64 end; 1286 int whole_page; 1287 int ret; 1288 1289 if (bio->bi_size) 1290 return 1; 1291 1292 do { 1293 struct page *page = bvec->bv_page; 1294 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1295 end = start + bvec->bv_len - 1; 1296 1297 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1298 whole_page = 1; 1299 else 1300 whole_page = 0; 1301 1302 if (--bvec >= bio->bi_io_vec) 1303 prefetchw(&bvec->bv_page->flags); 1304 1305 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1306 ret = tree->ops->readpage_end_io_hook(page, start, end); 1307 if (ret) 1308 uptodate = 0; 1309 } 1310 if (uptodate) { 1311 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1312 if (whole_page) 1313 SetPageUptodate(page); 1314 else 1315 check_page_uptodate(tree, page); 1316 } else { 1317 ClearPageUptodate(page); 1318 SetPageError(page); 1319 } 1320 1321 unlock_extent(tree, start, end, GFP_ATOMIC); 1322 1323 if (whole_page) 1324 unlock_page(page); 1325 else 1326 check_page_locked(tree, page); 1327 } while (bvec >= bio->bi_io_vec); 1328 1329 bio_put(bio); 1330 return 0; 1331 } 1332 1333 /* 1334 * IO done from prepare_write is pretty simple, we just unlock 1335 * the structs in the extent tree when done, and set the uptodate bits 1336 * as appropriate. 1337 */ 1338 static int end_bio_extent_preparewrite(struct bio *bio, 1339 unsigned int bytes_done, int err) 1340 { 1341 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1342 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1343 struct extent_map_tree *tree = bio->bi_private; 1344 u64 start; 1345 u64 end; 1346 1347 if (bio->bi_size) 1348 return 1; 1349 1350 do { 1351 struct page *page = bvec->bv_page; 1352 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1353 end = start + bvec->bv_len - 1; 1354 1355 if (--bvec >= bio->bi_io_vec) 1356 prefetchw(&bvec->bv_page->flags); 1357 1358 if (uptodate) { 1359 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1360 } else { 1361 ClearPageUptodate(page); 1362 SetPageError(page); 1363 } 1364 1365 unlock_extent(tree, start, end, GFP_ATOMIC); 1366 1367 } while (bvec >= bio->bi_io_vec); 1368 1369 bio_put(bio); 1370 return 0; 1371 } 1372 1373 static int submit_extent_page(int rw, struct extent_map_tree *tree, 1374 struct page *page, sector_t sector, 1375 size_t size, unsigned long offset, 1376 struct block_device *bdev, 1377 bio_end_io_t end_io_func) 1378 { 1379 struct bio *bio; 1380 int ret = 0; 1381 1382 bio = bio_alloc(GFP_NOIO, 1); 1383 1384 bio->bi_sector = sector; 1385 bio->bi_bdev = bdev; 1386 bio->bi_io_vec[0].bv_page = page; 1387 bio->bi_io_vec[0].bv_len = size; 1388 bio->bi_io_vec[0].bv_offset = offset; 1389 1390 bio->bi_vcnt = 1; 1391 bio->bi_idx = 0; 1392 bio->bi_size = size; 1393 1394 bio->bi_end_io = end_io_func; 1395 bio->bi_private = tree; 1396 1397 bio_get(bio); 1398 submit_bio(rw, bio); 1399 1400 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 1401 ret = -EOPNOTSUPP; 1402 1403 bio_put(bio); 1404 return ret; 1405 } 1406 1407 void set_page_extent_mapped(struct page *page) 1408 { 1409 if (!PagePrivate(page)) { 1410 SetPagePrivate(page); 1411 WARN_ON(!page->mapping->a_ops->invalidatepage); 1412 set_page_private(page, 1); 1413 page_cache_get(page); 1414 } 1415 } 1416 1417 /* 1418 * basic readpage implementation. Locked extent state structs are inserted 1419 * into the tree that are removed when the IO is done (by the end_io 1420 * handlers) 1421 */ 1422 int extent_read_full_page(struct extent_map_tree *tree, struct page *page, 1423 get_extent_t *get_extent) 1424 { 1425 struct inode *inode = page->mapping->host; 1426 u64 start = page->index << PAGE_CACHE_SHIFT; 1427 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1428 u64 end; 1429 u64 cur = start; 1430 u64 extent_offset; 1431 u64 last_byte = i_size_read(inode); 1432 u64 block_start; 1433 u64 cur_end; 1434 sector_t sector; 1435 struct extent_map *em; 1436 struct block_device *bdev; 1437 int ret; 1438 int nr = 0; 1439 size_t page_offset = 0; 1440 size_t iosize; 1441 size_t blocksize = inode->i_sb->s_blocksize; 1442 1443 set_page_extent_mapped(page); 1444 1445 end = page_end; 1446 lock_extent(tree, start, end, GFP_NOFS); 1447 1448 while (cur <= end) { 1449 if (cur >= last_byte) { 1450 iosize = PAGE_CACHE_SIZE - page_offset; 1451 zero_user_page(page, page_offset, iosize, KM_USER0); 1452 set_extent_uptodate(tree, cur, cur + iosize - 1, 1453 GFP_NOFS); 1454 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1455 break; 1456 } 1457 em = get_extent(inode, page, page_offset, cur, end, 0); 1458 if (IS_ERR(em) || !em) { 1459 SetPageError(page); 1460 unlock_extent(tree, cur, end, GFP_NOFS); 1461 break; 1462 } 1463 1464 extent_offset = cur - em->start; 1465 BUG_ON(em->end < cur); 1466 BUG_ON(end < cur); 1467 1468 iosize = min(em->end - cur, end - cur) + 1; 1469 cur_end = min(em->end, end); 1470 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1471 sector = (em->block_start + extent_offset) >> 9; 1472 bdev = em->bdev; 1473 block_start = em->block_start; 1474 free_extent_map(em); 1475 em = NULL; 1476 1477 /* we've found a hole, just zero and go on */ 1478 if (block_start == EXTENT_MAP_HOLE) { 1479 zero_user_page(page, page_offset, iosize, KM_USER0); 1480 set_extent_uptodate(tree, cur, cur + iosize - 1, 1481 GFP_NOFS); 1482 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1483 cur = cur + iosize; 1484 page_offset += iosize; 1485 continue; 1486 } 1487 /* the get_extent function already copied into the page */ 1488 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { 1489 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1490 cur = cur + iosize; 1491 page_offset += iosize; 1492 continue; 1493 } 1494 1495 ret = 0; 1496 if (tree->ops && tree->ops->readpage_io_hook) { 1497 ret = tree->ops->readpage_io_hook(page, cur, 1498 cur + iosize - 1); 1499 } 1500 if (!ret) { 1501 ret = submit_extent_page(READ, tree, page, 1502 sector, iosize, page_offset, 1503 bdev, end_bio_extent_readpage); 1504 } 1505 if (ret) 1506 SetPageError(page); 1507 cur = cur + iosize; 1508 page_offset += iosize; 1509 nr++; 1510 } 1511 if (!nr) { 1512 if (!PageError(page)) 1513 SetPageUptodate(page); 1514 unlock_page(page); 1515 } 1516 return 0; 1517 } 1518 EXPORT_SYMBOL(extent_read_full_page); 1519 1520 /* 1521 * the writepage semantics are similar to regular writepage. extent 1522 * records are inserted to lock ranges in the tree, and as dirty areas 1523 * are found, they are marked writeback. Then the lock bits are removed 1524 * and the end_io handler clears the writeback ranges 1525 */ 1526 int extent_write_full_page(struct extent_map_tree *tree, struct page *page, 1527 get_extent_t *get_extent, 1528 struct writeback_control *wbc) 1529 { 1530 struct inode *inode = page->mapping->host; 1531 u64 start = page->index << PAGE_CACHE_SHIFT; 1532 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1533 u64 end; 1534 u64 cur = start; 1535 u64 extent_offset; 1536 u64 last_byte = i_size_read(inode); 1537 u64 block_start; 1538 sector_t sector; 1539 struct extent_map *em; 1540 struct block_device *bdev; 1541 int ret; 1542 int nr = 0; 1543 size_t page_offset = 0; 1544 size_t iosize; 1545 size_t blocksize; 1546 loff_t i_size = i_size_read(inode); 1547 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; 1548 u64 nr_delalloc; 1549 u64 delalloc_end; 1550 1551 WARN_ON(!PageLocked(page)); 1552 if (page->index > end_index) { 1553 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 1554 unlock_page(page); 1555 return 0; 1556 } 1557 1558 if (page->index == end_index) { 1559 size_t offset = i_size & (PAGE_CACHE_SIZE - 1); 1560 zero_user_page(page, offset, 1561 PAGE_CACHE_SIZE - offset, KM_USER0); 1562 } 1563 1564 set_page_extent_mapped(page); 1565 1566 lock_extent(tree, start, page_end, GFP_NOFS); 1567 nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1, 1568 &delalloc_end, 1569 128 * 1024 * 1024); 1570 if (nr_delalloc) { 1571 tree->ops->fill_delalloc(inode, start, delalloc_end); 1572 if (delalloc_end >= page_end + 1) { 1573 clear_extent_bit(tree, page_end + 1, delalloc_end, 1574 EXTENT_LOCKED | EXTENT_DELALLOC, 1575 1, 0, GFP_NOFS); 1576 } 1577 clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC, 1578 0, 0, GFP_NOFS); 1579 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1580 printk("found delalloc bits after clear extent_bit\n"); 1581 } 1582 } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1583 printk("found delalloc bits after find_delalloc_range returns 0\n"); 1584 } 1585 1586 end = page_end; 1587 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1588 printk("found delalloc bits after lock_extent\n"); 1589 } 1590 1591 if (last_byte <= start) { 1592 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 1593 goto done; 1594 } 1595 1596 set_extent_uptodate(tree, start, page_end, GFP_NOFS); 1597 blocksize = inode->i_sb->s_blocksize; 1598 1599 while (cur <= end) { 1600 if (cur >= last_byte) { 1601 clear_extent_dirty(tree, cur, page_end, GFP_NOFS); 1602 break; 1603 } 1604 em = get_extent(inode, page, page_offset, cur, end, 0); 1605 if (IS_ERR(em) || !em) { 1606 SetPageError(page); 1607 break; 1608 } 1609 1610 extent_offset = cur - em->start; 1611 BUG_ON(em->end < cur); 1612 BUG_ON(end < cur); 1613 iosize = min(em->end - cur, end - cur) + 1; 1614 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1615 sector = (em->block_start + extent_offset) >> 9; 1616 bdev = em->bdev; 1617 block_start = em->block_start; 1618 free_extent_map(em); 1619 em = NULL; 1620 1621 if (block_start == EXTENT_MAP_HOLE || 1622 block_start == EXTENT_MAP_INLINE) { 1623 clear_extent_dirty(tree, cur, 1624 cur + iosize - 1, GFP_NOFS); 1625 cur = cur + iosize; 1626 page_offset += iosize; 1627 continue; 1628 } 1629 1630 /* leave this out until we have a page_mkwrite call */ 1631 if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 1632 EXTENT_DIRTY, 0)) { 1633 cur = cur + iosize; 1634 page_offset += iosize; 1635 continue; 1636 } 1637 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); 1638 if (tree->ops && tree->ops->writepage_io_hook) { 1639 ret = tree->ops->writepage_io_hook(page, cur, 1640 cur + iosize - 1); 1641 } else { 1642 ret = 0; 1643 } 1644 if (ret) 1645 SetPageError(page); 1646 else { 1647 set_range_writeback(tree, cur, cur + iosize - 1); 1648 ret = submit_extent_page(WRITE, tree, page, sector, 1649 iosize, page_offset, bdev, 1650 end_bio_extent_writepage); 1651 if (ret) 1652 SetPageError(page); 1653 } 1654 cur = cur + iosize; 1655 page_offset += iosize; 1656 nr++; 1657 } 1658 done: 1659 unlock_extent(tree, start, page_end, GFP_NOFS); 1660 unlock_page(page); 1661 return 0; 1662 } 1663 EXPORT_SYMBOL(extent_write_full_page); 1664 1665 /* 1666 * basic invalidatepage code, this waits on any locked or writeback 1667 * ranges corresponding to the page, and then deletes any extent state 1668 * records from the tree 1669 */ 1670 int extent_invalidatepage(struct extent_map_tree *tree, 1671 struct page *page, unsigned long offset) 1672 { 1673 u64 start = (page->index << PAGE_CACHE_SHIFT); 1674 u64 end = start + PAGE_CACHE_SIZE - 1; 1675 size_t blocksize = page->mapping->host->i_sb->s_blocksize; 1676 1677 start += (offset + blocksize -1) & ~(blocksize - 1); 1678 if (start > end) 1679 return 0; 1680 1681 lock_extent(tree, start, end, GFP_NOFS); 1682 wait_on_extent_writeback(tree, start, end); 1683 clear_extent_bit(tree, start, end, 1684 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 1685 1, 1, GFP_NOFS); 1686 return 0; 1687 } 1688 EXPORT_SYMBOL(extent_invalidatepage); 1689 1690 /* 1691 * simple commit_write call, set_range_dirty is used to mark both 1692 * the pages and the extent records as dirty 1693 */ 1694 int extent_commit_write(struct extent_map_tree *tree, 1695 struct inode *inode, struct page *page, 1696 unsigned from, unsigned to) 1697 { 1698 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 1699 1700 set_page_extent_mapped(page); 1701 set_page_dirty(page); 1702 1703 if (pos > inode->i_size) { 1704 i_size_write(inode, pos); 1705 mark_inode_dirty(inode); 1706 } 1707 return 0; 1708 } 1709 EXPORT_SYMBOL(extent_commit_write); 1710 1711 int extent_prepare_write(struct extent_map_tree *tree, 1712 struct inode *inode, struct page *page, 1713 unsigned from, unsigned to, get_extent_t *get_extent) 1714 { 1715 u64 page_start = page->index << PAGE_CACHE_SHIFT; 1716 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 1717 u64 block_start; 1718 u64 orig_block_start; 1719 u64 block_end; 1720 u64 cur_end; 1721 struct extent_map *em; 1722 unsigned blocksize = 1 << inode->i_blkbits; 1723 size_t page_offset = 0; 1724 size_t block_off_start; 1725 size_t block_off_end; 1726 int err = 0; 1727 int iocount = 0; 1728 int ret = 0; 1729 int isnew; 1730 1731 set_page_extent_mapped(page); 1732 1733 block_start = (page_start + from) & ~((u64)blocksize - 1); 1734 block_end = (page_start + to - 1) | (blocksize - 1); 1735 orig_block_start = block_start; 1736 1737 lock_extent(tree, page_start, page_end, GFP_NOFS); 1738 while(block_start <= block_end) { 1739 em = get_extent(inode, page, page_offset, block_start, 1740 block_end, 1); 1741 if (IS_ERR(em) || !em) { 1742 goto err; 1743 } 1744 cur_end = min(block_end, em->end); 1745 block_off_start = block_start & (PAGE_CACHE_SIZE - 1); 1746 block_off_end = block_off_start + blocksize; 1747 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); 1748 1749 if (!PageUptodate(page) && isnew && 1750 (block_off_end > to || block_off_start < from)) { 1751 void *kaddr; 1752 1753 kaddr = kmap_atomic(page, KM_USER0); 1754 if (block_off_end > to) 1755 memset(kaddr + to, 0, block_off_end - to); 1756 if (block_off_start < from) 1757 memset(kaddr + block_off_start, 0, 1758 from - block_off_start); 1759 flush_dcache_page(page); 1760 kunmap_atomic(kaddr, KM_USER0); 1761 } 1762 if (!isnew && !PageUptodate(page) && 1763 (block_off_end > to || block_off_start < from) && 1764 !test_range_bit(tree, block_start, cur_end, 1765 EXTENT_UPTODATE, 1)) { 1766 u64 sector; 1767 u64 extent_offset = block_start - em->start; 1768 size_t iosize; 1769 sector = (em->block_start + extent_offset) >> 9; 1770 iosize = (cur_end - block_start + blocksize - 1) & 1771 ~((u64)blocksize - 1); 1772 /* 1773 * we've already got the extent locked, but we 1774 * need to split the state such that our end_bio 1775 * handler can clear the lock. 1776 */ 1777 set_extent_bit(tree, block_start, 1778 block_start + iosize - 1, 1779 EXTENT_LOCKED, 0, NULL, GFP_NOFS); 1780 ret = submit_extent_page(READ, tree, page, 1781 sector, iosize, page_offset, em->bdev, 1782 end_bio_extent_preparewrite); 1783 iocount++; 1784 block_start = block_start + iosize; 1785 } else { 1786 set_extent_uptodate(tree, block_start, cur_end, 1787 GFP_NOFS); 1788 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 1789 block_start = cur_end + 1; 1790 } 1791 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 1792 free_extent_map(em); 1793 } 1794 if (iocount) { 1795 wait_extent_bit(tree, orig_block_start, 1796 block_end, EXTENT_LOCKED); 1797 } 1798 check_page_uptodate(tree, page); 1799 err: 1800 /* FIXME, zero out newly allocated blocks on error */ 1801 return err; 1802 } 1803 EXPORT_SYMBOL(extent_prepare_write); 1804 1805 /* 1806 * a helper for releasepage. As long as there are no locked extents 1807 * in the range corresponding to the page, both state records and extent 1808 * map records are removed 1809 */ 1810 int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) 1811 { 1812 struct extent_map *em; 1813 u64 start = page->index << PAGE_CACHE_SHIFT; 1814 u64 end = start + PAGE_CACHE_SIZE - 1; 1815 u64 orig_start = start; 1816 int ret = 1; 1817 1818 while (start <= end) { 1819 em = lookup_extent_mapping(tree, start, end); 1820 if (!em || IS_ERR(em)) 1821 break; 1822 if (!test_range_bit(tree, em->start, em->end, 1823 EXTENT_LOCKED, 0)) { 1824 remove_extent_mapping(tree, em); 1825 /* once for the rb tree */ 1826 free_extent_map(em); 1827 } 1828 start = em->end + 1; 1829 /* once for us */ 1830 free_extent_map(em); 1831 } 1832 if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) 1833 ret = 0; 1834 else 1835 clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, 1836 1, 1, GFP_NOFS); 1837 return ret; 1838 } 1839 EXPORT_SYMBOL(try_release_extent_mapping); 1840 1841 sector_t extent_bmap(struct address_space *mapping, sector_t iblock, 1842 get_extent_t *get_extent) 1843 { 1844 struct inode *inode = mapping->host; 1845 u64 start = iblock << inode->i_blkbits; 1846 u64 end = start + (1 << inode->i_blkbits) - 1; 1847 struct extent_map *em; 1848 1849 em = get_extent(inode, NULL, 0, start, end, 0); 1850 if (!em || IS_ERR(em)) 1851 return 0; 1852 1853 // XXX(hch): block 0 is valid in some cases, e.g. XFS RT device 1854 if (em->block_start == EXTENT_MAP_INLINE || 1855 em->block_start == EXTENT_MAP_HOLE) 1856 return 0; 1857 1858 return (em->block_start + start - em->start) >> inode->i_blkbits; 1859 } 1860 1861 struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, 1862 u64 start, unsigned long len, 1863 gfp_t mask) 1864 { 1865 unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - 1866 (start >> PAGE_CACHE_SHIFT) + 1; 1867 unsigned long i; 1868 unsigned long index = start >> PAGE_CACHE_SHIFT; 1869 struct extent_buffer *eb; 1870 struct page *p; 1871 struct address_space *mapping = tree->mapping; 1872 int uptodate = 0; 1873 1874 eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); 1875 if (!eb || IS_ERR(eb)) 1876 return NULL; 1877 1878 eb->start = start; 1879 eb->len = len; 1880 atomic_set(&eb->refs, 1); 1881 1882 for (i = 0; i < num_pages; i++, index++) { 1883 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); 1884 if (!p) 1885 goto fail; 1886 eb->pages[i] = p; 1887 if (!PageUptodate(p)) 1888 uptodate = 0; 1889 unlock_page(p); 1890 } 1891 if (uptodate) 1892 eb->flags |= EXTENT_UPTODATE; 1893 return eb; 1894 fail: 1895 free_extent_buffer(eb); 1896 return NULL; 1897 } 1898 EXPORT_SYMBOL(alloc_extent_buffer); 1899 1900 struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, 1901 u64 start, unsigned long len, 1902 gfp_t mask) 1903 { 1904 unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - 1905 (start >> PAGE_CACHE_SHIFT) + 1; 1906 unsigned long i; 1907 unsigned long index = start >> PAGE_CACHE_SHIFT; 1908 struct extent_buffer *eb; 1909 struct page *p; 1910 struct address_space *mapping = tree->mapping; 1911 1912 eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); 1913 if (!eb || IS_ERR(eb)) 1914 return NULL; 1915 1916 eb->start = start; 1917 eb->len = len; 1918 atomic_set(&eb->refs, 1); 1919 1920 for (i = 0; i < num_pages; i++, index++) { 1921 p = find_get_page(mapping, index); 1922 if (!p) 1923 goto fail; 1924 eb->pages[i] = p; 1925 } 1926 return eb; 1927 fail: 1928 free_extent_buffer(eb); 1929 return NULL; 1930 } 1931 EXPORT_SYMBOL(find_extent_buffer); 1932 1933 void free_extent_buffer(struct extent_buffer *eb) 1934 { 1935 unsigned long i; 1936 unsigned long num_pages; 1937 1938 if (!eb) 1939 return; 1940 1941 if (!atomic_dec_and_test(&eb->refs)) 1942 return; 1943 1944 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - 1945 (eb->start >> PAGE_CACHE_SHIFT) + 1; 1946 1947 for (i = 0; i < num_pages; i++) { 1948 if (eb->pages[i]) 1949 page_cache_release(eb->pages[i]); 1950 } 1951 kfree(eb); 1952 } 1953 EXPORT_SYMBOL(free_extent_buffer); 1954 1955 int clear_extent_buffer_dirty(struct extent_map_tree *tree, 1956 struct extent_buffer *eb) 1957 { 1958 int set; 1959 unsigned long i; 1960 unsigned long num_pages; 1961 struct page *page; 1962 1963 u64 start = eb->start; 1964 u64 end = start + eb->len - 1; 1965 1966 set = clear_extent_dirty(tree, start, end, GFP_NOFS); 1967 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - 1968 (eb->start >> PAGE_CACHE_SHIFT) + 1; 1969 1970 for (i = 0; i < num_pages; i++) { 1971 page = eb->pages[i]; 1972 lock_page(page); 1973 /* 1974 * if we're on the last page or the first page and the 1975 * block isn't aligned on a page boundary, do extra checks 1976 * to make sure we don't clean page that is partially dirty 1977 */ 1978 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 1979 ((i == num_pages - 1) && 1980 ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { 1981 start = page->index << PAGE_CACHE_SHIFT; 1982 end = start + PAGE_CACHE_SIZE - 1; 1983 if (test_range_bit(tree, start, end, 1984 EXTENT_DIRTY, 0)) { 1985 unlock_page(page); 1986 continue; 1987 } 1988 } 1989 clear_page_dirty_for_io(page); 1990 unlock_page(page); 1991 } 1992 return 0; 1993 } 1994 EXPORT_SYMBOL(clear_extent_buffer_dirty); 1995 1996 int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, 1997 struct extent_buffer *eb) 1998 { 1999 return wait_on_extent_writeback(tree, eb->start, 2000 eb->start + eb->len - 1); 2001 } 2002 EXPORT_SYMBOL(wait_on_extent_buffer_writeback); 2003 2004 int set_extent_buffer_dirty(struct extent_map_tree *tree, 2005 struct extent_buffer *eb) 2006 { 2007 return set_range_dirty(tree, eb->start, eb->start + eb->len - 1); 2008 } 2009 EXPORT_SYMBOL(set_extent_buffer_dirty); 2010 2011 int set_extent_buffer_uptodate(struct extent_map_tree *tree, 2012 struct extent_buffer *eb) 2013 { 2014 unsigned long i; 2015 struct page *page; 2016 unsigned long num_pages; 2017 2018 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - 2019 (eb->start >> PAGE_CACHE_SHIFT) + 1; 2020 2021 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 2022 GFP_NOFS); 2023 for (i = 0; i < num_pages; i++) { 2024 page = eb->pages[i]; 2025 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 2026 ((i == num_pages - 1) && 2027 ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { 2028 check_page_uptodate(tree, page); 2029 continue; 2030 } 2031 SetPageUptodate(page); 2032 } 2033 return 0; 2034 } 2035 EXPORT_SYMBOL(set_extent_buffer_uptodate); 2036 2037 int extent_buffer_uptodate(struct extent_map_tree *tree, 2038 struct extent_buffer *eb) 2039 { 2040 if (eb->flags & EXTENT_UPTODATE) 2041 return 1; 2042 return test_range_bit(tree, eb->start, eb->start + eb->len - 1, 2043 EXTENT_UPTODATE, 1); 2044 } 2045 EXPORT_SYMBOL(extent_buffer_uptodate); 2046 2047 int read_extent_buffer_pages(struct extent_map_tree *tree, 2048 struct extent_buffer *eb, int wait) 2049 { 2050 unsigned long i; 2051 struct page *page; 2052 int err; 2053 int ret = 0; 2054 unsigned long num_pages; 2055 2056 if (eb->flags & EXTENT_UPTODATE) 2057 return 0; 2058 2059 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, 2060 EXTENT_UPTODATE, 1)) { 2061 return 0; 2062 } 2063 2064 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - 2065 (eb->start >> PAGE_CACHE_SHIFT) + 1; 2066 for (i = 0; i < num_pages; i++) { 2067 page = eb->pages[i]; 2068 if (PageUptodate(page)) { 2069 continue; 2070 } 2071 if (!wait) { 2072 if (TestSetPageLocked(page)) { 2073 continue; 2074 } 2075 } else { 2076 lock_page(page); 2077 } 2078 if (!PageUptodate(page)) { 2079 err = page->mapping->a_ops->readpage(NULL, page); 2080 if (err) { 2081 ret = err; 2082 } 2083 } else { 2084 unlock_page(page); 2085 } 2086 } 2087 2088 if (ret || !wait) { 2089 return ret; 2090 } 2091 2092 for (i = 0; i < num_pages; i++) { 2093 page = eb->pages[i]; 2094 wait_on_page_locked(page); 2095 if (!PageUptodate(page)) { 2096 ret = -EIO; 2097 } 2098 } 2099 eb->flags |= EXTENT_UPTODATE; 2100 return ret; 2101 } 2102 EXPORT_SYMBOL(read_extent_buffer_pages); 2103 2104 void read_extent_buffer(struct extent_buffer *eb, void *dstv, 2105 unsigned long start, 2106 unsigned long len) 2107 { 2108 size_t cur; 2109 size_t offset; 2110 struct page *page; 2111 char *kaddr; 2112 char *dst = (char *)dstv; 2113 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2114 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2115 2116 WARN_ON(start > eb->len); 2117 WARN_ON(start + len > eb->start + eb->len); 2118 2119 page = eb->pages[i]; 2120 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); 2121 if (i == 0) 2122 offset += start_offset; 2123 2124 while(len > 0) { 2125 WARN_ON(!PageUptodate(page)); 2126 2127 cur = min(len, (PAGE_CACHE_SIZE - offset)); 2128 // kaddr = kmap_atomic(page, KM_USER0); 2129 kaddr = page_address(page); 2130 memcpy(dst, kaddr + offset, cur); 2131 // kunmap_atomic(kaddr, KM_USER0); 2132 2133 dst += cur; 2134 len -= cur; 2135 offset = 0; 2136 i++; 2137 page = eb->pages[i]; 2138 } 2139 } 2140 EXPORT_SYMBOL(read_extent_buffer); 2141 2142 int map_extent_buffer(struct extent_buffer *eb, unsigned long start, 2143 unsigned long min_len, 2144 char **token, char **map, 2145 unsigned long *map_start, 2146 unsigned long *map_len, int km) 2147 { 2148 size_t offset = start & (PAGE_CACHE_SIZE - 1); 2149 char *kaddr; 2150 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2151 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2152 unsigned long end_i = (start_offset + start + min_len) >> 2153 PAGE_CACHE_SHIFT; 2154 2155 if (i != end_i) 2156 return -EINVAL; 2157 2158 WARN_ON(start > eb->len); 2159 2160 if (i == 0) { 2161 offset = start_offset; 2162 *map_start = 0; 2163 } else { 2164 *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; 2165 } 2166 2167 // kaddr = kmap_atomic(eb->pages[i], km); 2168 kaddr = page_address(eb->pages[i]); 2169 *token = kaddr; 2170 *map = kaddr + offset; 2171 *map_len = PAGE_CACHE_SIZE - offset; 2172 return 0; 2173 } 2174 EXPORT_SYMBOL(map_extent_buffer); 2175 2176 void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) 2177 { 2178 // kunmap_atomic(token, km); 2179 } 2180 EXPORT_SYMBOL(unmap_extent_buffer); 2181 2182 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, 2183 unsigned long start, 2184 unsigned long len) 2185 { 2186 size_t cur; 2187 size_t offset; 2188 struct page *page; 2189 char *kaddr; 2190 char *ptr = (char *)ptrv; 2191 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2192 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2193 int ret = 0; 2194 2195 WARN_ON(start > eb->len); 2196 WARN_ON(start + len > eb->start + eb->len); 2197 2198 page = eb->pages[i]; 2199 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); 2200 if (i == 0) 2201 offset += start_offset; 2202 2203 while(len > 0) { 2204 WARN_ON(!PageUptodate(page)); 2205 2206 cur = min(len, (PAGE_CACHE_SIZE - offset)); 2207 2208 // kaddr = kmap_atomic(page, KM_USER0); 2209 kaddr = page_address(page); 2210 ret = memcmp(ptr, kaddr + offset, cur); 2211 // kunmap_atomic(kaddr, KM_USER0); 2212 if (ret) 2213 break; 2214 2215 ptr += cur; 2216 len -= cur; 2217 offset = 0; 2218 i++; 2219 page = eb->pages[i]; 2220 } 2221 return ret; 2222 } 2223 EXPORT_SYMBOL(memcmp_extent_buffer); 2224 2225 void write_extent_buffer(struct extent_buffer *eb, const void *srcv, 2226 unsigned long start, unsigned long len) 2227 { 2228 size_t cur; 2229 size_t offset; 2230 struct page *page; 2231 char *kaddr; 2232 char *src = (char *)srcv; 2233 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2234 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2235 2236 WARN_ON(start > eb->len); 2237 WARN_ON(start + len > eb->start + eb->len); 2238 2239 page = eb->pages[i]; 2240 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); 2241 if (i == 0) 2242 offset += start_offset; 2243 2244 while(len > 0) { 2245 WARN_ON(!PageUptodate(page)); 2246 2247 cur = min(len, PAGE_CACHE_SIZE - offset); 2248 // kaddr = kmap_atomic(page, KM_USER0); 2249 kaddr = page_address(page); 2250 memcpy(kaddr + offset, src, cur); 2251 // kunmap_atomic(kaddr, KM_USER0); 2252 2253 src += cur; 2254 len -= cur; 2255 offset = 0; 2256 i++; 2257 page = eb->pages[i]; 2258 } 2259 } 2260 EXPORT_SYMBOL(write_extent_buffer); 2261 2262 void memset_extent_buffer(struct extent_buffer *eb, char c, 2263 unsigned long start, unsigned long len) 2264 { 2265 size_t cur; 2266 size_t offset; 2267 struct page *page; 2268 char *kaddr; 2269 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2270 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2271 2272 WARN_ON(start > eb->len); 2273 WARN_ON(start + len > eb->start + eb->len); 2274 2275 page = eb->pages[i]; 2276 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); 2277 if (i == 0) 2278 offset += start_offset; 2279 2280 while(len > 0) { 2281 WARN_ON(!PageUptodate(page)); 2282 2283 cur = min(len, PAGE_CACHE_SIZE - offset); 2284 // kaddr = kmap_atomic(page, KM_USER0); 2285 kaddr = page_address(page); 2286 memset(kaddr + offset, c, cur); 2287 // kunmap_atomic(kaddr, KM_USER0); 2288 2289 len -= cur; 2290 offset = 0; 2291 i++; 2292 page = eb->pages[i]; 2293 } 2294 } 2295 EXPORT_SYMBOL(memset_extent_buffer); 2296 2297 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, 2298 unsigned long dst_offset, unsigned long src_offset, 2299 unsigned long len) 2300 { 2301 u64 dst_len = dst->len; 2302 size_t cur; 2303 size_t offset; 2304 struct page *page; 2305 char *kaddr; 2306 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 2307 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 2308 2309 WARN_ON(src->len != dst_len); 2310 2311 offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1); 2312 if (i == 0) 2313 offset += start_offset; 2314 2315 while(len > 0) { 2316 page = dst->pages[i]; 2317 WARN_ON(!PageUptodate(page)); 2318 2319 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); 2320 2321 // kaddr = kmap_atomic(page, KM_USER1); 2322 kaddr = page_address(page); 2323 read_extent_buffer(src, kaddr + offset, src_offset, cur); 2324 // kunmap_atomic(kaddr, KM_USER1); 2325 2326 src_offset += cur; 2327 len -= cur; 2328 offset = 0; 2329 i++; 2330 } 2331 } 2332 EXPORT_SYMBOL(copy_extent_buffer); 2333 2334 static void move_pages(struct page *dst_page, struct page *src_page, 2335 unsigned long dst_off, unsigned long src_off, 2336 unsigned long len) 2337 { 2338 // char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 2339 char *dst_kaddr = page_address(dst_page); 2340 if (dst_page == src_page) { 2341 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); 2342 } else { 2343 // char *src_kaddr = kmap_atomic(src_page, KM_USER1); 2344 char *src_kaddr = page_address(src_page); 2345 char *p = dst_kaddr + dst_off + len; 2346 char *s = src_kaddr + src_off + len; 2347 2348 while (len--) 2349 *--p = *--s; 2350 2351 // kunmap_atomic(src_kaddr, KM_USER1); 2352 } 2353 // kunmap_atomic(dst_kaddr, KM_USER0); 2354 } 2355 2356 static void copy_pages(struct page *dst_page, struct page *src_page, 2357 unsigned long dst_off, unsigned long src_off, 2358 unsigned long len) 2359 { 2360 //kmap_atomic(dst_page, KM_USER0); 2361 char *dst_kaddr = page_address(dst_page); 2362 char *src_kaddr; 2363 2364 if (dst_page != src_page) 2365 src_kaddr = page_address(src_page); // kmap_atomic(src_page, KM_USER1); 2366 else 2367 src_kaddr = dst_kaddr; 2368 2369 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 2370 /* 2371 kunmap_atomic(dst_kaddr, KM_USER0); 2372 if (dst_page != src_page) 2373 kunmap_atomic(src_kaddr, KM_USER1); 2374 */ 2375 } 2376 2377 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 2378 unsigned long src_offset, unsigned long len) 2379 { 2380 size_t cur; 2381 size_t dst_off_in_page; 2382 size_t src_off_in_page; 2383 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 2384 unsigned long dst_i; 2385 unsigned long src_i; 2386 2387 if (src_offset + len > dst->len) { 2388 printk("memmove bogus src_offset %lu move len %lu len %lu\n", 2389 src_offset, len, dst->len); 2390 BUG_ON(1); 2391 } 2392 if (dst_offset + len > dst->len) { 2393 printk("memmove bogus dst_offset %lu move len %lu len %lu\n", 2394 dst_offset, len, dst->len); 2395 BUG_ON(1); 2396 } 2397 2398 while(len > 0) { 2399 dst_off_in_page = dst_offset & 2400 ((unsigned long)PAGE_CACHE_SIZE - 1); 2401 src_off_in_page = src_offset & 2402 ((unsigned long)PAGE_CACHE_SIZE - 1); 2403 2404 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 2405 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; 2406 2407 if (src_i == 0) 2408 src_off_in_page += start_offset; 2409 if (dst_i == 0) 2410 dst_off_in_page += start_offset; 2411 2412 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - 2413 src_off_in_page)); 2414 cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE - 2415 dst_off_in_page)); 2416 2417 copy_pages(dst->pages[dst_i], dst->pages[src_i], 2418 dst_off_in_page, src_off_in_page, cur); 2419 2420 src_offset += cur; 2421 dst_offset += cur; 2422 len -= cur; 2423 } 2424 } 2425 EXPORT_SYMBOL(memcpy_extent_buffer); 2426 2427 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 2428 unsigned long src_offset, unsigned long len) 2429 { 2430 size_t cur; 2431 size_t dst_off_in_page; 2432 size_t src_off_in_page; 2433 unsigned long dst_end = dst_offset + len - 1; 2434 unsigned long src_end = src_offset + len - 1; 2435 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 2436 unsigned long dst_i; 2437 unsigned long src_i; 2438 2439 if (src_offset + len > dst->len) { 2440 printk("memmove bogus src_offset %lu move len %lu len %lu\n", 2441 src_offset, len, dst->len); 2442 BUG_ON(1); 2443 } 2444 if (dst_offset + len > dst->len) { 2445 printk("memmove bogus dst_offset %lu move len %lu len %lu\n", 2446 dst_offset, len, dst->len); 2447 BUG_ON(1); 2448 } 2449 if (dst_offset < src_offset) { 2450 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 2451 return; 2452 } 2453 while(len > 0) { 2454 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; 2455 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; 2456 2457 dst_off_in_page = dst_end & 2458 ((unsigned long)PAGE_CACHE_SIZE - 1); 2459 src_off_in_page = src_end & 2460 ((unsigned long)PAGE_CACHE_SIZE - 1); 2461 2462 if (src_i == 0) 2463 src_off_in_page += start_offset; 2464 if (dst_i == 0) 2465 dst_off_in_page += start_offset; 2466 2467 cur = min(len, src_off_in_page + 1); 2468 cur = min(cur, dst_off_in_page + 1); 2469 // printk("move pages orig dst %lu src %lu len %lu, this %lu %lu %lu\n", dst_offset, src_offset, len, dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); 2470 move_pages(dst->pages[dst_i], dst->pages[src_i], 2471 dst_off_in_page - cur + 1, 2472 src_off_in_page - cur + 1, cur); 2473 2474 dst_end -= cur - 1; 2475 src_end -= cur - 1; 2476 len -= cur; 2477 } 2478 } 2479 EXPORT_SYMBOL(memmove_extent_buffer); 2480