1 #include <linux/bitops.h> 2 #include <linux/slab.h> 3 #include <linux/bio.h> 4 #include <linux/mm.h> 5 #include <linux/gfp.h> 6 #include <linux/pagemap.h> 7 #include <linux/page-flags.h> 8 #include <linux/module.h> 9 #include <linux/spinlock.h> 10 #include <linux/blkdev.h> 11 #include "extent_map.h" 12 13 /* temporary define until extent_map moves out of btrfs */ 14 struct kmem_cache *btrfs_cache_create(const char *name, size_t size, 15 unsigned long extra_flags, 16 void (*ctor)(void *, struct kmem_cache *, 17 unsigned long)); 18 19 static struct kmem_cache *extent_map_cache; 20 static struct kmem_cache *extent_state_cache; 21 static struct kmem_cache *extent_buffer_cache; 22 static LIST_HEAD(extent_buffers); 23 static spinlock_t extent_buffers_lock; 24 static int nr_extent_buffers; 25 #define MAX_EXTENT_BUFFER_CACHE 128 26 27 struct tree_entry { 28 u64 start; 29 u64 end; 30 int in_tree; 31 struct rb_node rb_node; 32 }; 33 34 void __init extent_map_init(void) 35 { 36 extent_map_cache = btrfs_cache_create("extent_map", 37 sizeof(struct extent_map), 0, 38 NULL); 39 extent_state_cache = btrfs_cache_create("extent_state", 40 sizeof(struct extent_state), 0, 41 NULL); 42 extent_buffer_cache = btrfs_cache_create("extent_buffers", 43 sizeof(struct extent_buffer), 0, 44 NULL); 45 spin_lock_init(&extent_buffers_lock); 46 } 47 48 void __exit extent_map_exit(void) 49 { 50 struct extent_buffer *eb; 51 52 while (!list_empty(&extent_buffers)) { 53 eb = list_entry(extent_buffers.next, 54 struct extent_buffer, list); 55 list_del(&eb->list); 56 kmem_cache_free(extent_buffer_cache, eb); 57 } 58 if (extent_map_cache) 59 kmem_cache_destroy(extent_map_cache); 60 if (extent_state_cache) 61 kmem_cache_destroy(extent_state_cache); 62 if (extent_buffer_cache) 63 kmem_cache_destroy(extent_buffer_cache); 64 } 65 66 void extent_map_tree_init(struct extent_map_tree *tree, 67 struct address_space *mapping, gfp_t mask) 68 { 69 tree->map.rb_node = NULL; 70 tree->state.rb_node = NULL; 71 tree->ops = NULL; 72 rwlock_init(&tree->lock); 73 tree->mapping = mapping; 74 } 75 EXPORT_SYMBOL(extent_map_tree_init); 76 77 struct extent_map *alloc_extent_map(gfp_t mask) 78 { 79 struct extent_map *em; 80 em = kmem_cache_alloc(extent_map_cache, mask); 81 if (!em || IS_ERR(em)) 82 return em; 83 em->in_tree = 0; 84 atomic_set(&em->refs, 1); 85 return em; 86 } 87 EXPORT_SYMBOL(alloc_extent_map); 88 89 void free_extent_map(struct extent_map *em) 90 { 91 if (!em) 92 return; 93 if (atomic_dec_and_test(&em->refs)) { 94 WARN_ON(em->in_tree); 95 kmem_cache_free(extent_map_cache, em); 96 } 97 } 98 EXPORT_SYMBOL(free_extent_map); 99 100 101 struct extent_state *alloc_extent_state(gfp_t mask) 102 { 103 struct extent_state *state; 104 state = kmem_cache_alloc(extent_state_cache, mask); 105 if (!state || IS_ERR(state)) 106 return state; 107 state->state = 0; 108 state->in_tree = 0; 109 state->private = 0; 110 atomic_set(&state->refs, 1); 111 init_waitqueue_head(&state->wq); 112 return state; 113 } 114 EXPORT_SYMBOL(alloc_extent_state); 115 116 void free_extent_state(struct extent_state *state) 117 { 118 if (!state) 119 return; 120 if (atomic_dec_and_test(&state->refs)) { 121 WARN_ON(state->in_tree); 122 kmem_cache_free(extent_state_cache, state); 123 } 124 } 125 EXPORT_SYMBOL(free_extent_state); 126 127 static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 128 struct rb_node *node) 129 { 130 struct rb_node ** p = &root->rb_node; 131 struct rb_node * parent = NULL; 132 struct tree_entry *entry; 133 134 while(*p) { 135 parent = *p; 136 entry = rb_entry(parent, struct tree_entry, rb_node); 137 138 if (offset < entry->start) 139 p = &(*p)->rb_left; 140 else if (offset > entry->end) 141 p = &(*p)->rb_right; 142 else 143 return parent; 144 } 145 146 entry = rb_entry(node, struct tree_entry, rb_node); 147 entry->in_tree = 1; 148 rb_link_node(node, parent, p); 149 rb_insert_color(node, root); 150 return NULL; 151 } 152 153 static struct rb_node *__tree_search(struct rb_root *root, u64 offset, 154 struct rb_node **prev_ret) 155 { 156 struct rb_node * n = root->rb_node; 157 struct rb_node *prev = NULL; 158 struct tree_entry *entry; 159 struct tree_entry *prev_entry = NULL; 160 161 while(n) { 162 entry = rb_entry(n, struct tree_entry, rb_node); 163 prev = n; 164 prev_entry = entry; 165 166 if (offset < entry->start) 167 n = n->rb_left; 168 else if (offset > entry->end) 169 n = n->rb_right; 170 else 171 return n; 172 } 173 if (!prev_ret) 174 return NULL; 175 while(prev && offset > prev_entry->end) { 176 prev = rb_next(prev); 177 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 178 } 179 *prev_ret = prev; 180 return NULL; 181 } 182 183 static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) 184 { 185 struct rb_node *prev; 186 struct rb_node *ret; 187 ret = __tree_search(root, offset, &prev); 188 if (!ret) 189 return prev; 190 return ret; 191 } 192 193 static int tree_delete(struct rb_root *root, u64 offset) 194 { 195 struct rb_node *node; 196 struct tree_entry *entry; 197 198 node = __tree_search(root, offset, NULL); 199 if (!node) 200 return -ENOENT; 201 entry = rb_entry(node, struct tree_entry, rb_node); 202 entry->in_tree = 0; 203 rb_erase(node, root); 204 return 0; 205 } 206 207 /* 208 * add_extent_mapping tries a simple backward merge with existing 209 * mappings. The extent_map struct passed in will be inserted into 210 * the tree directly (no copies made, just a reference taken). 211 */ 212 int add_extent_mapping(struct extent_map_tree *tree, 213 struct extent_map *em) 214 { 215 int ret = 0; 216 struct extent_map *prev = NULL; 217 struct rb_node *rb; 218 219 write_lock_irq(&tree->lock); 220 rb = tree_insert(&tree->map, em->end, &em->rb_node); 221 if (rb) { 222 prev = rb_entry(rb, struct extent_map, rb_node); 223 printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end); 224 ret = -EEXIST; 225 goto out; 226 } 227 atomic_inc(&em->refs); 228 if (em->start != 0) { 229 rb = rb_prev(&em->rb_node); 230 if (rb) 231 prev = rb_entry(rb, struct extent_map, rb_node); 232 if (prev && prev->end + 1 == em->start && 233 ((em->block_start == EXTENT_MAP_HOLE && 234 prev->block_start == EXTENT_MAP_HOLE) || 235 (em->block_start == prev->block_end + 1))) { 236 em->start = prev->start; 237 em->block_start = prev->block_start; 238 rb_erase(&prev->rb_node, &tree->map); 239 prev->in_tree = 0; 240 free_extent_map(prev); 241 } 242 } 243 out: 244 write_unlock_irq(&tree->lock); 245 return ret; 246 } 247 EXPORT_SYMBOL(add_extent_mapping); 248 249 /* 250 * lookup_extent_mapping returns the first extent_map struct in the 251 * tree that intersects the [start, end] (inclusive) range. There may 252 * be additional objects in the tree that intersect, so check the object 253 * returned carefully to make sure you don't need additional lookups. 254 */ 255 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, 256 u64 start, u64 end) 257 { 258 struct extent_map *em; 259 struct rb_node *rb_node; 260 261 read_lock_irq(&tree->lock); 262 rb_node = tree_search(&tree->map, start); 263 if (!rb_node) { 264 em = NULL; 265 goto out; 266 } 267 if (IS_ERR(rb_node)) { 268 em = ERR_PTR(PTR_ERR(rb_node)); 269 goto out; 270 } 271 em = rb_entry(rb_node, struct extent_map, rb_node); 272 if (em->end < start || em->start > end) { 273 em = NULL; 274 goto out; 275 } 276 atomic_inc(&em->refs); 277 out: 278 read_unlock_irq(&tree->lock); 279 return em; 280 } 281 EXPORT_SYMBOL(lookup_extent_mapping); 282 283 /* 284 * removes an extent_map struct from the tree. No reference counts are 285 * dropped, and no checks are done to see if the range is in use 286 */ 287 int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) 288 { 289 int ret; 290 291 write_lock_irq(&tree->lock); 292 ret = tree_delete(&tree->map, em->end); 293 write_unlock_irq(&tree->lock); 294 return ret; 295 } 296 EXPORT_SYMBOL(remove_extent_mapping); 297 298 /* 299 * utility function to look for merge candidates inside a given range. 300 * Any extents with matching state are merged together into a single 301 * extent in the tree. Extents with EXTENT_IO in their state field 302 * are not merged because the end_io handlers need to be able to do 303 * operations on them without sleeping (or doing allocations/splits). 304 * 305 * This should be called with the tree lock held. 306 */ 307 static int merge_state(struct extent_map_tree *tree, 308 struct extent_state *state) 309 { 310 struct extent_state *other; 311 struct rb_node *other_node; 312 313 if (state->state & EXTENT_IOBITS) 314 return 0; 315 316 other_node = rb_prev(&state->rb_node); 317 if (other_node) { 318 other = rb_entry(other_node, struct extent_state, rb_node); 319 if (other->end == state->start - 1 && 320 other->state == state->state) { 321 state->start = other->start; 322 other->in_tree = 0; 323 rb_erase(&other->rb_node, &tree->state); 324 free_extent_state(other); 325 } 326 } 327 other_node = rb_next(&state->rb_node); 328 if (other_node) { 329 other = rb_entry(other_node, struct extent_state, rb_node); 330 if (other->start == state->end + 1 && 331 other->state == state->state) { 332 other->start = state->start; 333 state->in_tree = 0; 334 rb_erase(&state->rb_node, &tree->state); 335 free_extent_state(state); 336 } 337 } 338 return 0; 339 } 340 341 /* 342 * insert an extent_state struct into the tree. 'bits' are set on the 343 * struct before it is inserted. 344 * 345 * This may return -EEXIST if the extent is already there, in which case the 346 * state struct is freed. 347 * 348 * The tree lock is not taken internally. This is a utility function and 349 * probably isn't what you want to call (see set/clear_extent_bit). 350 */ 351 static int insert_state(struct extent_map_tree *tree, 352 struct extent_state *state, u64 start, u64 end, 353 int bits) 354 { 355 struct rb_node *node; 356 357 if (end < start) { 358 printk("end < start %Lu %Lu\n", end, start); 359 WARN_ON(1); 360 } 361 state->state |= bits; 362 state->start = start; 363 state->end = end; 364 if ((end & 4095) == 0) { 365 printk("insert state %Lu %Lu strange end\n", start, end); 366 WARN_ON(1); 367 } 368 node = tree_insert(&tree->state, end, &state->rb_node); 369 if (node) { 370 struct extent_state *found; 371 found = rb_entry(node, struct extent_state, rb_node); 372 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); 373 free_extent_state(state); 374 return -EEXIST; 375 } 376 merge_state(tree, state); 377 return 0; 378 } 379 380 /* 381 * split a given extent state struct in two, inserting the preallocated 382 * struct 'prealloc' as the newly created second half. 'split' indicates an 383 * offset inside 'orig' where it should be split. 384 * 385 * Before calling, 386 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 387 * are two extent state structs in the tree: 388 * prealloc: [orig->start, split - 1] 389 * orig: [ split, orig->end ] 390 * 391 * The tree locks are not taken by this function. They need to be held 392 * by the caller. 393 */ 394 static int split_state(struct extent_map_tree *tree, struct extent_state *orig, 395 struct extent_state *prealloc, u64 split) 396 { 397 struct rb_node *node; 398 prealloc->start = orig->start; 399 prealloc->end = split - 1; 400 prealloc->state = orig->state; 401 orig->start = split; 402 if ((prealloc->end & 4095) == 0) { 403 printk("insert state %Lu %Lu strange end\n", prealloc->start, 404 prealloc->end); 405 WARN_ON(1); 406 } 407 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); 408 if (node) { 409 struct extent_state *found; 410 found = rb_entry(node, struct extent_state, rb_node); 411 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); 412 free_extent_state(prealloc); 413 return -EEXIST; 414 } 415 return 0; 416 } 417 418 /* 419 * utility function to clear some bits in an extent state struct. 420 * it will optionally wake up any one waiting on this state (wake == 1), or 421 * forcibly remove the state from the tree (delete == 1). 422 * 423 * If no bits are set on the state struct after clearing things, the 424 * struct is freed and removed from the tree 425 */ 426 static int clear_state_bit(struct extent_map_tree *tree, 427 struct extent_state *state, int bits, int wake, 428 int delete) 429 { 430 int ret = state->state & bits; 431 state->state &= ~bits; 432 if (wake) 433 wake_up(&state->wq); 434 if (delete || state->state == 0) { 435 if (state->in_tree) { 436 rb_erase(&state->rb_node, &tree->state); 437 state->in_tree = 0; 438 free_extent_state(state); 439 } else { 440 WARN_ON(1); 441 } 442 } else { 443 merge_state(tree, state); 444 } 445 return ret; 446 } 447 448 /* 449 * clear some bits on a range in the tree. This may require splitting 450 * or inserting elements in the tree, so the gfp mask is used to 451 * indicate which allocations or sleeping are allowed. 452 * 453 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove 454 * the given range from the tree regardless of state (ie for truncate). 455 * 456 * the range [start, end] is inclusive. 457 * 458 * This takes the tree lock, and returns < 0 on error, > 0 if any of the 459 * bits were already set, or zero if none of the bits were already set. 460 */ 461 int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, 462 int bits, int wake, int delete, gfp_t mask) 463 { 464 struct extent_state *state; 465 struct extent_state *prealloc = NULL; 466 struct rb_node *node; 467 unsigned long flags; 468 int err; 469 int set = 0; 470 471 again: 472 if (!prealloc && (mask & __GFP_WAIT)) { 473 prealloc = alloc_extent_state(mask); 474 if (!prealloc) 475 return -ENOMEM; 476 } 477 478 write_lock_irqsave(&tree->lock, flags); 479 /* 480 * this search will find the extents that end after 481 * our range starts 482 */ 483 node = tree_search(&tree->state, start); 484 if (!node) 485 goto out; 486 state = rb_entry(node, struct extent_state, rb_node); 487 if (state->start > end) 488 goto out; 489 WARN_ON(state->end < start); 490 491 /* 492 * | ---- desired range ---- | 493 * | state | or 494 * | ------------- state -------------- | 495 * 496 * We need to split the extent we found, and may flip 497 * bits on second half. 498 * 499 * If the extent we found extends past our range, we 500 * just split and search again. It'll get split again 501 * the next time though. 502 * 503 * If the extent we found is inside our range, we clear 504 * the desired bit on it. 505 */ 506 507 if (state->start < start) { 508 err = split_state(tree, state, prealloc, start); 509 BUG_ON(err == -EEXIST); 510 prealloc = NULL; 511 if (err) 512 goto out; 513 if (state->end <= end) { 514 start = state->end + 1; 515 set |= clear_state_bit(tree, state, bits, 516 wake, delete); 517 } else { 518 start = state->start; 519 } 520 goto search_again; 521 } 522 /* 523 * | ---- desired range ---- | 524 * | state | 525 * We need to split the extent, and clear the bit 526 * on the first half 527 */ 528 if (state->start <= end && state->end > end) { 529 err = split_state(tree, state, prealloc, end + 1); 530 BUG_ON(err == -EEXIST); 531 532 if (wake) 533 wake_up(&state->wq); 534 set |= clear_state_bit(tree, prealloc, bits, 535 wake, delete); 536 prealloc = NULL; 537 goto out; 538 } 539 540 start = state->end + 1; 541 set |= clear_state_bit(tree, state, bits, wake, delete); 542 goto search_again; 543 544 out: 545 write_unlock_irqrestore(&tree->lock, flags); 546 if (prealloc) 547 free_extent_state(prealloc); 548 549 return set; 550 551 search_again: 552 if (start >= end) 553 goto out; 554 write_unlock_irqrestore(&tree->lock, flags); 555 if (mask & __GFP_WAIT) 556 cond_resched(); 557 goto again; 558 } 559 EXPORT_SYMBOL(clear_extent_bit); 560 561 static int wait_on_state(struct extent_map_tree *tree, 562 struct extent_state *state) 563 { 564 DEFINE_WAIT(wait); 565 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 566 read_unlock_irq(&tree->lock); 567 schedule(); 568 read_lock_irq(&tree->lock); 569 finish_wait(&state->wq, &wait); 570 return 0; 571 } 572 573 /* 574 * waits for one or more bits to clear on a range in the state tree. 575 * The range [start, end] is inclusive. 576 * The tree lock is taken by this function 577 */ 578 int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) 579 { 580 struct extent_state *state; 581 struct rb_node *node; 582 583 read_lock_irq(&tree->lock); 584 again: 585 while (1) { 586 /* 587 * this search will find all the extents that end after 588 * our range starts 589 */ 590 node = tree_search(&tree->state, start); 591 if (!node) 592 break; 593 594 state = rb_entry(node, struct extent_state, rb_node); 595 596 if (state->start > end) 597 goto out; 598 599 if (state->state & bits) { 600 start = state->start; 601 atomic_inc(&state->refs); 602 wait_on_state(tree, state); 603 free_extent_state(state); 604 goto again; 605 } 606 start = state->end + 1; 607 608 if (start > end) 609 break; 610 611 if (need_resched()) { 612 read_unlock_irq(&tree->lock); 613 cond_resched(); 614 read_lock_irq(&tree->lock); 615 } 616 } 617 out: 618 read_unlock_irq(&tree->lock); 619 return 0; 620 } 621 EXPORT_SYMBOL(wait_extent_bit); 622 623 /* 624 * set some bits on a range in the tree. This may require allocations 625 * or sleeping, so the gfp mask is used to indicate what is allowed. 626 * 627 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 628 * range already has the desired bits set. The start of the existing 629 * range is returned in failed_start in this case. 630 * 631 * [start, end] is inclusive 632 * This takes the tree lock. 633 */ 634 int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, 635 int exclusive, u64 *failed_start, gfp_t mask) 636 { 637 struct extent_state *state; 638 struct extent_state *prealloc = NULL; 639 struct rb_node *node; 640 unsigned long flags; 641 int err = 0; 642 int set; 643 u64 last_start; 644 u64 last_end; 645 again: 646 if (!prealloc && (mask & __GFP_WAIT)) { 647 prealloc = alloc_extent_state(mask); 648 if (!prealloc) 649 return -ENOMEM; 650 } 651 652 write_lock_irqsave(&tree->lock, flags); 653 /* 654 * this search will find all the extents that end after 655 * our range starts. 656 */ 657 node = tree_search(&tree->state, start); 658 if (!node) { 659 err = insert_state(tree, prealloc, start, end, bits); 660 prealloc = NULL; 661 BUG_ON(err == -EEXIST); 662 goto out; 663 } 664 665 state = rb_entry(node, struct extent_state, rb_node); 666 last_start = state->start; 667 last_end = state->end; 668 669 /* 670 * | ---- desired range ---- | 671 * | state | 672 * 673 * Just lock what we found and keep going 674 */ 675 if (state->start == start && state->end <= end) { 676 set = state->state & bits; 677 if (set && exclusive) { 678 *failed_start = state->start; 679 err = -EEXIST; 680 goto out; 681 } 682 state->state |= bits; 683 start = state->end + 1; 684 merge_state(tree, state); 685 goto search_again; 686 } 687 688 /* 689 * | ---- desired range ---- | 690 * | state | 691 * or 692 * | ------------- state -------------- | 693 * 694 * We need to split the extent we found, and may flip bits on 695 * second half. 696 * 697 * If the extent we found extends past our 698 * range, we just split and search again. It'll get split 699 * again the next time though. 700 * 701 * If the extent we found is inside our range, we set the 702 * desired bit on it. 703 */ 704 if (state->start < start) { 705 set = state->state & bits; 706 if (exclusive && set) { 707 *failed_start = start; 708 err = -EEXIST; 709 goto out; 710 } 711 err = split_state(tree, state, prealloc, start); 712 BUG_ON(err == -EEXIST); 713 prealloc = NULL; 714 if (err) 715 goto out; 716 if (state->end <= end) { 717 state->state |= bits; 718 start = state->end + 1; 719 merge_state(tree, state); 720 } else { 721 start = state->start; 722 } 723 goto search_again; 724 } 725 /* 726 * | ---- desired range ---- | 727 * | state | or | state | 728 * 729 * There's a hole, we need to insert something in it and 730 * ignore the extent we found. 731 */ 732 if (state->start > start) { 733 u64 this_end; 734 if (end < last_start) 735 this_end = end; 736 else 737 this_end = last_start -1; 738 err = insert_state(tree, prealloc, start, this_end, 739 bits); 740 prealloc = NULL; 741 BUG_ON(err == -EEXIST); 742 if (err) 743 goto out; 744 start = this_end + 1; 745 goto search_again; 746 } 747 /* 748 * | ---- desired range ---- | 749 * | state | 750 * We need to split the extent, and set the bit 751 * on the first half 752 */ 753 if (state->start <= end && state->end > end) { 754 set = state->state & bits; 755 if (exclusive && set) { 756 *failed_start = start; 757 err = -EEXIST; 758 goto out; 759 } 760 err = split_state(tree, state, prealloc, end + 1); 761 BUG_ON(err == -EEXIST); 762 763 prealloc->state |= bits; 764 merge_state(tree, prealloc); 765 prealloc = NULL; 766 goto out; 767 } 768 769 goto search_again; 770 771 out: 772 write_unlock_irqrestore(&tree->lock, flags); 773 if (prealloc) 774 free_extent_state(prealloc); 775 776 return err; 777 778 search_again: 779 if (start > end) 780 goto out; 781 write_unlock_irqrestore(&tree->lock, flags); 782 if (mask & __GFP_WAIT) 783 cond_resched(); 784 goto again; 785 } 786 EXPORT_SYMBOL(set_extent_bit); 787 788 /* wrappers around set/clear extent bit */ 789 int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, 790 gfp_t mask) 791 { 792 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 793 mask); 794 } 795 EXPORT_SYMBOL(set_extent_dirty); 796 797 int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, 798 gfp_t mask) 799 { 800 return set_extent_bit(tree, start, end, 801 EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, 802 mask); 803 } 804 EXPORT_SYMBOL(set_extent_delalloc); 805 806 int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, 807 gfp_t mask) 808 { 809 return clear_extent_bit(tree, start, end, 810 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); 811 } 812 EXPORT_SYMBOL(clear_extent_dirty); 813 814 int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, 815 gfp_t mask) 816 { 817 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 818 mask); 819 } 820 EXPORT_SYMBOL(set_extent_new); 821 822 int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, 823 gfp_t mask) 824 { 825 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); 826 } 827 EXPORT_SYMBOL(clear_extent_new); 828 829 int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, 830 gfp_t mask) 831 { 832 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 833 mask); 834 } 835 EXPORT_SYMBOL(set_extent_uptodate); 836 837 int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, 838 gfp_t mask) 839 { 840 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 841 } 842 EXPORT_SYMBOL(clear_extent_uptodate); 843 844 int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, 845 gfp_t mask) 846 { 847 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, 848 0, NULL, mask); 849 } 850 EXPORT_SYMBOL(set_extent_writeback); 851 852 int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, 853 gfp_t mask) 854 { 855 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); 856 } 857 EXPORT_SYMBOL(clear_extent_writeback); 858 859 int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) 860 { 861 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); 862 } 863 EXPORT_SYMBOL(wait_on_extent_writeback); 864 865 /* 866 * locks a range in ascending order, waiting for any locked regions 867 * it hits on the way. [start,end] are inclusive, and this will sleep. 868 */ 869 int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) 870 { 871 int err; 872 u64 failed_start; 873 while (1) { 874 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 875 &failed_start, mask); 876 if (err == -EEXIST && (mask & __GFP_WAIT)) { 877 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 878 start = failed_start; 879 } else { 880 break; 881 } 882 WARN_ON(start > end); 883 } 884 return err; 885 } 886 EXPORT_SYMBOL(lock_extent); 887 888 int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, 889 gfp_t mask) 890 { 891 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); 892 } 893 EXPORT_SYMBOL(unlock_extent); 894 895 /* 896 * helper function to set pages and extents in the tree dirty 897 */ 898 int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) 899 { 900 unsigned long index = start >> PAGE_CACHE_SHIFT; 901 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 902 struct page *page; 903 904 while (index <= end_index) { 905 page = find_get_page(tree->mapping, index); 906 BUG_ON(!page); 907 __set_page_dirty_nobuffers(page); 908 page_cache_release(page); 909 index++; 910 } 911 set_extent_dirty(tree, start, end, GFP_NOFS); 912 return 0; 913 } 914 EXPORT_SYMBOL(set_range_dirty); 915 916 /* 917 * helper function to set both pages and extents in the tree writeback 918 */ 919 int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) 920 { 921 unsigned long index = start >> PAGE_CACHE_SHIFT; 922 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 923 struct page *page; 924 925 while (index <= end_index) { 926 page = find_get_page(tree->mapping, index); 927 BUG_ON(!page); 928 set_page_writeback(page); 929 page_cache_release(page); 930 index++; 931 } 932 set_extent_writeback(tree, start, end, GFP_NOFS); 933 return 0; 934 } 935 EXPORT_SYMBOL(set_range_writeback); 936 937 int find_first_extent_bit(struct extent_map_tree *tree, u64 start, 938 u64 *start_ret, u64 *end_ret, int bits) 939 { 940 struct rb_node *node; 941 struct extent_state *state; 942 int ret = 1; 943 944 write_lock_irq(&tree->lock); 945 /* 946 * this search will find all the extents that end after 947 * our range starts. 948 */ 949 node = tree_search(&tree->state, start); 950 if (!node || IS_ERR(node)) { 951 goto out; 952 } 953 954 while(1) { 955 state = rb_entry(node, struct extent_state, rb_node); 956 if (state->state & bits) { 957 *start_ret = state->start; 958 *end_ret = state->end; 959 ret = 0; 960 } 961 node = rb_next(node); 962 if (!node) 963 break; 964 } 965 out: 966 write_unlock_irq(&tree->lock); 967 return ret; 968 } 969 EXPORT_SYMBOL(find_first_extent_bit); 970 971 u64 find_lock_delalloc_range(struct extent_map_tree *tree, 972 u64 start, u64 lock_start, u64 *end, u64 max_bytes) 973 { 974 struct rb_node *node; 975 struct extent_state *state; 976 u64 cur_start = start; 977 u64 found = 0; 978 u64 total_bytes = 0; 979 980 write_lock_irq(&tree->lock); 981 /* 982 * this search will find all the extents that end after 983 * our range starts. 984 */ 985 search_again: 986 node = tree_search(&tree->state, cur_start); 987 if (!node || IS_ERR(node)) { 988 goto out; 989 } 990 991 while(1) { 992 state = rb_entry(node, struct extent_state, rb_node); 993 if (state->start != cur_start) { 994 goto out; 995 } 996 if (!(state->state & EXTENT_DELALLOC)) { 997 goto out; 998 } 999 if (state->start >= lock_start) { 1000 if (state->state & EXTENT_LOCKED) { 1001 DEFINE_WAIT(wait); 1002 atomic_inc(&state->refs); 1003 write_unlock_irq(&tree->lock); 1004 schedule(); 1005 write_lock_irq(&tree->lock); 1006 finish_wait(&state->wq, &wait); 1007 free_extent_state(state); 1008 goto search_again; 1009 } 1010 state->state |= EXTENT_LOCKED; 1011 } 1012 found++; 1013 *end = state->end; 1014 cur_start = state->end + 1; 1015 node = rb_next(node); 1016 if (!node) 1017 break; 1018 total_bytes = state->end - state->start + 1; 1019 if (total_bytes >= max_bytes) 1020 break; 1021 } 1022 out: 1023 write_unlock_irq(&tree->lock); 1024 return found; 1025 } 1026 1027 /* 1028 * helper function to lock both pages and extents in the tree. 1029 * pages must be locked first. 1030 */ 1031 int lock_range(struct extent_map_tree *tree, u64 start, u64 end) 1032 { 1033 unsigned long index = start >> PAGE_CACHE_SHIFT; 1034 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1035 struct page *page; 1036 int err; 1037 1038 while (index <= end_index) { 1039 page = grab_cache_page(tree->mapping, index); 1040 if (!page) { 1041 err = -ENOMEM; 1042 goto failed; 1043 } 1044 if (IS_ERR(page)) { 1045 err = PTR_ERR(page); 1046 goto failed; 1047 } 1048 index++; 1049 } 1050 lock_extent(tree, start, end, GFP_NOFS); 1051 return 0; 1052 1053 failed: 1054 /* 1055 * we failed above in getting the page at 'index', so we undo here 1056 * up to but not including the page at 'index' 1057 */ 1058 end_index = index; 1059 index = start >> PAGE_CACHE_SHIFT; 1060 while (index < end_index) { 1061 page = find_get_page(tree->mapping, index); 1062 unlock_page(page); 1063 page_cache_release(page); 1064 index++; 1065 } 1066 return err; 1067 } 1068 EXPORT_SYMBOL(lock_range); 1069 1070 /* 1071 * helper function to unlock both pages and extents in the tree. 1072 */ 1073 int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) 1074 { 1075 unsigned long index = start >> PAGE_CACHE_SHIFT; 1076 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1077 struct page *page; 1078 1079 while (index <= end_index) { 1080 page = find_get_page(tree->mapping, index); 1081 unlock_page(page); 1082 page_cache_release(page); 1083 index++; 1084 } 1085 unlock_extent(tree, start, end, GFP_NOFS); 1086 return 0; 1087 } 1088 EXPORT_SYMBOL(unlock_range); 1089 1090 int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) 1091 { 1092 struct rb_node *node; 1093 struct extent_state *state; 1094 int ret = 0; 1095 1096 write_lock_irq(&tree->lock); 1097 /* 1098 * this search will find all the extents that end after 1099 * our range starts. 1100 */ 1101 node = tree_search(&tree->state, start); 1102 if (!node || IS_ERR(node)) { 1103 ret = -ENOENT; 1104 goto out; 1105 } 1106 state = rb_entry(node, struct extent_state, rb_node); 1107 if (state->start != start) { 1108 ret = -ENOENT; 1109 goto out; 1110 } 1111 state->private = private; 1112 out: 1113 write_unlock_irq(&tree->lock); 1114 return ret; 1115 1116 } 1117 1118 int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) 1119 { 1120 struct rb_node *node; 1121 struct extent_state *state; 1122 int ret = 0; 1123 1124 read_lock_irq(&tree->lock); 1125 /* 1126 * this search will find all the extents that end after 1127 * our range starts. 1128 */ 1129 node = tree_search(&tree->state, start); 1130 if (!node || IS_ERR(node)) { 1131 ret = -ENOENT; 1132 goto out; 1133 } 1134 state = rb_entry(node, struct extent_state, rb_node); 1135 if (state->start != start) { 1136 ret = -ENOENT; 1137 goto out; 1138 } 1139 *private = state->private; 1140 out: 1141 read_unlock_irq(&tree->lock); 1142 return ret; 1143 } 1144 1145 /* 1146 * searches a range in the state tree for a given mask. 1147 * If 'filled' == 1, this returns 1 only if ever extent in the tree 1148 * has the bits set. Otherwise, 1 is returned if any bit in the 1149 * range is found set. 1150 */ 1151 static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, 1152 int bits, int filled) 1153 { 1154 struct extent_state *state = NULL; 1155 struct rb_node *node; 1156 int bitset = 0; 1157 1158 read_lock_irq(&tree->lock); 1159 node = tree_search(&tree->state, start); 1160 while (node && start <= end) { 1161 state = rb_entry(node, struct extent_state, rb_node); 1162 if (state->start > end) 1163 break; 1164 1165 if (filled && state->start > start) { 1166 bitset = 0; 1167 break; 1168 } 1169 if (state->state & bits) { 1170 bitset = 1; 1171 if (!filled) 1172 break; 1173 } else if (filled) { 1174 bitset = 0; 1175 break; 1176 } 1177 start = state->end + 1; 1178 if (start > end) 1179 break; 1180 node = rb_next(node); 1181 } 1182 read_unlock_irq(&tree->lock); 1183 return bitset; 1184 } 1185 1186 /* 1187 * helper function to set a given page up to date if all the 1188 * extents in the tree for that page are up to date 1189 */ 1190 static int check_page_uptodate(struct extent_map_tree *tree, 1191 struct page *page) 1192 { 1193 u64 start = page->index << PAGE_CACHE_SHIFT; 1194 u64 end = start + PAGE_CACHE_SIZE - 1; 1195 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) 1196 SetPageUptodate(page); 1197 return 0; 1198 } 1199 1200 /* 1201 * helper function to unlock a page if all the extents in the tree 1202 * for that page are unlocked 1203 */ 1204 static int check_page_locked(struct extent_map_tree *tree, 1205 struct page *page) 1206 { 1207 u64 start = page->index << PAGE_CACHE_SHIFT; 1208 u64 end = start + PAGE_CACHE_SIZE - 1; 1209 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) 1210 unlock_page(page); 1211 return 0; 1212 } 1213 1214 /* 1215 * helper function to end page writeback if all the extents 1216 * in the tree for that page are done with writeback 1217 */ 1218 static int check_page_writeback(struct extent_map_tree *tree, 1219 struct page *page) 1220 { 1221 u64 start = page->index << PAGE_CACHE_SHIFT; 1222 u64 end = start + PAGE_CACHE_SIZE - 1; 1223 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) 1224 end_page_writeback(page); 1225 return 0; 1226 } 1227 1228 /* lots and lots of room for performance fixes in the end_bio funcs */ 1229 1230 /* 1231 * after a writepage IO is done, we need to: 1232 * clear the uptodate bits on error 1233 * clear the writeback bits in the extent tree for this IO 1234 * end_page_writeback if the page has no more pending IO 1235 * 1236 * Scheduling is not allowed, so the extent state tree is expected 1237 * to have one and only one object corresponding to this IO. 1238 */ 1239 static int end_bio_extent_writepage(struct bio *bio, 1240 unsigned int bytes_done, int err) 1241 { 1242 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1243 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1244 struct extent_map_tree *tree = bio->bi_private; 1245 u64 start; 1246 u64 end; 1247 int whole_page; 1248 1249 if (bio->bi_size) 1250 return 1; 1251 1252 do { 1253 struct page *page = bvec->bv_page; 1254 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1255 end = start + bvec->bv_len - 1; 1256 1257 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1258 whole_page = 1; 1259 else 1260 whole_page = 0; 1261 1262 if (--bvec >= bio->bi_io_vec) 1263 prefetchw(&bvec->bv_page->flags); 1264 1265 if (!uptodate) { 1266 clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1267 ClearPageUptodate(page); 1268 SetPageError(page); 1269 } 1270 clear_extent_writeback(tree, start, end, GFP_ATOMIC); 1271 1272 if (whole_page) 1273 end_page_writeback(page); 1274 else 1275 check_page_writeback(tree, page); 1276 if (tree->ops && tree->ops->writepage_end_io_hook) 1277 tree->ops->writepage_end_io_hook(page, start, end); 1278 } while (bvec >= bio->bi_io_vec); 1279 1280 bio_put(bio); 1281 return 0; 1282 } 1283 1284 /* 1285 * after a readpage IO is done, we need to: 1286 * clear the uptodate bits on error 1287 * set the uptodate bits if things worked 1288 * set the page up to date if all extents in the tree are uptodate 1289 * clear the lock bit in the extent tree 1290 * unlock the page if there are no other extents locked for it 1291 * 1292 * Scheduling is not allowed, so the extent state tree is expected 1293 * to have one and only one object corresponding to this IO. 1294 */ 1295 static int end_bio_extent_readpage(struct bio *bio, 1296 unsigned int bytes_done, int err) 1297 { 1298 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1299 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1300 struct extent_map_tree *tree = bio->bi_private; 1301 u64 start; 1302 u64 end; 1303 int whole_page; 1304 int ret; 1305 1306 if (bio->bi_size) 1307 return 1; 1308 1309 do { 1310 struct page *page = bvec->bv_page; 1311 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1312 end = start + bvec->bv_len - 1; 1313 1314 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1315 whole_page = 1; 1316 else 1317 whole_page = 0; 1318 1319 if (--bvec >= bio->bi_io_vec) 1320 prefetchw(&bvec->bv_page->flags); 1321 1322 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1323 ret = tree->ops->readpage_end_io_hook(page, start, end); 1324 if (ret) 1325 uptodate = 0; 1326 } 1327 if (uptodate) { 1328 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1329 if (whole_page) 1330 SetPageUptodate(page); 1331 else 1332 check_page_uptodate(tree, page); 1333 } else { 1334 ClearPageUptodate(page); 1335 SetPageError(page); 1336 } 1337 1338 unlock_extent(tree, start, end, GFP_ATOMIC); 1339 1340 if (whole_page) 1341 unlock_page(page); 1342 else 1343 check_page_locked(tree, page); 1344 } while (bvec >= bio->bi_io_vec); 1345 1346 bio_put(bio); 1347 return 0; 1348 } 1349 1350 /* 1351 * IO done from prepare_write is pretty simple, we just unlock 1352 * the structs in the extent tree when done, and set the uptodate bits 1353 * as appropriate. 1354 */ 1355 static int end_bio_extent_preparewrite(struct bio *bio, 1356 unsigned int bytes_done, int err) 1357 { 1358 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1359 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1360 struct extent_map_tree *tree = bio->bi_private; 1361 u64 start; 1362 u64 end; 1363 1364 if (bio->bi_size) 1365 return 1; 1366 1367 do { 1368 struct page *page = bvec->bv_page; 1369 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1370 end = start + bvec->bv_len - 1; 1371 1372 if (--bvec >= bio->bi_io_vec) 1373 prefetchw(&bvec->bv_page->flags); 1374 1375 if (uptodate) { 1376 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1377 } else { 1378 ClearPageUptodate(page); 1379 SetPageError(page); 1380 } 1381 1382 unlock_extent(tree, start, end, GFP_ATOMIC); 1383 1384 } while (bvec >= bio->bi_io_vec); 1385 1386 bio_put(bio); 1387 return 0; 1388 } 1389 1390 static int submit_extent_page(int rw, struct extent_map_tree *tree, 1391 struct page *page, sector_t sector, 1392 size_t size, unsigned long offset, 1393 struct block_device *bdev, 1394 bio_end_io_t end_io_func) 1395 { 1396 struct bio *bio; 1397 int ret = 0; 1398 1399 bio = bio_alloc(GFP_NOIO, 1); 1400 1401 bio->bi_sector = sector; 1402 bio->bi_bdev = bdev; 1403 bio->bi_io_vec[0].bv_page = page; 1404 bio->bi_io_vec[0].bv_len = size; 1405 bio->bi_io_vec[0].bv_offset = offset; 1406 1407 bio->bi_vcnt = 1; 1408 bio->bi_idx = 0; 1409 bio->bi_size = size; 1410 1411 bio->bi_end_io = end_io_func; 1412 bio->bi_private = tree; 1413 1414 bio_get(bio); 1415 submit_bio(rw, bio); 1416 1417 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 1418 ret = -EOPNOTSUPP; 1419 1420 bio_put(bio); 1421 return ret; 1422 } 1423 1424 void set_page_extent_mapped(struct page *page) 1425 { 1426 if (!PagePrivate(page)) { 1427 SetPagePrivate(page); 1428 WARN_ON(!page->mapping->a_ops->invalidatepage); 1429 set_page_private(page, 1); 1430 page_cache_get(page); 1431 } 1432 } 1433 1434 /* 1435 * basic readpage implementation. Locked extent state structs are inserted 1436 * into the tree that are removed when the IO is done (by the end_io 1437 * handlers) 1438 */ 1439 int extent_read_full_page(struct extent_map_tree *tree, struct page *page, 1440 get_extent_t *get_extent) 1441 { 1442 struct inode *inode = page->mapping->host; 1443 u64 start = page->index << PAGE_CACHE_SHIFT; 1444 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1445 u64 end; 1446 u64 cur = start; 1447 u64 extent_offset; 1448 u64 last_byte = i_size_read(inode); 1449 u64 block_start; 1450 u64 cur_end; 1451 sector_t sector; 1452 struct extent_map *em; 1453 struct block_device *bdev; 1454 int ret; 1455 int nr = 0; 1456 size_t page_offset = 0; 1457 size_t iosize; 1458 size_t blocksize = inode->i_sb->s_blocksize; 1459 1460 set_page_extent_mapped(page); 1461 1462 end = page_end; 1463 lock_extent(tree, start, end, GFP_NOFS); 1464 1465 while (cur <= end) { 1466 if (cur >= last_byte) { 1467 iosize = PAGE_CACHE_SIZE - page_offset; 1468 zero_user_page(page, page_offset, iosize, KM_USER0); 1469 set_extent_uptodate(tree, cur, cur + iosize - 1, 1470 GFP_NOFS); 1471 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1472 break; 1473 } 1474 em = get_extent(inode, page, page_offset, cur, end, 0); 1475 if (IS_ERR(em) || !em) { 1476 SetPageError(page); 1477 unlock_extent(tree, cur, end, GFP_NOFS); 1478 break; 1479 } 1480 1481 extent_offset = cur - em->start; 1482 BUG_ON(em->end < cur); 1483 BUG_ON(end < cur); 1484 1485 iosize = min(em->end - cur, end - cur) + 1; 1486 cur_end = min(em->end, end); 1487 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1488 sector = (em->block_start + extent_offset) >> 9; 1489 bdev = em->bdev; 1490 block_start = em->block_start; 1491 free_extent_map(em); 1492 em = NULL; 1493 1494 /* we've found a hole, just zero and go on */ 1495 if (block_start == EXTENT_MAP_HOLE) { 1496 zero_user_page(page, page_offset, iosize, KM_USER0); 1497 set_extent_uptodate(tree, cur, cur + iosize - 1, 1498 GFP_NOFS); 1499 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1500 cur = cur + iosize; 1501 page_offset += iosize; 1502 continue; 1503 } 1504 /* the get_extent function already copied into the page */ 1505 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { 1506 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1507 cur = cur + iosize; 1508 page_offset += iosize; 1509 continue; 1510 } 1511 1512 ret = 0; 1513 if (tree->ops && tree->ops->readpage_io_hook) { 1514 ret = tree->ops->readpage_io_hook(page, cur, 1515 cur + iosize - 1); 1516 } 1517 if (!ret) { 1518 ret = submit_extent_page(READ, tree, page, 1519 sector, iosize, page_offset, 1520 bdev, end_bio_extent_readpage); 1521 } 1522 if (ret) 1523 SetPageError(page); 1524 cur = cur + iosize; 1525 page_offset += iosize; 1526 nr++; 1527 } 1528 if (!nr) { 1529 if (!PageError(page)) 1530 SetPageUptodate(page); 1531 unlock_page(page); 1532 } 1533 return 0; 1534 } 1535 EXPORT_SYMBOL(extent_read_full_page); 1536 1537 /* 1538 * the writepage semantics are similar to regular writepage. extent 1539 * records are inserted to lock ranges in the tree, and as dirty areas 1540 * are found, they are marked writeback. Then the lock bits are removed 1541 * and the end_io handler clears the writeback ranges 1542 */ 1543 int extent_write_full_page(struct extent_map_tree *tree, struct page *page, 1544 get_extent_t *get_extent, 1545 struct writeback_control *wbc) 1546 { 1547 struct inode *inode = page->mapping->host; 1548 u64 start = page->index << PAGE_CACHE_SHIFT; 1549 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1550 u64 end; 1551 u64 cur = start; 1552 u64 extent_offset; 1553 u64 last_byte = i_size_read(inode); 1554 u64 block_start; 1555 sector_t sector; 1556 struct extent_map *em; 1557 struct block_device *bdev; 1558 int ret; 1559 int nr = 0; 1560 size_t page_offset = 0; 1561 size_t iosize; 1562 size_t blocksize; 1563 loff_t i_size = i_size_read(inode); 1564 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; 1565 u64 nr_delalloc; 1566 u64 delalloc_end; 1567 1568 WARN_ON(!PageLocked(page)); 1569 if (page->index > end_index) { 1570 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 1571 unlock_page(page); 1572 return 0; 1573 } 1574 1575 if (page->index == end_index) { 1576 size_t offset = i_size & (PAGE_CACHE_SIZE - 1); 1577 zero_user_page(page, offset, 1578 PAGE_CACHE_SIZE - offset, KM_USER0); 1579 } 1580 1581 set_page_extent_mapped(page); 1582 1583 lock_extent(tree, start, page_end, GFP_NOFS); 1584 nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1, 1585 &delalloc_end, 1586 128 * 1024 * 1024); 1587 if (nr_delalloc) { 1588 tree->ops->fill_delalloc(inode, start, delalloc_end); 1589 if (delalloc_end >= page_end + 1) { 1590 clear_extent_bit(tree, page_end + 1, delalloc_end, 1591 EXTENT_LOCKED | EXTENT_DELALLOC, 1592 1, 0, GFP_NOFS); 1593 } 1594 clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC, 1595 0, 0, GFP_NOFS); 1596 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1597 printk("found delalloc bits after clear extent_bit\n"); 1598 } 1599 } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1600 printk("found delalloc bits after find_delalloc_range returns 0\n"); 1601 } 1602 1603 end = page_end; 1604 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1605 printk("found delalloc bits after lock_extent\n"); 1606 } 1607 1608 if (last_byte <= start) { 1609 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 1610 goto done; 1611 } 1612 1613 set_extent_uptodate(tree, start, page_end, GFP_NOFS); 1614 blocksize = inode->i_sb->s_blocksize; 1615 1616 while (cur <= end) { 1617 if (cur >= last_byte) { 1618 clear_extent_dirty(tree, cur, page_end, GFP_NOFS); 1619 break; 1620 } 1621 em = get_extent(inode, page, page_offset, cur, end, 0); 1622 if (IS_ERR(em) || !em) { 1623 SetPageError(page); 1624 break; 1625 } 1626 1627 extent_offset = cur - em->start; 1628 BUG_ON(em->end < cur); 1629 BUG_ON(end < cur); 1630 iosize = min(em->end - cur, end - cur) + 1; 1631 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1632 sector = (em->block_start + extent_offset) >> 9; 1633 bdev = em->bdev; 1634 block_start = em->block_start; 1635 free_extent_map(em); 1636 em = NULL; 1637 1638 if (block_start == EXTENT_MAP_HOLE || 1639 block_start == EXTENT_MAP_INLINE) { 1640 clear_extent_dirty(tree, cur, 1641 cur + iosize - 1, GFP_NOFS); 1642 cur = cur + iosize; 1643 page_offset += iosize; 1644 continue; 1645 } 1646 1647 /* leave this out until we have a page_mkwrite call */ 1648 if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 1649 EXTENT_DIRTY, 0)) { 1650 cur = cur + iosize; 1651 page_offset += iosize; 1652 continue; 1653 } 1654 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); 1655 if (tree->ops && tree->ops->writepage_io_hook) { 1656 ret = tree->ops->writepage_io_hook(page, cur, 1657 cur + iosize - 1); 1658 } else { 1659 ret = 0; 1660 } 1661 if (ret) 1662 SetPageError(page); 1663 else { 1664 set_range_writeback(tree, cur, cur + iosize - 1); 1665 ret = submit_extent_page(WRITE, tree, page, sector, 1666 iosize, page_offset, bdev, 1667 end_bio_extent_writepage); 1668 if (ret) 1669 SetPageError(page); 1670 } 1671 cur = cur + iosize; 1672 page_offset += iosize; 1673 nr++; 1674 } 1675 done: 1676 unlock_extent(tree, start, page_end, GFP_NOFS); 1677 unlock_page(page); 1678 return 0; 1679 } 1680 EXPORT_SYMBOL(extent_write_full_page); 1681 1682 /* 1683 * basic invalidatepage code, this waits on any locked or writeback 1684 * ranges corresponding to the page, and then deletes any extent state 1685 * records from the tree 1686 */ 1687 int extent_invalidatepage(struct extent_map_tree *tree, 1688 struct page *page, unsigned long offset) 1689 { 1690 u64 start = (page->index << PAGE_CACHE_SHIFT); 1691 u64 end = start + PAGE_CACHE_SIZE - 1; 1692 size_t blocksize = page->mapping->host->i_sb->s_blocksize; 1693 1694 start += (offset + blocksize -1) & ~(blocksize - 1); 1695 if (start > end) 1696 return 0; 1697 1698 lock_extent(tree, start, end, GFP_NOFS); 1699 wait_on_extent_writeback(tree, start, end); 1700 clear_extent_bit(tree, start, end, 1701 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 1702 1, 1, GFP_NOFS); 1703 return 0; 1704 } 1705 EXPORT_SYMBOL(extent_invalidatepage); 1706 1707 /* 1708 * simple commit_write call, set_range_dirty is used to mark both 1709 * the pages and the extent records as dirty 1710 */ 1711 int extent_commit_write(struct extent_map_tree *tree, 1712 struct inode *inode, struct page *page, 1713 unsigned from, unsigned to) 1714 { 1715 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 1716 1717 set_page_extent_mapped(page); 1718 set_page_dirty(page); 1719 1720 if (pos > inode->i_size) { 1721 i_size_write(inode, pos); 1722 mark_inode_dirty(inode); 1723 } 1724 return 0; 1725 } 1726 EXPORT_SYMBOL(extent_commit_write); 1727 1728 int extent_prepare_write(struct extent_map_tree *tree, 1729 struct inode *inode, struct page *page, 1730 unsigned from, unsigned to, get_extent_t *get_extent) 1731 { 1732 u64 page_start = page->index << PAGE_CACHE_SHIFT; 1733 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 1734 u64 block_start; 1735 u64 orig_block_start; 1736 u64 block_end; 1737 u64 cur_end; 1738 struct extent_map *em; 1739 unsigned blocksize = 1 << inode->i_blkbits; 1740 size_t page_offset = 0; 1741 size_t block_off_start; 1742 size_t block_off_end; 1743 int err = 0; 1744 int iocount = 0; 1745 int ret = 0; 1746 int isnew; 1747 1748 set_page_extent_mapped(page); 1749 1750 block_start = (page_start + from) & ~((u64)blocksize - 1); 1751 block_end = (page_start + to - 1) | (blocksize - 1); 1752 orig_block_start = block_start; 1753 1754 lock_extent(tree, page_start, page_end, GFP_NOFS); 1755 while(block_start <= block_end) { 1756 em = get_extent(inode, page, page_offset, block_start, 1757 block_end, 1); 1758 if (IS_ERR(em) || !em) { 1759 goto err; 1760 } 1761 cur_end = min(block_end, em->end); 1762 block_off_start = block_start & (PAGE_CACHE_SIZE - 1); 1763 block_off_end = block_off_start + blocksize; 1764 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); 1765 1766 if (!PageUptodate(page) && isnew && 1767 (block_off_end > to || block_off_start < from)) { 1768 void *kaddr; 1769 1770 kaddr = kmap_atomic(page, KM_USER0); 1771 if (block_off_end > to) 1772 memset(kaddr + to, 0, block_off_end - to); 1773 if (block_off_start < from) 1774 memset(kaddr + block_off_start, 0, 1775 from - block_off_start); 1776 flush_dcache_page(page); 1777 kunmap_atomic(kaddr, KM_USER0); 1778 } 1779 if (!isnew && !PageUptodate(page) && 1780 (block_off_end > to || block_off_start < from) && 1781 !test_range_bit(tree, block_start, cur_end, 1782 EXTENT_UPTODATE, 1)) { 1783 u64 sector; 1784 u64 extent_offset = block_start - em->start; 1785 size_t iosize; 1786 sector = (em->block_start + extent_offset) >> 9; 1787 iosize = (cur_end - block_start + blocksize - 1) & 1788 ~((u64)blocksize - 1); 1789 /* 1790 * we've already got the extent locked, but we 1791 * need to split the state such that our end_bio 1792 * handler can clear the lock. 1793 */ 1794 set_extent_bit(tree, block_start, 1795 block_start + iosize - 1, 1796 EXTENT_LOCKED, 0, NULL, GFP_NOFS); 1797 ret = submit_extent_page(READ, tree, page, 1798 sector, iosize, page_offset, em->bdev, 1799 end_bio_extent_preparewrite); 1800 iocount++; 1801 block_start = block_start + iosize; 1802 } else { 1803 set_extent_uptodate(tree, block_start, cur_end, 1804 GFP_NOFS); 1805 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 1806 block_start = cur_end + 1; 1807 } 1808 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 1809 free_extent_map(em); 1810 } 1811 if (iocount) { 1812 wait_extent_bit(tree, orig_block_start, 1813 block_end, EXTENT_LOCKED); 1814 } 1815 check_page_uptodate(tree, page); 1816 err: 1817 /* FIXME, zero out newly allocated blocks on error */ 1818 return err; 1819 } 1820 EXPORT_SYMBOL(extent_prepare_write); 1821 1822 /* 1823 * a helper for releasepage. As long as there are no locked extents 1824 * in the range corresponding to the page, both state records and extent 1825 * map records are removed 1826 */ 1827 int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) 1828 { 1829 struct extent_map *em; 1830 u64 start = page->index << PAGE_CACHE_SHIFT; 1831 u64 end = start + PAGE_CACHE_SIZE - 1; 1832 u64 orig_start = start; 1833 int ret = 1; 1834 1835 while (start <= end) { 1836 em = lookup_extent_mapping(tree, start, end); 1837 if (!em || IS_ERR(em)) 1838 break; 1839 if (!test_range_bit(tree, em->start, em->end, 1840 EXTENT_LOCKED, 0)) { 1841 remove_extent_mapping(tree, em); 1842 /* once for the rb tree */ 1843 free_extent_map(em); 1844 } 1845 start = em->end + 1; 1846 /* once for us */ 1847 free_extent_map(em); 1848 } 1849 if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) 1850 ret = 0; 1851 else 1852 clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, 1853 1, 1, GFP_NOFS); 1854 return ret; 1855 } 1856 EXPORT_SYMBOL(try_release_extent_mapping); 1857 1858 sector_t extent_bmap(struct address_space *mapping, sector_t iblock, 1859 get_extent_t *get_extent) 1860 { 1861 struct inode *inode = mapping->host; 1862 u64 start = iblock << inode->i_blkbits; 1863 u64 end = start + (1 << inode->i_blkbits) - 1; 1864 struct extent_map *em; 1865 1866 em = get_extent(inode, NULL, 0, start, end, 0); 1867 if (!em || IS_ERR(em)) 1868 return 0; 1869 1870 // XXX(hch): block 0 is valid in some cases, e.g. XFS RT device 1871 if (em->block_start == EXTENT_MAP_INLINE || 1872 em->block_start == EXTENT_MAP_HOLE) 1873 return 0; 1874 1875 return (em->block_start + start - em->start) >> inode->i_blkbits; 1876 } 1877 1878 static struct extent_buffer *__alloc_extent_buffer(gfp_t mask) 1879 { 1880 struct extent_buffer *eb = NULL; 1881 spin_lock(&extent_buffers_lock); 1882 if (!list_empty(&extent_buffers)) { 1883 eb = list_entry(extent_buffers.next, struct extent_buffer, 1884 list); 1885 list_del(&eb->list); 1886 WARN_ON(nr_extent_buffers == 0); 1887 nr_extent_buffers--; 1888 } 1889 spin_unlock(&extent_buffers_lock); 1890 if (eb) { 1891 memset(eb, 0, sizeof(*eb)); 1892 return eb; 1893 } 1894 return kmem_cache_zalloc(extent_buffer_cache, mask); 1895 } 1896 1897 static void __free_extent_buffer(struct extent_buffer *eb) 1898 { 1899 if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) { 1900 kmem_cache_free(extent_buffer_cache, eb); 1901 } else { 1902 spin_lock(&extent_buffers_lock); 1903 list_add(&eb->list, &extent_buffers); 1904 nr_extent_buffers++; 1905 spin_unlock(&extent_buffers_lock); 1906 } 1907 } 1908 1909 static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i) 1910 { 1911 struct page *p; 1912 if (i == 0) 1913 return eb->first_page; 1914 i += eb->start >> PAGE_CACHE_SHIFT; 1915 p = find_get_page(eb->first_page->mapping, i); 1916 page_cache_release(p); 1917 return p; 1918 } 1919 1920 struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, 1921 u64 start, unsigned long len, 1922 gfp_t mask) 1923 { 1924 unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - 1925 (start >> PAGE_CACHE_SHIFT) + 1; 1926 unsigned long i; 1927 unsigned long index = start >> PAGE_CACHE_SHIFT; 1928 struct extent_buffer *eb; 1929 struct page *p; 1930 struct address_space *mapping = tree->mapping; 1931 int uptodate = 0; 1932 1933 eb = __alloc_extent_buffer(mask); 1934 if (!eb || IS_ERR(eb)) 1935 return NULL; 1936 1937 eb->start = start; 1938 eb->len = len; 1939 atomic_set(&eb->refs, 1); 1940 1941 for (i = 0; i < num_pages; i++, index++) { 1942 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); 1943 if (!p) { 1944 /* make sure the free only frees the pages we've 1945 * grabbed a reference on 1946 */ 1947 eb->len = i << PAGE_CACHE_SHIFT; 1948 eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); 1949 goto fail; 1950 } 1951 if (i == 0) 1952 eb->first_page = p; 1953 if (!PageUptodate(p)) 1954 uptodate = 0; 1955 unlock_page(p); 1956 } 1957 if (uptodate) 1958 eb->flags |= EXTENT_UPTODATE; 1959 return eb; 1960 fail: 1961 free_extent_buffer(eb); 1962 return NULL; 1963 } 1964 EXPORT_SYMBOL(alloc_extent_buffer); 1965 1966 struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, 1967 u64 start, unsigned long len, 1968 gfp_t mask) 1969 { 1970 unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - 1971 (start >> PAGE_CACHE_SHIFT) + 1; 1972 unsigned long i; 1973 unsigned long index = start >> PAGE_CACHE_SHIFT; 1974 struct extent_buffer *eb; 1975 struct page *p; 1976 struct address_space *mapping = tree->mapping; 1977 1978 eb = __alloc_extent_buffer(mask); 1979 if (!eb || IS_ERR(eb)) 1980 return NULL; 1981 1982 eb->start = start; 1983 eb->len = len; 1984 atomic_set(&eb->refs, 1); 1985 1986 for (i = 0; i < num_pages; i++, index++) { 1987 p = find_get_page(mapping, index); 1988 if (!p) { 1989 /* make sure the free only frees the pages we've 1990 * grabbed a reference on 1991 */ 1992 eb->len = i << PAGE_CACHE_SHIFT; 1993 eb->start &= ~((u64)PAGE_CACHE_SIZE - 1); 1994 goto fail; 1995 } 1996 if (i == 0) 1997 eb->first_page = p; 1998 } 1999 return eb; 2000 fail: 2001 free_extent_buffer(eb); 2002 return NULL; 2003 } 2004 EXPORT_SYMBOL(find_extent_buffer); 2005 2006 void free_extent_buffer(struct extent_buffer *eb) 2007 { 2008 unsigned long i; 2009 unsigned long num_pages; 2010 2011 if (!eb) 2012 return; 2013 2014 if (!atomic_dec_and_test(&eb->refs)) 2015 return; 2016 2017 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - 2018 (eb->start >> PAGE_CACHE_SHIFT) + 1; 2019 2020 if (eb->first_page) 2021 page_cache_release(eb->first_page); 2022 for (i = 1; i < num_pages; i++) { 2023 page_cache_release(extent_buffer_page(eb, i)); 2024 } 2025 __free_extent_buffer(eb); 2026 } 2027 EXPORT_SYMBOL(free_extent_buffer); 2028 2029 int clear_extent_buffer_dirty(struct extent_map_tree *tree, 2030 struct extent_buffer *eb) 2031 { 2032 int set; 2033 unsigned long i; 2034 unsigned long num_pages; 2035 struct page *page; 2036 2037 u64 start = eb->start; 2038 u64 end = start + eb->len - 1; 2039 2040 set = clear_extent_dirty(tree, start, end, GFP_NOFS); 2041 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - 2042 (eb->start >> PAGE_CACHE_SHIFT) + 1; 2043 2044 for (i = 0; i < num_pages; i++) { 2045 page = extent_buffer_page(eb, i); 2046 lock_page(page); 2047 /* 2048 * if we're on the last page or the first page and the 2049 * block isn't aligned on a page boundary, do extra checks 2050 * to make sure we don't clean page that is partially dirty 2051 */ 2052 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 2053 ((i == num_pages - 1) && 2054 ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { 2055 start = page->index << PAGE_CACHE_SHIFT; 2056 end = start + PAGE_CACHE_SIZE - 1; 2057 if (test_range_bit(tree, start, end, 2058 EXTENT_DIRTY, 0)) { 2059 unlock_page(page); 2060 continue; 2061 } 2062 } 2063 clear_page_dirty_for_io(page); 2064 unlock_page(page); 2065 } 2066 return 0; 2067 } 2068 EXPORT_SYMBOL(clear_extent_buffer_dirty); 2069 2070 int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, 2071 struct extent_buffer *eb) 2072 { 2073 return wait_on_extent_writeback(tree, eb->start, 2074 eb->start + eb->len - 1); 2075 } 2076 EXPORT_SYMBOL(wait_on_extent_buffer_writeback); 2077 2078 int set_extent_buffer_dirty(struct extent_map_tree *tree, 2079 struct extent_buffer *eb) 2080 { 2081 return set_range_dirty(tree, eb->start, eb->start + eb->len - 1); 2082 } 2083 EXPORT_SYMBOL(set_extent_buffer_dirty); 2084 2085 int set_extent_buffer_uptodate(struct extent_map_tree *tree, 2086 struct extent_buffer *eb) 2087 { 2088 unsigned long i; 2089 struct page *page; 2090 unsigned long num_pages; 2091 2092 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - 2093 (eb->start >> PAGE_CACHE_SHIFT) + 1; 2094 2095 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 2096 GFP_NOFS); 2097 for (i = 0; i < num_pages; i++) { 2098 page = extent_buffer_page(eb, i); 2099 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 2100 ((i == num_pages - 1) && 2101 ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { 2102 check_page_uptodate(tree, page); 2103 continue; 2104 } 2105 SetPageUptodate(page); 2106 } 2107 return 0; 2108 } 2109 EXPORT_SYMBOL(set_extent_buffer_uptodate); 2110 2111 int extent_buffer_uptodate(struct extent_map_tree *tree, 2112 struct extent_buffer *eb) 2113 { 2114 if (eb->flags & EXTENT_UPTODATE) 2115 return 1; 2116 return test_range_bit(tree, eb->start, eb->start + eb->len - 1, 2117 EXTENT_UPTODATE, 1); 2118 } 2119 EXPORT_SYMBOL(extent_buffer_uptodate); 2120 2121 int read_extent_buffer_pages(struct extent_map_tree *tree, 2122 struct extent_buffer *eb, int wait) 2123 { 2124 unsigned long i; 2125 struct page *page; 2126 int err; 2127 int ret = 0; 2128 unsigned long num_pages; 2129 2130 if (eb->flags & EXTENT_UPTODATE) 2131 return 0; 2132 2133 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, 2134 EXTENT_UPTODATE, 1)) { 2135 return 0; 2136 } 2137 2138 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - 2139 (eb->start >> PAGE_CACHE_SHIFT) + 1; 2140 for (i = 0; i < num_pages; i++) { 2141 page = extent_buffer_page(eb, i); 2142 if (PageUptodate(page)) { 2143 continue; 2144 } 2145 if (!wait) { 2146 if (TestSetPageLocked(page)) { 2147 continue; 2148 } 2149 } else { 2150 lock_page(page); 2151 } 2152 if (!PageUptodate(page)) { 2153 err = page->mapping->a_ops->readpage(NULL, page); 2154 if (err) { 2155 ret = err; 2156 } 2157 } else { 2158 unlock_page(page); 2159 } 2160 } 2161 2162 if (ret || !wait) { 2163 return ret; 2164 } 2165 2166 for (i = 0; i < num_pages; i++) { 2167 page = extent_buffer_page(eb, i); 2168 wait_on_page_locked(page); 2169 if (!PageUptodate(page)) { 2170 ret = -EIO; 2171 } 2172 } 2173 eb->flags |= EXTENT_UPTODATE; 2174 return ret; 2175 } 2176 EXPORT_SYMBOL(read_extent_buffer_pages); 2177 2178 void read_extent_buffer(struct extent_buffer *eb, void *dstv, 2179 unsigned long start, 2180 unsigned long len) 2181 { 2182 size_t cur; 2183 size_t offset; 2184 struct page *page; 2185 char *kaddr; 2186 char *dst = (char *)dstv; 2187 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2188 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2189 2190 WARN_ON(start > eb->len); 2191 WARN_ON(start + len > eb->start + eb->len); 2192 2193 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); 2194 if (i == 0) 2195 offset += start_offset; 2196 2197 while(len > 0) { 2198 page = extent_buffer_page(eb, i); 2199 WARN_ON(!PageUptodate(page)); 2200 2201 cur = min(len, (PAGE_CACHE_SIZE - offset)); 2202 // kaddr = kmap_atomic(page, KM_USER0); 2203 kaddr = page_address(page); 2204 memcpy(dst, kaddr + offset, cur); 2205 // kunmap_atomic(kaddr, KM_USER0); 2206 2207 dst += cur; 2208 len -= cur; 2209 offset = 0; 2210 i++; 2211 } 2212 } 2213 EXPORT_SYMBOL(read_extent_buffer); 2214 2215 int map_extent_buffer(struct extent_buffer *eb, unsigned long start, 2216 unsigned long min_len, 2217 char **token, char **map, 2218 unsigned long *map_start, 2219 unsigned long *map_len, int km) 2220 { 2221 size_t offset = start & (PAGE_CACHE_SIZE - 1); 2222 char *kaddr; 2223 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2224 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2225 unsigned long end_i = (start_offset + start + min_len) >> 2226 PAGE_CACHE_SHIFT; 2227 2228 if (i != end_i) 2229 return -EINVAL; 2230 2231 WARN_ON(start > eb->len); 2232 2233 if (i == 0) { 2234 offset = start_offset; 2235 *map_start = 0; 2236 } else { 2237 *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; 2238 } 2239 2240 // kaddr = kmap_atomic(eb->pages[i], km); 2241 kaddr = page_address(extent_buffer_page(eb, i)); 2242 *token = kaddr; 2243 *map = kaddr + offset; 2244 *map_len = PAGE_CACHE_SIZE - offset; 2245 return 0; 2246 } 2247 EXPORT_SYMBOL(map_extent_buffer); 2248 2249 void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) 2250 { 2251 // kunmap_atomic(token, km); 2252 } 2253 EXPORT_SYMBOL(unmap_extent_buffer); 2254 2255 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, 2256 unsigned long start, 2257 unsigned long len) 2258 { 2259 size_t cur; 2260 size_t offset; 2261 struct page *page; 2262 char *kaddr; 2263 char *ptr = (char *)ptrv; 2264 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2265 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2266 int ret = 0; 2267 2268 WARN_ON(start > eb->len); 2269 WARN_ON(start + len > eb->start + eb->len); 2270 2271 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); 2272 if (i == 0) 2273 offset += start_offset; 2274 2275 while(len > 0) { 2276 page = extent_buffer_page(eb, i); 2277 WARN_ON(!PageUptodate(page)); 2278 2279 cur = min(len, (PAGE_CACHE_SIZE - offset)); 2280 2281 // kaddr = kmap_atomic(page, KM_USER0); 2282 kaddr = page_address(page); 2283 ret = memcmp(ptr, kaddr + offset, cur); 2284 // kunmap_atomic(kaddr, KM_USER0); 2285 if (ret) 2286 break; 2287 2288 ptr += cur; 2289 len -= cur; 2290 offset = 0; 2291 i++; 2292 } 2293 return ret; 2294 } 2295 EXPORT_SYMBOL(memcmp_extent_buffer); 2296 2297 void write_extent_buffer(struct extent_buffer *eb, const void *srcv, 2298 unsigned long start, unsigned long len) 2299 { 2300 size_t cur; 2301 size_t offset; 2302 struct page *page; 2303 char *kaddr; 2304 char *src = (char *)srcv; 2305 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2306 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2307 2308 WARN_ON(start > eb->len); 2309 WARN_ON(start + len > eb->start + eb->len); 2310 2311 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); 2312 if (i == 0) 2313 offset += start_offset; 2314 2315 while(len > 0) { 2316 page = extent_buffer_page(eb, i); 2317 WARN_ON(!PageUptodate(page)); 2318 2319 cur = min(len, PAGE_CACHE_SIZE - offset); 2320 // kaddr = kmap_atomic(page, KM_USER0); 2321 kaddr = page_address(page); 2322 memcpy(kaddr + offset, src, cur); 2323 // kunmap_atomic(kaddr, KM_USER0); 2324 2325 src += cur; 2326 len -= cur; 2327 offset = 0; 2328 i++; 2329 } 2330 } 2331 EXPORT_SYMBOL(write_extent_buffer); 2332 2333 void memset_extent_buffer(struct extent_buffer *eb, char c, 2334 unsigned long start, unsigned long len) 2335 { 2336 size_t cur; 2337 size_t offset; 2338 struct page *page; 2339 char *kaddr; 2340 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); 2341 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; 2342 2343 WARN_ON(start > eb->len); 2344 WARN_ON(start + len > eb->start + eb->len); 2345 2346 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); 2347 if (i == 0) 2348 offset += start_offset; 2349 2350 while(len > 0) { 2351 page = extent_buffer_page(eb, i); 2352 WARN_ON(!PageUptodate(page)); 2353 2354 cur = min(len, PAGE_CACHE_SIZE - offset); 2355 // kaddr = kmap_atomic(page, KM_USER0); 2356 kaddr = page_address(page); 2357 memset(kaddr + offset, c, cur); 2358 // kunmap_atomic(kaddr, KM_USER0); 2359 2360 len -= cur; 2361 offset = 0; 2362 i++; 2363 } 2364 } 2365 EXPORT_SYMBOL(memset_extent_buffer); 2366 2367 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, 2368 unsigned long dst_offset, unsigned long src_offset, 2369 unsigned long len) 2370 { 2371 u64 dst_len = dst->len; 2372 size_t cur; 2373 size_t offset; 2374 struct page *page; 2375 char *kaddr; 2376 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 2377 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 2378 2379 WARN_ON(src->len != dst_len); 2380 2381 offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1); 2382 if (i == 0) 2383 offset += start_offset; 2384 2385 while(len > 0) { 2386 page = extent_buffer_page(dst, i); 2387 WARN_ON(!PageUptodate(page)); 2388 2389 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); 2390 2391 // kaddr = kmap_atomic(page, KM_USER1); 2392 kaddr = page_address(page); 2393 read_extent_buffer(src, kaddr + offset, src_offset, cur); 2394 // kunmap_atomic(kaddr, KM_USER1); 2395 2396 src_offset += cur; 2397 len -= cur; 2398 offset = 0; 2399 i++; 2400 } 2401 } 2402 EXPORT_SYMBOL(copy_extent_buffer); 2403 2404 static void move_pages(struct page *dst_page, struct page *src_page, 2405 unsigned long dst_off, unsigned long src_off, 2406 unsigned long len) 2407 { 2408 // char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 2409 char *dst_kaddr = page_address(dst_page); 2410 if (dst_page == src_page) { 2411 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); 2412 } else { 2413 // char *src_kaddr = kmap_atomic(src_page, KM_USER1); 2414 char *src_kaddr = page_address(src_page); 2415 char *p = dst_kaddr + dst_off + len; 2416 char *s = src_kaddr + src_off + len; 2417 2418 while (len--) 2419 *--p = *--s; 2420 2421 // kunmap_atomic(src_kaddr, KM_USER1); 2422 } 2423 // kunmap_atomic(dst_kaddr, KM_USER0); 2424 } 2425 2426 static void copy_pages(struct page *dst_page, struct page *src_page, 2427 unsigned long dst_off, unsigned long src_off, 2428 unsigned long len) 2429 { 2430 //kmap_atomic(dst_page, KM_USER0); 2431 char *dst_kaddr = page_address(dst_page); 2432 char *src_kaddr; 2433 2434 if (dst_page != src_page) 2435 src_kaddr = page_address(src_page); // kmap_atomic(src_page, KM_USER1); 2436 else 2437 src_kaddr = dst_kaddr; 2438 2439 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 2440 /* 2441 kunmap_atomic(dst_kaddr, KM_USER0); 2442 if (dst_page != src_page) 2443 kunmap_atomic(src_kaddr, KM_USER1); 2444 */ 2445 } 2446 2447 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 2448 unsigned long src_offset, unsigned long len) 2449 { 2450 size_t cur; 2451 size_t dst_off_in_page; 2452 size_t src_off_in_page; 2453 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 2454 unsigned long dst_i; 2455 unsigned long src_i; 2456 2457 if (src_offset + len > dst->len) { 2458 printk("memmove bogus src_offset %lu move len %lu len %lu\n", 2459 src_offset, len, dst->len); 2460 BUG_ON(1); 2461 } 2462 if (dst_offset + len > dst->len) { 2463 printk("memmove bogus dst_offset %lu move len %lu len %lu\n", 2464 dst_offset, len, dst->len); 2465 BUG_ON(1); 2466 } 2467 2468 while(len > 0) { 2469 dst_off_in_page = dst_offset & 2470 ((unsigned long)PAGE_CACHE_SIZE - 1); 2471 src_off_in_page = src_offset & 2472 ((unsigned long)PAGE_CACHE_SIZE - 1); 2473 2474 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; 2475 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; 2476 2477 if (src_i == 0) 2478 src_off_in_page += start_offset; 2479 if (dst_i == 0) 2480 dst_off_in_page += start_offset; 2481 2482 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - 2483 src_off_in_page)); 2484 cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE - 2485 dst_off_in_page)); 2486 2487 copy_pages(extent_buffer_page(dst, dst_i), 2488 extent_buffer_page(dst, src_i), 2489 dst_off_in_page, src_off_in_page, cur); 2490 2491 src_offset += cur; 2492 dst_offset += cur; 2493 len -= cur; 2494 } 2495 } 2496 EXPORT_SYMBOL(memcpy_extent_buffer); 2497 2498 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, 2499 unsigned long src_offset, unsigned long len) 2500 { 2501 size_t cur; 2502 size_t dst_off_in_page; 2503 size_t src_off_in_page; 2504 unsigned long dst_end = dst_offset + len - 1; 2505 unsigned long src_end = src_offset + len - 1; 2506 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); 2507 unsigned long dst_i; 2508 unsigned long src_i; 2509 2510 if (src_offset + len > dst->len) { 2511 printk("memmove bogus src_offset %lu move len %lu len %lu\n", 2512 src_offset, len, dst->len); 2513 BUG_ON(1); 2514 } 2515 if (dst_offset + len > dst->len) { 2516 printk("memmove bogus dst_offset %lu move len %lu len %lu\n", 2517 dst_offset, len, dst->len); 2518 BUG_ON(1); 2519 } 2520 if (dst_offset < src_offset) { 2521 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 2522 return; 2523 } 2524 while(len > 0) { 2525 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; 2526 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; 2527 2528 dst_off_in_page = dst_end & 2529 ((unsigned long)PAGE_CACHE_SIZE - 1); 2530 src_off_in_page = src_end & 2531 ((unsigned long)PAGE_CACHE_SIZE - 1); 2532 2533 if (src_i == 0) 2534 src_off_in_page += start_offset; 2535 if (dst_i == 0) 2536 dst_off_in_page += start_offset; 2537 2538 cur = min(len, src_off_in_page + 1); 2539 cur = min(cur, dst_off_in_page + 1); 2540 // printk("move pages orig dst %lu src %lu len %lu, this %lu %lu %lu\n", dst_offset, src_offset, len, dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); 2541 move_pages(extent_buffer_page(dst, dst_i), 2542 extent_buffer_page(dst, src_i), 2543 dst_off_in_page - cur + 1, 2544 src_off_in_page - cur + 1, cur); 2545 2546 dst_end -= cur - 1; 2547 src_end -= cur - 1; 2548 len -= cur; 2549 } 2550 } 2551 EXPORT_SYMBOL(memmove_extent_buffer); 2552