1 #include <linux/bitops.h> 2 #include <linux/slab.h> 3 #include <linux/bio.h> 4 #include <linux/mm.h> 5 #include <linux/gfp.h> 6 #include <linux/pagemap.h> 7 #include <linux/page-flags.h> 8 #include <linux/module.h> 9 #include <linux/spinlock.h> 10 #include <linux/blkdev.h> 11 #include "extent_map.h" 12 13 /* temporary define until extent_map moves out of btrfs */ 14 struct kmem_cache *btrfs_cache_create(const char *name, size_t size, 15 unsigned long extra_flags, 16 void (*ctor)(void *, struct kmem_cache *, 17 unsigned long)); 18 19 static struct kmem_cache *extent_map_cache; 20 static struct kmem_cache *extent_state_cache; 21 22 struct tree_entry { 23 u64 start; 24 u64 end; 25 int in_tree; 26 struct rb_node rb_node; 27 }; 28 29 /* bits for the extent state */ 30 #define EXTENT_DIRTY 1 31 #define EXTENT_WRITEBACK (1 << 1) 32 #define EXTENT_UPTODATE (1 << 2) 33 #define EXTENT_LOCKED (1 << 3) 34 #define EXTENT_NEW (1 << 4) 35 #define EXTENT_DELALLOC (1 << 5) 36 37 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 38 39 void __init extent_map_init(void) 40 { 41 extent_map_cache = btrfs_cache_create("extent_map", 42 sizeof(struct extent_map), 43 SLAB_DESTROY_BY_RCU, 44 NULL); 45 extent_state_cache = btrfs_cache_create("extent_state", 46 sizeof(struct extent_state), 47 SLAB_DESTROY_BY_RCU, 48 NULL); 49 } 50 51 void __exit extent_map_exit(void) 52 { 53 if (extent_map_cache) 54 kmem_cache_destroy(extent_map_cache); 55 if (extent_state_cache) 56 kmem_cache_destroy(extent_state_cache); 57 } 58 59 void extent_map_tree_init(struct extent_map_tree *tree, 60 struct address_space *mapping, gfp_t mask) 61 { 62 tree->map.rb_node = NULL; 63 tree->state.rb_node = NULL; 64 tree->ops = NULL; 65 rwlock_init(&tree->lock); 66 tree->mapping = mapping; 67 } 68 EXPORT_SYMBOL(extent_map_tree_init); 69 70 struct extent_map *alloc_extent_map(gfp_t mask) 71 { 72 struct extent_map *em; 73 em = kmem_cache_alloc(extent_map_cache, mask); 74 if (!em || IS_ERR(em)) 75 return em; 76 em->in_tree = 0; 77 atomic_set(&em->refs, 1); 78 return em; 79 } 80 EXPORT_SYMBOL(alloc_extent_map); 81 82 void free_extent_map(struct extent_map *em) 83 { 84 if (!em) 85 return; 86 if (atomic_dec_and_test(&em->refs)) { 87 WARN_ON(em->in_tree); 88 kmem_cache_free(extent_map_cache, em); 89 } 90 } 91 EXPORT_SYMBOL(free_extent_map); 92 93 94 struct extent_state *alloc_extent_state(gfp_t mask) 95 { 96 struct extent_state *state; 97 state = kmem_cache_alloc(extent_state_cache, mask); 98 if (!state || IS_ERR(state)) 99 return state; 100 state->state = 0; 101 state->in_tree = 0; 102 state->private = 0; 103 atomic_set(&state->refs, 1); 104 init_waitqueue_head(&state->wq); 105 return state; 106 } 107 EXPORT_SYMBOL(alloc_extent_state); 108 109 void free_extent_state(struct extent_state *state) 110 { 111 if (!state) 112 return; 113 if (atomic_dec_and_test(&state->refs)) { 114 WARN_ON(state->in_tree); 115 kmem_cache_free(extent_state_cache, state); 116 } 117 } 118 EXPORT_SYMBOL(free_extent_state); 119 120 static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 121 struct rb_node *node) 122 { 123 struct rb_node ** p = &root->rb_node; 124 struct rb_node * parent = NULL; 125 struct tree_entry *entry; 126 127 while(*p) { 128 parent = *p; 129 entry = rb_entry(parent, struct tree_entry, rb_node); 130 131 if (offset < entry->start) 132 p = &(*p)->rb_left; 133 else if (offset > entry->end) 134 p = &(*p)->rb_right; 135 else 136 return parent; 137 } 138 139 entry = rb_entry(node, struct tree_entry, rb_node); 140 entry->in_tree = 1; 141 rb_link_node(node, parent, p); 142 rb_insert_color(node, root); 143 return NULL; 144 } 145 146 static struct rb_node *__tree_search(struct rb_root *root, u64 offset, 147 struct rb_node **prev_ret) 148 { 149 struct rb_node * n = root->rb_node; 150 struct rb_node *prev = NULL; 151 struct tree_entry *entry; 152 struct tree_entry *prev_entry = NULL; 153 154 while(n) { 155 entry = rb_entry(n, struct tree_entry, rb_node); 156 prev = n; 157 prev_entry = entry; 158 159 if (offset < entry->start) 160 n = n->rb_left; 161 else if (offset > entry->end) 162 n = n->rb_right; 163 else 164 return n; 165 } 166 if (!prev_ret) 167 return NULL; 168 while(prev && offset > prev_entry->end) { 169 prev = rb_next(prev); 170 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 171 } 172 *prev_ret = prev; 173 return NULL; 174 } 175 176 static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) 177 { 178 struct rb_node *prev; 179 struct rb_node *ret; 180 ret = __tree_search(root, offset, &prev); 181 if (!ret) 182 return prev; 183 return ret; 184 } 185 186 static int tree_delete(struct rb_root *root, u64 offset) 187 { 188 struct rb_node *node; 189 struct tree_entry *entry; 190 191 node = __tree_search(root, offset, NULL); 192 if (!node) 193 return -ENOENT; 194 entry = rb_entry(node, struct tree_entry, rb_node); 195 entry->in_tree = 0; 196 rb_erase(node, root); 197 return 0; 198 } 199 200 /* 201 * add_extent_mapping tries a simple backward merge with existing 202 * mappings. The extent_map struct passed in will be inserted into 203 * the tree directly (no copies made, just a reference taken). 204 */ 205 int add_extent_mapping(struct extent_map_tree *tree, 206 struct extent_map *em) 207 { 208 int ret = 0; 209 struct extent_map *prev = NULL; 210 struct rb_node *rb; 211 212 write_lock_irq(&tree->lock); 213 rb = tree_insert(&tree->map, em->end, &em->rb_node); 214 if (rb) { 215 prev = rb_entry(rb, struct extent_map, rb_node); 216 printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end); 217 ret = -EEXIST; 218 goto out; 219 } 220 atomic_inc(&em->refs); 221 if (em->start != 0) { 222 rb = rb_prev(&em->rb_node); 223 if (rb) 224 prev = rb_entry(rb, struct extent_map, rb_node); 225 if (prev && prev->end + 1 == em->start && 226 ((em->block_start == 0 && prev->block_start == 0) || 227 (em->block_start == prev->block_end + 1))) { 228 em->start = prev->start; 229 em->block_start = prev->block_start; 230 rb_erase(&prev->rb_node, &tree->map); 231 prev->in_tree = 0; 232 free_extent_map(prev); 233 } 234 } 235 out: 236 write_unlock_irq(&tree->lock); 237 return ret; 238 } 239 EXPORT_SYMBOL(add_extent_mapping); 240 241 /* 242 * lookup_extent_mapping returns the first extent_map struct in the 243 * tree that intersects the [start, end] (inclusive) range. There may 244 * be additional objects in the tree that intersect, so check the object 245 * returned carefully to make sure you don't need additional lookups. 246 */ 247 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, 248 u64 start, u64 end) 249 { 250 struct extent_map *em; 251 struct rb_node *rb_node; 252 253 read_lock_irq(&tree->lock); 254 rb_node = tree_search(&tree->map, start); 255 if (!rb_node) { 256 em = NULL; 257 goto out; 258 } 259 if (IS_ERR(rb_node)) { 260 em = ERR_PTR(PTR_ERR(rb_node)); 261 goto out; 262 } 263 em = rb_entry(rb_node, struct extent_map, rb_node); 264 if (em->end < start || em->start > end) { 265 em = NULL; 266 goto out; 267 } 268 atomic_inc(&em->refs); 269 out: 270 read_unlock_irq(&tree->lock); 271 return em; 272 } 273 EXPORT_SYMBOL(lookup_extent_mapping); 274 275 /* 276 * removes an extent_map struct from the tree. No reference counts are 277 * dropped, and no checks are done to see if the range is in use 278 */ 279 int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) 280 { 281 int ret; 282 283 write_lock_irq(&tree->lock); 284 ret = tree_delete(&tree->map, em->end); 285 write_unlock_irq(&tree->lock); 286 return ret; 287 } 288 EXPORT_SYMBOL(remove_extent_mapping); 289 290 /* 291 * utility function to look for merge candidates inside a given range. 292 * Any extents with matching state are merged together into a single 293 * extent in the tree. Extents with EXTENT_IO in their state field 294 * are not merged because the end_io handlers need to be able to do 295 * operations on them without sleeping (or doing allocations/splits). 296 * 297 * This should be called with the tree lock held. 298 */ 299 static int merge_state(struct extent_map_tree *tree, 300 struct extent_state *state) 301 { 302 struct extent_state *other; 303 struct rb_node *other_node; 304 305 if (state->state & EXTENT_IOBITS) 306 return 0; 307 308 other_node = rb_prev(&state->rb_node); 309 if (other_node) { 310 other = rb_entry(other_node, struct extent_state, rb_node); 311 if (other->end == state->start - 1 && 312 other->state == state->state) { 313 state->start = other->start; 314 other->in_tree = 0; 315 rb_erase(&other->rb_node, &tree->state); 316 free_extent_state(other); 317 } 318 } 319 other_node = rb_next(&state->rb_node); 320 if (other_node) { 321 other = rb_entry(other_node, struct extent_state, rb_node); 322 if (other->start == state->end + 1 && 323 other->state == state->state) { 324 other->start = state->start; 325 state->in_tree = 0; 326 rb_erase(&state->rb_node, &tree->state); 327 free_extent_state(state); 328 } 329 } 330 return 0; 331 } 332 333 /* 334 * insert an extent_state struct into the tree. 'bits' are set on the 335 * struct before it is inserted. 336 * 337 * This may return -EEXIST if the extent is already there, in which case the 338 * state struct is freed. 339 * 340 * The tree lock is not taken internally. This is a utility function and 341 * probably isn't what you want to call (see set/clear_extent_bit). 342 */ 343 static int insert_state(struct extent_map_tree *tree, 344 struct extent_state *state, u64 start, u64 end, 345 int bits) 346 { 347 struct rb_node *node; 348 349 if (end < start) { 350 printk("end < start %Lu %Lu\n", end, start); 351 WARN_ON(1); 352 } 353 state->state |= bits; 354 state->start = start; 355 state->end = end; 356 if ((end & 4095) == 0) { 357 printk("insert state %Lu %Lu strange end\n", start, end); 358 WARN_ON(1); 359 } 360 node = tree_insert(&tree->state, end, &state->rb_node); 361 if (node) { 362 struct extent_state *found; 363 found = rb_entry(node, struct extent_state, rb_node); 364 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); 365 free_extent_state(state); 366 return -EEXIST; 367 } 368 merge_state(tree, state); 369 return 0; 370 } 371 372 /* 373 * split a given extent state struct in two, inserting the preallocated 374 * struct 'prealloc' as the newly created second half. 'split' indicates an 375 * offset inside 'orig' where it should be split. 376 * 377 * Before calling, 378 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 379 * are two extent state structs in the tree: 380 * prealloc: [orig->start, split - 1] 381 * orig: [ split, orig->end ] 382 * 383 * The tree locks are not taken by this function. They need to be held 384 * by the caller. 385 */ 386 static int split_state(struct extent_map_tree *tree, struct extent_state *orig, 387 struct extent_state *prealloc, u64 split) 388 { 389 struct rb_node *node; 390 prealloc->start = orig->start; 391 prealloc->end = split - 1; 392 prealloc->state = orig->state; 393 orig->start = split; 394 if ((prealloc->end & 4095) == 0) { 395 printk("insert state %Lu %Lu strange end\n", prealloc->start, 396 prealloc->end); 397 WARN_ON(1); 398 } 399 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); 400 if (node) { 401 struct extent_state *found; 402 found = rb_entry(node, struct extent_state, rb_node); 403 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); 404 free_extent_state(prealloc); 405 return -EEXIST; 406 } 407 return 0; 408 } 409 410 /* 411 * utility function to clear some bits in an extent state struct. 412 * it will optionally wake up any one waiting on this state (wake == 1), or 413 * forcibly remove the state from the tree (delete == 1). 414 * 415 * If no bits are set on the state struct after clearing things, the 416 * struct is freed and removed from the tree 417 */ 418 static int clear_state_bit(struct extent_map_tree *tree, 419 struct extent_state *state, int bits, int wake, 420 int delete) 421 { 422 int ret = state->state & bits; 423 state->state &= ~bits; 424 if (wake) 425 wake_up(&state->wq); 426 if (delete || state->state == 0) { 427 if (state->in_tree) { 428 rb_erase(&state->rb_node, &tree->state); 429 state->in_tree = 0; 430 free_extent_state(state); 431 } else { 432 WARN_ON(1); 433 } 434 } else { 435 merge_state(tree, state); 436 } 437 return ret; 438 } 439 440 /* 441 * clear some bits on a range in the tree. This may require splitting 442 * or inserting elements in the tree, so the gfp mask is used to 443 * indicate which allocations or sleeping are allowed. 444 * 445 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove 446 * the given range from the tree regardless of state (ie for truncate). 447 * 448 * the range [start, end] is inclusive. 449 * 450 * This takes the tree lock, and returns < 0 on error, > 0 if any of the 451 * bits were already set, or zero if none of the bits were already set. 452 */ 453 int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, 454 int bits, int wake, int delete, gfp_t mask) 455 { 456 struct extent_state *state; 457 struct extent_state *prealloc = NULL; 458 struct rb_node *node; 459 int err; 460 int set = 0; 461 462 again: 463 if (!prealloc && (mask & __GFP_WAIT)) { 464 prealloc = alloc_extent_state(mask); 465 if (!prealloc) 466 return -ENOMEM; 467 } 468 469 write_lock_irq(&tree->lock); 470 /* 471 * this search will find the extents that end after 472 * our range starts 473 */ 474 node = tree_search(&tree->state, start); 475 if (!node) 476 goto out; 477 state = rb_entry(node, struct extent_state, rb_node); 478 if (state->start > end) 479 goto out; 480 WARN_ON(state->end < start); 481 482 /* 483 * | ---- desired range ---- | 484 * | state | or 485 * | ------------- state -------------- | 486 * 487 * We need to split the extent we found, and may flip 488 * bits on second half. 489 * 490 * If the extent we found extends past our range, we 491 * just split and search again. It'll get split again 492 * the next time though. 493 * 494 * If the extent we found is inside our range, we clear 495 * the desired bit on it. 496 */ 497 498 if (state->start < start) { 499 err = split_state(tree, state, prealloc, start); 500 BUG_ON(err == -EEXIST); 501 prealloc = NULL; 502 if (err) 503 goto out; 504 if (state->end <= end) { 505 start = state->end + 1; 506 set |= clear_state_bit(tree, state, bits, 507 wake, delete); 508 } else { 509 start = state->start; 510 } 511 goto search_again; 512 } 513 /* 514 * | ---- desired range ---- | 515 * | state | 516 * We need to split the extent, and clear the bit 517 * on the first half 518 */ 519 if (state->start <= end && state->end > end) { 520 err = split_state(tree, state, prealloc, end + 1); 521 BUG_ON(err == -EEXIST); 522 523 if (wake) 524 wake_up(&state->wq); 525 set |= clear_state_bit(tree, prealloc, bits, 526 wake, delete); 527 prealloc = NULL; 528 goto out; 529 } 530 531 start = state->end + 1; 532 set |= clear_state_bit(tree, state, bits, wake, delete); 533 goto search_again; 534 535 out: 536 write_unlock_irq(&tree->lock); 537 if (prealloc) 538 free_extent_state(prealloc); 539 540 return set; 541 542 search_again: 543 if (start >= end) 544 goto out; 545 write_unlock_irq(&tree->lock); 546 if (mask & __GFP_WAIT) 547 cond_resched(); 548 goto again; 549 } 550 EXPORT_SYMBOL(clear_extent_bit); 551 552 static int wait_on_state(struct extent_map_tree *tree, 553 struct extent_state *state) 554 { 555 DEFINE_WAIT(wait); 556 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 557 read_unlock_irq(&tree->lock); 558 schedule(); 559 read_lock_irq(&tree->lock); 560 finish_wait(&state->wq, &wait); 561 return 0; 562 } 563 564 /* 565 * waits for one or more bits to clear on a range in the state tree. 566 * The range [start, end] is inclusive. 567 * The tree lock is taken by this function 568 */ 569 int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) 570 { 571 struct extent_state *state; 572 struct rb_node *node; 573 574 read_lock_irq(&tree->lock); 575 again: 576 while (1) { 577 /* 578 * this search will find all the extents that end after 579 * our range starts 580 */ 581 node = tree_search(&tree->state, start); 582 if (!node) 583 break; 584 585 state = rb_entry(node, struct extent_state, rb_node); 586 587 if (state->start > end) 588 goto out; 589 590 if (state->state & bits) { 591 start = state->start; 592 atomic_inc(&state->refs); 593 wait_on_state(tree, state); 594 free_extent_state(state); 595 goto again; 596 } 597 start = state->end + 1; 598 599 if (start > end) 600 break; 601 602 if (need_resched()) { 603 read_unlock_irq(&tree->lock); 604 cond_resched(); 605 read_lock_irq(&tree->lock); 606 } 607 } 608 out: 609 read_unlock_irq(&tree->lock); 610 return 0; 611 } 612 EXPORT_SYMBOL(wait_extent_bit); 613 614 /* 615 * set some bits on a range in the tree. This may require allocations 616 * or sleeping, so the gfp mask is used to indicate what is allowed. 617 * 618 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 619 * range already has the desired bits set. The start of the existing 620 * range is returned in failed_start in this case. 621 * 622 * [start, end] is inclusive 623 * This takes the tree lock. 624 */ 625 int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, 626 int exclusive, u64 *failed_start, gfp_t mask) 627 { 628 struct extent_state *state; 629 struct extent_state *prealloc = NULL; 630 struct rb_node *node; 631 int err = 0; 632 int set; 633 u64 last_start; 634 u64 last_end; 635 again: 636 if (!prealloc && (mask & __GFP_WAIT)) { 637 prealloc = alloc_extent_state(mask); 638 if (!prealloc) 639 return -ENOMEM; 640 } 641 642 write_lock_irq(&tree->lock); 643 /* 644 * this search will find all the extents that end after 645 * our range starts. 646 */ 647 node = tree_search(&tree->state, start); 648 if (!node) { 649 err = insert_state(tree, prealloc, start, end, bits); 650 prealloc = NULL; 651 BUG_ON(err == -EEXIST); 652 goto out; 653 } 654 655 state = rb_entry(node, struct extent_state, rb_node); 656 last_start = state->start; 657 last_end = state->end; 658 659 /* 660 * | ---- desired range ---- | 661 * | state | 662 * 663 * Just lock what we found and keep going 664 */ 665 if (state->start == start && state->end <= end) { 666 set = state->state & bits; 667 if (set && exclusive) { 668 *failed_start = state->start; 669 err = -EEXIST; 670 goto out; 671 } 672 state->state |= bits; 673 start = state->end + 1; 674 merge_state(tree, state); 675 goto search_again; 676 } 677 678 /* 679 * | ---- desired range ---- | 680 * | state | 681 * or 682 * | ------------- state -------------- | 683 * 684 * We need to split the extent we found, and may flip bits on 685 * second half. 686 * 687 * If the extent we found extends past our 688 * range, we just split and search again. It'll get split 689 * again the next time though. 690 * 691 * If the extent we found is inside our range, we set the 692 * desired bit on it. 693 */ 694 if (state->start < start) { 695 set = state->state & bits; 696 if (exclusive && set) { 697 *failed_start = start; 698 err = -EEXIST; 699 goto out; 700 } 701 err = split_state(tree, state, prealloc, start); 702 BUG_ON(err == -EEXIST); 703 prealloc = NULL; 704 if (err) 705 goto out; 706 if (state->end <= end) { 707 state->state |= bits; 708 start = state->end + 1; 709 merge_state(tree, state); 710 } else { 711 start = state->start; 712 } 713 goto search_again; 714 } 715 /* 716 * | ---- desired range ---- | 717 * | state | 718 * We need to split the extent, and set the bit 719 * on the first half 720 */ 721 if (state->start <= end && state->end > end) { 722 set = state->state & bits; 723 if (exclusive && set) { 724 *failed_start = start; 725 err = -EEXIST; 726 goto out; 727 } 728 err = split_state(tree, state, prealloc, end + 1); 729 BUG_ON(err == -EEXIST); 730 731 prealloc->state |= bits; 732 merge_state(tree, prealloc); 733 prealloc = NULL; 734 goto out; 735 } 736 737 /* 738 * | ---- desired range ---- | 739 * | state | or | state | 740 * 741 * There's a hole, we need to insert something in it and 742 * ignore the extent we found. 743 */ 744 if (state->start > start) { 745 u64 this_end; 746 if (end < last_start) 747 this_end = end; 748 else 749 this_end = last_start -1; 750 err = insert_state(tree, prealloc, start, this_end, 751 bits); 752 prealloc = NULL; 753 BUG_ON(err == -EEXIST); 754 if (err) 755 goto out; 756 start = this_end + 1; 757 goto search_again; 758 } 759 goto search_again; 760 761 out: 762 write_unlock_irq(&tree->lock); 763 if (prealloc) 764 free_extent_state(prealloc); 765 766 return err; 767 768 search_again: 769 if (start > end) 770 goto out; 771 write_unlock_irq(&tree->lock); 772 if (mask & __GFP_WAIT) 773 cond_resched(); 774 goto again; 775 } 776 EXPORT_SYMBOL(set_extent_bit); 777 778 /* wrappers around set/clear extent bit */ 779 int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, 780 gfp_t mask) 781 { 782 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 783 mask); 784 } 785 EXPORT_SYMBOL(set_extent_dirty); 786 787 int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, 788 gfp_t mask) 789 { 790 return set_extent_bit(tree, start, end, 791 EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, 792 mask); 793 } 794 EXPORT_SYMBOL(set_extent_delalloc); 795 796 int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, 797 gfp_t mask) 798 { 799 return clear_extent_bit(tree, start, end, 800 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); 801 } 802 EXPORT_SYMBOL(clear_extent_dirty); 803 804 int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, 805 gfp_t mask) 806 { 807 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 808 mask); 809 } 810 EXPORT_SYMBOL(set_extent_new); 811 812 int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, 813 gfp_t mask) 814 { 815 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); 816 } 817 EXPORT_SYMBOL(clear_extent_new); 818 819 int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, 820 gfp_t mask) 821 { 822 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 823 mask); 824 } 825 EXPORT_SYMBOL(set_extent_uptodate); 826 827 int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, 828 gfp_t mask) 829 { 830 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 831 } 832 EXPORT_SYMBOL(clear_extent_uptodate); 833 834 int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, 835 gfp_t mask) 836 { 837 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, 838 0, NULL, mask); 839 } 840 EXPORT_SYMBOL(set_extent_writeback); 841 842 int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, 843 gfp_t mask) 844 { 845 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); 846 } 847 EXPORT_SYMBOL(clear_extent_writeback); 848 849 int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) 850 { 851 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); 852 } 853 EXPORT_SYMBOL(wait_on_extent_writeback); 854 855 /* 856 * locks a range in ascending order, waiting for any locked regions 857 * it hits on the way. [start,end] are inclusive, and this will sleep. 858 */ 859 int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) 860 { 861 int err; 862 u64 failed_start; 863 while (1) { 864 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 865 &failed_start, mask); 866 if (err == -EEXIST && (mask & __GFP_WAIT)) { 867 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 868 start = failed_start; 869 } else { 870 break; 871 } 872 WARN_ON(start > end); 873 } 874 return err; 875 } 876 EXPORT_SYMBOL(lock_extent); 877 878 int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, 879 gfp_t mask) 880 { 881 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); 882 } 883 EXPORT_SYMBOL(unlock_extent); 884 885 /* 886 * helper function to set pages and extents in the tree dirty 887 */ 888 int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) 889 { 890 unsigned long index = start >> PAGE_CACHE_SHIFT; 891 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 892 struct page *page; 893 894 while (index <= end_index) { 895 page = find_get_page(tree->mapping, index); 896 BUG_ON(!page); 897 __set_page_dirty_nobuffers(page); 898 page_cache_release(page); 899 index++; 900 } 901 set_extent_dirty(tree, start, end, GFP_NOFS); 902 return 0; 903 } 904 EXPORT_SYMBOL(set_range_dirty); 905 906 /* 907 * helper function to set both pages and extents in the tree writeback 908 */ 909 int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) 910 { 911 unsigned long index = start >> PAGE_CACHE_SHIFT; 912 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 913 struct page *page; 914 915 while (index <= end_index) { 916 page = find_get_page(tree->mapping, index); 917 BUG_ON(!page); 918 set_page_writeback(page); 919 page_cache_release(page); 920 index++; 921 } 922 set_extent_writeback(tree, start, end, GFP_NOFS); 923 return 0; 924 } 925 EXPORT_SYMBOL(set_range_writeback); 926 927 u64 find_lock_delalloc_range(struct extent_map_tree *tree, 928 u64 start, u64 lock_start, u64 *end, u64 max_bytes) 929 { 930 struct rb_node *node; 931 struct extent_state *state; 932 u64 cur_start = start; 933 u64 found = 0; 934 u64 total_bytes = 0; 935 936 write_lock_irq(&tree->lock); 937 /* 938 * this search will find all the extents that end after 939 * our range starts. 940 */ 941 search_again: 942 node = tree_search(&tree->state, cur_start); 943 if (!node || IS_ERR(node)) { 944 goto out; 945 } 946 947 while(1) { 948 state = rb_entry(node, struct extent_state, rb_node); 949 if (state->start != cur_start) { 950 goto out; 951 } 952 if (!(state->state & EXTENT_DELALLOC)) { 953 goto out; 954 } 955 if (state->start >= lock_start) { 956 if (state->state & EXTENT_LOCKED) { 957 DEFINE_WAIT(wait); 958 atomic_inc(&state->refs); 959 write_unlock_irq(&tree->lock); 960 schedule(); 961 write_lock_irq(&tree->lock); 962 finish_wait(&state->wq, &wait); 963 free_extent_state(state); 964 goto search_again; 965 } 966 state->state |= EXTENT_LOCKED; 967 } 968 found++; 969 *end = state->end; 970 cur_start = state->end + 1; 971 node = rb_next(node); 972 if (!node) 973 break; 974 total_bytes = state->end - state->start + 1; 975 if (total_bytes >= max_bytes) 976 break; 977 } 978 out: 979 write_unlock_irq(&tree->lock); 980 return found; 981 } 982 983 /* 984 * helper function to lock both pages and extents in the tree. 985 * pages must be locked first. 986 */ 987 int lock_range(struct extent_map_tree *tree, u64 start, u64 end) 988 { 989 unsigned long index = start >> PAGE_CACHE_SHIFT; 990 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 991 struct page *page; 992 int err; 993 994 while (index <= end_index) { 995 page = grab_cache_page(tree->mapping, index); 996 if (!page) { 997 err = -ENOMEM; 998 goto failed; 999 } 1000 if (IS_ERR(page)) { 1001 err = PTR_ERR(page); 1002 goto failed; 1003 } 1004 index++; 1005 } 1006 lock_extent(tree, start, end, GFP_NOFS); 1007 return 0; 1008 1009 failed: 1010 /* 1011 * we failed above in getting the page at 'index', so we undo here 1012 * up to but not including the page at 'index' 1013 */ 1014 end_index = index; 1015 index = start >> PAGE_CACHE_SHIFT; 1016 while (index < end_index) { 1017 page = find_get_page(tree->mapping, index); 1018 unlock_page(page); 1019 page_cache_release(page); 1020 index++; 1021 } 1022 return err; 1023 } 1024 EXPORT_SYMBOL(lock_range); 1025 1026 /* 1027 * helper function to unlock both pages and extents in the tree. 1028 */ 1029 int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) 1030 { 1031 unsigned long index = start >> PAGE_CACHE_SHIFT; 1032 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1033 struct page *page; 1034 1035 while (index <= end_index) { 1036 page = find_get_page(tree->mapping, index); 1037 unlock_page(page); 1038 page_cache_release(page); 1039 index++; 1040 } 1041 unlock_extent(tree, start, end, GFP_NOFS); 1042 return 0; 1043 } 1044 EXPORT_SYMBOL(unlock_range); 1045 1046 int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) 1047 { 1048 struct rb_node *node; 1049 struct extent_state *state; 1050 int ret = 0; 1051 1052 write_lock_irq(&tree->lock); 1053 /* 1054 * this search will find all the extents that end after 1055 * our range starts. 1056 */ 1057 node = tree_search(&tree->state, start); 1058 if (!node || IS_ERR(node)) { 1059 ret = -ENOENT; 1060 goto out; 1061 } 1062 state = rb_entry(node, struct extent_state, rb_node); 1063 if (state->start != start) { 1064 ret = -ENOENT; 1065 goto out; 1066 } 1067 state->private = private; 1068 out: 1069 write_unlock_irq(&tree->lock); 1070 return ret; 1071 1072 } 1073 1074 int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) 1075 { 1076 struct rb_node *node; 1077 struct extent_state *state; 1078 int ret = 0; 1079 1080 read_lock_irq(&tree->lock); 1081 /* 1082 * this search will find all the extents that end after 1083 * our range starts. 1084 */ 1085 node = tree_search(&tree->state, start); 1086 if (!node || IS_ERR(node)) { 1087 ret = -ENOENT; 1088 goto out; 1089 } 1090 state = rb_entry(node, struct extent_state, rb_node); 1091 if (state->start != start) { 1092 ret = -ENOENT; 1093 goto out; 1094 } 1095 *private = state->private; 1096 out: 1097 read_unlock_irq(&tree->lock); 1098 return ret; 1099 } 1100 1101 /* 1102 * searches a range in the state tree for a given mask. 1103 * If 'filled' == 1, this returns 1 only if ever extent in the tree 1104 * has the bits set. Otherwise, 1 is returned if any bit in the 1105 * range is found set. 1106 */ 1107 static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, 1108 int bits, int filled) 1109 { 1110 struct extent_state *state = NULL; 1111 struct rb_node *node; 1112 int bitset = 0; 1113 1114 read_lock_irq(&tree->lock); 1115 node = tree_search(&tree->state, start); 1116 while (node && start <= end) { 1117 state = rb_entry(node, struct extent_state, rb_node); 1118 if (state->start > end) 1119 break; 1120 1121 if (filled && state->start > start) { 1122 bitset = 0; 1123 break; 1124 } 1125 if (state->state & bits) { 1126 bitset = 1; 1127 if (!filled) 1128 break; 1129 } else if (filled) { 1130 bitset = 0; 1131 break; 1132 } 1133 start = state->end + 1; 1134 if (start > end) 1135 break; 1136 node = rb_next(node); 1137 } 1138 read_unlock_irq(&tree->lock); 1139 return bitset; 1140 } 1141 1142 /* 1143 * helper function to set a given page up to date if all the 1144 * extents in the tree for that page are up to date 1145 */ 1146 static int check_page_uptodate(struct extent_map_tree *tree, 1147 struct page *page) 1148 { 1149 u64 start = page->index << PAGE_CACHE_SHIFT; 1150 u64 end = start + PAGE_CACHE_SIZE - 1; 1151 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) 1152 SetPageUptodate(page); 1153 return 0; 1154 } 1155 1156 /* 1157 * helper function to unlock a page if all the extents in the tree 1158 * for that page are unlocked 1159 */ 1160 static int check_page_locked(struct extent_map_tree *tree, 1161 struct page *page) 1162 { 1163 u64 start = page->index << PAGE_CACHE_SHIFT; 1164 u64 end = start + PAGE_CACHE_SIZE - 1; 1165 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) 1166 unlock_page(page); 1167 return 0; 1168 } 1169 1170 /* 1171 * helper function to end page writeback if all the extents 1172 * in the tree for that page are done with writeback 1173 */ 1174 static int check_page_writeback(struct extent_map_tree *tree, 1175 struct page *page) 1176 { 1177 u64 start = page->index << PAGE_CACHE_SHIFT; 1178 u64 end = start + PAGE_CACHE_SIZE - 1; 1179 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) 1180 end_page_writeback(page); 1181 return 0; 1182 } 1183 1184 /* lots and lots of room for performance fixes in the end_bio funcs */ 1185 1186 /* 1187 * after a writepage IO is done, we need to: 1188 * clear the uptodate bits on error 1189 * clear the writeback bits in the extent tree for this IO 1190 * end_page_writeback if the page has no more pending IO 1191 * 1192 * Scheduling is not allowed, so the extent state tree is expected 1193 * to have one and only one object corresponding to this IO. 1194 */ 1195 static int end_bio_extent_writepage(struct bio *bio, 1196 unsigned int bytes_done, int err) 1197 { 1198 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1199 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1200 struct extent_map_tree *tree = bio->bi_private; 1201 u64 start; 1202 u64 end; 1203 int whole_page; 1204 1205 if (bio->bi_size) 1206 return 1; 1207 1208 do { 1209 struct page *page = bvec->bv_page; 1210 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1211 end = start + bvec->bv_len - 1; 1212 1213 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1214 whole_page = 1; 1215 else 1216 whole_page = 0; 1217 1218 if (--bvec >= bio->bi_io_vec) 1219 prefetchw(&bvec->bv_page->flags); 1220 1221 if (!uptodate) { 1222 clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1223 ClearPageUptodate(page); 1224 SetPageError(page); 1225 } 1226 clear_extent_writeback(tree, start, end, GFP_ATOMIC); 1227 1228 if (whole_page) 1229 end_page_writeback(page); 1230 else 1231 check_page_writeback(tree, page); 1232 } while (bvec >= bio->bi_io_vec); 1233 1234 bio_put(bio); 1235 return 0; 1236 } 1237 1238 /* 1239 * after a readpage IO is done, we need to: 1240 * clear the uptodate bits on error 1241 * set the uptodate bits if things worked 1242 * set the page up to date if all extents in the tree are uptodate 1243 * clear the lock bit in the extent tree 1244 * unlock the page if there are no other extents locked for it 1245 * 1246 * Scheduling is not allowed, so the extent state tree is expected 1247 * to have one and only one object corresponding to this IO. 1248 */ 1249 static int end_bio_extent_readpage(struct bio *bio, 1250 unsigned int bytes_done, int err) 1251 { 1252 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1253 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1254 struct extent_map_tree *tree = bio->bi_private; 1255 u64 start; 1256 u64 end; 1257 int whole_page; 1258 int ret; 1259 1260 if (bio->bi_size) 1261 return 1; 1262 1263 do { 1264 struct page *page = bvec->bv_page; 1265 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1266 end = start + bvec->bv_len - 1; 1267 1268 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1269 whole_page = 1; 1270 else 1271 whole_page = 0; 1272 1273 if (--bvec >= bio->bi_io_vec) 1274 prefetchw(&bvec->bv_page->flags); 1275 1276 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1277 ret = tree->ops->readpage_end_io_hook(page, start, end); 1278 if (ret) 1279 uptodate = 0; 1280 } 1281 if (uptodate) { 1282 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1283 if (whole_page) 1284 SetPageUptodate(page); 1285 else 1286 check_page_uptodate(tree, page); 1287 } else { 1288 ClearPageUptodate(page); 1289 SetPageError(page); 1290 } 1291 1292 unlock_extent(tree, start, end, GFP_ATOMIC); 1293 1294 if (whole_page) 1295 unlock_page(page); 1296 else 1297 check_page_locked(tree, page); 1298 } while (bvec >= bio->bi_io_vec); 1299 1300 bio_put(bio); 1301 return 0; 1302 } 1303 1304 /* 1305 * IO done from prepare_write is pretty simple, we just unlock 1306 * the structs in the extent tree when done, and set the uptodate bits 1307 * as appropriate. 1308 */ 1309 static int end_bio_extent_preparewrite(struct bio *bio, 1310 unsigned int bytes_done, int err) 1311 { 1312 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1313 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1314 struct extent_map_tree *tree = bio->bi_private; 1315 u64 start; 1316 u64 end; 1317 1318 if (bio->bi_size) 1319 return 1; 1320 1321 do { 1322 struct page *page = bvec->bv_page; 1323 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1324 end = start + bvec->bv_len - 1; 1325 1326 if (--bvec >= bio->bi_io_vec) 1327 prefetchw(&bvec->bv_page->flags); 1328 1329 if (uptodate) { 1330 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1331 } else { 1332 ClearPageUptodate(page); 1333 SetPageError(page); 1334 } 1335 1336 unlock_extent(tree, start, end, GFP_ATOMIC); 1337 1338 } while (bvec >= bio->bi_io_vec); 1339 1340 bio_put(bio); 1341 return 0; 1342 } 1343 1344 static int submit_extent_page(int rw, struct extent_map_tree *tree, 1345 struct page *page, sector_t sector, 1346 size_t size, unsigned long offset, 1347 struct block_device *bdev, 1348 bio_end_io_t end_io_func) 1349 { 1350 struct bio *bio; 1351 int ret = 0; 1352 1353 bio = bio_alloc(GFP_NOIO, 1); 1354 1355 bio->bi_sector = sector; 1356 bio->bi_bdev = bdev; 1357 bio->bi_io_vec[0].bv_page = page; 1358 bio->bi_io_vec[0].bv_len = size; 1359 bio->bi_io_vec[0].bv_offset = offset; 1360 1361 bio->bi_vcnt = 1; 1362 bio->bi_idx = 0; 1363 bio->bi_size = size; 1364 1365 bio->bi_end_io = end_io_func; 1366 bio->bi_private = tree; 1367 1368 bio_get(bio); 1369 submit_bio(rw, bio); 1370 1371 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 1372 ret = -EOPNOTSUPP; 1373 1374 bio_put(bio); 1375 return ret; 1376 } 1377 1378 /* 1379 * basic readpage implementation. Locked extent state structs are inserted 1380 * into the tree that are removed when the IO is done (by the end_io 1381 * handlers) 1382 */ 1383 int extent_read_full_page(struct extent_map_tree *tree, struct page *page, 1384 get_extent_t *get_extent) 1385 { 1386 struct inode *inode = page->mapping->host; 1387 u64 start = page->index << PAGE_CACHE_SHIFT; 1388 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1389 u64 end; 1390 u64 cur = start; 1391 u64 extent_offset; 1392 u64 last_byte = i_size_read(inode); 1393 u64 block_start; 1394 u64 cur_end; 1395 sector_t sector; 1396 struct extent_map *em; 1397 struct block_device *bdev; 1398 int ret; 1399 int nr = 0; 1400 size_t page_offset = 0; 1401 size_t iosize; 1402 size_t blocksize = inode->i_sb->s_blocksize; 1403 1404 if (!PagePrivate(page)) { 1405 SetPagePrivate(page); 1406 WARN_ON(!page->mapping->a_ops->invalidatepage); 1407 set_page_private(page, 1); 1408 page_cache_get(page); 1409 } 1410 1411 end = page_end; 1412 lock_extent(tree, start, end, GFP_NOFS); 1413 1414 while (cur <= end) { 1415 if (cur >= last_byte) { 1416 iosize = PAGE_CACHE_SIZE - page_offset; 1417 zero_user_page(page, page_offset, iosize, KM_USER0); 1418 set_extent_uptodate(tree, cur, cur + iosize - 1, 1419 GFP_NOFS); 1420 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1421 break; 1422 } 1423 em = get_extent(inode, page, page_offset, cur, end, 0); 1424 if (IS_ERR(em) || !em) { 1425 SetPageError(page); 1426 unlock_extent(tree, cur, end, GFP_NOFS); 1427 break; 1428 } 1429 1430 extent_offset = cur - em->start; 1431 BUG_ON(em->end < cur); 1432 BUG_ON(end < cur); 1433 1434 iosize = min(em->end - cur, end - cur) + 1; 1435 cur_end = min(em->end, end); 1436 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1437 sector = (em->block_start + extent_offset) >> 9; 1438 bdev = em->bdev; 1439 block_start = em->block_start; 1440 free_extent_map(em); 1441 em = NULL; 1442 1443 /* we've found a hole, just zero and go on */ 1444 if (block_start == 0) { 1445 zero_user_page(page, page_offset, iosize, KM_USER0); 1446 set_extent_uptodate(tree, cur, cur + iosize - 1, 1447 GFP_NOFS); 1448 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1449 cur = cur + iosize; 1450 page_offset += iosize; 1451 continue; 1452 } 1453 /* the get_extent function already copied into the page */ 1454 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { 1455 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1456 cur = cur + iosize; 1457 page_offset += iosize; 1458 continue; 1459 } 1460 1461 ret = 0; 1462 if (tree->ops && tree->ops->readpage_io_hook) { 1463 ret = tree->ops->readpage_io_hook(page, cur, 1464 cur + iosize - 1); 1465 } 1466 if (!ret) { 1467 ret = submit_extent_page(READ, tree, page, 1468 sector, iosize, page_offset, 1469 bdev, end_bio_extent_readpage); 1470 } 1471 if (ret) 1472 SetPageError(page); 1473 cur = cur + iosize; 1474 page_offset += iosize; 1475 nr++; 1476 } 1477 if (!nr) { 1478 if (!PageError(page)) 1479 SetPageUptodate(page); 1480 unlock_page(page); 1481 } 1482 return 0; 1483 } 1484 EXPORT_SYMBOL(extent_read_full_page); 1485 1486 /* 1487 * the writepage semantics are similar to regular writepage. extent 1488 * records are inserted to lock ranges in the tree, and as dirty areas 1489 * are found, they are marked writeback. Then the lock bits are removed 1490 * and the end_io handler clears the writeback ranges 1491 */ 1492 int extent_write_full_page(struct extent_map_tree *tree, struct page *page, 1493 get_extent_t *get_extent, 1494 struct writeback_control *wbc) 1495 { 1496 struct inode *inode = page->mapping->host; 1497 u64 start = page->index << PAGE_CACHE_SHIFT; 1498 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1499 u64 end; 1500 u64 cur = start; 1501 u64 extent_offset; 1502 u64 last_byte = i_size_read(inode); 1503 u64 block_start; 1504 sector_t sector; 1505 struct extent_map *em; 1506 struct block_device *bdev; 1507 int ret; 1508 int nr = 0; 1509 size_t page_offset = 0; 1510 size_t iosize; 1511 size_t blocksize; 1512 loff_t i_size = i_size_read(inode); 1513 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; 1514 u64 nr_delalloc; 1515 u64 delalloc_end; 1516 1517 WARN_ON(!PageLocked(page)); 1518 if (page->index > end_index) { 1519 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 1520 unlock_page(page); 1521 return 0; 1522 } 1523 1524 if (page->index == end_index) { 1525 size_t offset = i_size & (PAGE_CACHE_SIZE - 1); 1526 zero_user_page(page, offset, 1527 PAGE_CACHE_SIZE - offset, KM_USER0); 1528 } 1529 1530 if (!PagePrivate(page)) { 1531 SetPagePrivate(page); 1532 set_page_private(page, 1); 1533 WARN_ON(!page->mapping->a_ops->invalidatepage); 1534 page_cache_get(page); 1535 } 1536 1537 lock_extent(tree, start, page_end, GFP_NOFS); 1538 nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1, 1539 &delalloc_end, 1540 128 * 1024 * 1024); 1541 if (nr_delalloc) { 1542 tree->ops->fill_delalloc(inode, start, delalloc_end); 1543 if (delalloc_end >= page_end + 1) { 1544 clear_extent_bit(tree, page_end + 1, delalloc_end, 1545 EXTENT_LOCKED | EXTENT_DELALLOC, 1546 1, 0, GFP_NOFS); 1547 } 1548 clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC, 1549 0, 0, GFP_NOFS); 1550 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1551 printk("found delalloc bits after clear extent_bit\n"); 1552 } 1553 } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1554 printk("found delalloc bits after find_delalloc_range returns 0\n"); 1555 } 1556 1557 end = page_end; 1558 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1559 printk("found delalloc bits after lock_extent\n"); 1560 } 1561 1562 if (last_byte <= start) { 1563 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 1564 goto done; 1565 } 1566 1567 set_extent_uptodate(tree, start, page_end, GFP_NOFS); 1568 blocksize = inode->i_sb->s_blocksize; 1569 1570 while (cur <= end) { 1571 if (cur >= last_byte) { 1572 clear_extent_dirty(tree, cur, page_end, GFP_NOFS); 1573 break; 1574 } 1575 em = get_extent(inode, page, page_offset, cur, end, 0); 1576 if (IS_ERR(em) || !em) { 1577 SetPageError(page); 1578 break; 1579 } 1580 1581 extent_offset = cur - em->start; 1582 BUG_ON(em->end < cur); 1583 BUG_ON(end < cur); 1584 iosize = min(em->end - cur, end - cur) + 1; 1585 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1586 sector = (em->block_start + extent_offset) >> 9; 1587 bdev = em->bdev; 1588 block_start = em->block_start; 1589 free_extent_map(em); 1590 em = NULL; 1591 1592 if (block_start == 0 || block_start == EXTENT_MAP_INLINE) { 1593 clear_extent_dirty(tree, cur, 1594 cur + iosize - 1, GFP_NOFS); 1595 cur = cur + iosize; 1596 page_offset += iosize; 1597 continue; 1598 } 1599 1600 /* leave this out until we have a page_mkwrite call */ 1601 if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 1602 EXTENT_DIRTY, 0)) { 1603 cur = cur + iosize; 1604 page_offset += iosize; 1605 continue; 1606 } 1607 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); 1608 ret = tree->ops->writepage_io_hook(page, cur, cur + iosize - 1); 1609 if (ret) 1610 SetPageError(page); 1611 else { 1612 set_range_writeback(tree, cur, cur + iosize - 1); 1613 ret = submit_extent_page(WRITE, tree, page, sector, 1614 iosize, page_offset, bdev, 1615 end_bio_extent_writepage); 1616 if (ret) 1617 SetPageError(page); 1618 } 1619 cur = cur + iosize; 1620 page_offset += iosize; 1621 nr++; 1622 } 1623 done: 1624 WARN_ON(test_range_bit(tree, start, page_end, EXTENT_DIRTY, 0)); 1625 unlock_extent(tree, start, page_end, GFP_NOFS); 1626 unlock_page(page); 1627 return 0; 1628 } 1629 EXPORT_SYMBOL(extent_write_full_page); 1630 1631 /* 1632 * basic invalidatepage code, this waits on any locked or writeback 1633 * ranges corresponding to the page, and then deletes any extent state 1634 * records from the tree 1635 */ 1636 int extent_invalidatepage(struct extent_map_tree *tree, 1637 struct page *page, unsigned long offset) 1638 { 1639 u64 start = (page->index << PAGE_CACHE_SHIFT); 1640 u64 end = start + PAGE_CACHE_SIZE - 1; 1641 size_t blocksize = page->mapping->host->i_sb->s_blocksize; 1642 1643 start += (offset + blocksize -1) & ~(blocksize - 1); 1644 if (start > end) 1645 return 0; 1646 1647 lock_extent(tree, start, end, GFP_NOFS); 1648 wait_on_extent_writeback(tree, start, end); 1649 clear_extent_bit(tree, start, end, 1650 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 1651 1, 1, GFP_NOFS); 1652 return 0; 1653 } 1654 EXPORT_SYMBOL(extent_invalidatepage); 1655 1656 /* 1657 * simple commit_write call, set_range_dirty is used to mark both 1658 * the pages and the extent records as dirty 1659 */ 1660 int extent_commit_write(struct extent_map_tree *tree, 1661 struct inode *inode, struct page *page, 1662 unsigned from, unsigned to) 1663 { 1664 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 1665 1666 if (!PagePrivate(page)) { 1667 SetPagePrivate(page); 1668 set_page_private(page, 1); 1669 WARN_ON(!page->mapping->a_ops->invalidatepage); 1670 page_cache_get(page); 1671 } 1672 1673 set_page_dirty(page); 1674 1675 if (pos > inode->i_size) { 1676 i_size_write(inode, pos); 1677 mark_inode_dirty(inode); 1678 } 1679 return 0; 1680 } 1681 EXPORT_SYMBOL(extent_commit_write); 1682 1683 int extent_prepare_write(struct extent_map_tree *tree, 1684 struct inode *inode, struct page *page, 1685 unsigned from, unsigned to, get_extent_t *get_extent) 1686 { 1687 u64 page_start = page->index << PAGE_CACHE_SHIFT; 1688 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 1689 u64 block_start; 1690 u64 orig_block_start; 1691 u64 block_end; 1692 u64 cur_end; 1693 struct extent_map *em; 1694 unsigned blocksize = 1 << inode->i_blkbits; 1695 size_t page_offset = 0; 1696 size_t block_off_start; 1697 size_t block_off_end; 1698 int err = 0; 1699 int iocount = 0; 1700 int ret = 0; 1701 int isnew; 1702 1703 if (!PagePrivate(page)) { 1704 SetPagePrivate(page); 1705 set_page_private(page, 1); 1706 WARN_ON(!page->mapping->a_ops->invalidatepage); 1707 page_cache_get(page); 1708 } 1709 block_start = (page_start + from) & ~((u64)blocksize - 1); 1710 block_end = (page_start + to - 1) | (blocksize - 1); 1711 orig_block_start = block_start; 1712 1713 lock_extent(tree, page_start, page_end, GFP_NOFS); 1714 while(block_start <= block_end) { 1715 em = get_extent(inode, page, page_offset, block_start, 1716 block_end, 1); 1717 if (IS_ERR(em) || !em) { 1718 goto err; 1719 } 1720 cur_end = min(block_end, em->end); 1721 block_off_start = block_start & (PAGE_CACHE_SIZE - 1); 1722 block_off_end = block_off_start + blocksize; 1723 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); 1724 1725 if (!PageUptodate(page) && isnew && 1726 (block_off_end > to || block_off_start < from)) { 1727 void *kaddr; 1728 1729 kaddr = kmap_atomic(page, KM_USER0); 1730 if (block_off_end > to) 1731 memset(kaddr + to, 0, block_off_end - to); 1732 if (block_off_start < from) 1733 memset(kaddr + block_off_start, 0, 1734 from - block_off_start); 1735 flush_dcache_page(page); 1736 kunmap_atomic(kaddr, KM_USER0); 1737 } 1738 if (!isnew && !PageUptodate(page) && 1739 (block_off_end > to || block_off_start < from) && 1740 !test_range_bit(tree, block_start, cur_end, 1741 EXTENT_UPTODATE, 1)) { 1742 u64 sector; 1743 u64 extent_offset = block_start - em->start; 1744 size_t iosize; 1745 sector = (em->block_start + extent_offset) >> 9; 1746 iosize = (cur_end - block_start + blocksize - 1) & 1747 ~((u64)blocksize - 1); 1748 /* 1749 * we've already got the extent locked, but we 1750 * need to split the state such that our end_bio 1751 * handler can clear the lock. 1752 */ 1753 set_extent_bit(tree, block_start, 1754 block_start + iosize - 1, 1755 EXTENT_LOCKED, 0, NULL, GFP_NOFS); 1756 ret = submit_extent_page(READ, tree, page, 1757 sector, iosize, page_offset, em->bdev, 1758 end_bio_extent_preparewrite); 1759 iocount++; 1760 block_start = block_start + iosize; 1761 } else { 1762 set_extent_uptodate(tree, block_start, cur_end, 1763 GFP_NOFS); 1764 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 1765 block_start = cur_end + 1; 1766 } 1767 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 1768 free_extent_map(em); 1769 } 1770 if (iocount) { 1771 wait_extent_bit(tree, orig_block_start, 1772 block_end, EXTENT_LOCKED); 1773 } 1774 check_page_uptodate(tree, page); 1775 err: 1776 /* FIXME, zero out newly allocated blocks on error */ 1777 return err; 1778 } 1779 EXPORT_SYMBOL(extent_prepare_write); 1780 1781 /* 1782 * a helper for releasepage. As long as there are no locked extents 1783 * in the range corresponding to the page, both state records and extent 1784 * map records are removed 1785 */ 1786 int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) 1787 { 1788 struct extent_map *em; 1789 u64 start = page->index << PAGE_CACHE_SHIFT; 1790 u64 end = start + PAGE_CACHE_SIZE - 1; 1791 u64 orig_start = start; 1792 int ret = 1; 1793 1794 while (start <= end) { 1795 em = lookup_extent_mapping(tree, start, end); 1796 if (!em || IS_ERR(em)) 1797 break; 1798 if (!test_range_bit(tree, em->start, em->end, 1799 EXTENT_LOCKED, 0)) { 1800 remove_extent_mapping(tree, em); 1801 /* once for the rb tree */ 1802 free_extent_map(em); 1803 } 1804 start = em->end + 1; 1805 /* once for us */ 1806 free_extent_map(em); 1807 } 1808 if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) 1809 ret = 0; 1810 else 1811 clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, 1812 1, 1, GFP_NOFS); 1813 return ret; 1814 } 1815 EXPORT_SYMBOL(try_release_extent_mapping); 1816 1817