1 #include <linux/bitops.h> 2 #include <linux/slab.h> 3 #include <linux/bio.h> 4 #include <linux/mm.h> 5 #include <linux/gfp.h> 6 #include <linux/pagemap.h> 7 #include <linux/page-flags.h> 8 #include <linux/module.h> 9 #include <linux/spinlock.h> 10 #include <linux/blkdev.h> 11 #include "extent_map.h" 12 13 static struct kmem_cache *extent_map_cache; 14 static struct kmem_cache *extent_state_cache; 15 16 struct tree_entry { 17 u64 start; 18 u64 end; 19 int in_tree; 20 struct rb_node rb_node; 21 }; 22 23 /* bits for the extent state */ 24 #define EXTENT_DIRTY 1 25 #define EXTENT_WRITEBACK (1 << 1) 26 #define EXTENT_UPTODATE (1 << 2) 27 #define EXTENT_LOCKED (1 << 3) 28 #define EXTENT_NEW (1 << 4) 29 #define EXTENT_DELALLOC (1 << 5) 30 31 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 32 33 void __init extent_map_init(void) 34 { 35 extent_map_cache = kmem_cache_create("extent_map", 36 sizeof(struct extent_map), 0, 37 SLAB_RECLAIM_ACCOUNT | 38 SLAB_DESTROY_BY_RCU, 39 NULL); 40 extent_state_cache = kmem_cache_create("extent_state", 41 sizeof(struct extent_state), 0, 42 SLAB_RECLAIM_ACCOUNT | 43 SLAB_DESTROY_BY_RCU, 44 NULL); 45 } 46 47 void __exit extent_map_exit(void) 48 { 49 if (extent_map_cache) 50 kmem_cache_destroy(extent_map_cache); 51 if (extent_state_cache) 52 kmem_cache_destroy(extent_state_cache); 53 } 54 55 void extent_map_tree_init(struct extent_map_tree *tree, 56 struct address_space *mapping, gfp_t mask) 57 { 58 tree->map.rb_node = NULL; 59 tree->state.rb_node = NULL; 60 tree->fill_delalloc = NULL; 61 rwlock_init(&tree->lock); 62 tree->mapping = mapping; 63 } 64 EXPORT_SYMBOL(extent_map_tree_init); 65 66 struct extent_map *alloc_extent_map(gfp_t mask) 67 { 68 struct extent_map *em; 69 em = kmem_cache_alloc(extent_map_cache, mask); 70 if (!em || IS_ERR(em)) 71 return em; 72 em->in_tree = 0; 73 atomic_set(&em->refs, 1); 74 return em; 75 } 76 EXPORT_SYMBOL(alloc_extent_map); 77 78 void free_extent_map(struct extent_map *em) 79 { 80 if (atomic_dec_and_test(&em->refs)) { 81 WARN_ON(em->in_tree); 82 kmem_cache_free(extent_map_cache, em); 83 } 84 } 85 EXPORT_SYMBOL(free_extent_map); 86 87 88 struct extent_state *alloc_extent_state(gfp_t mask) 89 { 90 struct extent_state *state; 91 state = kmem_cache_alloc(extent_state_cache, mask); 92 if (!state || IS_ERR(state)) 93 return state; 94 state->state = 0; 95 state->in_tree = 0; 96 atomic_set(&state->refs, 1); 97 init_waitqueue_head(&state->wq); 98 return state; 99 } 100 EXPORT_SYMBOL(alloc_extent_state); 101 102 void free_extent_state(struct extent_state *state) 103 { 104 if (atomic_dec_and_test(&state->refs)) { 105 WARN_ON(state->in_tree); 106 kmem_cache_free(extent_state_cache, state); 107 } 108 } 109 EXPORT_SYMBOL(free_extent_state); 110 111 static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 112 struct rb_node *node) 113 { 114 struct rb_node ** p = &root->rb_node; 115 struct rb_node * parent = NULL; 116 struct tree_entry *entry; 117 118 while(*p) { 119 parent = *p; 120 entry = rb_entry(parent, struct tree_entry, rb_node); 121 122 if (offset < entry->start) 123 p = &(*p)->rb_left; 124 else if (offset > entry->end) 125 p = &(*p)->rb_right; 126 else 127 return parent; 128 } 129 130 entry = rb_entry(node, struct tree_entry, rb_node); 131 entry->in_tree = 1; 132 rb_link_node(node, parent, p); 133 rb_insert_color(node, root); 134 return NULL; 135 } 136 137 static struct rb_node *__tree_search(struct rb_root *root, u64 offset, 138 struct rb_node **prev_ret) 139 { 140 struct rb_node * n = root->rb_node; 141 struct rb_node *prev = NULL; 142 struct tree_entry *entry; 143 struct tree_entry *prev_entry = NULL; 144 145 while(n) { 146 entry = rb_entry(n, struct tree_entry, rb_node); 147 prev = n; 148 prev_entry = entry; 149 150 if (offset < entry->start) 151 n = n->rb_left; 152 else if (offset > entry->end) 153 n = n->rb_right; 154 else 155 return n; 156 } 157 if (!prev_ret) 158 return NULL; 159 while(prev && offset > prev_entry->end) { 160 prev = rb_next(prev); 161 prev_entry = rb_entry(prev, struct tree_entry, rb_node); 162 } 163 *prev_ret = prev; 164 return NULL; 165 } 166 167 static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) 168 { 169 struct rb_node *prev; 170 struct rb_node *ret; 171 ret = __tree_search(root, offset, &prev); 172 if (!ret) 173 return prev; 174 return ret; 175 } 176 177 static int tree_delete(struct rb_root *root, u64 offset) 178 { 179 struct rb_node *node; 180 struct tree_entry *entry; 181 182 node = __tree_search(root, offset, NULL); 183 if (!node) 184 return -ENOENT; 185 entry = rb_entry(node, struct tree_entry, rb_node); 186 entry->in_tree = 0; 187 rb_erase(node, root); 188 return 0; 189 } 190 191 /* 192 * add_extent_mapping tries a simple backward merge with existing 193 * mappings. The extent_map struct passed in will be inserted into 194 * the tree directly (no copies made, just a reference taken). 195 */ 196 int add_extent_mapping(struct extent_map_tree *tree, 197 struct extent_map *em) 198 { 199 int ret = 0; 200 struct extent_map *prev = NULL; 201 struct rb_node *rb; 202 203 write_lock_irq(&tree->lock); 204 rb = tree_insert(&tree->map, em->end, &em->rb_node); 205 if (rb) { 206 prev = rb_entry(rb, struct extent_map, rb_node); 207 printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end); 208 ret = -EEXIST; 209 goto out; 210 } 211 atomic_inc(&em->refs); 212 if (em->start != 0) { 213 rb = rb_prev(&em->rb_node); 214 if (rb) 215 prev = rb_entry(rb, struct extent_map, rb_node); 216 if (prev && prev->end + 1 == em->start && 217 ((em->block_start == 0 && prev->block_start == 0) || 218 (em->block_start == prev->block_end + 1))) { 219 em->start = prev->start; 220 em->block_start = prev->block_start; 221 rb_erase(&prev->rb_node, &tree->map); 222 prev->in_tree = 0; 223 free_extent_map(prev); 224 } 225 } 226 out: 227 write_unlock_irq(&tree->lock); 228 return ret; 229 } 230 EXPORT_SYMBOL(add_extent_mapping); 231 232 /* 233 * lookup_extent_mapping returns the first extent_map struct in the 234 * tree that intersects the [start, end] (inclusive) range. There may 235 * be additional objects in the tree that intersect, so check the object 236 * returned carefully to make sure you don't need additional lookups. 237 */ 238 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, 239 u64 start, u64 end) 240 { 241 struct extent_map *em; 242 struct rb_node *rb_node; 243 244 read_lock_irq(&tree->lock); 245 rb_node = tree_search(&tree->map, start); 246 if (!rb_node) { 247 em = NULL; 248 goto out; 249 } 250 if (IS_ERR(rb_node)) { 251 em = ERR_PTR(PTR_ERR(rb_node)); 252 goto out; 253 } 254 em = rb_entry(rb_node, struct extent_map, rb_node); 255 if (em->end < start || em->start > end) { 256 em = NULL; 257 goto out; 258 } 259 atomic_inc(&em->refs); 260 out: 261 read_unlock_irq(&tree->lock); 262 return em; 263 } 264 EXPORT_SYMBOL(lookup_extent_mapping); 265 266 /* 267 * removes an extent_map struct from the tree. No reference counts are 268 * dropped, and no checks are done to see if the range is in use 269 */ 270 int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) 271 { 272 int ret; 273 274 write_lock_irq(&tree->lock); 275 ret = tree_delete(&tree->map, em->end); 276 write_unlock_irq(&tree->lock); 277 return ret; 278 } 279 EXPORT_SYMBOL(remove_extent_mapping); 280 281 /* 282 * utility function to look for merge candidates inside a given range. 283 * Any extents with matching state are merged together into a single 284 * extent in the tree. Extents with EXTENT_IO in their state field 285 * are not merged because the end_io handlers need to be able to do 286 * operations on them without sleeping (or doing allocations/splits). 287 * 288 * This should be called with the tree lock held. 289 */ 290 static int merge_state(struct extent_map_tree *tree, 291 struct extent_state *state) 292 { 293 struct extent_state *other; 294 struct rb_node *other_node; 295 296 if (state->state & EXTENT_IOBITS) 297 return 0; 298 299 other_node = rb_prev(&state->rb_node); 300 if (other_node) { 301 other = rb_entry(other_node, struct extent_state, rb_node); 302 if (other->end == state->start - 1 && 303 other->state == state->state) { 304 state->start = other->start; 305 other->in_tree = 0; 306 rb_erase(&other->rb_node, &tree->state); 307 free_extent_state(other); 308 } 309 } 310 other_node = rb_next(&state->rb_node); 311 if (other_node) { 312 other = rb_entry(other_node, struct extent_state, rb_node); 313 if (other->start == state->end + 1 && 314 other->state == state->state) { 315 other->start = state->start; 316 state->in_tree = 0; 317 rb_erase(&state->rb_node, &tree->state); 318 free_extent_state(state); 319 } 320 } 321 return 0; 322 } 323 324 /* 325 * insert an extent_state struct into the tree. 'bits' are set on the 326 * struct before it is inserted. 327 * 328 * This may return -EEXIST if the extent is already there, in which case the 329 * state struct is freed. 330 * 331 * The tree lock is not taken internally. This is a utility function and 332 * probably isn't what you want to call (see set/clear_extent_bit). 333 */ 334 static int insert_state(struct extent_map_tree *tree, 335 struct extent_state *state, u64 start, u64 end, 336 int bits) 337 { 338 struct rb_node *node; 339 340 if (end < start) { 341 printk("end < start %Lu %Lu\n", end, start); 342 WARN_ON(1); 343 } 344 state->state |= bits; 345 state->start = start; 346 state->end = end; 347 if ((end & 4095) == 0) { 348 printk("insert state %Lu %Lu strange end\n", start, end); 349 WARN_ON(1); 350 } 351 node = tree_insert(&tree->state, end, &state->rb_node); 352 if (node) { 353 struct extent_state *found; 354 found = rb_entry(node, struct extent_state, rb_node); 355 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); 356 free_extent_state(state); 357 return -EEXIST; 358 } 359 merge_state(tree, state); 360 return 0; 361 } 362 363 /* 364 * split a given extent state struct in two, inserting the preallocated 365 * struct 'prealloc' as the newly created second half. 'split' indicates an 366 * offset inside 'orig' where it should be split. 367 * 368 * Before calling, 369 * the tree has 'orig' at [orig->start, orig->end]. After calling, there 370 * are two extent state structs in the tree: 371 * prealloc: [orig->start, split - 1] 372 * orig: [ split, orig->end ] 373 * 374 * The tree locks are not taken by this function. They need to be held 375 * by the caller. 376 */ 377 static int split_state(struct extent_map_tree *tree, struct extent_state *orig, 378 struct extent_state *prealloc, u64 split) 379 { 380 struct rb_node *node; 381 prealloc->start = orig->start; 382 prealloc->end = split - 1; 383 prealloc->state = orig->state; 384 orig->start = split; 385 if ((prealloc->end & 4095) == 0) { 386 printk("insert state %Lu %Lu strange end\n", prealloc->start, 387 prealloc->end); 388 WARN_ON(1); 389 } 390 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); 391 if (node) { 392 struct extent_state *found; 393 found = rb_entry(node, struct extent_state, rb_node); 394 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); 395 free_extent_state(prealloc); 396 return -EEXIST; 397 } 398 return 0; 399 } 400 401 /* 402 * utility function to clear some bits in an extent state struct. 403 * it will optionally wake up any one waiting on this state (wake == 1), or 404 * forcibly remove the state from the tree (delete == 1). 405 * 406 * If no bits are set on the state struct after clearing things, the 407 * struct is freed and removed from the tree 408 */ 409 static int clear_state_bit(struct extent_map_tree *tree, 410 struct extent_state *state, int bits, int wake, 411 int delete) 412 { 413 int ret = state->state & bits; 414 state->state &= ~bits; 415 if (wake) 416 wake_up(&state->wq); 417 if (delete || state->state == 0) { 418 if (state->in_tree) { 419 rb_erase(&state->rb_node, &tree->state); 420 state->in_tree = 0; 421 free_extent_state(state); 422 } else { 423 WARN_ON(1); 424 } 425 } else { 426 merge_state(tree, state); 427 } 428 return ret; 429 } 430 431 /* 432 * clear some bits on a range in the tree. This may require splitting 433 * or inserting elements in the tree, so the gfp mask is used to 434 * indicate which allocations or sleeping are allowed. 435 * 436 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove 437 * the given range from the tree regardless of state (ie for truncate). 438 * 439 * the range [start, end] is inclusive. 440 * 441 * This takes the tree lock, and returns < 0 on error, > 0 if any of the 442 * bits were already set, or zero if none of the bits were already set. 443 */ 444 int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, 445 int bits, int wake, int delete, gfp_t mask) 446 { 447 struct extent_state *state; 448 struct extent_state *prealloc = NULL; 449 struct rb_node *node; 450 int err; 451 int set = 0; 452 453 again: 454 if (!prealloc && (mask & __GFP_WAIT)) { 455 prealloc = alloc_extent_state(mask); 456 if (!prealloc) 457 return -ENOMEM; 458 } 459 460 write_lock_irq(&tree->lock); 461 /* 462 * this search will find the extents that end after 463 * our range starts 464 */ 465 node = tree_search(&tree->state, start); 466 if (!node) 467 goto out; 468 state = rb_entry(node, struct extent_state, rb_node); 469 if (state->start > end) 470 goto out; 471 WARN_ON(state->end < start); 472 473 /* 474 * | ---- desired range ---- | 475 * | state | or 476 * | ------------- state -------------- | 477 * 478 * We need to split the extent we found, and may flip 479 * bits on second half. 480 * 481 * If the extent we found extends past our range, we 482 * just split and search again. It'll get split again 483 * the next time though. 484 * 485 * If the extent we found is inside our range, we clear 486 * the desired bit on it. 487 */ 488 489 if (state->start < start) { 490 err = split_state(tree, state, prealloc, start); 491 BUG_ON(err == -EEXIST); 492 prealloc = NULL; 493 if (err) 494 goto out; 495 if (state->end <= end) { 496 start = state->end + 1; 497 set |= clear_state_bit(tree, state, bits, 498 wake, delete); 499 } else { 500 start = state->start; 501 } 502 goto search_again; 503 } 504 /* 505 * | ---- desired range ---- | 506 * | state | 507 * We need to split the extent, and clear the bit 508 * on the first half 509 */ 510 if (state->start <= end && state->end > end) { 511 err = split_state(tree, state, prealloc, end + 1); 512 BUG_ON(err == -EEXIST); 513 514 if (wake) 515 wake_up(&state->wq); 516 set |= clear_state_bit(tree, prealloc, bits, 517 wake, delete); 518 prealloc = NULL; 519 goto out; 520 } 521 522 start = state->end + 1; 523 set |= clear_state_bit(tree, state, bits, wake, delete); 524 goto search_again; 525 526 out: 527 write_unlock_irq(&tree->lock); 528 if (prealloc) 529 free_extent_state(prealloc); 530 531 return set; 532 533 search_again: 534 if (start >= end) 535 goto out; 536 write_unlock_irq(&tree->lock); 537 if (mask & __GFP_WAIT) 538 cond_resched(); 539 goto again; 540 } 541 EXPORT_SYMBOL(clear_extent_bit); 542 543 static int wait_on_state(struct extent_map_tree *tree, 544 struct extent_state *state) 545 { 546 DEFINE_WAIT(wait); 547 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 548 read_unlock_irq(&tree->lock); 549 schedule(); 550 read_lock_irq(&tree->lock); 551 finish_wait(&state->wq, &wait); 552 return 0; 553 } 554 555 /* 556 * waits for one or more bits to clear on a range in the state tree. 557 * The range [start, end] is inclusive. 558 * The tree lock is taken by this function 559 */ 560 int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) 561 { 562 struct extent_state *state; 563 struct rb_node *node; 564 565 read_lock_irq(&tree->lock); 566 again: 567 while (1) { 568 /* 569 * this search will find all the extents that end after 570 * our range starts 571 */ 572 node = tree_search(&tree->state, start); 573 if (!node) 574 break; 575 576 state = rb_entry(node, struct extent_state, rb_node); 577 578 if (state->start > end) 579 goto out; 580 581 if (state->state & bits) { 582 start = state->start; 583 atomic_inc(&state->refs); 584 wait_on_state(tree, state); 585 free_extent_state(state); 586 goto again; 587 } 588 start = state->end + 1; 589 590 if (start > end) 591 break; 592 593 if (need_resched()) { 594 read_unlock_irq(&tree->lock); 595 cond_resched(); 596 read_lock_irq(&tree->lock); 597 } 598 } 599 out: 600 read_unlock_irq(&tree->lock); 601 return 0; 602 } 603 EXPORT_SYMBOL(wait_extent_bit); 604 605 /* 606 * set some bits on a range in the tree. This may require allocations 607 * or sleeping, so the gfp mask is used to indicate what is allowed. 608 * 609 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 610 * range already has the desired bits set. The start of the existing 611 * range is returned in failed_start in this case. 612 * 613 * [start, end] is inclusive 614 * This takes the tree lock. 615 */ 616 int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, 617 int exclusive, u64 *failed_start, gfp_t mask) 618 { 619 struct extent_state *state; 620 struct extent_state *prealloc = NULL; 621 struct rb_node *node; 622 int err = 0; 623 int set; 624 u64 last_start; 625 u64 last_end; 626 again: 627 if (!prealloc && (mask & __GFP_WAIT)) { 628 prealloc = alloc_extent_state(mask); 629 if (!prealloc) 630 return -ENOMEM; 631 } 632 633 write_lock_irq(&tree->lock); 634 /* 635 * this search will find all the extents that end after 636 * our range starts. 637 */ 638 node = tree_search(&tree->state, start); 639 if (!node) { 640 err = insert_state(tree, prealloc, start, end, bits); 641 prealloc = NULL; 642 BUG_ON(err == -EEXIST); 643 goto out; 644 } 645 646 state = rb_entry(node, struct extent_state, rb_node); 647 last_start = state->start; 648 last_end = state->end; 649 650 /* 651 * | ---- desired range ---- | 652 * | state | 653 * 654 * Just lock what we found and keep going 655 */ 656 if (state->start == start && state->end <= end) { 657 set = state->state & bits; 658 if (set && exclusive) { 659 *failed_start = state->start; 660 err = -EEXIST; 661 goto out; 662 } 663 state->state |= bits; 664 start = state->end + 1; 665 merge_state(tree, state); 666 goto search_again; 667 } 668 669 /* 670 * | ---- desired range ---- | 671 * | state | 672 * or 673 * | ------------- state -------------- | 674 * 675 * We need to split the extent we found, and may flip bits on 676 * second half. 677 * 678 * If the extent we found extends past our 679 * range, we just split and search again. It'll get split 680 * again the next time though. 681 * 682 * If the extent we found is inside our range, we set the 683 * desired bit on it. 684 */ 685 if (state->start < start) { 686 set = state->state & bits; 687 if (exclusive && set) { 688 *failed_start = start; 689 err = -EEXIST; 690 goto out; 691 } 692 err = split_state(tree, state, prealloc, start); 693 BUG_ON(err == -EEXIST); 694 prealloc = NULL; 695 if (err) 696 goto out; 697 if (state->end <= end) { 698 state->state |= bits; 699 start = state->end + 1; 700 merge_state(tree, state); 701 } else { 702 start = state->start; 703 } 704 goto search_again; 705 } 706 /* 707 * | ---- desired range ---- | 708 * | state | 709 * We need to split the extent, and set the bit 710 * on the first half 711 */ 712 if (state->start <= end && state->end > end) { 713 set = state->state & bits; 714 if (exclusive && set) { 715 *failed_start = start; 716 err = -EEXIST; 717 goto out; 718 } 719 err = split_state(tree, state, prealloc, end + 1); 720 BUG_ON(err == -EEXIST); 721 722 prealloc->state |= bits; 723 merge_state(tree, prealloc); 724 prealloc = NULL; 725 goto out; 726 } 727 728 /* 729 * | ---- desired range ---- | 730 * | state | or | state | 731 * 732 * There's a hole, we need to insert something in it and 733 * ignore the extent we found. 734 */ 735 if (state->start > start) { 736 u64 this_end; 737 if (end < last_start) 738 this_end = end; 739 else 740 this_end = last_start -1; 741 err = insert_state(tree, prealloc, start, this_end, 742 bits); 743 prealloc = NULL; 744 BUG_ON(err == -EEXIST); 745 if (err) 746 goto out; 747 start = this_end + 1; 748 goto search_again; 749 } 750 goto search_again; 751 752 out: 753 write_unlock_irq(&tree->lock); 754 if (prealloc) 755 free_extent_state(prealloc); 756 757 return err; 758 759 search_again: 760 if (start > end) 761 goto out; 762 write_unlock_irq(&tree->lock); 763 if (mask & __GFP_WAIT) 764 cond_resched(); 765 goto again; 766 } 767 EXPORT_SYMBOL(set_extent_bit); 768 769 /* wrappers around set/clear extent bit */ 770 int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, 771 gfp_t mask) 772 { 773 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 774 mask); 775 } 776 EXPORT_SYMBOL(set_extent_dirty); 777 778 int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, 779 gfp_t mask) 780 { 781 return set_extent_bit(tree, start, end, 782 EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, 783 mask); 784 } 785 EXPORT_SYMBOL(set_extent_delalloc); 786 787 int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, 788 gfp_t mask) 789 { 790 return clear_extent_bit(tree, start, end, 791 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); 792 } 793 EXPORT_SYMBOL(clear_extent_dirty); 794 795 int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, 796 gfp_t mask) 797 { 798 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 799 mask); 800 } 801 EXPORT_SYMBOL(set_extent_new); 802 803 int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, 804 gfp_t mask) 805 { 806 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); 807 } 808 EXPORT_SYMBOL(clear_extent_new); 809 810 int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, 811 gfp_t mask) 812 { 813 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 814 mask); 815 } 816 EXPORT_SYMBOL(set_extent_uptodate); 817 818 int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, 819 gfp_t mask) 820 { 821 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 822 } 823 EXPORT_SYMBOL(clear_extent_uptodate); 824 825 int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, 826 gfp_t mask) 827 { 828 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, 829 0, NULL, mask); 830 } 831 EXPORT_SYMBOL(set_extent_writeback); 832 833 int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, 834 gfp_t mask) 835 { 836 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); 837 } 838 EXPORT_SYMBOL(clear_extent_writeback); 839 840 int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) 841 { 842 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); 843 } 844 EXPORT_SYMBOL(wait_on_extent_writeback); 845 846 /* 847 * locks a range in ascending order, waiting for any locked regions 848 * it hits on the way. [start,end] are inclusive, and this will sleep. 849 */ 850 int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) 851 { 852 int err; 853 u64 failed_start; 854 while (1) { 855 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 856 &failed_start, mask); 857 if (err == -EEXIST && (mask & __GFP_WAIT)) { 858 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 859 start = failed_start; 860 } else { 861 break; 862 } 863 WARN_ON(start > end); 864 } 865 return err; 866 } 867 EXPORT_SYMBOL(lock_extent); 868 869 int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, 870 gfp_t mask) 871 { 872 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); 873 } 874 EXPORT_SYMBOL(unlock_extent); 875 876 /* 877 * helper function to set pages and extents in the tree dirty 878 */ 879 int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) 880 { 881 unsigned long index = start >> PAGE_CACHE_SHIFT; 882 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 883 struct page *page; 884 885 while (index <= end_index) { 886 page = find_get_page(tree->mapping, index); 887 BUG_ON(!page); 888 __set_page_dirty_nobuffers(page); 889 page_cache_release(page); 890 index++; 891 } 892 set_extent_dirty(tree, start, end, GFP_NOFS); 893 return 0; 894 } 895 EXPORT_SYMBOL(set_range_dirty); 896 897 /* 898 * helper function to set both pages and extents in the tree writeback 899 */ 900 int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) 901 { 902 unsigned long index = start >> PAGE_CACHE_SHIFT; 903 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 904 struct page *page; 905 906 while (index <= end_index) { 907 page = find_get_page(tree->mapping, index); 908 BUG_ON(!page); 909 set_page_writeback(page); 910 page_cache_release(page); 911 index++; 912 } 913 set_extent_writeback(tree, start, end, GFP_NOFS); 914 return 0; 915 } 916 EXPORT_SYMBOL(set_range_writeback); 917 918 u64 find_lock_delalloc_range(struct extent_map_tree *tree, 919 u64 start, u64 lock_start, u64 *end, u64 max_bytes) 920 { 921 struct rb_node *node; 922 struct extent_state *state; 923 u64 cur_start = start; 924 u64 found = 0; 925 u64 total_bytes = 0; 926 927 write_lock_irq(&tree->lock); 928 /* 929 * this search will find all the extents that end after 930 * our range starts. 931 */ 932 search_again: 933 node = tree_search(&tree->state, cur_start); 934 if (!node || IS_ERR(node)) { 935 goto out; 936 } 937 938 while(1) { 939 state = rb_entry(node, struct extent_state, rb_node); 940 if (state->start != cur_start) { 941 goto out; 942 } 943 if (!(state->state & EXTENT_DELALLOC)) { 944 goto out; 945 } 946 if (state->start >= lock_start) { 947 if (state->state & EXTENT_LOCKED) { 948 DEFINE_WAIT(wait); 949 atomic_inc(&state->refs); 950 write_unlock_irq(&tree->lock); 951 schedule(); 952 write_lock_irq(&tree->lock); 953 finish_wait(&state->wq, &wait); 954 free_extent_state(state); 955 goto search_again; 956 } 957 state->state |= EXTENT_LOCKED; 958 } 959 found++; 960 *end = state->end; 961 cur_start = state->end + 1; 962 node = rb_next(node); 963 if (!node) 964 break; 965 total_bytes = state->end - state->start + 1; 966 if (total_bytes >= max_bytes) 967 break; 968 } 969 out: 970 write_unlock_irq(&tree->lock); 971 return found; 972 } 973 974 /* 975 * helper function to lock both pages and extents in the tree. 976 * pages must be locked first. 977 */ 978 int lock_range(struct extent_map_tree *tree, u64 start, u64 end) 979 { 980 unsigned long index = start >> PAGE_CACHE_SHIFT; 981 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 982 struct page *page; 983 int err; 984 985 while (index <= end_index) { 986 page = grab_cache_page(tree->mapping, index); 987 if (!page) { 988 err = -ENOMEM; 989 goto failed; 990 } 991 if (IS_ERR(page)) { 992 err = PTR_ERR(page); 993 goto failed; 994 } 995 index++; 996 } 997 lock_extent(tree, start, end, GFP_NOFS); 998 return 0; 999 1000 failed: 1001 /* 1002 * we failed above in getting the page at 'index', so we undo here 1003 * up to but not including the page at 'index' 1004 */ 1005 end_index = index; 1006 index = start >> PAGE_CACHE_SHIFT; 1007 while (index < end_index) { 1008 page = find_get_page(tree->mapping, index); 1009 unlock_page(page); 1010 page_cache_release(page); 1011 index++; 1012 } 1013 return err; 1014 } 1015 EXPORT_SYMBOL(lock_range); 1016 1017 /* 1018 * helper function to unlock both pages and extents in the tree. 1019 */ 1020 int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) 1021 { 1022 unsigned long index = start >> PAGE_CACHE_SHIFT; 1023 unsigned long end_index = end >> PAGE_CACHE_SHIFT; 1024 struct page *page; 1025 1026 while (index <= end_index) { 1027 page = find_get_page(tree->mapping, index); 1028 unlock_page(page); 1029 page_cache_release(page); 1030 index++; 1031 } 1032 unlock_extent(tree, start, end, GFP_NOFS); 1033 return 0; 1034 } 1035 EXPORT_SYMBOL(unlock_range); 1036 1037 /* 1038 * searches a range in the state tree for a given mask. 1039 * If 'filled' == 1, this returns 1 only if ever extent in the tree 1040 * has the bits set. Otherwise, 1 is returned if any bit in the 1041 * range is found set. 1042 */ 1043 static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, 1044 int bits, int filled) 1045 { 1046 struct extent_state *state = NULL; 1047 struct rb_node *node; 1048 int bitset = 0; 1049 1050 read_lock_irq(&tree->lock); 1051 node = tree_search(&tree->state, start); 1052 while (node && start <= end) { 1053 state = rb_entry(node, struct extent_state, rb_node); 1054 if (state->start > end) 1055 break; 1056 1057 if (filled && state->start > start) { 1058 bitset = 0; 1059 break; 1060 } 1061 if (state->state & bits) { 1062 bitset = 1; 1063 if (!filled) 1064 break; 1065 } else if (filled) { 1066 bitset = 0; 1067 break; 1068 } 1069 start = state->end + 1; 1070 if (start > end) 1071 break; 1072 node = rb_next(node); 1073 } 1074 read_unlock_irq(&tree->lock); 1075 return bitset; 1076 } 1077 1078 /* 1079 * helper function to set a given page up to date if all the 1080 * extents in the tree for that page are up to date 1081 */ 1082 static int check_page_uptodate(struct extent_map_tree *tree, 1083 struct page *page) 1084 { 1085 u64 start = page->index << PAGE_CACHE_SHIFT; 1086 u64 end = start + PAGE_CACHE_SIZE - 1; 1087 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) 1088 SetPageUptodate(page); 1089 return 0; 1090 } 1091 1092 /* 1093 * helper function to unlock a page if all the extents in the tree 1094 * for that page are unlocked 1095 */ 1096 static int check_page_locked(struct extent_map_tree *tree, 1097 struct page *page) 1098 { 1099 u64 start = page->index << PAGE_CACHE_SHIFT; 1100 u64 end = start + PAGE_CACHE_SIZE - 1; 1101 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) 1102 unlock_page(page); 1103 return 0; 1104 } 1105 1106 /* 1107 * helper function to end page writeback if all the extents 1108 * in the tree for that page are done with writeback 1109 */ 1110 static int check_page_writeback(struct extent_map_tree *tree, 1111 struct page *page) 1112 { 1113 u64 start = page->index << PAGE_CACHE_SHIFT; 1114 u64 end = start + PAGE_CACHE_SIZE - 1; 1115 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) 1116 end_page_writeback(page); 1117 return 0; 1118 } 1119 1120 /* lots and lots of room for performance fixes in the end_bio funcs */ 1121 1122 /* 1123 * after a writepage IO is done, we need to: 1124 * clear the uptodate bits on error 1125 * clear the writeback bits in the extent tree for this IO 1126 * end_page_writeback if the page has no more pending IO 1127 * 1128 * Scheduling is not allowed, so the extent state tree is expected 1129 * to have one and only one object corresponding to this IO. 1130 */ 1131 static int end_bio_extent_writepage(struct bio *bio, 1132 unsigned int bytes_done, int err) 1133 { 1134 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1135 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1136 struct extent_map_tree *tree = bio->bi_private; 1137 u64 start; 1138 u64 end; 1139 int whole_page; 1140 1141 if (bio->bi_size) 1142 return 1; 1143 1144 do { 1145 struct page *page = bvec->bv_page; 1146 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1147 end = start + bvec->bv_len - 1; 1148 1149 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1150 whole_page = 1; 1151 else 1152 whole_page = 0; 1153 1154 if (--bvec >= bio->bi_io_vec) 1155 prefetchw(&bvec->bv_page->flags); 1156 1157 if (!uptodate) { 1158 clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1159 ClearPageUptodate(page); 1160 SetPageError(page); 1161 } 1162 clear_extent_writeback(tree, start, end, GFP_ATOMIC); 1163 1164 if (whole_page) 1165 end_page_writeback(page); 1166 else 1167 check_page_writeback(tree, page); 1168 } while (bvec >= bio->bi_io_vec); 1169 1170 bio_put(bio); 1171 return 0; 1172 } 1173 1174 /* 1175 * after a readpage IO is done, we need to: 1176 * clear the uptodate bits on error 1177 * set the uptodate bits if things worked 1178 * set the page up to date if all extents in the tree are uptodate 1179 * clear the lock bit in the extent tree 1180 * unlock the page if there are no other extents locked for it 1181 * 1182 * Scheduling is not allowed, so the extent state tree is expected 1183 * to have one and only one object corresponding to this IO. 1184 */ 1185 static int end_bio_extent_readpage(struct bio *bio, 1186 unsigned int bytes_done, int err) 1187 { 1188 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1189 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1190 struct extent_map_tree *tree = bio->bi_private; 1191 u64 start; 1192 u64 end; 1193 int whole_page; 1194 1195 if (bio->bi_size) 1196 return 1; 1197 1198 do { 1199 struct page *page = bvec->bv_page; 1200 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1201 end = start + bvec->bv_len - 1; 1202 1203 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1204 whole_page = 1; 1205 else 1206 whole_page = 0; 1207 1208 if (--bvec >= bio->bi_io_vec) 1209 prefetchw(&bvec->bv_page->flags); 1210 1211 if (uptodate) { 1212 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1213 if (whole_page) 1214 SetPageUptodate(page); 1215 else 1216 check_page_uptodate(tree, page); 1217 } else { 1218 ClearPageUptodate(page); 1219 SetPageError(page); 1220 } 1221 1222 unlock_extent(tree, start, end, GFP_ATOMIC); 1223 1224 if (whole_page) 1225 unlock_page(page); 1226 else 1227 check_page_locked(tree, page); 1228 } while (bvec >= bio->bi_io_vec); 1229 1230 bio_put(bio); 1231 return 0; 1232 } 1233 1234 /* 1235 * IO done from prepare_write is pretty simple, we just unlock 1236 * the structs in the extent tree when done, and set the uptodate bits 1237 * as appropriate. 1238 */ 1239 static int end_bio_extent_preparewrite(struct bio *bio, 1240 unsigned int bytes_done, int err) 1241 { 1242 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1243 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1244 struct extent_map_tree *tree = bio->bi_private; 1245 u64 start; 1246 u64 end; 1247 1248 if (bio->bi_size) 1249 return 1; 1250 1251 do { 1252 struct page *page = bvec->bv_page; 1253 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1254 end = start + bvec->bv_len - 1; 1255 1256 if (--bvec >= bio->bi_io_vec) 1257 prefetchw(&bvec->bv_page->flags); 1258 1259 if (uptodate) { 1260 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1261 } else { 1262 ClearPageUptodate(page); 1263 SetPageError(page); 1264 } 1265 1266 unlock_extent(tree, start, end, GFP_ATOMIC); 1267 1268 } while (bvec >= bio->bi_io_vec); 1269 1270 bio_put(bio); 1271 return 0; 1272 } 1273 1274 static int submit_extent_page(int rw, struct extent_map_tree *tree, 1275 struct page *page, sector_t sector, 1276 size_t size, unsigned long offset, 1277 struct block_device *bdev, 1278 bio_end_io_t end_io_func) 1279 { 1280 struct bio *bio; 1281 int ret = 0; 1282 1283 bio = bio_alloc(GFP_NOIO, 1); 1284 1285 bio->bi_sector = sector; 1286 bio->bi_bdev = bdev; 1287 bio->bi_io_vec[0].bv_page = page; 1288 bio->bi_io_vec[0].bv_len = size; 1289 bio->bi_io_vec[0].bv_offset = offset; 1290 1291 bio->bi_vcnt = 1; 1292 bio->bi_idx = 0; 1293 bio->bi_size = size; 1294 1295 bio->bi_end_io = end_io_func; 1296 bio->bi_private = tree; 1297 1298 bio_get(bio); 1299 submit_bio(rw, bio); 1300 1301 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 1302 ret = -EOPNOTSUPP; 1303 1304 bio_put(bio); 1305 return ret; 1306 } 1307 1308 /* 1309 * basic readpage implementation. Locked extent state structs are inserted 1310 * into the tree that are removed when the IO is done (by the end_io 1311 * handlers) 1312 */ 1313 int extent_read_full_page(struct extent_map_tree *tree, struct page *page, 1314 get_extent_t *get_extent) 1315 { 1316 struct inode *inode = page->mapping->host; 1317 u64 start = page->index << PAGE_CACHE_SHIFT; 1318 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1319 u64 end; 1320 u64 cur = start; 1321 u64 extent_offset; 1322 u64 last_byte = i_size_read(inode); 1323 u64 block_start; 1324 u64 cur_end; 1325 sector_t sector; 1326 struct extent_map *em; 1327 struct block_device *bdev; 1328 int ret; 1329 int nr = 0; 1330 size_t page_offset = 0; 1331 size_t iosize; 1332 size_t blocksize = inode->i_sb->s_blocksize; 1333 1334 if (!PagePrivate(page)) { 1335 SetPagePrivate(page); 1336 set_page_private(page, 1); 1337 WARN_ON(!page->mapping->a_ops->invalidatepage); 1338 page_cache_get(page); 1339 } 1340 1341 end = page_end; 1342 lock_extent(tree, start, end, GFP_NOFS); 1343 1344 while (cur <= end) { 1345 if (cur >= last_byte) { 1346 iosize = PAGE_CACHE_SIZE - page_offset; 1347 zero_user_page(page, page_offset, iosize, KM_USER0); 1348 set_extent_uptodate(tree, cur, cur + iosize - 1, 1349 GFP_NOFS); 1350 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1351 break; 1352 } 1353 em = get_extent(inode, page, page_offset, cur, end, 0); 1354 if (IS_ERR(em) || !em) { 1355 SetPageError(page); 1356 unlock_extent(tree, cur, end, GFP_NOFS); 1357 break; 1358 } 1359 1360 extent_offset = cur - em->start; 1361 BUG_ON(em->end < cur); 1362 BUG_ON(end < cur); 1363 1364 iosize = min(em->end - cur, end - cur) + 1; 1365 cur_end = min(em->end, end); 1366 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1367 sector = (em->block_start + extent_offset) >> 9; 1368 bdev = em->bdev; 1369 block_start = em->block_start; 1370 free_extent_map(em); 1371 em = NULL; 1372 1373 /* we've found a hole, just zero and go on */ 1374 if (block_start == 0) { 1375 zero_user_page(page, page_offset, iosize, KM_USER0); 1376 set_extent_uptodate(tree, cur, cur + iosize - 1, 1377 GFP_NOFS); 1378 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1379 cur = cur + iosize; 1380 page_offset += iosize; 1381 continue; 1382 } 1383 /* the get_extent function already copied into the page */ 1384 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { 1385 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1386 cur = cur + iosize; 1387 page_offset += iosize; 1388 continue; 1389 } 1390 1391 ret = submit_extent_page(READ, tree, page, 1392 sector, iosize, page_offset, bdev, 1393 end_bio_extent_readpage); 1394 if (ret) 1395 SetPageError(page); 1396 cur = cur + iosize; 1397 page_offset += iosize; 1398 nr++; 1399 } 1400 if (!nr) { 1401 if (!PageError(page)) 1402 SetPageUptodate(page); 1403 unlock_page(page); 1404 } 1405 return 0; 1406 } 1407 EXPORT_SYMBOL(extent_read_full_page); 1408 1409 /* 1410 * the writepage semantics are similar to regular writepage. extent 1411 * records are inserted to lock ranges in the tree, and as dirty areas 1412 * are found, they are marked writeback. Then the lock bits are removed 1413 * and the end_io handler clears the writeback ranges 1414 */ 1415 int extent_write_full_page(struct extent_map_tree *tree, struct page *page, 1416 get_extent_t *get_extent, 1417 struct writeback_control *wbc) 1418 { 1419 struct inode *inode = page->mapping->host; 1420 u64 start = page->index << PAGE_CACHE_SHIFT; 1421 u64 page_end = start + PAGE_CACHE_SIZE - 1; 1422 u64 end; 1423 u64 cur = start; 1424 u64 extent_offset; 1425 u64 last_byte = i_size_read(inode); 1426 u64 block_start; 1427 sector_t sector; 1428 struct extent_map *em; 1429 struct block_device *bdev; 1430 int ret; 1431 int nr = 0; 1432 size_t page_offset = 0; 1433 size_t iosize; 1434 size_t blocksize; 1435 loff_t i_size = i_size_read(inode); 1436 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; 1437 u64 nr_delalloc; 1438 u64 delalloc_end; 1439 1440 WARN_ON(!PageLocked(page)); 1441 if (page->index > end_index) { 1442 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 1443 unlock_page(page); 1444 return 0; 1445 } 1446 1447 if (page->index == end_index) { 1448 size_t offset = i_size & (PAGE_CACHE_SIZE - 1); 1449 zero_user_page(page, offset, 1450 PAGE_CACHE_SIZE - offset, KM_USER0); 1451 } 1452 1453 if (!PagePrivate(page)) { 1454 SetPagePrivate(page); 1455 set_page_private(page, 1); 1456 WARN_ON(!page->mapping->a_ops->invalidatepage); 1457 page_cache_get(page); 1458 } 1459 1460 lock_extent(tree, start, page_end, GFP_NOFS); 1461 nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1, 1462 &delalloc_end, 1463 128 * 1024 * 1024); 1464 if (nr_delalloc) { 1465 tree->fill_delalloc(inode, start, delalloc_end); 1466 if (delalloc_end >= page_end + 1) { 1467 clear_extent_bit(tree, page_end + 1, delalloc_end, 1468 EXTENT_LOCKED | EXTENT_DELALLOC, 1469 1, 0, GFP_NOFS); 1470 } 1471 clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC, 1472 0, 0, GFP_NOFS); 1473 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1474 printk("found delalloc bits after clear extent_bit\n"); 1475 } 1476 } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1477 printk("found delalloc bits after find_delalloc_range returns 0\n"); 1478 } 1479 1480 end = page_end; 1481 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { 1482 printk("found delalloc bits after lock_extent\n"); 1483 } 1484 1485 if (last_byte <= start) { 1486 clear_extent_dirty(tree, start, page_end, GFP_NOFS); 1487 goto done; 1488 } 1489 1490 set_extent_uptodate(tree, start, page_end, GFP_NOFS); 1491 blocksize = inode->i_sb->s_blocksize; 1492 1493 while (cur <= end) { 1494 if (cur >= last_byte) { 1495 clear_extent_dirty(tree, cur, page_end, GFP_NOFS); 1496 break; 1497 } 1498 em = get_extent(inode, page, page_offset, cur, end, 0); 1499 if (IS_ERR(em) || !em) { 1500 SetPageError(page); 1501 break; 1502 } 1503 1504 extent_offset = cur - em->start; 1505 BUG_ON(em->end < cur); 1506 BUG_ON(end < cur); 1507 iosize = min(em->end - cur, end - cur) + 1; 1508 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1509 sector = (em->block_start + extent_offset) >> 9; 1510 bdev = em->bdev; 1511 block_start = em->block_start; 1512 free_extent_map(em); 1513 em = NULL; 1514 1515 if (block_start == 0 || block_start == EXTENT_MAP_INLINE) { 1516 clear_extent_dirty(tree, cur, 1517 cur + iosize - 1, GFP_NOFS); 1518 cur = cur + iosize; 1519 page_offset += iosize; 1520 continue; 1521 } 1522 1523 /* leave this out until we have a page_mkwrite call */ 1524 if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 1525 EXTENT_DIRTY, 0)) { 1526 cur = cur + iosize; 1527 page_offset += iosize; 1528 continue; 1529 } 1530 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); 1531 set_range_writeback(tree, cur, cur + iosize - 1); 1532 ret = submit_extent_page(WRITE, tree, page, 1533 sector, iosize, page_offset, bdev, 1534 end_bio_extent_writepage); 1535 if (ret) 1536 SetPageError(page); 1537 cur = cur + iosize; 1538 page_offset += iosize; 1539 nr++; 1540 } 1541 done: 1542 WARN_ON(test_range_bit(tree, start, page_end, EXTENT_DIRTY, 0)); 1543 unlock_extent(tree, start, page_end, GFP_NOFS); 1544 unlock_page(page); 1545 return 0; 1546 } 1547 EXPORT_SYMBOL(extent_write_full_page); 1548 1549 /* 1550 * basic invalidatepage code, this waits on any locked or writeback 1551 * ranges corresponding to the page, and then deletes any extent state 1552 * records from the tree 1553 */ 1554 int extent_invalidatepage(struct extent_map_tree *tree, 1555 struct page *page, unsigned long offset) 1556 { 1557 u64 start = (page->index << PAGE_CACHE_SHIFT); 1558 u64 end = start + PAGE_CACHE_SIZE - 1; 1559 size_t blocksize = page->mapping->host->i_sb->s_blocksize; 1560 1561 start += (offset + blocksize -1) & ~(blocksize - 1); 1562 if (start > end) 1563 return 0; 1564 1565 lock_extent(tree, start, end, GFP_NOFS); 1566 wait_on_extent_writeback(tree, start, end); 1567 clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY, 1568 1, 1, GFP_NOFS); 1569 return 0; 1570 } 1571 EXPORT_SYMBOL(extent_invalidatepage); 1572 1573 /* 1574 * simple commit_write call, set_range_dirty is used to mark both 1575 * the pages and the extent records as dirty 1576 */ 1577 int extent_commit_write(struct extent_map_tree *tree, 1578 struct inode *inode, struct page *page, 1579 unsigned from, unsigned to) 1580 { 1581 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 1582 1583 if (!PagePrivate(page)) { 1584 SetPagePrivate(page); 1585 set_page_private(page, 1); 1586 WARN_ON(!page->mapping->a_ops->invalidatepage); 1587 page_cache_get(page); 1588 } 1589 1590 set_page_dirty(page); 1591 1592 if (pos > inode->i_size) { 1593 i_size_write(inode, pos); 1594 mark_inode_dirty(inode); 1595 } 1596 return 0; 1597 } 1598 EXPORT_SYMBOL(extent_commit_write); 1599 1600 int extent_prepare_write(struct extent_map_tree *tree, 1601 struct inode *inode, struct page *page, 1602 unsigned from, unsigned to, get_extent_t *get_extent) 1603 { 1604 u64 page_start = page->index << PAGE_CACHE_SHIFT; 1605 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 1606 u64 block_start; 1607 u64 orig_block_start; 1608 u64 block_end; 1609 u64 cur_end; 1610 struct extent_map *em; 1611 unsigned blocksize = 1 << inode->i_blkbits; 1612 size_t page_offset = 0; 1613 size_t block_off_start; 1614 size_t block_off_end; 1615 int err = 0; 1616 int iocount = 0; 1617 int ret = 0; 1618 int isnew; 1619 1620 if (!PagePrivate(page)) { 1621 SetPagePrivate(page); 1622 set_page_private(page, 1); 1623 WARN_ON(!page->mapping->a_ops->invalidatepage); 1624 page_cache_get(page); 1625 } 1626 block_start = (page_start + from) & ~((u64)blocksize - 1); 1627 block_end = (page_start + to - 1) | (blocksize - 1); 1628 orig_block_start = block_start; 1629 1630 lock_extent(tree, page_start, page_end, GFP_NOFS); 1631 while(block_start <= block_end) { 1632 em = get_extent(inode, page, page_offset, block_start, 1633 block_end, 1); 1634 if (IS_ERR(em) || !em) { 1635 goto err; 1636 } 1637 cur_end = min(block_end, em->end); 1638 block_off_start = block_start & (PAGE_CACHE_SIZE - 1); 1639 block_off_end = block_off_start + blocksize; 1640 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); 1641 1642 if (!PageUptodate(page) && isnew && 1643 (block_off_end > to || block_off_start < from)) { 1644 void *kaddr; 1645 1646 kaddr = kmap_atomic(page, KM_USER0); 1647 if (block_off_end > to) 1648 memset(kaddr + to, 0, block_off_end - to); 1649 if (block_off_start < from) 1650 memset(kaddr + block_off_start, 0, 1651 from - block_off_start); 1652 flush_dcache_page(page); 1653 kunmap_atomic(kaddr, KM_USER0); 1654 } 1655 if (!isnew && !PageUptodate(page) && 1656 (block_off_end > to || block_off_start < from) && 1657 !test_range_bit(tree, block_start, cur_end, 1658 EXTENT_UPTODATE, 1)) { 1659 u64 sector; 1660 u64 extent_offset = block_start - em->start; 1661 size_t iosize; 1662 sector = (em->block_start + extent_offset) >> 9; 1663 iosize = (cur_end - block_start + blocksize - 1) & 1664 ~((u64)blocksize - 1); 1665 /* 1666 * we've already got the extent locked, but we 1667 * need to split the state such that our end_bio 1668 * handler can clear the lock. 1669 */ 1670 set_extent_bit(tree, block_start, 1671 block_start + iosize - 1, 1672 EXTENT_LOCKED, 0, NULL, GFP_NOFS); 1673 ret = submit_extent_page(READ, tree, page, 1674 sector, iosize, page_offset, em->bdev, 1675 end_bio_extent_preparewrite); 1676 iocount++; 1677 block_start = block_start + iosize; 1678 } else { 1679 set_extent_uptodate(tree, block_start, cur_end, 1680 GFP_NOFS); 1681 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 1682 block_start = cur_end + 1; 1683 } 1684 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 1685 free_extent_map(em); 1686 } 1687 if (iocount) { 1688 wait_extent_bit(tree, orig_block_start, 1689 block_end, EXTENT_LOCKED); 1690 } 1691 check_page_uptodate(tree, page); 1692 err: 1693 /* FIXME, zero out newly allocated blocks on error */ 1694 return err; 1695 } 1696 EXPORT_SYMBOL(extent_prepare_write); 1697 1698 /* 1699 * a helper for releasepage. As long as there are no locked extents 1700 * in the range corresponding to the page, both state records and extent 1701 * map records are removed 1702 */ 1703 int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) 1704 { 1705 struct extent_map *em; 1706 u64 start = page->index << PAGE_CACHE_SHIFT; 1707 u64 end = start + PAGE_CACHE_SIZE - 1; 1708 u64 orig_start = start; 1709 int ret = 1; 1710 1711 while (start <= end) { 1712 em = lookup_extent_mapping(tree, start, end); 1713 if (!em || IS_ERR(em)) 1714 break; 1715 if (!test_range_bit(tree, em->start, em->end, 1716 EXTENT_LOCKED, 0)) { 1717 remove_extent_mapping(tree, em); 1718 /* once for the rb tree */ 1719 free_extent_map(em); 1720 } 1721 start = em->end + 1; 1722 /* once for us */ 1723 free_extent_map(em); 1724 } 1725 if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) 1726 ret = 0; 1727 else 1728 clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, 1729 1, 1, GFP_NOFS); 1730 return ret; 1731 } 1732 EXPORT_SYMBOL(try_release_extent_mapping); 1733 1734