1*a52d9a80SChris Mason #include <linux/bitops.h> 2*a52d9a80SChris Mason #include <linux/slab.h> 3*a52d9a80SChris Mason #include <linux/bio.h> 4*a52d9a80SChris Mason #include <linux/mm.h> 5*a52d9a80SChris Mason #include <linux/gfp.h> 6*a52d9a80SChris Mason #include <linux/pagemap.h> 7*a52d9a80SChris Mason #include <linux/page-flags.h> 8*a52d9a80SChris Mason #include <linux/module.h> 9*a52d9a80SChris Mason #include <linux/spinlock.h> 10*a52d9a80SChris Mason #include <linux/blkdev.h> 11*a52d9a80SChris Mason #include "extent_map.h" 12*a52d9a80SChris Mason 13*a52d9a80SChris Mason static struct kmem_cache *extent_map_cache; 14*a52d9a80SChris Mason static struct kmem_cache *extent_state_cache; 15*a52d9a80SChris Mason 16*a52d9a80SChris Mason struct tree_entry { 17*a52d9a80SChris Mason u64 start; 18*a52d9a80SChris Mason u64 end; 19*a52d9a80SChris Mason int in_tree; 20*a52d9a80SChris Mason struct rb_node rb_node; 21*a52d9a80SChris Mason }; 22*a52d9a80SChris Mason 23*a52d9a80SChris Mason /* bits for the extent state */ 24*a52d9a80SChris Mason #define EXTENT_DIRTY 1 25*a52d9a80SChris Mason #define EXTENT_WRITEBACK (1 << 1) 26*a52d9a80SChris Mason #define EXTENT_UPTODATE (1 << 2) 27*a52d9a80SChris Mason #define EXTENT_LOCKED (1 << 3) 28*a52d9a80SChris Mason #define EXTENT_NEW (1 << 4) 29*a52d9a80SChris Mason #define EXTENT_DELALLOC (1 << 5) 30*a52d9a80SChris Mason 31*a52d9a80SChris Mason #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 32*a52d9a80SChris Mason 33*a52d9a80SChris Mason static LIST_HEAD(all_states); 34*a52d9a80SChris Mason spinlock_t state_lock = SPIN_LOCK_UNLOCKED; 35*a52d9a80SChris Mason 36*a52d9a80SChris Mason void __init extent_map_init(void) 37*a52d9a80SChris Mason { 38*a52d9a80SChris Mason extent_map_cache = kmem_cache_create("extent_map", 39*a52d9a80SChris Mason sizeof(struct extent_map), 0, 40*a52d9a80SChris Mason SLAB_RECLAIM_ACCOUNT | 41*a52d9a80SChris Mason SLAB_DESTROY_BY_RCU, 42*a52d9a80SChris Mason NULL); 43*a52d9a80SChris Mason extent_state_cache = kmem_cache_create("extent_state", 44*a52d9a80SChris Mason sizeof(struct extent_state), 0, 45*a52d9a80SChris Mason SLAB_RECLAIM_ACCOUNT | 46*a52d9a80SChris Mason SLAB_DESTROY_BY_RCU, 47*a52d9a80SChris Mason NULL); 48*a52d9a80SChris Mason } 49*a52d9a80SChris Mason 50*a52d9a80SChris Mason void __exit extent_map_exit(void) 51*a52d9a80SChris Mason { 52*a52d9a80SChris Mason while(!list_empty(&all_states)) { 53*a52d9a80SChris Mason struct extent_state *state; 54*a52d9a80SChris Mason struct list_head *cur = all_states.next; 55*a52d9a80SChris Mason state = list_entry(cur, struct extent_state, list); 56*a52d9a80SChris Mason printk("found leaked state %Lu %Lu state %d in_tree %d\n", 57*a52d9a80SChris Mason state->start, state->end, state->state, state->in_tree); 58*a52d9a80SChris Mason list_del(&state->list); 59*a52d9a80SChris Mason kfree(state); 60*a52d9a80SChris Mason } 61*a52d9a80SChris Mason if (extent_map_cache) 62*a52d9a80SChris Mason kmem_cache_destroy(extent_map_cache); 63*a52d9a80SChris Mason if (extent_state_cache) 64*a52d9a80SChris Mason kmem_cache_destroy(extent_state_cache); 65*a52d9a80SChris Mason } 66*a52d9a80SChris Mason 67*a52d9a80SChris Mason void extent_map_tree_init(struct extent_map_tree *tree, 68*a52d9a80SChris Mason struct address_space *mapping, gfp_t mask) 69*a52d9a80SChris Mason { 70*a52d9a80SChris Mason tree->map.rb_node = NULL; 71*a52d9a80SChris Mason tree->state.rb_node = NULL; 72*a52d9a80SChris Mason rwlock_init(&tree->lock); 73*a52d9a80SChris Mason tree->mapping = mapping; 74*a52d9a80SChris Mason } 75*a52d9a80SChris Mason EXPORT_SYMBOL(extent_map_tree_init); 76*a52d9a80SChris Mason 77*a52d9a80SChris Mason struct extent_map *alloc_extent_map(gfp_t mask) 78*a52d9a80SChris Mason { 79*a52d9a80SChris Mason struct extent_map *em; 80*a52d9a80SChris Mason em = kmem_cache_alloc(extent_map_cache, mask); 81*a52d9a80SChris Mason if (!em || IS_ERR(em)) 82*a52d9a80SChris Mason return em; 83*a52d9a80SChris Mason em->in_tree = 0; 84*a52d9a80SChris Mason atomic_set(&em->refs, 1); 85*a52d9a80SChris Mason return em; 86*a52d9a80SChris Mason } 87*a52d9a80SChris Mason EXPORT_SYMBOL(alloc_extent_map); 88*a52d9a80SChris Mason 89*a52d9a80SChris Mason void free_extent_map(struct extent_map *em) 90*a52d9a80SChris Mason { 91*a52d9a80SChris Mason if (atomic_dec_and_test(&em->refs)) { 92*a52d9a80SChris Mason WARN_ON(em->in_tree); 93*a52d9a80SChris Mason kmem_cache_free(extent_map_cache, em); 94*a52d9a80SChris Mason } 95*a52d9a80SChris Mason } 96*a52d9a80SChris Mason EXPORT_SYMBOL(free_extent_map); 97*a52d9a80SChris Mason 98*a52d9a80SChris Mason 99*a52d9a80SChris Mason struct extent_state *alloc_extent_state(gfp_t mask) 100*a52d9a80SChris Mason { 101*a52d9a80SChris Mason struct extent_state *state; 102*a52d9a80SChris Mason state = kmem_cache_alloc(extent_state_cache, mask); 103*a52d9a80SChris Mason if (!state || IS_ERR(state)) 104*a52d9a80SChris Mason return state; 105*a52d9a80SChris Mason state->state = 0; 106*a52d9a80SChris Mason state->in_tree = 0; 107*a52d9a80SChris Mason atomic_set(&state->refs, 1); 108*a52d9a80SChris Mason init_waitqueue_head(&state->wq); 109*a52d9a80SChris Mason spin_lock_irq(&state_lock); 110*a52d9a80SChris Mason list_add(&state->list, &all_states); 111*a52d9a80SChris Mason spin_unlock_irq(&state_lock); 112*a52d9a80SChris Mason return state; 113*a52d9a80SChris Mason } 114*a52d9a80SChris Mason EXPORT_SYMBOL(alloc_extent_state); 115*a52d9a80SChris Mason 116*a52d9a80SChris Mason void free_extent_state(struct extent_state *state) 117*a52d9a80SChris Mason { 118*a52d9a80SChris Mason if (atomic_dec_and_test(&state->refs)) { 119*a52d9a80SChris Mason WARN_ON(state->in_tree); 120*a52d9a80SChris Mason spin_lock_irq(&state_lock); 121*a52d9a80SChris Mason list_del_init(&state->list); 122*a52d9a80SChris Mason spin_unlock_irq(&state_lock); 123*a52d9a80SChris Mason kmem_cache_free(extent_state_cache, state); 124*a52d9a80SChris Mason } 125*a52d9a80SChris Mason } 126*a52d9a80SChris Mason EXPORT_SYMBOL(free_extent_state); 127*a52d9a80SChris Mason 128*a52d9a80SChris Mason static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 129*a52d9a80SChris Mason struct rb_node *node) 130*a52d9a80SChris Mason { 131*a52d9a80SChris Mason struct rb_node ** p = &root->rb_node; 132*a52d9a80SChris Mason struct rb_node * parent = NULL; 133*a52d9a80SChris Mason struct tree_entry *entry; 134*a52d9a80SChris Mason 135*a52d9a80SChris Mason while(*p) { 136*a52d9a80SChris Mason parent = *p; 137*a52d9a80SChris Mason entry = rb_entry(parent, struct tree_entry, rb_node); 138*a52d9a80SChris Mason 139*a52d9a80SChris Mason if (offset < entry->start) 140*a52d9a80SChris Mason p = &(*p)->rb_left; 141*a52d9a80SChris Mason else if (offset > entry->end) 142*a52d9a80SChris Mason p = &(*p)->rb_right; 143*a52d9a80SChris Mason else 144*a52d9a80SChris Mason return parent; 145*a52d9a80SChris Mason } 146*a52d9a80SChris Mason 147*a52d9a80SChris Mason entry = rb_entry(node, struct tree_entry, rb_node); 148*a52d9a80SChris Mason entry->in_tree = 1; 149*a52d9a80SChris Mason rb_link_node(node, parent, p); 150*a52d9a80SChris Mason rb_insert_color(node, root); 151*a52d9a80SChris Mason return NULL; 152*a52d9a80SChris Mason } 153*a52d9a80SChris Mason 154*a52d9a80SChris Mason static struct rb_node *__tree_search(struct rb_root *root, u64 offset, 155*a52d9a80SChris Mason struct rb_node **prev_ret) 156*a52d9a80SChris Mason { 157*a52d9a80SChris Mason struct rb_node * n = root->rb_node; 158*a52d9a80SChris Mason struct rb_node *prev = NULL; 159*a52d9a80SChris Mason struct tree_entry *entry; 160*a52d9a80SChris Mason struct tree_entry *prev_entry = NULL; 161*a52d9a80SChris Mason 162*a52d9a80SChris Mason while(n) { 163*a52d9a80SChris Mason entry = rb_entry(n, struct tree_entry, rb_node); 164*a52d9a80SChris Mason prev = n; 165*a52d9a80SChris Mason prev_entry = entry; 166*a52d9a80SChris Mason 167*a52d9a80SChris Mason if (offset < entry->start) 168*a52d9a80SChris Mason n = n->rb_left; 169*a52d9a80SChris Mason else if (offset > entry->end) 170*a52d9a80SChris Mason n = n->rb_right; 171*a52d9a80SChris Mason else 172*a52d9a80SChris Mason return n; 173*a52d9a80SChris Mason } 174*a52d9a80SChris Mason if (!prev_ret) 175*a52d9a80SChris Mason return NULL; 176*a52d9a80SChris Mason while(prev && offset > prev_entry->end) { 177*a52d9a80SChris Mason prev = rb_next(prev); 178*a52d9a80SChris Mason prev_entry = rb_entry(prev, struct tree_entry, rb_node); 179*a52d9a80SChris Mason } 180*a52d9a80SChris Mason *prev_ret = prev; 181*a52d9a80SChris Mason return NULL; 182*a52d9a80SChris Mason } 183*a52d9a80SChris Mason 184*a52d9a80SChris Mason static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) 185*a52d9a80SChris Mason { 186*a52d9a80SChris Mason struct rb_node *prev; 187*a52d9a80SChris Mason struct rb_node *ret; 188*a52d9a80SChris Mason ret = __tree_search(root, offset, &prev); 189*a52d9a80SChris Mason if (!ret) 190*a52d9a80SChris Mason return prev; 191*a52d9a80SChris Mason return ret; 192*a52d9a80SChris Mason } 193*a52d9a80SChris Mason 194*a52d9a80SChris Mason static int tree_delete(struct rb_root *root, u64 offset) 195*a52d9a80SChris Mason { 196*a52d9a80SChris Mason struct rb_node *node; 197*a52d9a80SChris Mason struct tree_entry *entry; 198*a52d9a80SChris Mason 199*a52d9a80SChris Mason node = __tree_search(root, offset, NULL); 200*a52d9a80SChris Mason if (!node) 201*a52d9a80SChris Mason return -ENOENT; 202*a52d9a80SChris Mason entry = rb_entry(node, struct tree_entry, rb_node); 203*a52d9a80SChris Mason entry->in_tree = 0; 204*a52d9a80SChris Mason rb_erase(node, root); 205*a52d9a80SChris Mason return 0; 206*a52d9a80SChris Mason } 207*a52d9a80SChris Mason 208*a52d9a80SChris Mason /* 209*a52d9a80SChris Mason * add_extent_mapping tries a simple backward merge with existing 210*a52d9a80SChris Mason * mappings. The extent_map struct passed in will be inserted into 211*a52d9a80SChris Mason * the tree directly (no copies made, just a reference taken). 212*a52d9a80SChris Mason */ 213*a52d9a80SChris Mason int add_extent_mapping(struct extent_map_tree *tree, 214*a52d9a80SChris Mason struct extent_map *em) 215*a52d9a80SChris Mason { 216*a52d9a80SChris Mason int ret = 0; 217*a52d9a80SChris Mason struct extent_map *prev = NULL; 218*a52d9a80SChris Mason struct rb_node *rb; 219*a52d9a80SChris Mason 220*a52d9a80SChris Mason write_lock_irq(&tree->lock); 221*a52d9a80SChris Mason rb = tree_insert(&tree->map, em->end, &em->rb_node); 222*a52d9a80SChris Mason if (rb) { 223*a52d9a80SChris Mason prev = rb_entry(rb, struct extent_map, rb_node); 224*a52d9a80SChris Mason printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end); 225*a52d9a80SChris Mason ret = -EEXIST; 226*a52d9a80SChris Mason goto out; 227*a52d9a80SChris Mason } 228*a52d9a80SChris Mason atomic_inc(&em->refs); 229*a52d9a80SChris Mason if (em->start != 0) { 230*a52d9a80SChris Mason rb = rb_prev(&em->rb_node); 231*a52d9a80SChris Mason if (rb) 232*a52d9a80SChris Mason prev = rb_entry(rb, struct extent_map, rb_node); 233*a52d9a80SChris Mason if (prev && prev->end + 1 == em->start && 234*a52d9a80SChris Mason ((em->block_start == 0 && prev->block_start == 0) || 235*a52d9a80SChris Mason (em->block_start == prev->block_end + 1))) { 236*a52d9a80SChris Mason em->start = prev->start; 237*a52d9a80SChris Mason em->block_start = prev->block_start; 238*a52d9a80SChris Mason rb_erase(&prev->rb_node, &tree->map); 239*a52d9a80SChris Mason prev->in_tree = 0; 240*a52d9a80SChris Mason free_extent_map(prev); 241*a52d9a80SChris Mason } 242*a52d9a80SChris Mason } 243*a52d9a80SChris Mason out: 244*a52d9a80SChris Mason write_unlock_irq(&tree->lock); 245*a52d9a80SChris Mason return ret; 246*a52d9a80SChris Mason } 247*a52d9a80SChris Mason EXPORT_SYMBOL(add_extent_mapping); 248*a52d9a80SChris Mason 249*a52d9a80SChris Mason /* 250*a52d9a80SChris Mason * lookup_extent_mapping returns the first extent_map struct in the 251*a52d9a80SChris Mason * tree that intersects the [start, end] (inclusive) range. There may 252*a52d9a80SChris Mason * be additional objects in the tree that intersect, so check the object 253*a52d9a80SChris Mason * returned carefully to make sure you don't need additional lookups. 254*a52d9a80SChris Mason */ 255*a52d9a80SChris Mason struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, 256*a52d9a80SChris Mason u64 start, u64 end) 257*a52d9a80SChris Mason { 258*a52d9a80SChris Mason struct extent_map *em; 259*a52d9a80SChris Mason struct rb_node *rb_node; 260*a52d9a80SChris Mason 261*a52d9a80SChris Mason read_lock_irq(&tree->lock); 262*a52d9a80SChris Mason rb_node = tree_search(&tree->map, start); 263*a52d9a80SChris Mason if (!rb_node) { 264*a52d9a80SChris Mason em = NULL; 265*a52d9a80SChris Mason goto out; 266*a52d9a80SChris Mason } 267*a52d9a80SChris Mason if (IS_ERR(rb_node)) { 268*a52d9a80SChris Mason em = ERR_PTR(PTR_ERR(rb_node)); 269*a52d9a80SChris Mason goto out; 270*a52d9a80SChris Mason } 271*a52d9a80SChris Mason em = rb_entry(rb_node, struct extent_map, rb_node); 272*a52d9a80SChris Mason if (em->end < start || em->start > end) { 273*a52d9a80SChris Mason em = NULL; 274*a52d9a80SChris Mason goto out; 275*a52d9a80SChris Mason } 276*a52d9a80SChris Mason atomic_inc(&em->refs); 277*a52d9a80SChris Mason out: 278*a52d9a80SChris Mason read_unlock_irq(&tree->lock); 279*a52d9a80SChris Mason return em; 280*a52d9a80SChris Mason } 281*a52d9a80SChris Mason EXPORT_SYMBOL(lookup_extent_mapping); 282*a52d9a80SChris Mason 283*a52d9a80SChris Mason /* 284*a52d9a80SChris Mason * removes an extent_map struct from the tree. No reference counts are 285*a52d9a80SChris Mason * dropped, and no checks are done to see if the range is in use 286*a52d9a80SChris Mason */ 287*a52d9a80SChris Mason int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) 288*a52d9a80SChris Mason { 289*a52d9a80SChris Mason int ret; 290*a52d9a80SChris Mason 291*a52d9a80SChris Mason write_lock_irq(&tree->lock); 292*a52d9a80SChris Mason ret = tree_delete(&tree->map, em->end); 293*a52d9a80SChris Mason write_unlock_irq(&tree->lock); 294*a52d9a80SChris Mason return ret; 295*a52d9a80SChris Mason } 296*a52d9a80SChris Mason EXPORT_SYMBOL(remove_extent_mapping); 297*a52d9a80SChris Mason 298*a52d9a80SChris Mason /* 299*a52d9a80SChris Mason * utility function to look for merge candidates inside a given range. 300*a52d9a80SChris Mason * Any extents with matching state are merged together into a single 301*a52d9a80SChris Mason * extent in the tree. Extents with EXTENT_IO in their state field 302*a52d9a80SChris Mason * are not merged because the end_io handlers need to be able to do 303*a52d9a80SChris Mason * operations on them without sleeping (or doing allocations/splits). 304*a52d9a80SChris Mason * 305*a52d9a80SChris Mason * This should be called with the tree lock held. 306*a52d9a80SChris Mason */ 307*a52d9a80SChris Mason static int merge_state(struct extent_map_tree *tree, 308*a52d9a80SChris Mason struct extent_state *state) 309*a52d9a80SChris Mason { 310*a52d9a80SChris Mason struct extent_state *other; 311*a52d9a80SChris Mason struct rb_node *other_node; 312*a52d9a80SChris Mason 313*a52d9a80SChris Mason if (state->state & EXTENT_IOBITS) 314*a52d9a80SChris Mason return 0; 315*a52d9a80SChris Mason 316*a52d9a80SChris Mason other_node = rb_prev(&state->rb_node); 317*a52d9a80SChris Mason if (other_node) { 318*a52d9a80SChris Mason other = rb_entry(other_node, struct extent_state, rb_node); 319*a52d9a80SChris Mason if (other->end == state->start - 1 && 320*a52d9a80SChris Mason other->state == state->state) { 321*a52d9a80SChris Mason state->start = other->start; 322*a52d9a80SChris Mason other->in_tree = 0; 323*a52d9a80SChris Mason rb_erase(&other->rb_node, &tree->state); 324*a52d9a80SChris Mason free_extent_state(other); 325*a52d9a80SChris Mason } 326*a52d9a80SChris Mason } 327*a52d9a80SChris Mason other_node = rb_next(&state->rb_node); 328*a52d9a80SChris Mason if (other_node) { 329*a52d9a80SChris Mason other = rb_entry(other_node, struct extent_state, rb_node); 330*a52d9a80SChris Mason if (other->start == state->end + 1 && 331*a52d9a80SChris Mason other->state == state->state) { 332*a52d9a80SChris Mason other->start = state->start; 333*a52d9a80SChris Mason state->in_tree = 0; 334*a52d9a80SChris Mason rb_erase(&state->rb_node, &tree->state); 335*a52d9a80SChris Mason free_extent_state(state); 336*a52d9a80SChris Mason } 337*a52d9a80SChris Mason } 338*a52d9a80SChris Mason return 0; 339*a52d9a80SChris Mason } 340*a52d9a80SChris Mason 341*a52d9a80SChris Mason /* 342*a52d9a80SChris Mason * insert an extent_state struct into the tree. 'bits' are set on the 343*a52d9a80SChris Mason * struct before it is inserted. 344*a52d9a80SChris Mason * 345*a52d9a80SChris Mason * This may return -EEXIST if the extent is already there, in which case the 346*a52d9a80SChris Mason * state struct is freed. 347*a52d9a80SChris Mason * 348*a52d9a80SChris Mason * The tree lock is not taken internally. This is a utility function and 349*a52d9a80SChris Mason * probably isn't what you want to call (see set/clear_extent_bit). 350*a52d9a80SChris Mason */ 351*a52d9a80SChris Mason static int insert_state(struct extent_map_tree *tree, 352*a52d9a80SChris Mason struct extent_state *state, u64 start, u64 end, 353*a52d9a80SChris Mason int bits) 354*a52d9a80SChris Mason { 355*a52d9a80SChris Mason struct rb_node *node; 356*a52d9a80SChris Mason 357*a52d9a80SChris Mason if (end < start) { 358*a52d9a80SChris Mason printk("end < start %Lu %Lu\n", end, start); 359*a52d9a80SChris Mason WARN_ON(1); 360*a52d9a80SChris Mason } 361*a52d9a80SChris Mason state->state |= bits; 362*a52d9a80SChris Mason state->start = start; 363*a52d9a80SChris Mason state->end = end; 364*a52d9a80SChris Mason if ((end & 4095) == 0) { 365*a52d9a80SChris Mason printk("insert state %Lu %Lu strange end\n", start, end); 366*a52d9a80SChris Mason WARN_ON(1); 367*a52d9a80SChris Mason } 368*a52d9a80SChris Mason node = tree_insert(&tree->state, end, &state->rb_node); 369*a52d9a80SChris Mason if (node) { 370*a52d9a80SChris Mason struct extent_state *found; 371*a52d9a80SChris Mason found = rb_entry(node, struct extent_state, rb_node); 372*a52d9a80SChris Mason printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); 373*a52d9a80SChris Mason free_extent_state(state); 374*a52d9a80SChris Mason return -EEXIST; 375*a52d9a80SChris Mason } 376*a52d9a80SChris Mason merge_state(tree, state); 377*a52d9a80SChris Mason return 0; 378*a52d9a80SChris Mason } 379*a52d9a80SChris Mason 380*a52d9a80SChris Mason /* 381*a52d9a80SChris Mason * split a given extent state struct in two, inserting the preallocated 382*a52d9a80SChris Mason * struct 'prealloc' as the newly created second half. 'split' indicates an 383*a52d9a80SChris Mason * offset inside 'orig' where it should be split. 384*a52d9a80SChris Mason * 385*a52d9a80SChris Mason * Before calling, 386*a52d9a80SChris Mason * the tree has 'orig' at [orig->start, orig->end]. After calling, there 387*a52d9a80SChris Mason * are two extent state structs in the tree: 388*a52d9a80SChris Mason * prealloc: [orig->start, split - 1] 389*a52d9a80SChris Mason * orig: [ split, orig->end ] 390*a52d9a80SChris Mason * 391*a52d9a80SChris Mason * The tree locks are not taken by this function. They need to be held 392*a52d9a80SChris Mason * by the caller. 393*a52d9a80SChris Mason */ 394*a52d9a80SChris Mason static int split_state(struct extent_map_tree *tree, struct extent_state *orig, 395*a52d9a80SChris Mason struct extent_state *prealloc, u64 split) 396*a52d9a80SChris Mason { 397*a52d9a80SChris Mason struct rb_node *node; 398*a52d9a80SChris Mason prealloc->start = orig->start; 399*a52d9a80SChris Mason prealloc->end = split - 1; 400*a52d9a80SChris Mason prealloc->state = orig->state; 401*a52d9a80SChris Mason orig->start = split; 402*a52d9a80SChris Mason if ((prealloc->end & 4095) == 0) { 403*a52d9a80SChris Mason printk("insert state %Lu %Lu strange end\n", prealloc->start, 404*a52d9a80SChris Mason prealloc->end); 405*a52d9a80SChris Mason WARN_ON(1); 406*a52d9a80SChris Mason } 407*a52d9a80SChris Mason node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); 408*a52d9a80SChris Mason if (node) { 409*a52d9a80SChris Mason struct extent_state *found; 410*a52d9a80SChris Mason found = rb_entry(node, struct extent_state, rb_node); 411*a52d9a80SChris Mason printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); 412*a52d9a80SChris Mason free_extent_state(prealloc); 413*a52d9a80SChris Mason return -EEXIST; 414*a52d9a80SChris Mason } 415*a52d9a80SChris Mason return 0; 416*a52d9a80SChris Mason } 417*a52d9a80SChris Mason 418*a52d9a80SChris Mason /* 419*a52d9a80SChris Mason * utility function to clear some bits in an extent state struct. 420*a52d9a80SChris Mason * it will optionally wake up any one waiting on this state (wake == 1), or 421*a52d9a80SChris Mason * forcibly remove the state from the tree (delete == 1). 422*a52d9a80SChris Mason * 423*a52d9a80SChris Mason * If no bits are set on the state struct after clearing things, the 424*a52d9a80SChris Mason * struct is freed and removed from the tree 425*a52d9a80SChris Mason */ 426*a52d9a80SChris Mason static int clear_state_bit(struct extent_map_tree *tree, 427*a52d9a80SChris Mason struct extent_state *state, int bits, int wake, 428*a52d9a80SChris Mason int delete) 429*a52d9a80SChris Mason { 430*a52d9a80SChris Mason int ret = state->state & bits; 431*a52d9a80SChris Mason state->state &= ~bits; 432*a52d9a80SChris Mason if (wake) 433*a52d9a80SChris Mason wake_up(&state->wq); 434*a52d9a80SChris Mason if (delete || state->state == 0) { 435*a52d9a80SChris Mason if (state->in_tree) { 436*a52d9a80SChris Mason rb_erase(&state->rb_node, &tree->state); 437*a52d9a80SChris Mason state->in_tree = 0; 438*a52d9a80SChris Mason free_extent_state(state); 439*a52d9a80SChris Mason } else { 440*a52d9a80SChris Mason WARN_ON(1); 441*a52d9a80SChris Mason } 442*a52d9a80SChris Mason } else { 443*a52d9a80SChris Mason merge_state(tree, state); 444*a52d9a80SChris Mason } 445*a52d9a80SChris Mason return ret; 446*a52d9a80SChris Mason } 447*a52d9a80SChris Mason 448*a52d9a80SChris Mason /* 449*a52d9a80SChris Mason * clear some bits on a range in the tree. This may require splitting 450*a52d9a80SChris Mason * or inserting elements in the tree, so the gfp mask is used to 451*a52d9a80SChris Mason * indicate which allocations or sleeping are allowed. 452*a52d9a80SChris Mason * 453*a52d9a80SChris Mason * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove 454*a52d9a80SChris Mason * the given range from the tree regardless of state (ie for truncate). 455*a52d9a80SChris Mason * 456*a52d9a80SChris Mason * the range [start, end] is inclusive. 457*a52d9a80SChris Mason * 458*a52d9a80SChris Mason * This takes the tree lock, and returns < 0 on error, > 0 if any of the 459*a52d9a80SChris Mason * bits were already set, or zero if none of the bits were already set. 460*a52d9a80SChris Mason */ 461*a52d9a80SChris Mason int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, 462*a52d9a80SChris Mason int bits, int wake, int delete, gfp_t mask) 463*a52d9a80SChris Mason { 464*a52d9a80SChris Mason struct extent_state *state; 465*a52d9a80SChris Mason struct extent_state *prealloc = NULL; 466*a52d9a80SChris Mason struct rb_node *node; 467*a52d9a80SChris Mason int err; 468*a52d9a80SChris Mason int set = 0; 469*a52d9a80SChris Mason 470*a52d9a80SChris Mason again: 471*a52d9a80SChris Mason if (!prealloc && (mask & __GFP_WAIT)) { 472*a52d9a80SChris Mason prealloc = alloc_extent_state(mask); 473*a52d9a80SChris Mason if (!prealloc) 474*a52d9a80SChris Mason return -ENOMEM; 475*a52d9a80SChris Mason } 476*a52d9a80SChris Mason 477*a52d9a80SChris Mason write_lock_irq(&tree->lock); 478*a52d9a80SChris Mason /* 479*a52d9a80SChris Mason * this search will find the extents that end after 480*a52d9a80SChris Mason * our range starts 481*a52d9a80SChris Mason */ 482*a52d9a80SChris Mason node = tree_search(&tree->state, start); 483*a52d9a80SChris Mason if (!node) 484*a52d9a80SChris Mason goto out; 485*a52d9a80SChris Mason state = rb_entry(node, struct extent_state, rb_node); 486*a52d9a80SChris Mason if (state->start > end) 487*a52d9a80SChris Mason goto out; 488*a52d9a80SChris Mason WARN_ON(state->end < start); 489*a52d9a80SChris Mason 490*a52d9a80SChris Mason /* 491*a52d9a80SChris Mason * | ---- desired range ---- | 492*a52d9a80SChris Mason * | state | or 493*a52d9a80SChris Mason * | ------------- state -------------- | 494*a52d9a80SChris Mason * 495*a52d9a80SChris Mason * We need to split the extent we found, and may flip 496*a52d9a80SChris Mason * bits on second half. 497*a52d9a80SChris Mason * 498*a52d9a80SChris Mason * If the extent we found extends past our range, we 499*a52d9a80SChris Mason * just split and search again. It'll get split again 500*a52d9a80SChris Mason * the next time though. 501*a52d9a80SChris Mason * 502*a52d9a80SChris Mason * If the extent we found is inside our range, we clear 503*a52d9a80SChris Mason * the desired bit on it. 504*a52d9a80SChris Mason */ 505*a52d9a80SChris Mason 506*a52d9a80SChris Mason if (state->start < start) { 507*a52d9a80SChris Mason err = split_state(tree, state, prealloc, start); 508*a52d9a80SChris Mason BUG_ON(err == -EEXIST); 509*a52d9a80SChris Mason prealloc = NULL; 510*a52d9a80SChris Mason if (err) 511*a52d9a80SChris Mason goto out; 512*a52d9a80SChris Mason if (state->end <= end) { 513*a52d9a80SChris Mason start = state->end + 1; 514*a52d9a80SChris Mason set |= clear_state_bit(tree, state, bits, 515*a52d9a80SChris Mason wake, delete); 516*a52d9a80SChris Mason } else { 517*a52d9a80SChris Mason start = state->start; 518*a52d9a80SChris Mason } 519*a52d9a80SChris Mason goto search_again; 520*a52d9a80SChris Mason } 521*a52d9a80SChris Mason /* 522*a52d9a80SChris Mason * | ---- desired range ---- | 523*a52d9a80SChris Mason * | state | 524*a52d9a80SChris Mason * We need to split the extent, and clear the bit 525*a52d9a80SChris Mason * on the first half 526*a52d9a80SChris Mason */ 527*a52d9a80SChris Mason if (state->start <= end && state->end > end) { 528*a52d9a80SChris Mason err = split_state(tree, state, prealloc, end + 1); 529*a52d9a80SChris Mason BUG_ON(err == -EEXIST); 530*a52d9a80SChris Mason 531*a52d9a80SChris Mason if (wake) 532*a52d9a80SChris Mason wake_up(&state->wq); 533*a52d9a80SChris Mason set |= clear_state_bit(tree, prealloc, bits, 534*a52d9a80SChris Mason wake, delete); 535*a52d9a80SChris Mason prealloc = NULL; 536*a52d9a80SChris Mason goto out; 537*a52d9a80SChris Mason } 538*a52d9a80SChris Mason 539*a52d9a80SChris Mason start = state->end + 1; 540*a52d9a80SChris Mason set |= clear_state_bit(tree, state, bits, wake, delete); 541*a52d9a80SChris Mason goto search_again; 542*a52d9a80SChris Mason 543*a52d9a80SChris Mason out: 544*a52d9a80SChris Mason write_unlock_irq(&tree->lock); 545*a52d9a80SChris Mason if (prealloc) 546*a52d9a80SChris Mason free_extent_state(prealloc); 547*a52d9a80SChris Mason 548*a52d9a80SChris Mason return set; 549*a52d9a80SChris Mason 550*a52d9a80SChris Mason search_again: 551*a52d9a80SChris Mason if (start >= end) 552*a52d9a80SChris Mason goto out; 553*a52d9a80SChris Mason write_unlock_irq(&tree->lock); 554*a52d9a80SChris Mason if (mask & __GFP_WAIT) 555*a52d9a80SChris Mason cond_resched(); 556*a52d9a80SChris Mason goto again; 557*a52d9a80SChris Mason } 558*a52d9a80SChris Mason EXPORT_SYMBOL(clear_extent_bit); 559*a52d9a80SChris Mason 560*a52d9a80SChris Mason static int wait_on_state(struct extent_map_tree *tree, 561*a52d9a80SChris Mason struct extent_state *state) 562*a52d9a80SChris Mason { 563*a52d9a80SChris Mason DEFINE_WAIT(wait); 564*a52d9a80SChris Mason prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 565*a52d9a80SChris Mason read_unlock_irq(&tree->lock); 566*a52d9a80SChris Mason schedule(); 567*a52d9a80SChris Mason read_lock_irq(&tree->lock); 568*a52d9a80SChris Mason finish_wait(&state->wq, &wait); 569*a52d9a80SChris Mason return 0; 570*a52d9a80SChris Mason } 571*a52d9a80SChris Mason 572*a52d9a80SChris Mason /* 573*a52d9a80SChris Mason * waits for one or more bits to clear on a range in the state tree. 574*a52d9a80SChris Mason * The range [start, end] is inclusive. 575*a52d9a80SChris Mason * The tree lock is taken by this function 576*a52d9a80SChris Mason */ 577*a52d9a80SChris Mason int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) 578*a52d9a80SChris Mason { 579*a52d9a80SChris Mason struct extent_state *state; 580*a52d9a80SChris Mason struct rb_node *node; 581*a52d9a80SChris Mason 582*a52d9a80SChris Mason read_lock_irq(&tree->lock); 583*a52d9a80SChris Mason again: 584*a52d9a80SChris Mason while (1) { 585*a52d9a80SChris Mason /* 586*a52d9a80SChris Mason * this search will find all the extents that end after 587*a52d9a80SChris Mason * our range starts 588*a52d9a80SChris Mason */ 589*a52d9a80SChris Mason node = tree_search(&tree->state, start); 590*a52d9a80SChris Mason if (!node) 591*a52d9a80SChris Mason break; 592*a52d9a80SChris Mason 593*a52d9a80SChris Mason state = rb_entry(node, struct extent_state, rb_node); 594*a52d9a80SChris Mason 595*a52d9a80SChris Mason if (state->start > end) 596*a52d9a80SChris Mason goto out; 597*a52d9a80SChris Mason 598*a52d9a80SChris Mason if (state->state & bits) { 599*a52d9a80SChris Mason start = state->start; 600*a52d9a80SChris Mason atomic_inc(&state->refs); 601*a52d9a80SChris Mason wait_on_state(tree, state); 602*a52d9a80SChris Mason free_extent_state(state); 603*a52d9a80SChris Mason goto again; 604*a52d9a80SChris Mason } 605*a52d9a80SChris Mason start = state->end + 1; 606*a52d9a80SChris Mason 607*a52d9a80SChris Mason if (start > end) 608*a52d9a80SChris Mason break; 609*a52d9a80SChris Mason 610*a52d9a80SChris Mason if (need_resched()) { 611*a52d9a80SChris Mason read_unlock_irq(&tree->lock); 612*a52d9a80SChris Mason cond_resched(); 613*a52d9a80SChris Mason read_lock_irq(&tree->lock); 614*a52d9a80SChris Mason } 615*a52d9a80SChris Mason } 616*a52d9a80SChris Mason out: 617*a52d9a80SChris Mason read_unlock_irq(&tree->lock); 618*a52d9a80SChris Mason return 0; 619*a52d9a80SChris Mason } 620*a52d9a80SChris Mason EXPORT_SYMBOL(wait_extent_bit); 621*a52d9a80SChris Mason 622*a52d9a80SChris Mason /* 623*a52d9a80SChris Mason * set some bits on a range in the tree. This may require allocations 624*a52d9a80SChris Mason * or sleeping, so the gfp mask is used to indicate what is allowed. 625*a52d9a80SChris Mason * 626*a52d9a80SChris Mason * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 627*a52d9a80SChris Mason * range already has the desired bits set. The start of the existing 628*a52d9a80SChris Mason * range is returned in failed_start in this case. 629*a52d9a80SChris Mason * 630*a52d9a80SChris Mason * [start, end] is inclusive 631*a52d9a80SChris Mason * This takes the tree lock. 632*a52d9a80SChris Mason */ 633*a52d9a80SChris Mason int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, 634*a52d9a80SChris Mason int exclusive, u64 *failed_start, gfp_t mask) 635*a52d9a80SChris Mason { 636*a52d9a80SChris Mason struct extent_state *state; 637*a52d9a80SChris Mason struct extent_state *prealloc = NULL; 638*a52d9a80SChris Mason struct rb_node *node; 639*a52d9a80SChris Mason int err = 0; 640*a52d9a80SChris Mason int set; 641*a52d9a80SChris Mason u64 last_start; 642*a52d9a80SChris Mason u64 last_end; 643*a52d9a80SChris Mason again: 644*a52d9a80SChris Mason if (!prealloc && (mask & __GFP_WAIT)) { 645*a52d9a80SChris Mason prealloc = alloc_extent_state(mask); 646*a52d9a80SChris Mason if (!prealloc) 647*a52d9a80SChris Mason return -ENOMEM; 648*a52d9a80SChris Mason } 649*a52d9a80SChris Mason 650*a52d9a80SChris Mason write_lock_irq(&tree->lock); 651*a52d9a80SChris Mason /* 652*a52d9a80SChris Mason * this search will find all the extents that end after 653*a52d9a80SChris Mason * our range starts. 654*a52d9a80SChris Mason */ 655*a52d9a80SChris Mason node = tree_search(&tree->state, start); 656*a52d9a80SChris Mason if (!node) { 657*a52d9a80SChris Mason err = insert_state(tree, prealloc, start, end, bits); 658*a52d9a80SChris Mason prealloc = NULL; 659*a52d9a80SChris Mason BUG_ON(err == -EEXIST); 660*a52d9a80SChris Mason goto out; 661*a52d9a80SChris Mason } 662*a52d9a80SChris Mason 663*a52d9a80SChris Mason state = rb_entry(node, struct extent_state, rb_node); 664*a52d9a80SChris Mason last_start = state->start; 665*a52d9a80SChris Mason last_end = state->end; 666*a52d9a80SChris Mason 667*a52d9a80SChris Mason /* 668*a52d9a80SChris Mason * | ---- desired range ---- | 669*a52d9a80SChris Mason * | state | 670*a52d9a80SChris Mason * 671*a52d9a80SChris Mason * Just lock what we found and keep going 672*a52d9a80SChris Mason */ 673*a52d9a80SChris Mason if (state->start == start && state->end <= end) { 674*a52d9a80SChris Mason set = state->state & bits; 675*a52d9a80SChris Mason if (set && exclusive) { 676*a52d9a80SChris Mason *failed_start = state->start; 677*a52d9a80SChris Mason err = -EEXIST; 678*a52d9a80SChris Mason goto out; 679*a52d9a80SChris Mason } 680*a52d9a80SChris Mason state->state |= bits; 681*a52d9a80SChris Mason start = state->end + 1; 682*a52d9a80SChris Mason merge_state(tree, state); 683*a52d9a80SChris Mason goto search_again; 684*a52d9a80SChris Mason } 685*a52d9a80SChris Mason 686*a52d9a80SChris Mason /* 687*a52d9a80SChris Mason * | ---- desired range ---- | 688*a52d9a80SChris Mason * | state | 689*a52d9a80SChris Mason * or 690*a52d9a80SChris Mason * | ------------- state -------------- | 691*a52d9a80SChris Mason * 692*a52d9a80SChris Mason * We need to split the extent we found, and may flip bits on 693*a52d9a80SChris Mason * second half. 694*a52d9a80SChris Mason * 695*a52d9a80SChris Mason * If the extent we found extends past our 696*a52d9a80SChris Mason * range, we just split and search again. It'll get split 697*a52d9a80SChris Mason * again the next time though. 698*a52d9a80SChris Mason * 699*a52d9a80SChris Mason * If the extent we found is inside our range, we set the 700*a52d9a80SChris Mason * desired bit on it. 701*a52d9a80SChris Mason */ 702*a52d9a80SChris Mason if (state->start < start) { 703*a52d9a80SChris Mason set = state->state & bits; 704*a52d9a80SChris Mason if (exclusive && set) { 705*a52d9a80SChris Mason *failed_start = start; 706*a52d9a80SChris Mason err = -EEXIST; 707*a52d9a80SChris Mason goto out; 708*a52d9a80SChris Mason } 709*a52d9a80SChris Mason err = split_state(tree, state, prealloc, start); 710*a52d9a80SChris Mason BUG_ON(err == -EEXIST); 711*a52d9a80SChris Mason prealloc = NULL; 712*a52d9a80SChris Mason if (err) 713*a52d9a80SChris Mason goto out; 714*a52d9a80SChris Mason if (state->end <= end) { 715*a52d9a80SChris Mason state->state |= bits; 716*a52d9a80SChris Mason start = state->end + 1; 717*a52d9a80SChris Mason merge_state(tree, state); 718*a52d9a80SChris Mason } else { 719*a52d9a80SChris Mason start = state->start; 720*a52d9a80SChris Mason } 721*a52d9a80SChris Mason goto search_again; 722*a52d9a80SChris Mason } 723*a52d9a80SChris Mason /* 724*a52d9a80SChris Mason * | ---- desired range ---- | 725*a52d9a80SChris Mason * | state | 726*a52d9a80SChris Mason * We need to split the extent, and set the bit 727*a52d9a80SChris Mason * on the first half 728*a52d9a80SChris Mason */ 729*a52d9a80SChris Mason if (state->start <= end && state->end > end) { 730*a52d9a80SChris Mason set = state->state & bits; 731*a52d9a80SChris Mason if (exclusive && set) { 732*a52d9a80SChris Mason *failed_start = start; 733*a52d9a80SChris Mason err = -EEXIST; 734*a52d9a80SChris Mason goto out; 735*a52d9a80SChris Mason } 736*a52d9a80SChris Mason err = split_state(tree, state, prealloc, end + 1); 737*a52d9a80SChris Mason BUG_ON(err == -EEXIST); 738*a52d9a80SChris Mason 739*a52d9a80SChris Mason prealloc->state |= bits; 740*a52d9a80SChris Mason merge_state(tree, prealloc); 741*a52d9a80SChris Mason prealloc = NULL; 742*a52d9a80SChris Mason goto out; 743*a52d9a80SChris Mason } 744*a52d9a80SChris Mason 745*a52d9a80SChris Mason /* 746*a52d9a80SChris Mason * | ---- desired range ---- | 747*a52d9a80SChris Mason * | state | or | state | 748*a52d9a80SChris Mason * 749*a52d9a80SChris Mason * There's a hole, we need to insert something in it and 750*a52d9a80SChris Mason * ignore the extent we found. 751*a52d9a80SChris Mason */ 752*a52d9a80SChris Mason if (state->start > start) { 753*a52d9a80SChris Mason u64 this_end; 754*a52d9a80SChris Mason if (end < last_start) 755*a52d9a80SChris Mason this_end = end; 756*a52d9a80SChris Mason else 757*a52d9a80SChris Mason this_end = last_start -1; 758*a52d9a80SChris Mason err = insert_state(tree, prealloc, start, this_end, 759*a52d9a80SChris Mason bits); 760*a52d9a80SChris Mason prealloc = NULL; 761*a52d9a80SChris Mason BUG_ON(err == -EEXIST); 762*a52d9a80SChris Mason if (err) 763*a52d9a80SChris Mason goto out; 764*a52d9a80SChris Mason start = this_end + 1; 765*a52d9a80SChris Mason goto search_again; 766*a52d9a80SChris Mason } 767*a52d9a80SChris Mason goto search_again; 768*a52d9a80SChris Mason 769*a52d9a80SChris Mason out: 770*a52d9a80SChris Mason write_unlock_irq(&tree->lock); 771*a52d9a80SChris Mason if (prealloc) 772*a52d9a80SChris Mason free_extent_state(prealloc); 773*a52d9a80SChris Mason 774*a52d9a80SChris Mason return err; 775*a52d9a80SChris Mason 776*a52d9a80SChris Mason search_again: 777*a52d9a80SChris Mason if (start > end) 778*a52d9a80SChris Mason goto out; 779*a52d9a80SChris Mason write_unlock_irq(&tree->lock); 780*a52d9a80SChris Mason if (mask & __GFP_WAIT) 781*a52d9a80SChris Mason cond_resched(); 782*a52d9a80SChris Mason goto again; 783*a52d9a80SChris Mason } 784*a52d9a80SChris Mason EXPORT_SYMBOL(set_extent_bit); 785*a52d9a80SChris Mason 786*a52d9a80SChris Mason /* wrappers around set/clear extent bit */ 787*a52d9a80SChris Mason int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, 788*a52d9a80SChris Mason gfp_t mask) 789*a52d9a80SChris Mason { 790*a52d9a80SChris Mason return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 791*a52d9a80SChris Mason mask); 792*a52d9a80SChris Mason } 793*a52d9a80SChris Mason EXPORT_SYMBOL(set_extent_dirty); 794*a52d9a80SChris Mason 795*a52d9a80SChris Mason int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, 796*a52d9a80SChris Mason gfp_t mask) 797*a52d9a80SChris Mason { 798*a52d9a80SChris Mason return clear_extent_bit(tree, start, end, EXTENT_DIRTY, 0, 0, mask); 799*a52d9a80SChris Mason } 800*a52d9a80SChris Mason EXPORT_SYMBOL(clear_extent_dirty); 801*a52d9a80SChris Mason 802*a52d9a80SChris Mason int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, 803*a52d9a80SChris Mason gfp_t mask) 804*a52d9a80SChris Mason { 805*a52d9a80SChris Mason return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 806*a52d9a80SChris Mason mask); 807*a52d9a80SChris Mason } 808*a52d9a80SChris Mason EXPORT_SYMBOL(set_extent_new); 809*a52d9a80SChris Mason 810*a52d9a80SChris Mason int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, 811*a52d9a80SChris Mason gfp_t mask) 812*a52d9a80SChris Mason { 813*a52d9a80SChris Mason return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); 814*a52d9a80SChris Mason } 815*a52d9a80SChris Mason EXPORT_SYMBOL(clear_extent_new); 816*a52d9a80SChris Mason 817*a52d9a80SChris Mason int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, 818*a52d9a80SChris Mason gfp_t mask) 819*a52d9a80SChris Mason { 820*a52d9a80SChris Mason return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 821*a52d9a80SChris Mason mask); 822*a52d9a80SChris Mason } 823*a52d9a80SChris Mason EXPORT_SYMBOL(set_extent_uptodate); 824*a52d9a80SChris Mason 825*a52d9a80SChris Mason int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, 826*a52d9a80SChris Mason gfp_t mask) 827*a52d9a80SChris Mason { 828*a52d9a80SChris Mason return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 829*a52d9a80SChris Mason } 830*a52d9a80SChris Mason EXPORT_SYMBOL(clear_extent_uptodate); 831*a52d9a80SChris Mason 832*a52d9a80SChris Mason int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, 833*a52d9a80SChris Mason gfp_t mask) 834*a52d9a80SChris Mason { 835*a52d9a80SChris Mason return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, 836*a52d9a80SChris Mason 0, NULL, mask); 837*a52d9a80SChris Mason } 838*a52d9a80SChris Mason EXPORT_SYMBOL(set_extent_writeback); 839*a52d9a80SChris Mason 840*a52d9a80SChris Mason int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, 841*a52d9a80SChris Mason gfp_t mask) 842*a52d9a80SChris Mason { 843*a52d9a80SChris Mason return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); 844*a52d9a80SChris Mason } 845*a52d9a80SChris Mason EXPORT_SYMBOL(clear_extent_writeback); 846*a52d9a80SChris Mason 847*a52d9a80SChris Mason int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) 848*a52d9a80SChris Mason { 849*a52d9a80SChris Mason return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); 850*a52d9a80SChris Mason } 851*a52d9a80SChris Mason EXPORT_SYMBOL(wait_on_extent_writeback); 852*a52d9a80SChris Mason 853*a52d9a80SChris Mason /* 854*a52d9a80SChris Mason * locks a range in ascending order, waiting for any locked regions 855*a52d9a80SChris Mason * it hits on the way. [start,end] are inclusive, and this will sleep. 856*a52d9a80SChris Mason */ 857*a52d9a80SChris Mason int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) 858*a52d9a80SChris Mason { 859*a52d9a80SChris Mason int err; 860*a52d9a80SChris Mason u64 failed_start; 861*a52d9a80SChris Mason while (1) { 862*a52d9a80SChris Mason err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 863*a52d9a80SChris Mason &failed_start, mask); 864*a52d9a80SChris Mason if (err == -EEXIST && (mask & __GFP_WAIT)) { 865*a52d9a80SChris Mason wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 866*a52d9a80SChris Mason start = failed_start; 867*a52d9a80SChris Mason } else { 868*a52d9a80SChris Mason break; 869*a52d9a80SChris Mason } 870*a52d9a80SChris Mason WARN_ON(start > end); 871*a52d9a80SChris Mason } 872*a52d9a80SChris Mason return err; 873*a52d9a80SChris Mason } 874*a52d9a80SChris Mason EXPORT_SYMBOL(lock_extent); 875*a52d9a80SChris Mason 876*a52d9a80SChris Mason int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, 877*a52d9a80SChris Mason gfp_t mask) 878*a52d9a80SChris Mason { 879*a52d9a80SChris Mason return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); 880*a52d9a80SChris Mason } 881*a52d9a80SChris Mason EXPORT_SYMBOL(unlock_extent); 882*a52d9a80SChris Mason 883*a52d9a80SChris Mason /* 884*a52d9a80SChris Mason * helper function to set pages and extents in the tree dirty 885*a52d9a80SChris Mason */ 886*a52d9a80SChris Mason int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) 887*a52d9a80SChris Mason { 888*a52d9a80SChris Mason unsigned long index = start >> PAGE_CACHE_SHIFT; 889*a52d9a80SChris Mason unsigned long end_index = end >> PAGE_CACHE_SHIFT; 890*a52d9a80SChris Mason struct page *page; 891*a52d9a80SChris Mason 892*a52d9a80SChris Mason while (index <= end_index) { 893*a52d9a80SChris Mason page = find_get_page(tree->mapping, index); 894*a52d9a80SChris Mason BUG_ON(!page); 895*a52d9a80SChris Mason __set_page_dirty_nobuffers(page); 896*a52d9a80SChris Mason page_cache_release(page); 897*a52d9a80SChris Mason index++; 898*a52d9a80SChris Mason } 899*a52d9a80SChris Mason set_extent_dirty(tree, start, end, GFP_NOFS); 900*a52d9a80SChris Mason return 0; 901*a52d9a80SChris Mason } 902*a52d9a80SChris Mason EXPORT_SYMBOL(set_range_dirty); 903*a52d9a80SChris Mason 904*a52d9a80SChris Mason /* 905*a52d9a80SChris Mason * helper function to set both pages and extents in the tree writeback 906*a52d9a80SChris Mason */ 907*a52d9a80SChris Mason int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) 908*a52d9a80SChris Mason { 909*a52d9a80SChris Mason unsigned long index = start >> PAGE_CACHE_SHIFT; 910*a52d9a80SChris Mason unsigned long end_index = end >> PAGE_CACHE_SHIFT; 911*a52d9a80SChris Mason struct page *page; 912*a52d9a80SChris Mason 913*a52d9a80SChris Mason while (index <= end_index) { 914*a52d9a80SChris Mason page = find_get_page(tree->mapping, index); 915*a52d9a80SChris Mason BUG_ON(!page); 916*a52d9a80SChris Mason set_page_writeback(page); 917*a52d9a80SChris Mason page_cache_release(page); 918*a52d9a80SChris Mason index++; 919*a52d9a80SChris Mason } 920*a52d9a80SChris Mason set_extent_writeback(tree, start, end, GFP_NOFS); 921*a52d9a80SChris Mason return 0; 922*a52d9a80SChris Mason } 923*a52d9a80SChris Mason EXPORT_SYMBOL(set_range_writeback); 924*a52d9a80SChris Mason 925*a52d9a80SChris Mason /* 926*a52d9a80SChris Mason * helper function to lock both pages and extents in the tree. 927*a52d9a80SChris Mason * pages must be locked first. 928*a52d9a80SChris Mason */ 929*a52d9a80SChris Mason int lock_range(struct extent_map_tree *tree, u64 start, u64 end) 930*a52d9a80SChris Mason { 931*a52d9a80SChris Mason unsigned long index = start >> PAGE_CACHE_SHIFT; 932*a52d9a80SChris Mason unsigned long end_index = end >> PAGE_CACHE_SHIFT; 933*a52d9a80SChris Mason struct page *page; 934*a52d9a80SChris Mason int err; 935*a52d9a80SChris Mason 936*a52d9a80SChris Mason while (index <= end_index) { 937*a52d9a80SChris Mason page = grab_cache_page(tree->mapping, index); 938*a52d9a80SChris Mason if (!page) { 939*a52d9a80SChris Mason err = -ENOMEM; 940*a52d9a80SChris Mason goto failed; 941*a52d9a80SChris Mason } 942*a52d9a80SChris Mason if (IS_ERR(page)) { 943*a52d9a80SChris Mason err = PTR_ERR(page); 944*a52d9a80SChris Mason goto failed; 945*a52d9a80SChris Mason } 946*a52d9a80SChris Mason index++; 947*a52d9a80SChris Mason } 948*a52d9a80SChris Mason lock_extent(tree, start, end, GFP_NOFS); 949*a52d9a80SChris Mason return 0; 950*a52d9a80SChris Mason 951*a52d9a80SChris Mason failed: 952*a52d9a80SChris Mason /* 953*a52d9a80SChris Mason * we failed above in getting the page at 'index', so we undo here 954*a52d9a80SChris Mason * up to but not including the page at 'index' 955*a52d9a80SChris Mason */ 956*a52d9a80SChris Mason end_index = index; 957*a52d9a80SChris Mason index = start >> PAGE_CACHE_SHIFT; 958*a52d9a80SChris Mason while (index < end_index) { 959*a52d9a80SChris Mason page = find_get_page(tree->mapping, index); 960*a52d9a80SChris Mason unlock_page(page); 961*a52d9a80SChris Mason page_cache_release(page); 962*a52d9a80SChris Mason index++; 963*a52d9a80SChris Mason } 964*a52d9a80SChris Mason return err; 965*a52d9a80SChris Mason } 966*a52d9a80SChris Mason EXPORT_SYMBOL(lock_range); 967*a52d9a80SChris Mason 968*a52d9a80SChris Mason /* 969*a52d9a80SChris Mason * helper function to unlock both pages and extents in the tree. 970*a52d9a80SChris Mason */ 971*a52d9a80SChris Mason int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) 972*a52d9a80SChris Mason { 973*a52d9a80SChris Mason unsigned long index = start >> PAGE_CACHE_SHIFT; 974*a52d9a80SChris Mason unsigned long end_index = end >> PAGE_CACHE_SHIFT; 975*a52d9a80SChris Mason struct page *page; 976*a52d9a80SChris Mason 977*a52d9a80SChris Mason while (index <= end_index) { 978*a52d9a80SChris Mason page = find_get_page(tree->mapping, index); 979*a52d9a80SChris Mason unlock_page(page); 980*a52d9a80SChris Mason page_cache_release(page); 981*a52d9a80SChris Mason index++; 982*a52d9a80SChris Mason } 983*a52d9a80SChris Mason unlock_extent(tree, start, end, GFP_NOFS); 984*a52d9a80SChris Mason return 0; 985*a52d9a80SChris Mason } 986*a52d9a80SChris Mason EXPORT_SYMBOL(unlock_range); 987*a52d9a80SChris Mason 988*a52d9a80SChris Mason /* 989*a52d9a80SChris Mason * searches a range in the state tree for a given mask. 990*a52d9a80SChris Mason * If 'filled' == 1, this returns 1 only if ever extent in the tree 991*a52d9a80SChris Mason * has the bits set. Otherwise, 1 is returned if any bit in the 992*a52d9a80SChris Mason * range is found set. 993*a52d9a80SChris Mason */ 994*a52d9a80SChris Mason static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, 995*a52d9a80SChris Mason int bits, int filled) 996*a52d9a80SChris Mason { 997*a52d9a80SChris Mason struct extent_state *state = NULL; 998*a52d9a80SChris Mason struct rb_node *node; 999*a52d9a80SChris Mason int bitset = 0; 1000*a52d9a80SChris Mason 1001*a52d9a80SChris Mason read_lock_irq(&tree->lock); 1002*a52d9a80SChris Mason node = tree_search(&tree->state, start); 1003*a52d9a80SChris Mason while (node && start <= end) { 1004*a52d9a80SChris Mason state = rb_entry(node, struct extent_state, rb_node); 1005*a52d9a80SChris Mason if (state->start > end) 1006*a52d9a80SChris Mason break; 1007*a52d9a80SChris Mason 1008*a52d9a80SChris Mason if (filled && state->start > start) { 1009*a52d9a80SChris Mason bitset = 0; 1010*a52d9a80SChris Mason break; 1011*a52d9a80SChris Mason } 1012*a52d9a80SChris Mason if (state->state & bits) { 1013*a52d9a80SChris Mason bitset = 1; 1014*a52d9a80SChris Mason if (!filled) 1015*a52d9a80SChris Mason break; 1016*a52d9a80SChris Mason } else if (filled) { 1017*a52d9a80SChris Mason bitset = 0; 1018*a52d9a80SChris Mason break; 1019*a52d9a80SChris Mason } 1020*a52d9a80SChris Mason start = state->end + 1; 1021*a52d9a80SChris Mason if (start > end) 1022*a52d9a80SChris Mason break; 1023*a52d9a80SChris Mason node = rb_next(node); 1024*a52d9a80SChris Mason } 1025*a52d9a80SChris Mason read_unlock_irq(&tree->lock); 1026*a52d9a80SChris Mason return bitset; 1027*a52d9a80SChris Mason } 1028*a52d9a80SChris Mason 1029*a52d9a80SChris Mason /* 1030*a52d9a80SChris Mason * helper function to set a given page up to date if all the 1031*a52d9a80SChris Mason * extents in the tree for that page are up to date 1032*a52d9a80SChris Mason */ 1033*a52d9a80SChris Mason static int check_page_uptodate(struct extent_map_tree *tree, 1034*a52d9a80SChris Mason struct page *page) 1035*a52d9a80SChris Mason { 1036*a52d9a80SChris Mason u64 start = page->index << PAGE_CACHE_SHIFT; 1037*a52d9a80SChris Mason u64 end = start + PAGE_CACHE_SIZE - 1; 1038*a52d9a80SChris Mason if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) 1039*a52d9a80SChris Mason SetPageUptodate(page); 1040*a52d9a80SChris Mason return 0; 1041*a52d9a80SChris Mason } 1042*a52d9a80SChris Mason 1043*a52d9a80SChris Mason /* 1044*a52d9a80SChris Mason * helper function to unlock a page if all the extents in the tree 1045*a52d9a80SChris Mason * for that page are unlocked 1046*a52d9a80SChris Mason */ 1047*a52d9a80SChris Mason static int check_page_locked(struct extent_map_tree *tree, 1048*a52d9a80SChris Mason struct page *page) 1049*a52d9a80SChris Mason { 1050*a52d9a80SChris Mason u64 start = page->index << PAGE_CACHE_SHIFT; 1051*a52d9a80SChris Mason u64 end = start + PAGE_CACHE_SIZE - 1; 1052*a52d9a80SChris Mason if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) 1053*a52d9a80SChris Mason unlock_page(page); 1054*a52d9a80SChris Mason return 0; 1055*a52d9a80SChris Mason } 1056*a52d9a80SChris Mason 1057*a52d9a80SChris Mason /* 1058*a52d9a80SChris Mason * helper function to end page writeback if all the extents 1059*a52d9a80SChris Mason * in the tree for that page are done with writeback 1060*a52d9a80SChris Mason */ 1061*a52d9a80SChris Mason static int check_page_writeback(struct extent_map_tree *tree, 1062*a52d9a80SChris Mason struct page *page) 1063*a52d9a80SChris Mason { 1064*a52d9a80SChris Mason u64 start = page->index << PAGE_CACHE_SHIFT; 1065*a52d9a80SChris Mason u64 end = start + PAGE_CACHE_SIZE - 1; 1066*a52d9a80SChris Mason if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) 1067*a52d9a80SChris Mason end_page_writeback(page); 1068*a52d9a80SChris Mason return 0; 1069*a52d9a80SChris Mason } 1070*a52d9a80SChris Mason 1071*a52d9a80SChris Mason /* lots and lots of room for performance fixes in the end_bio funcs */ 1072*a52d9a80SChris Mason 1073*a52d9a80SChris Mason /* 1074*a52d9a80SChris Mason * after a writepage IO is done, we need to: 1075*a52d9a80SChris Mason * clear the uptodate bits on error 1076*a52d9a80SChris Mason * clear the writeback bits in the extent tree for this IO 1077*a52d9a80SChris Mason * end_page_writeback if the page has no more pending IO 1078*a52d9a80SChris Mason * 1079*a52d9a80SChris Mason * Scheduling is not allowed, so the extent state tree is expected 1080*a52d9a80SChris Mason * to have one and only one object corresponding to this IO. 1081*a52d9a80SChris Mason */ 1082*a52d9a80SChris Mason static int end_bio_extent_writepage(struct bio *bio, 1083*a52d9a80SChris Mason unsigned int bytes_done, int err) 1084*a52d9a80SChris Mason { 1085*a52d9a80SChris Mason const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1086*a52d9a80SChris Mason struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1087*a52d9a80SChris Mason struct extent_map_tree *tree = bio->bi_private; 1088*a52d9a80SChris Mason u64 start; 1089*a52d9a80SChris Mason u64 end; 1090*a52d9a80SChris Mason int whole_page; 1091*a52d9a80SChris Mason 1092*a52d9a80SChris Mason if (bio->bi_size) 1093*a52d9a80SChris Mason return 1; 1094*a52d9a80SChris Mason 1095*a52d9a80SChris Mason do { 1096*a52d9a80SChris Mason struct page *page = bvec->bv_page; 1097*a52d9a80SChris Mason start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1098*a52d9a80SChris Mason end = start + bvec->bv_len - 1; 1099*a52d9a80SChris Mason 1100*a52d9a80SChris Mason if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1101*a52d9a80SChris Mason whole_page = 1; 1102*a52d9a80SChris Mason else 1103*a52d9a80SChris Mason whole_page = 0; 1104*a52d9a80SChris Mason 1105*a52d9a80SChris Mason if (--bvec >= bio->bi_io_vec) 1106*a52d9a80SChris Mason prefetchw(&bvec->bv_page->flags); 1107*a52d9a80SChris Mason 1108*a52d9a80SChris Mason if (!uptodate) { 1109*a52d9a80SChris Mason clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1110*a52d9a80SChris Mason ClearPageUptodate(page); 1111*a52d9a80SChris Mason SetPageError(page); 1112*a52d9a80SChris Mason } 1113*a52d9a80SChris Mason clear_extent_writeback(tree, start, end, GFP_ATOMIC); 1114*a52d9a80SChris Mason 1115*a52d9a80SChris Mason if (whole_page) 1116*a52d9a80SChris Mason end_page_writeback(page); 1117*a52d9a80SChris Mason else 1118*a52d9a80SChris Mason check_page_writeback(tree, page); 1119*a52d9a80SChris Mason } while (bvec >= bio->bi_io_vec); 1120*a52d9a80SChris Mason 1121*a52d9a80SChris Mason bio_put(bio); 1122*a52d9a80SChris Mason return 0; 1123*a52d9a80SChris Mason } 1124*a52d9a80SChris Mason 1125*a52d9a80SChris Mason /* 1126*a52d9a80SChris Mason * after a readpage IO is done, we need to: 1127*a52d9a80SChris Mason * clear the uptodate bits on error 1128*a52d9a80SChris Mason * set the uptodate bits if things worked 1129*a52d9a80SChris Mason * set the page up to date if all extents in the tree are uptodate 1130*a52d9a80SChris Mason * clear the lock bit in the extent tree 1131*a52d9a80SChris Mason * unlock the page if there are no other extents locked for it 1132*a52d9a80SChris Mason * 1133*a52d9a80SChris Mason * Scheduling is not allowed, so the extent state tree is expected 1134*a52d9a80SChris Mason * to have one and only one object corresponding to this IO. 1135*a52d9a80SChris Mason */ 1136*a52d9a80SChris Mason static int end_bio_extent_readpage(struct bio *bio, 1137*a52d9a80SChris Mason unsigned int bytes_done, int err) 1138*a52d9a80SChris Mason { 1139*a52d9a80SChris Mason const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1140*a52d9a80SChris Mason struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1141*a52d9a80SChris Mason struct extent_map_tree *tree = bio->bi_private; 1142*a52d9a80SChris Mason u64 start; 1143*a52d9a80SChris Mason u64 end; 1144*a52d9a80SChris Mason int whole_page; 1145*a52d9a80SChris Mason 1146*a52d9a80SChris Mason if (bio->bi_size) 1147*a52d9a80SChris Mason return 1; 1148*a52d9a80SChris Mason 1149*a52d9a80SChris Mason do { 1150*a52d9a80SChris Mason struct page *page = bvec->bv_page; 1151*a52d9a80SChris Mason start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1152*a52d9a80SChris Mason end = start + bvec->bv_len - 1; 1153*a52d9a80SChris Mason 1154*a52d9a80SChris Mason if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 1155*a52d9a80SChris Mason whole_page = 1; 1156*a52d9a80SChris Mason else 1157*a52d9a80SChris Mason whole_page = 0; 1158*a52d9a80SChris Mason 1159*a52d9a80SChris Mason if (--bvec >= bio->bi_io_vec) 1160*a52d9a80SChris Mason prefetchw(&bvec->bv_page->flags); 1161*a52d9a80SChris Mason 1162*a52d9a80SChris Mason if (uptodate) { 1163*a52d9a80SChris Mason set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1164*a52d9a80SChris Mason if (whole_page) 1165*a52d9a80SChris Mason SetPageUptodate(page); 1166*a52d9a80SChris Mason else 1167*a52d9a80SChris Mason check_page_uptodate(tree, page); 1168*a52d9a80SChris Mason } else { 1169*a52d9a80SChris Mason ClearPageUptodate(page); 1170*a52d9a80SChris Mason SetPageError(page); 1171*a52d9a80SChris Mason } 1172*a52d9a80SChris Mason 1173*a52d9a80SChris Mason unlock_extent(tree, start, end, GFP_ATOMIC); 1174*a52d9a80SChris Mason 1175*a52d9a80SChris Mason if (whole_page) 1176*a52d9a80SChris Mason unlock_page(page); 1177*a52d9a80SChris Mason else 1178*a52d9a80SChris Mason check_page_locked(tree, page); 1179*a52d9a80SChris Mason } while (bvec >= bio->bi_io_vec); 1180*a52d9a80SChris Mason 1181*a52d9a80SChris Mason bio_put(bio); 1182*a52d9a80SChris Mason return 0; 1183*a52d9a80SChris Mason } 1184*a52d9a80SChris Mason 1185*a52d9a80SChris Mason /* 1186*a52d9a80SChris Mason * IO done from prepare_write is pretty simple, we just unlock 1187*a52d9a80SChris Mason * the structs in the extent tree when done, and set the uptodate bits 1188*a52d9a80SChris Mason * as appropriate. 1189*a52d9a80SChris Mason */ 1190*a52d9a80SChris Mason static int end_bio_extent_preparewrite(struct bio *bio, 1191*a52d9a80SChris Mason unsigned int bytes_done, int err) 1192*a52d9a80SChris Mason { 1193*a52d9a80SChris Mason const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1194*a52d9a80SChris Mason struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1195*a52d9a80SChris Mason struct extent_map_tree *tree = bio->bi_private; 1196*a52d9a80SChris Mason u64 start; 1197*a52d9a80SChris Mason u64 end; 1198*a52d9a80SChris Mason 1199*a52d9a80SChris Mason if (bio->bi_size) 1200*a52d9a80SChris Mason return 1; 1201*a52d9a80SChris Mason 1202*a52d9a80SChris Mason do { 1203*a52d9a80SChris Mason struct page *page = bvec->bv_page; 1204*a52d9a80SChris Mason start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1205*a52d9a80SChris Mason end = start + bvec->bv_len - 1; 1206*a52d9a80SChris Mason 1207*a52d9a80SChris Mason if (--bvec >= bio->bi_io_vec) 1208*a52d9a80SChris Mason prefetchw(&bvec->bv_page->flags); 1209*a52d9a80SChris Mason 1210*a52d9a80SChris Mason if (uptodate) { 1211*a52d9a80SChris Mason set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1212*a52d9a80SChris Mason } else { 1213*a52d9a80SChris Mason ClearPageUptodate(page); 1214*a52d9a80SChris Mason SetPageError(page); 1215*a52d9a80SChris Mason } 1216*a52d9a80SChris Mason 1217*a52d9a80SChris Mason unlock_extent(tree, start, end, GFP_ATOMIC); 1218*a52d9a80SChris Mason 1219*a52d9a80SChris Mason } while (bvec >= bio->bi_io_vec); 1220*a52d9a80SChris Mason 1221*a52d9a80SChris Mason bio_put(bio); 1222*a52d9a80SChris Mason return 0; 1223*a52d9a80SChris Mason } 1224*a52d9a80SChris Mason 1225*a52d9a80SChris Mason static int submit_extent_page(int rw, struct extent_map_tree *tree, 1226*a52d9a80SChris Mason struct page *page, sector_t sector, 1227*a52d9a80SChris Mason size_t size, unsigned long offset, 1228*a52d9a80SChris Mason struct block_device *bdev, 1229*a52d9a80SChris Mason bio_end_io_t end_io_func) 1230*a52d9a80SChris Mason { 1231*a52d9a80SChris Mason struct bio *bio; 1232*a52d9a80SChris Mason int ret = 0; 1233*a52d9a80SChris Mason 1234*a52d9a80SChris Mason bio = bio_alloc(GFP_NOIO, 1); 1235*a52d9a80SChris Mason 1236*a52d9a80SChris Mason bio->bi_sector = sector; 1237*a52d9a80SChris Mason bio->bi_bdev = bdev; 1238*a52d9a80SChris Mason bio->bi_io_vec[0].bv_page = page; 1239*a52d9a80SChris Mason bio->bi_io_vec[0].bv_len = size; 1240*a52d9a80SChris Mason bio->bi_io_vec[0].bv_offset = offset; 1241*a52d9a80SChris Mason 1242*a52d9a80SChris Mason bio->bi_vcnt = 1; 1243*a52d9a80SChris Mason bio->bi_idx = 0; 1244*a52d9a80SChris Mason bio->bi_size = size; 1245*a52d9a80SChris Mason 1246*a52d9a80SChris Mason bio->bi_end_io = end_io_func; 1247*a52d9a80SChris Mason bio->bi_private = tree; 1248*a52d9a80SChris Mason 1249*a52d9a80SChris Mason bio_get(bio); 1250*a52d9a80SChris Mason submit_bio(rw, bio); 1251*a52d9a80SChris Mason 1252*a52d9a80SChris Mason if (bio_flagged(bio, BIO_EOPNOTSUPP)) 1253*a52d9a80SChris Mason ret = -EOPNOTSUPP; 1254*a52d9a80SChris Mason 1255*a52d9a80SChris Mason bio_put(bio); 1256*a52d9a80SChris Mason return ret; 1257*a52d9a80SChris Mason } 1258*a52d9a80SChris Mason 1259*a52d9a80SChris Mason /* 1260*a52d9a80SChris Mason * basic readpage implementation. Locked extent state structs are inserted 1261*a52d9a80SChris Mason * into the tree that are removed when the IO is done (by the end_io 1262*a52d9a80SChris Mason * handlers) 1263*a52d9a80SChris Mason */ 1264*a52d9a80SChris Mason int extent_read_full_page(struct extent_map_tree *tree, struct page *page, 1265*a52d9a80SChris Mason get_extent_t *get_extent) 1266*a52d9a80SChris Mason { 1267*a52d9a80SChris Mason struct inode *inode = page->mapping->host; 1268*a52d9a80SChris Mason u64 start = page->index << PAGE_CACHE_SHIFT; 1269*a52d9a80SChris Mason u64 page_end = start + PAGE_CACHE_SIZE - 1; 1270*a52d9a80SChris Mason u64 end; 1271*a52d9a80SChris Mason u64 cur = start; 1272*a52d9a80SChris Mason u64 extent_offset; 1273*a52d9a80SChris Mason u64 last_byte = i_size_read(inode); 1274*a52d9a80SChris Mason u64 block_start; 1275*a52d9a80SChris Mason u64 cur_end; 1276*a52d9a80SChris Mason sector_t sector; 1277*a52d9a80SChris Mason struct extent_map *em; 1278*a52d9a80SChris Mason struct block_device *bdev; 1279*a52d9a80SChris Mason int ret; 1280*a52d9a80SChris Mason int nr = 0; 1281*a52d9a80SChris Mason size_t page_offset = 0; 1282*a52d9a80SChris Mason size_t iosize; 1283*a52d9a80SChris Mason size_t blocksize = inode->i_sb->s_blocksize; 1284*a52d9a80SChris Mason 1285*a52d9a80SChris Mason if (!PagePrivate(page)) { 1286*a52d9a80SChris Mason SetPagePrivate(page); 1287*a52d9a80SChris Mason set_page_private(page, 1); 1288*a52d9a80SChris Mason page_cache_get(page); 1289*a52d9a80SChris Mason } 1290*a52d9a80SChris Mason 1291*a52d9a80SChris Mason end = page_end; 1292*a52d9a80SChris Mason lock_extent(tree, start, end, GFP_NOFS); 1293*a52d9a80SChris Mason 1294*a52d9a80SChris Mason while (cur <= end) { 1295*a52d9a80SChris Mason if (cur >= last_byte) { 1296*a52d9a80SChris Mason iosize = PAGE_CACHE_SIZE - page_offset; 1297*a52d9a80SChris Mason zero_user_page(page, page_offset, iosize, KM_USER0); 1298*a52d9a80SChris Mason set_extent_uptodate(tree, cur, cur + iosize - 1, 1299*a52d9a80SChris Mason GFP_NOFS); 1300*a52d9a80SChris Mason unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1301*a52d9a80SChris Mason break; 1302*a52d9a80SChris Mason } 1303*a52d9a80SChris Mason em = get_extent(inode, page, page_offset, cur, end, 0); 1304*a52d9a80SChris Mason if (IS_ERR(em) || !em) { 1305*a52d9a80SChris Mason SetPageError(page); 1306*a52d9a80SChris Mason unlock_extent(tree, cur, end, GFP_NOFS); 1307*a52d9a80SChris Mason break; 1308*a52d9a80SChris Mason } 1309*a52d9a80SChris Mason 1310*a52d9a80SChris Mason extent_offset = cur - em->start; 1311*a52d9a80SChris Mason BUG_ON(em->end < cur); 1312*a52d9a80SChris Mason BUG_ON(end < cur); 1313*a52d9a80SChris Mason 1314*a52d9a80SChris Mason iosize = min(em->end - cur, end - cur) + 1; 1315*a52d9a80SChris Mason cur_end = min(em->end, end); 1316*a52d9a80SChris Mason iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1317*a52d9a80SChris Mason sector = (em->block_start + extent_offset) >> 9; 1318*a52d9a80SChris Mason bdev = em->bdev; 1319*a52d9a80SChris Mason block_start = em->block_start; 1320*a52d9a80SChris Mason free_extent_map(em); 1321*a52d9a80SChris Mason em = NULL; 1322*a52d9a80SChris Mason 1323*a52d9a80SChris Mason /* we've found a hole, just zero and go on */ 1324*a52d9a80SChris Mason if (block_start == 0) { 1325*a52d9a80SChris Mason zero_user_page(page, page_offset, iosize, KM_USER0); 1326*a52d9a80SChris Mason set_extent_uptodate(tree, cur, cur + iosize - 1, 1327*a52d9a80SChris Mason GFP_NOFS); 1328*a52d9a80SChris Mason unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1329*a52d9a80SChris Mason cur = cur + iosize; 1330*a52d9a80SChris Mason page_offset += iosize; 1331*a52d9a80SChris Mason continue; 1332*a52d9a80SChris Mason } 1333*a52d9a80SChris Mason /* the get_extent function already copied into the page */ 1334*a52d9a80SChris Mason if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { 1335*a52d9a80SChris Mason unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 1336*a52d9a80SChris Mason cur = cur + iosize; 1337*a52d9a80SChris Mason page_offset += iosize; 1338*a52d9a80SChris Mason continue; 1339*a52d9a80SChris Mason } 1340*a52d9a80SChris Mason 1341*a52d9a80SChris Mason ret = submit_extent_page(READ, tree, page, 1342*a52d9a80SChris Mason sector, iosize, page_offset, bdev, 1343*a52d9a80SChris Mason end_bio_extent_readpage); 1344*a52d9a80SChris Mason if (ret) 1345*a52d9a80SChris Mason SetPageError(page); 1346*a52d9a80SChris Mason cur = cur + iosize; 1347*a52d9a80SChris Mason page_offset += iosize; 1348*a52d9a80SChris Mason nr++; 1349*a52d9a80SChris Mason } 1350*a52d9a80SChris Mason if (!nr) { 1351*a52d9a80SChris Mason if (!PageError(page)) 1352*a52d9a80SChris Mason SetPageUptodate(page); 1353*a52d9a80SChris Mason unlock_page(page); 1354*a52d9a80SChris Mason } 1355*a52d9a80SChris Mason return 0; 1356*a52d9a80SChris Mason } 1357*a52d9a80SChris Mason EXPORT_SYMBOL(extent_read_full_page); 1358*a52d9a80SChris Mason 1359*a52d9a80SChris Mason /* 1360*a52d9a80SChris Mason * the writepage semantics are similar to regular writepage. extent 1361*a52d9a80SChris Mason * records are inserted to lock ranges in the tree, and as dirty areas 1362*a52d9a80SChris Mason * are found, they are marked writeback. Then the lock bits are removed 1363*a52d9a80SChris Mason * and the end_io handler clears the writeback ranges 1364*a52d9a80SChris Mason */ 1365*a52d9a80SChris Mason int extent_write_full_page(struct extent_map_tree *tree, struct page *page, 1366*a52d9a80SChris Mason get_extent_t *get_extent, 1367*a52d9a80SChris Mason struct writeback_control *wbc) 1368*a52d9a80SChris Mason { 1369*a52d9a80SChris Mason struct inode *inode = page->mapping->host; 1370*a52d9a80SChris Mason u64 start = page->index << PAGE_CACHE_SHIFT; 1371*a52d9a80SChris Mason u64 page_end = start + PAGE_CACHE_SIZE - 1; 1372*a52d9a80SChris Mason u64 end; 1373*a52d9a80SChris Mason u64 cur = start; 1374*a52d9a80SChris Mason u64 extent_offset; 1375*a52d9a80SChris Mason u64 last_byte = i_size_read(inode); 1376*a52d9a80SChris Mason u64 block_start; 1377*a52d9a80SChris Mason sector_t sector; 1378*a52d9a80SChris Mason struct extent_map *em; 1379*a52d9a80SChris Mason struct block_device *bdev; 1380*a52d9a80SChris Mason int ret; 1381*a52d9a80SChris Mason int nr = 0; 1382*a52d9a80SChris Mason size_t page_offset = 0; 1383*a52d9a80SChris Mason size_t iosize; 1384*a52d9a80SChris Mason size_t blocksize; 1385*a52d9a80SChris Mason loff_t i_size = i_size_read(inode); 1386*a52d9a80SChris Mason unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; 1387*a52d9a80SChris Mason 1388*a52d9a80SChris Mason if (page->index > end_index) { 1389*a52d9a80SChris Mason clear_extent_dirty(tree, start, page_end, GFP_NOFS); 1390*a52d9a80SChris Mason unlock_page(page); 1391*a52d9a80SChris Mason return 0; 1392*a52d9a80SChris Mason } 1393*a52d9a80SChris Mason 1394*a52d9a80SChris Mason if (page->index == end_index) { 1395*a52d9a80SChris Mason size_t offset = i_size & (PAGE_CACHE_SIZE - 1); 1396*a52d9a80SChris Mason zero_user_page(page, offset, 1397*a52d9a80SChris Mason PAGE_CACHE_SIZE - offset, KM_USER0); 1398*a52d9a80SChris Mason } 1399*a52d9a80SChris Mason 1400*a52d9a80SChris Mason if (!PagePrivate(page)) { 1401*a52d9a80SChris Mason SetPagePrivate(page); 1402*a52d9a80SChris Mason set_page_private(page, 1); 1403*a52d9a80SChris Mason page_cache_get(page); 1404*a52d9a80SChris Mason } 1405*a52d9a80SChris Mason 1406*a52d9a80SChris Mason end = page_end; 1407*a52d9a80SChris Mason lock_extent(tree, start, page_end, GFP_NOFS); 1408*a52d9a80SChris Mason 1409*a52d9a80SChris Mason if (last_byte <= start) { 1410*a52d9a80SChris Mason clear_extent_dirty(tree, start, page_end, GFP_NOFS); 1411*a52d9a80SChris Mason goto done; 1412*a52d9a80SChris Mason } 1413*a52d9a80SChris Mason 1414*a52d9a80SChris Mason set_extent_uptodate(tree, start, page_end, GFP_NOFS); 1415*a52d9a80SChris Mason blocksize = inode->i_sb->s_blocksize; 1416*a52d9a80SChris Mason 1417*a52d9a80SChris Mason while (cur <= end) { 1418*a52d9a80SChris Mason if (cur >= last_byte) { 1419*a52d9a80SChris Mason clear_extent_dirty(tree, cur, page_end, GFP_NOFS); 1420*a52d9a80SChris Mason break; 1421*a52d9a80SChris Mason } 1422*a52d9a80SChris Mason em = get_extent(inode, page, page_offset, cur, end, 1); 1423*a52d9a80SChris Mason if (IS_ERR(em) || !em) { 1424*a52d9a80SChris Mason SetPageError(page); 1425*a52d9a80SChris Mason break; 1426*a52d9a80SChris Mason } 1427*a52d9a80SChris Mason 1428*a52d9a80SChris Mason extent_offset = cur - em->start; 1429*a52d9a80SChris Mason BUG_ON(em->end < cur); 1430*a52d9a80SChris Mason BUG_ON(end < cur); 1431*a52d9a80SChris Mason iosize = min(em->end - cur, end - cur) + 1; 1432*a52d9a80SChris Mason iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); 1433*a52d9a80SChris Mason sector = (em->block_start + extent_offset) >> 9; 1434*a52d9a80SChris Mason bdev = em->bdev; 1435*a52d9a80SChris Mason block_start = em->block_start; 1436*a52d9a80SChris Mason free_extent_map(em); 1437*a52d9a80SChris Mason em = NULL; 1438*a52d9a80SChris Mason 1439*a52d9a80SChris Mason if (block_start == 0 || block_start == EXTENT_MAP_INLINE) { 1440*a52d9a80SChris Mason clear_extent_dirty(tree, cur, 1441*a52d9a80SChris Mason cur + iosize - 1, GFP_NOFS); 1442*a52d9a80SChris Mason cur = cur + iosize; 1443*a52d9a80SChris Mason page_offset += iosize; 1444*a52d9a80SChris Mason continue; 1445*a52d9a80SChris Mason } 1446*a52d9a80SChris Mason 1447*a52d9a80SChris Mason /* leave this out until we have a page_mkwrite call */ 1448*a52d9a80SChris Mason if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 1449*a52d9a80SChris Mason EXTENT_DIRTY, 0)) { 1450*a52d9a80SChris Mason cur = cur + iosize; 1451*a52d9a80SChris Mason page_offset += iosize; 1452*a52d9a80SChris Mason continue; 1453*a52d9a80SChris Mason } 1454*a52d9a80SChris Mason clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); 1455*a52d9a80SChris Mason set_range_writeback(tree, cur, cur + iosize - 1); 1456*a52d9a80SChris Mason ret = submit_extent_page(WRITE, tree, page, 1457*a52d9a80SChris Mason sector, iosize, page_offset, bdev, 1458*a52d9a80SChris Mason end_bio_extent_writepage); 1459*a52d9a80SChris Mason if (ret) 1460*a52d9a80SChris Mason SetPageError(page); 1461*a52d9a80SChris Mason cur = cur + iosize; 1462*a52d9a80SChris Mason page_offset += iosize; 1463*a52d9a80SChris Mason nr++; 1464*a52d9a80SChris Mason } 1465*a52d9a80SChris Mason done: 1466*a52d9a80SChris Mason WARN_ON(test_range_bit(tree, start, page_end, EXTENT_DIRTY, 0)); 1467*a52d9a80SChris Mason unlock_extent(tree, start, page_end, GFP_NOFS); 1468*a52d9a80SChris Mason unlock_page(page); 1469*a52d9a80SChris Mason return 0; 1470*a52d9a80SChris Mason } 1471*a52d9a80SChris Mason EXPORT_SYMBOL(extent_write_full_page); 1472*a52d9a80SChris Mason 1473*a52d9a80SChris Mason /* 1474*a52d9a80SChris Mason * basic invalidatepage code, this waits on any locked or writeback 1475*a52d9a80SChris Mason * ranges corresponding to the page, and then deletes any extent state 1476*a52d9a80SChris Mason * records from the tree 1477*a52d9a80SChris Mason */ 1478*a52d9a80SChris Mason int extent_invalidatepage(struct extent_map_tree *tree, 1479*a52d9a80SChris Mason struct page *page, unsigned long offset) 1480*a52d9a80SChris Mason { 1481*a52d9a80SChris Mason u64 start = (page->index << PAGE_CACHE_SHIFT); 1482*a52d9a80SChris Mason u64 end = start + PAGE_CACHE_SIZE - 1; 1483*a52d9a80SChris Mason size_t blocksize = page->mapping->host->i_sb->s_blocksize; 1484*a52d9a80SChris Mason 1485*a52d9a80SChris Mason start += (offset + blocksize -1) & ~(blocksize - 1); 1486*a52d9a80SChris Mason if (start > end) 1487*a52d9a80SChris Mason return 0; 1488*a52d9a80SChris Mason 1489*a52d9a80SChris Mason lock_extent(tree, start, end, GFP_NOFS); 1490*a52d9a80SChris Mason wait_on_extent_writeback(tree, start, end); 1491*a52d9a80SChris Mason clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY, 1492*a52d9a80SChris Mason 1, 1, GFP_NOFS); 1493*a52d9a80SChris Mason return 0; 1494*a52d9a80SChris Mason } 1495*a52d9a80SChris Mason EXPORT_SYMBOL(extent_invalidatepage); 1496*a52d9a80SChris Mason 1497*a52d9a80SChris Mason /* 1498*a52d9a80SChris Mason * simple commit_write call, set_range_dirty is used to mark both 1499*a52d9a80SChris Mason * the pages and the extent records as dirty 1500*a52d9a80SChris Mason */ 1501*a52d9a80SChris Mason int extent_commit_write(struct extent_map_tree *tree, 1502*a52d9a80SChris Mason struct inode *inode, struct page *page, 1503*a52d9a80SChris Mason unsigned from, unsigned to) 1504*a52d9a80SChris Mason { 1505*a52d9a80SChris Mason loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 1506*a52d9a80SChris Mason 1507*a52d9a80SChris Mason if (!PagePrivate(page)) { 1508*a52d9a80SChris Mason SetPagePrivate(page); 1509*a52d9a80SChris Mason set_page_private(page, 1); 1510*a52d9a80SChris Mason page_cache_get(page); 1511*a52d9a80SChris Mason } 1512*a52d9a80SChris Mason 1513*a52d9a80SChris Mason set_page_dirty(page); 1514*a52d9a80SChris Mason 1515*a52d9a80SChris Mason if (pos > inode->i_size) { 1516*a52d9a80SChris Mason i_size_write(inode, pos); 1517*a52d9a80SChris Mason mark_inode_dirty(inode); 1518*a52d9a80SChris Mason } 1519*a52d9a80SChris Mason return 0; 1520*a52d9a80SChris Mason } 1521*a52d9a80SChris Mason EXPORT_SYMBOL(extent_commit_write); 1522*a52d9a80SChris Mason 1523*a52d9a80SChris Mason int extent_prepare_write(struct extent_map_tree *tree, 1524*a52d9a80SChris Mason struct inode *inode, struct page *page, 1525*a52d9a80SChris Mason unsigned from, unsigned to, get_extent_t *get_extent) 1526*a52d9a80SChris Mason { 1527*a52d9a80SChris Mason u64 page_start = page->index << PAGE_CACHE_SHIFT; 1528*a52d9a80SChris Mason u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 1529*a52d9a80SChris Mason u64 block_start; 1530*a52d9a80SChris Mason u64 orig_block_start; 1531*a52d9a80SChris Mason u64 block_end; 1532*a52d9a80SChris Mason u64 cur_end; 1533*a52d9a80SChris Mason struct extent_map *em; 1534*a52d9a80SChris Mason unsigned blocksize = 1 << inode->i_blkbits; 1535*a52d9a80SChris Mason size_t page_offset = 0; 1536*a52d9a80SChris Mason size_t block_off_start; 1537*a52d9a80SChris Mason size_t block_off_end; 1538*a52d9a80SChris Mason int err = 0; 1539*a52d9a80SChris Mason int iocount = 0; 1540*a52d9a80SChris Mason int ret = 0; 1541*a52d9a80SChris Mason int isnew; 1542*a52d9a80SChris Mason 1543*a52d9a80SChris Mason if (!PagePrivate(page)) { 1544*a52d9a80SChris Mason SetPagePrivate(page); 1545*a52d9a80SChris Mason set_page_private(page, 1); 1546*a52d9a80SChris Mason page_cache_get(page); 1547*a52d9a80SChris Mason } 1548*a52d9a80SChris Mason block_start = (page_start + from) & ~((u64)blocksize - 1); 1549*a52d9a80SChris Mason block_end = (page_start + to - 1) | (blocksize - 1); 1550*a52d9a80SChris Mason orig_block_start = block_start; 1551*a52d9a80SChris Mason 1552*a52d9a80SChris Mason lock_extent(tree, page_start, page_end, GFP_NOFS); 1553*a52d9a80SChris Mason while(block_start <= block_end) { 1554*a52d9a80SChris Mason em = get_extent(inode, page, page_offset, block_start, 1555*a52d9a80SChris Mason block_end, 1); 1556*a52d9a80SChris Mason if (IS_ERR(em) || !em) { 1557*a52d9a80SChris Mason goto err; 1558*a52d9a80SChris Mason } 1559*a52d9a80SChris Mason cur_end = min(block_end, em->end); 1560*a52d9a80SChris Mason block_off_start = block_start & (PAGE_CACHE_SIZE - 1); 1561*a52d9a80SChris Mason block_off_end = block_off_start + blocksize; 1562*a52d9a80SChris Mason isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); 1563*a52d9a80SChris Mason 1564*a52d9a80SChris Mason if (!PageUptodate(page) && isnew && 1565*a52d9a80SChris Mason (block_off_end > to || block_off_start < from)) { 1566*a52d9a80SChris Mason void *kaddr; 1567*a52d9a80SChris Mason 1568*a52d9a80SChris Mason kaddr = kmap_atomic(page, KM_USER0); 1569*a52d9a80SChris Mason if (block_off_end > to) 1570*a52d9a80SChris Mason memset(kaddr + to, 0, block_off_end - to); 1571*a52d9a80SChris Mason if (block_off_start < from) 1572*a52d9a80SChris Mason memset(kaddr + block_off_start, 0, 1573*a52d9a80SChris Mason from - block_off_start); 1574*a52d9a80SChris Mason flush_dcache_page(page); 1575*a52d9a80SChris Mason kunmap_atomic(kaddr, KM_USER0); 1576*a52d9a80SChris Mason } 1577*a52d9a80SChris Mason if (!isnew && !PageUptodate(page) && 1578*a52d9a80SChris Mason (block_off_end > to || block_off_start < from) && 1579*a52d9a80SChris Mason !test_range_bit(tree, block_start, cur_end, 1580*a52d9a80SChris Mason EXTENT_UPTODATE, 1)) { 1581*a52d9a80SChris Mason u64 sector; 1582*a52d9a80SChris Mason u64 extent_offset = block_start - em->start; 1583*a52d9a80SChris Mason size_t iosize; 1584*a52d9a80SChris Mason sector = (em->block_start + extent_offset) >> 9; 1585*a52d9a80SChris Mason iosize = (cur_end - block_start + blocksize - 1) & 1586*a52d9a80SChris Mason ~((u64)blocksize - 1); 1587*a52d9a80SChris Mason /* 1588*a52d9a80SChris Mason * we've already got the extent locked, but we 1589*a52d9a80SChris Mason * need to split the state such that our end_bio 1590*a52d9a80SChris Mason * handler can clear the lock. 1591*a52d9a80SChris Mason */ 1592*a52d9a80SChris Mason set_extent_bit(tree, block_start, 1593*a52d9a80SChris Mason block_start + iosize - 1, 1594*a52d9a80SChris Mason EXTENT_LOCKED, 0, NULL, GFP_NOFS); 1595*a52d9a80SChris Mason ret = submit_extent_page(READ, tree, page, 1596*a52d9a80SChris Mason sector, iosize, page_offset, em->bdev, 1597*a52d9a80SChris Mason end_bio_extent_preparewrite); 1598*a52d9a80SChris Mason iocount++; 1599*a52d9a80SChris Mason block_start = block_start + iosize; 1600*a52d9a80SChris Mason } else { 1601*a52d9a80SChris Mason set_extent_uptodate(tree, block_start, cur_end, 1602*a52d9a80SChris Mason GFP_NOFS); 1603*a52d9a80SChris Mason unlock_extent(tree, block_start, cur_end, GFP_NOFS); 1604*a52d9a80SChris Mason block_start = cur_end + 1; 1605*a52d9a80SChris Mason } 1606*a52d9a80SChris Mason page_offset = block_start & (PAGE_CACHE_SIZE - 1); 1607*a52d9a80SChris Mason free_extent_map(em); 1608*a52d9a80SChris Mason } 1609*a52d9a80SChris Mason if (iocount) { 1610*a52d9a80SChris Mason wait_extent_bit(tree, orig_block_start, 1611*a52d9a80SChris Mason block_end, EXTENT_LOCKED); 1612*a52d9a80SChris Mason } 1613*a52d9a80SChris Mason check_page_uptodate(tree, page); 1614*a52d9a80SChris Mason err: 1615*a52d9a80SChris Mason /* FIXME, zero out newly allocated blocks on error */ 1616*a52d9a80SChris Mason return err; 1617*a52d9a80SChris Mason } 1618*a52d9a80SChris Mason EXPORT_SYMBOL(extent_prepare_write); 1619*a52d9a80SChris Mason 1620*a52d9a80SChris Mason /* 1621*a52d9a80SChris Mason * a helper for releasepage. As long as there are no locked extents 1622*a52d9a80SChris Mason * in the range corresponding to the page, both state records and extent 1623*a52d9a80SChris Mason * map records are removed 1624*a52d9a80SChris Mason */ 1625*a52d9a80SChris Mason int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) 1626*a52d9a80SChris Mason { 1627*a52d9a80SChris Mason struct extent_map *em; 1628*a52d9a80SChris Mason u64 start = page->index << PAGE_CACHE_SHIFT; 1629*a52d9a80SChris Mason u64 end = start + PAGE_CACHE_SIZE - 1; 1630*a52d9a80SChris Mason u64 orig_start = start; 1631*a52d9a80SChris Mason 1632*a52d9a80SChris Mason while (start <= end) { 1633*a52d9a80SChris Mason em = lookup_extent_mapping(tree, start, end); 1634*a52d9a80SChris Mason if (!em || IS_ERR(em)) 1635*a52d9a80SChris Mason break; 1636*a52d9a80SChris Mason if (test_range_bit(tree, em->start, em->end, 1637*a52d9a80SChris Mason EXTENT_LOCKED, 0)) { 1638*a52d9a80SChris Mason free_extent_map(em); 1639*a52d9a80SChris Mason start = em->end + 1; 1640*a52d9a80SChris Mason printk("range still locked %Lu %Lu\n", em->start, em->end); 1641*a52d9a80SChris Mason break; 1642*a52d9a80SChris Mason } 1643*a52d9a80SChris Mason remove_extent_mapping(tree, em); 1644*a52d9a80SChris Mason start = em->end + 1; 1645*a52d9a80SChris Mason /* once for the rb tree */ 1646*a52d9a80SChris Mason free_extent_map(em); 1647*a52d9a80SChris Mason /* once for us */ 1648*a52d9a80SChris Mason free_extent_map(em); 1649*a52d9a80SChris Mason } 1650*a52d9a80SChris Mason WARN_ON(test_range_bit(tree, orig_start, end, EXTENT_WRITEBACK, 0)); 1651*a52d9a80SChris Mason clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, 1652*a52d9a80SChris Mason 1, 1, GFP_NOFS); 1653*a52d9a80SChris Mason return 1; 1654*a52d9a80SChris Mason } 1655*a52d9a80SChris Mason EXPORT_SYMBOL(try_release_extent_mapping); 1656*a52d9a80SChris Mason 1657