xref: /openbmc/linux/fs/btrfs/extent_map.c (revision 179e29e4)
1 #include <linux/bitops.h>
2 #include <linux/slab.h>
3 #include <linux/bio.h>
4 #include <linux/mm.h>
5 #include <linux/gfp.h>
6 #include <linux/pagemap.h>
7 #include <linux/page-flags.h>
8 #include <linux/module.h>
9 #include <linux/spinlock.h>
10 #include <linux/blkdev.h>
11 #include <linux/swap.h>
12 #include <linux/version.h>
13 #include "extent_map.h"
14 
15 /* temporary define until extent_map moves out of btrfs */
16 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
17 				       unsigned long extra_flags,
18 				       void (*ctor)(void *, struct kmem_cache *,
19 						    unsigned long));
20 
21 static struct kmem_cache *extent_map_cache;
22 static struct kmem_cache *extent_state_cache;
23 static struct kmem_cache *extent_buffer_cache;
24 
25 static LIST_HEAD(buffers);
26 static LIST_HEAD(states);
27 
28 static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
29 #define BUFFER_LRU_MAX 64
30 
31 struct tree_entry {
32 	u64 start;
33 	u64 end;
34 	int in_tree;
35 	struct rb_node rb_node;
36 };
37 
38 void __init extent_map_init(void)
39 {
40 	extent_map_cache = btrfs_cache_create("extent_map",
41 					    sizeof(struct extent_map), 0,
42 					    NULL);
43 	extent_state_cache = btrfs_cache_create("extent_state",
44 					    sizeof(struct extent_state), 0,
45 					    NULL);
46 	extent_buffer_cache = btrfs_cache_create("extent_buffers",
47 					    sizeof(struct extent_buffer), 0,
48 					    NULL);
49 }
50 
51 void __exit extent_map_exit(void)
52 {
53 	struct extent_state *state;
54 
55 	while (!list_empty(&states)) {
56 		state = list_entry(states.next, struct extent_state, list);
57 		printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
58 		list_del(&state->list);
59 		kmem_cache_free(extent_state_cache, state);
60 
61 	}
62 
63 	if (extent_map_cache)
64 		kmem_cache_destroy(extent_map_cache);
65 	if (extent_state_cache)
66 		kmem_cache_destroy(extent_state_cache);
67 	if (extent_buffer_cache)
68 		kmem_cache_destroy(extent_buffer_cache);
69 }
70 
71 void extent_map_tree_init(struct extent_map_tree *tree,
72 			  struct address_space *mapping, gfp_t mask)
73 {
74 	tree->map.rb_node = NULL;
75 	tree->state.rb_node = NULL;
76 	tree->ops = NULL;
77 	rwlock_init(&tree->lock);
78 	spin_lock_init(&tree->lru_lock);
79 	tree->mapping = mapping;
80 	INIT_LIST_HEAD(&tree->buffer_lru);
81 	tree->lru_size = 0;
82 }
83 EXPORT_SYMBOL(extent_map_tree_init);
84 
85 void extent_map_tree_empty_lru(struct extent_map_tree *tree)
86 {
87 	struct extent_buffer *eb;
88 	while(!list_empty(&tree->buffer_lru)) {
89 		eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
90 				lru);
91 		list_del(&eb->lru);
92 		free_extent_buffer(eb);
93 	}
94 }
95 EXPORT_SYMBOL(extent_map_tree_empty_lru);
96 
97 struct extent_map *alloc_extent_map(gfp_t mask)
98 {
99 	struct extent_map *em;
100 	em = kmem_cache_alloc(extent_map_cache, mask);
101 	if (!em || IS_ERR(em))
102 		return em;
103 	em->in_tree = 0;
104 	atomic_set(&em->refs, 1);
105 	return em;
106 }
107 EXPORT_SYMBOL(alloc_extent_map);
108 
109 void free_extent_map(struct extent_map *em)
110 {
111 	if (!em)
112 		return;
113 	if (atomic_dec_and_test(&em->refs)) {
114 		WARN_ON(em->in_tree);
115 		kmem_cache_free(extent_map_cache, em);
116 	}
117 }
118 EXPORT_SYMBOL(free_extent_map);
119 
120 
121 struct extent_state *alloc_extent_state(gfp_t mask)
122 {
123 	struct extent_state *state;
124 	unsigned long flags;
125 
126 	state = kmem_cache_alloc(extent_state_cache, mask);
127 	if (!state || IS_ERR(state))
128 		return state;
129 	state->state = 0;
130 	state->in_tree = 0;
131 	state->private = 0;
132 
133 	spin_lock_irqsave(&state_lock, flags);
134 	list_add(&state->list, &states);
135 	spin_unlock_irqrestore(&state_lock, flags);
136 
137 	atomic_set(&state->refs, 1);
138 	init_waitqueue_head(&state->wq);
139 	return state;
140 }
141 EXPORT_SYMBOL(alloc_extent_state);
142 
143 void free_extent_state(struct extent_state *state)
144 {
145 	unsigned long flags;
146 	if (!state)
147 		return;
148 	if (atomic_dec_and_test(&state->refs)) {
149 		WARN_ON(state->in_tree);
150 		spin_lock_irqsave(&state_lock, flags);
151 		list_del(&state->list);
152 		spin_unlock_irqrestore(&state_lock, flags);
153 		kmem_cache_free(extent_state_cache, state);
154 	}
155 }
156 EXPORT_SYMBOL(free_extent_state);
157 
158 static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
159 				   struct rb_node *node)
160 {
161 	struct rb_node ** p = &root->rb_node;
162 	struct rb_node * parent = NULL;
163 	struct tree_entry *entry;
164 
165 	while(*p) {
166 		parent = *p;
167 		entry = rb_entry(parent, struct tree_entry, rb_node);
168 
169 		if (offset < entry->start)
170 			p = &(*p)->rb_left;
171 		else if (offset > entry->end)
172 			p = &(*p)->rb_right;
173 		else
174 			return parent;
175 	}
176 
177 	entry = rb_entry(node, struct tree_entry, rb_node);
178 	entry->in_tree = 1;
179 	rb_link_node(node, parent, p);
180 	rb_insert_color(node, root);
181 	return NULL;
182 }
183 
184 static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
185 				   struct rb_node **prev_ret)
186 {
187 	struct rb_node * n = root->rb_node;
188 	struct rb_node *prev = NULL;
189 	struct tree_entry *entry;
190 	struct tree_entry *prev_entry = NULL;
191 
192 	while(n) {
193 		entry = rb_entry(n, struct tree_entry, rb_node);
194 		prev = n;
195 		prev_entry = entry;
196 
197 		if (offset < entry->start)
198 			n = n->rb_left;
199 		else if (offset > entry->end)
200 			n = n->rb_right;
201 		else
202 			return n;
203 	}
204 	if (!prev_ret)
205 		return NULL;
206 	while(prev && offset > prev_entry->end) {
207 		prev = rb_next(prev);
208 		prev_entry = rb_entry(prev, struct tree_entry, rb_node);
209 	}
210 	*prev_ret = prev;
211 	return NULL;
212 }
213 
214 static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
215 {
216 	struct rb_node *prev;
217 	struct rb_node *ret;
218 	ret = __tree_search(root, offset, &prev);
219 	if (!ret)
220 		return prev;
221 	return ret;
222 }
223 
224 static int tree_delete(struct rb_root *root, u64 offset)
225 {
226 	struct rb_node *node;
227 	struct tree_entry *entry;
228 
229 	node = __tree_search(root, offset, NULL);
230 	if (!node)
231 		return -ENOENT;
232 	entry = rb_entry(node, struct tree_entry, rb_node);
233 	entry->in_tree = 0;
234 	rb_erase(node, root);
235 	return 0;
236 }
237 
238 /*
239  * add_extent_mapping tries a simple backward merge with existing
240  * mappings.  The extent_map struct passed in will be inserted into
241  * the tree directly (no copies made, just a reference taken).
242  */
243 int add_extent_mapping(struct extent_map_tree *tree,
244 		       struct extent_map *em)
245 {
246 	int ret = 0;
247 	struct extent_map *prev = NULL;
248 	struct rb_node *rb;
249 
250 	write_lock_irq(&tree->lock);
251 	rb = tree_insert(&tree->map, em->end, &em->rb_node);
252 	if (rb) {
253 		prev = rb_entry(rb, struct extent_map, rb_node);
254 		printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end);
255 		ret = -EEXIST;
256 		goto out;
257 	}
258 	atomic_inc(&em->refs);
259 	if (em->start != 0) {
260 		rb = rb_prev(&em->rb_node);
261 		if (rb)
262 			prev = rb_entry(rb, struct extent_map, rb_node);
263 		if (prev && prev->end + 1 == em->start &&
264 		    ((em->block_start == EXTENT_MAP_HOLE &&
265 		      prev->block_start == EXTENT_MAP_HOLE) ||
266 		     (em->block_start == EXTENT_MAP_INLINE &&
267 		      prev->block_start == EXTENT_MAP_INLINE) ||
268 		     (em->block_start == EXTENT_MAP_DELALLOC &&
269 		      prev->block_start == EXTENT_MAP_DELALLOC) ||
270 		     (em->block_start < EXTENT_MAP_DELALLOC - 1 &&
271 		      em->block_start == prev->block_end + 1))) {
272 			em->start = prev->start;
273 			em->block_start = prev->block_start;
274 			rb_erase(&prev->rb_node, &tree->map);
275 			prev->in_tree = 0;
276 			free_extent_map(prev);
277 		}
278 	 }
279 out:
280 	write_unlock_irq(&tree->lock);
281 	return ret;
282 }
283 EXPORT_SYMBOL(add_extent_mapping);
284 
285 /*
286  * lookup_extent_mapping returns the first extent_map struct in the
287  * tree that intersects the [start, end] (inclusive) range.  There may
288  * be additional objects in the tree that intersect, so check the object
289  * returned carefully to make sure you don't need additional lookups.
290  */
291 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
292 					 u64 start, u64 end)
293 {
294 	struct extent_map *em;
295 	struct rb_node *rb_node;
296 
297 	read_lock_irq(&tree->lock);
298 	rb_node = tree_search(&tree->map, start);
299 	if (!rb_node) {
300 		em = NULL;
301 		goto out;
302 	}
303 	if (IS_ERR(rb_node)) {
304 		em = ERR_PTR(PTR_ERR(rb_node));
305 		goto out;
306 	}
307 	em = rb_entry(rb_node, struct extent_map, rb_node);
308 	if (em->end < start || em->start > end) {
309 		em = NULL;
310 		goto out;
311 	}
312 	atomic_inc(&em->refs);
313 out:
314 	read_unlock_irq(&tree->lock);
315 	return em;
316 }
317 EXPORT_SYMBOL(lookup_extent_mapping);
318 
319 /*
320  * removes an extent_map struct from the tree.  No reference counts are
321  * dropped, and no checks are done to  see if the range is in use
322  */
323 int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
324 {
325 	int ret;
326 
327 	write_lock_irq(&tree->lock);
328 	ret = tree_delete(&tree->map, em->end);
329 	write_unlock_irq(&tree->lock);
330 	return ret;
331 }
332 EXPORT_SYMBOL(remove_extent_mapping);
333 
334 /*
335  * utility function to look for merge candidates inside a given range.
336  * Any extents with matching state are merged together into a single
337  * extent in the tree.  Extents with EXTENT_IO in their state field
338  * are not merged because the end_io handlers need to be able to do
339  * operations on them without sleeping (or doing allocations/splits).
340  *
341  * This should be called with the tree lock held.
342  */
343 static int merge_state(struct extent_map_tree *tree,
344 		       struct extent_state *state)
345 {
346 	struct extent_state *other;
347 	struct rb_node *other_node;
348 
349 	if (state->state & EXTENT_IOBITS)
350 		return 0;
351 
352 	other_node = rb_prev(&state->rb_node);
353 	if (other_node) {
354 		other = rb_entry(other_node, struct extent_state, rb_node);
355 		if (other->end == state->start - 1 &&
356 		    other->state == state->state) {
357 			state->start = other->start;
358 			other->in_tree = 0;
359 			rb_erase(&other->rb_node, &tree->state);
360 			free_extent_state(other);
361 		}
362 	}
363 	other_node = rb_next(&state->rb_node);
364 	if (other_node) {
365 		other = rb_entry(other_node, struct extent_state, rb_node);
366 		if (other->start == state->end + 1 &&
367 		    other->state == state->state) {
368 			other->start = state->start;
369 			state->in_tree = 0;
370 			rb_erase(&state->rb_node, &tree->state);
371 			free_extent_state(state);
372 		}
373 	}
374 	return 0;
375 }
376 
377 /*
378  * insert an extent_state struct into the tree.  'bits' are set on the
379  * struct before it is inserted.
380  *
381  * This may return -EEXIST if the extent is already there, in which case the
382  * state struct is freed.
383  *
384  * The tree lock is not taken internally.  This is a utility function and
385  * probably isn't what you want to call (see set/clear_extent_bit).
386  */
387 static int insert_state(struct extent_map_tree *tree,
388 			struct extent_state *state, u64 start, u64 end,
389 			int bits)
390 {
391 	struct rb_node *node;
392 
393 	if (end < start) {
394 		printk("end < start %Lu %Lu\n", end, start);
395 		WARN_ON(1);
396 	}
397 	state->state |= bits;
398 	state->start = start;
399 	state->end = end;
400 	node = tree_insert(&tree->state, end, &state->rb_node);
401 	if (node) {
402 		struct extent_state *found;
403 		found = rb_entry(node, struct extent_state, rb_node);
404 		printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
405 		free_extent_state(state);
406 		return -EEXIST;
407 	}
408 	merge_state(tree, state);
409 	return 0;
410 }
411 
412 /*
413  * split a given extent state struct in two, inserting the preallocated
414  * struct 'prealloc' as the newly created second half.  'split' indicates an
415  * offset inside 'orig' where it should be split.
416  *
417  * Before calling,
418  * the tree has 'orig' at [orig->start, orig->end].  After calling, there
419  * are two extent state structs in the tree:
420  * prealloc: [orig->start, split - 1]
421  * orig: [ split, orig->end ]
422  *
423  * The tree locks are not taken by this function. They need to be held
424  * by the caller.
425  */
426 static int split_state(struct extent_map_tree *tree, struct extent_state *orig,
427 		       struct extent_state *prealloc, u64 split)
428 {
429 	struct rb_node *node;
430 	prealloc->start = orig->start;
431 	prealloc->end = split - 1;
432 	prealloc->state = orig->state;
433 	orig->start = split;
434 
435 	node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
436 	if (node) {
437 		struct extent_state *found;
438 		found = rb_entry(node, struct extent_state, rb_node);
439 		printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
440 		free_extent_state(prealloc);
441 		return -EEXIST;
442 	}
443 	return 0;
444 }
445 
446 /*
447  * utility function to clear some bits in an extent state struct.
448  * it will optionally wake up any one waiting on this state (wake == 1), or
449  * forcibly remove the state from the tree (delete == 1).
450  *
451  * If no bits are set on the state struct after clearing things, the
452  * struct is freed and removed from the tree
453  */
454 static int clear_state_bit(struct extent_map_tree *tree,
455 			    struct extent_state *state, int bits, int wake,
456 			    int delete)
457 {
458 	int ret = state->state & bits;
459 	state->state &= ~bits;
460 	if (wake)
461 		wake_up(&state->wq);
462 	if (delete || state->state == 0) {
463 		if (state->in_tree) {
464 			rb_erase(&state->rb_node, &tree->state);
465 			state->in_tree = 0;
466 			free_extent_state(state);
467 		} else {
468 			WARN_ON(1);
469 		}
470 	} else {
471 		merge_state(tree, state);
472 	}
473 	return ret;
474 }
475 
476 /*
477  * clear some bits on a range in the tree.  This may require splitting
478  * or inserting elements in the tree, so the gfp mask is used to
479  * indicate which allocations or sleeping are allowed.
480  *
481  * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
482  * the given range from the tree regardless of state (ie for truncate).
483  *
484  * the range [start, end] is inclusive.
485  *
486  * This takes the tree lock, and returns < 0 on error, > 0 if any of the
487  * bits were already set, or zero if none of the bits were already set.
488  */
489 int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end,
490 		     int bits, int wake, int delete, gfp_t mask)
491 {
492 	struct extent_state *state;
493 	struct extent_state *prealloc = NULL;
494 	struct rb_node *node;
495 	unsigned long flags;
496 	int err;
497 	int set = 0;
498 
499 again:
500 	if (!prealloc && (mask & __GFP_WAIT)) {
501 		prealloc = alloc_extent_state(mask);
502 		if (!prealloc)
503 			return -ENOMEM;
504 	}
505 
506 	write_lock_irqsave(&tree->lock, flags);
507 	/*
508 	 * this search will find the extents that end after
509 	 * our range starts
510 	 */
511 	node = tree_search(&tree->state, start);
512 	if (!node)
513 		goto out;
514 	state = rb_entry(node, struct extent_state, rb_node);
515 	if (state->start > end)
516 		goto out;
517 	WARN_ON(state->end < start);
518 
519 	/*
520 	 *     | ---- desired range ---- |
521 	 *  | state | or
522 	 *  | ------------- state -------------- |
523 	 *
524 	 * We need to split the extent we found, and may flip
525 	 * bits on second half.
526 	 *
527 	 * If the extent we found extends past our range, we
528 	 * just split and search again.  It'll get split again
529 	 * the next time though.
530 	 *
531 	 * If the extent we found is inside our range, we clear
532 	 * the desired bit on it.
533 	 */
534 
535 	if (state->start < start) {
536 		err = split_state(tree, state, prealloc, start);
537 		BUG_ON(err == -EEXIST);
538 		prealloc = NULL;
539 		if (err)
540 			goto out;
541 		if (state->end <= end) {
542 			start = state->end + 1;
543 			set |= clear_state_bit(tree, state, bits,
544 					wake, delete);
545 		} else {
546 			start = state->start;
547 		}
548 		goto search_again;
549 	}
550 	/*
551 	 * | ---- desired range ---- |
552 	 *                        | state |
553 	 * We need to split the extent, and clear the bit
554 	 * on the first half
555 	 */
556 	if (state->start <= end && state->end > end) {
557 		err = split_state(tree, state, prealloc, end + 1);
558 		BUG_ON(err == -EEXIST);
559 
560 		if (wake)
561 			wake_up(&state->wq);
562 		set |= clear_state_bit(tree, prealloc, bits,
563 				       wake, delete);
564 		prealloc = NULL;
565 		goto out;
566 	}
567 
568 	start = state->end + 1;
569 	set |= clear_state_bit(tree, state, bits, wake, delete);
570 	goto search_again;
571 
572 out:
573 	write_unlock_irqrestore(&tree->lock, flags);
574 	if (prealloc)
575 		free_extent_state(prealloc);
576 
577 	return set;
578 
579 search_again:
580 	if (start > end)
581 		goto out;
582 	write_unlock_irqrestore(&tree->lock, flags);
583 	if (mask & __GFP_WAIT)
584 		cond_resched();
585 	goto again;
586 }
587 EXPORT_SYMBOL(clear_extent_bit);
588 
589 static int wait_on_state(struct extent_map_tree *tree,
590 			 struct extent_state *state)
591 {
592 	DEFINE_WAIT(wait);
593 	prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
594 	read_unlock_irq(&tree->lock);
595 	schedule();
596 	read_lock_irq(&tree->lock);
597 	finish_wait(&state->wq, &wait);
598 	return 0;
599 }
600 
601 /*
602  * waits for one or more bits to clear on a range in the state tree.
603  * The range [start, end] is inclusive.
604  * The tree lock is taken by this function
605  */
606 int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits)
607 {
608 	struct extent_state *state;
609 	struct rb_node *node;
610 
611 	read_lock_irq(&tree->lock);
612 again:
613 	while (1) {
614 		/*
615 		 * this search will find all the extents that end after
616 		 * our range starts
617 		 */
618 		node = tree_search(&tree->state, start);
619 		if (!node)
620 			break;
621 
622 		state = rb_entry(node, struct extent_state, rb_node);
623 
624 		if (state->start > end)
625 			goto out;
626 
627 		if (state->state & bits) {
628 			start = state->start;
629 			atomic_inc(&state->refs);
630 			wait_on_state(tree, state);
631 			free_extent_state(state);
632 			goto again;
633 		}
634 		start = state->end + 1;
635 
636 		if (start > end)
637 			break;
638 
639 		if (need_resched()) {
640 			read_unlock_irq(&tree->lock);
641 			cond_resched();
642 			read_lock_irq(&tree->lock);
643 		}
644 	}
645 out:
646 	read_unlock_irq(&tree->lock);
647 	return 0;
648 }
649 EXPORT_SYMBOL(wait_extent_bit);
650 
651 /*
652  * set some bits on a range in the tree.  This may require allocations
653  * or sleeping, so the gfp mask is used to indicate what is allowed.
654  *
655  * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
656  * range already has the desired bits set.  The start of the existing
657  * range is returned in failed_start in this case.
658  *
659  * [start, end] is inclusive
660  * This takes the tree lock.
661  */
662 int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits,
663 		   int exclusive, u64 *failed_start, gfp_t mask)
664 {
665 	struct extent_state *state;
666 	struct extent_state *prealloc = NULL;
667 	struct rb_node *node;
668 	unsigned long flags;
669 	int err = 0;
670 	int set;
671 	u64 last_start;
672 	u64 last_end;
673 again:
674 	if (!prealloc && (mask & __GFP_WAIT)) {
675 		prealloc = alloc_extent_state(mask);
676 		if (!prealloc)
677 			return -ENOMEM;
678 	}
679 
680 	write_lock_irqsave(&tree->lock, flags);
681 	/*
682 	 * this search will find all the extents that end after
683 	 * our range starts.
684 	 */
685 	node = tree_search(&tree->state, start);
686 	if (!node) {
687 		err = insert_state(tree, prealloc, start, end, bits);
688 		prealloc = NULL;
689 		BUG_ON(err == -EEXIST);
690 		goto out;
691 	}
692 
693 	state = rb_entry(node, struct extent_state, rb_node);
694 	last_start = state->start;
695 	last_end = state->end;
696 
697 	/*
698 	 * | ---- desired range ---- |
699 	 * | state |
700 	 *
701 	 * Just lock what we found and keep going
702 	 */
703 	if (state->start == start && state->end <= end) {
704 		set = state->state & bits;
705 		if (set && exclusive) {
706 			*failed_start = state->start;
707 			err = -EEXIST;
708 			goto out;
709 		}
710 		state->state |= bits;
711 		start = state->end + 1;
712 		merge_state(tree, state);
713 		goto search_again;
714 	}
715 
716 	/*
717 	 *     | ---- desired range ---- |
718 	 * | state |
719 	 *   or
720 	 * | ------------- state -------------- |
721 	 *
722 	 * We need to split the extent we found, and may flip bits on
723 	 * second half.
724 	 *
725 	 * If the extent we found extends past our
726 	 * range, we just split and search again.  It'll get split
727 	 * again the next time though.
728 	 *
729 	 * If the extent we found is inside our range, we set the
730 	 * desired bit on it.
731 	 */
732 	if (state->start < start) {
733 		set = state->state & bits;
734 		if (exclusive && set) {
735 			*failed_start = start;
736 			err = -EEXIST;
737 			goto out;
738 		}
739 		err = split_state(tree, state, prealloc, start);
740 		BUG_ON(err == -EEXIST);
741 		prealloc = NULL;
742 		if (err)
743 			goto out;
744 		if (state->end <= end) {
745 			state->state |= bits;
746 			start = state->end + 1;
747 			merge_state(tree, state);
748 		} else {
749 			start = state->start;
750 		}
751 		goto search_again;
752 	}
753 	/*
754 	 * | ---- desired range ---- |
755 	 *     | state | or               | state |
756 	 *
757 	 * There's a hole, we need to insert something in it and
758 	 * ignore the extent we found.
759 	 */
760 	if (state->start > start) {
761 		u64 this_end;
762 		if (end < last_start)
763 			this_end = end;
764 		else
765 			this_end = last_start -1;
766 		err = insert_state(tree, prealloc, start, this_end,
767 				   bits);
768 		prealloc = NULL;
769 		BUG_ON(err == -EEXIST);
770 		if (err)
771 			goto out;
772 		start = this_end + 1;
773 		goto search_again;
774 	}
775 	/*
776 	 * | ---- desired range ---- |
777 	 *                        | state |
778 	 * We need to split the extent, and set the bit
779 	 * on the first half
780 	 */
781 	if (state->start <= end && state->end > end) {
782 		set = state->state & bits;
783 		if (exclusive && set) {
784 			*failed_start = start;
785 			err = -EEXIST;
786 			goto out;
787 		}
788 		err = split_state(tree, state, prealloc, end + 1);
789 		BUG_ON(err == -EEXIST);
790 
791 		prealloc->state |= bits;
792 		merge_state(tree, prealloc);
793 		prealloc = NULL;
794 		goto out;
795 	}
796 
797 	goto search_again;
798 
799 out:
800 	write_unlock_irqrestore(&tree->lock, flags);
801 	if (prealloc)
802 		free_extent_state(prealloc);
803 
804 	return err;
805 
806 search_again:
807 	if (start > end)
808 		goto out;
809 	write_unlock_irqrestore(&tree->lock, flags);
810 	if (mask & __GFP_WAIT)
811 		cond_resched();
812 	goto again;
813 }
814 EXPORT_SYMBOL(set_extent_bit);
815 
816 /* wrappers around set/clear extent bit */
817 int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
818 		     gfp_t mask)
819 {
820 	return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
821 			      mask);
822 }
823 EXPORT_SYMBOL(set_extent_dirty);
824 
825 int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
826 		    int bits, gfp_t mask)
827 {
828 	return set_extent_bit(tree, start, end, bits, 0, NULL,
829 			      mask);
830 }
831 EXPORT_SYMBOL(set_extent_bits);
832 
833 int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
834 		      int bits, gfp_t mask)
835 {
836 	return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
837 }
838 EXPORT_SYMBOL(clear_extent_bits);
839 
840 int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end,
841 		     gfp_t mask)
842 {
843 	return set_extent_bit(tree, start, end,
844 			      EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
845 			      mask);
846 }
847 EXPORT_SYMBOL(set_extent_delalloc);
848 
849 int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
850 		       gfp_t mask)
851 {
852 	return clear_extent_bit(tree, start, end,
853 				EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
854 }
855 EXPORT_SYMBOL(clear_extent_dirty);
856 
857 int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
858 		     gfp_t mask)
859 {
860 	return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
861 			      mask);
862 }
863 EXPORT_SYMBOL(set_extent_new);
864 
865 int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
866 		       gfp_t mask)
867 {
868 	return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
869 }
870 EXPORT_SYMBOL(clear_extent_new);
871 
872 int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
873 			gfp_t mask)
874 {
875 	return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
876 			      mask);
877 }
878 EXPORT_SYMBOL(set_extent_uptodate);
879 
880 int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
881 			  gfp_t mask)
882 {
883 	return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
884 }
885 EXPORT_SYMBOL(clear_extent_uptodate);
886 
887 int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
888 			 gfp_t mask)
889 {
890 	return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
891 			      0, NULL, mask);
892 }
893 EXPORT_SYMBOL(set_extent_writeback);
894 
895 int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
896 			   gfp_t mask)
897 {
898 	return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
899 }
900 EXPORT_SYMBOL(clear_extent_writeback);
901 
902 int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end)
903 {
904 	return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
905 }
906 EXPORT_SYMBOL(wait_on_extent_writeback);
907 
908 /*
909  * locks a range in ascending order, waiting for any locked regions
910  * it hits on the way.  [start,end] are inclusive, and this will sleep.
911  */
912 int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask)
913 {
914 	int err;
915 	u64 failed_start;
916 	while (1) {
917 		err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
918 				     &failed_start, mask);
919 		if (err == -EEXIST && (mask & __GFP_WAIT)) {
920 			wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
921 			start = failed_start;
922 		} else {
923 			break;
924 		}
925 		WARN_ON(start > end);
926 	}
927 	return err;
928 }
929 EXPORT_SYMBOL(lock_extent);
930 
931 int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end,
932 		  gfp_t mask)
933 {
934 	return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
935 }
936 EXPORT_SYMBOL(unlock_extent);
937 
938 /*
939  * helper function to set pages and extents in the tree dirty
940  */
941 int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end)
942 {
943 	unsigned long index = start >> PAGE_CACHE_SHIFT;
944 	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
945 	struct page *page;
946 
947 	while (index <= end_index) {
948 		page = find_get_page(tree->mapping, index);
949 		BUG_ON(!page);
950 		__set_page_dirty_nobuffers(page);
951 		page_cache_release(page);
952 		index++;
953 	}
954 	set_extent_dirty(tree, start, end, GFP_NOFS);
955 	return 0;
956 }
957 EXPORT_SYMBOL(set_range_dirty);
958 
959 /*
960  * helper function to set both pages and extents in the tree writeback
961  */
962 int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end)
963 {
964 	unsigned long index = start >> PAGE_CACHE_SHIFT;
965 	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
966 	struct page *page;
967 
968 	while (index <= end_index) {
969 		page = find_get_page(tree->mapping, index);
970 		BUG_ON(!page);
971 		set_page_writeback(page);
972 		page_cache_release(page);
973 		index++;
974 	}
975 	set_extent_writeback(tree, start, end, GFP_NOFS);
976 	return 0;
977 }
978 EXPORT_SYMBOL(set_range_writeback);
979 
980 int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
981 			  u64 *start_ret, u64 *end_ret, int bits)
982 {
983 	struct rb_node *node;
984 	struct extent_state *state;
985 	int ret = 1;
986 
987 	read_lock_irq(&tree->lock);
988 	/*
989 	 * this search will find all the extents that end after
990 	 * our range starts.
991 	 */
992 	node = tree_search(&tree->state, start);
993 	if (!node || IS_ERR(node)) {
994 		goto out;
995 	}
996 
997 	while(1) {
998 		state = rb_entry(node, struct extent_state, rb_node);
999 		if (state->end >= start && (state->state & bits)) {
1000 			*start_ret = state->start;
1001 			*end_ret = state->end;
1002 			ret = 0;
1003 			break;
1004 		}
1005 		node = rb_next(node);
1006 		if (!node)
1007 			break;
1008 	}
1009 out:
1010 	read_unlock_irq(&tree->lock);
1011 	return ret;
1012 }
1013 EXPORT_SYMBOL(find_first_extent_bit);
1014 
1015 u64 find_lock_delalloc_range(struct extent_map_tree *tree,
1016 			     u64 start, u64 lock_start, u64 *end, u64 max_bytes)
1017 {
1018 	struct rb_node *node;
1019 	struct extent_state *state;
1020 	u64 cur_start = start;
1021 	u64 found = 0;
1022 	u64 total_bytes = 0;
1023 
1024 	write_lock_irq(&tree->lock);
1025 	/*
1026 	 * this search will find all the extents that end after
1027 	 * our range starts.
1028 	 */
1029 search_again:
1030 	node = tree_search(&tree->state, cur_start);
1031 	if (!node || IS_ERR(node)) {
1032 		goto out;
1033 	}
1034 
1035 	while(1) {
1036 		state = rb_entry(node, struct extent_state, rb_node);
1037 		if (state->start != cur_start) {
1038 			goto out;
1039 		}
1040 		if (!(state->state & EXTENT_DELALLOC)) {
1041 			goto out;
1042 		}
1043 		if (state->start >= lock_start) {
1044 			if (state->state & EXTENT_LOCKED) {
1045 				DEFINE_WAIT(wait);
1046 				atomic_inc(&state->refs);
1047 				write_unlock_irq(&tree->lock);
1048 				schedule();
1049 				write_lock_irq(&tree->lock);
1050 				finish_wait(&state->wq, &wait);
1051 				free_extent_state(state);
1052 				goto search_again;
1053 			}
1054 			state->state |= EXTENT_LOCKED;
1055 		}
1056 		found++;
1057 		*end = state->end;
1058 		cur_start = state->end + 1;
1059 		node = rb_next(node);
1060 		if (!node)
1061 			break;
1062 		total_bytes = state->end - state->start + 1;
1063 		if (total_bytes >= max_bytes)
1064 			break;
1065 	}
1066 out:
1067 	write_unlock_irq(&tree->lock);
1068 	return found;
1069 }
1070 
1071 /*
1072  * helper function to lock both pages and extents in the tree.
1073  * pages must be locked first.
1074  */
1075 int lock_range(struct extent_map_tree *tree, u64 start, u64 end)
1076 {
1077 	unsigned long index = start >> PAGE_CACHE_SHIFT;
1078 	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1079 	struct page *page;
1080 	int err;
1081 
1082 	while (index <= end_index) {
1083 		page = grab_cache_page(tree->mapping, index);
1084 		if (!page) {
1085 			err = -ENOMEM;
1086 			goto failed;
1087 		}
1088 		if (IS_ERR(page)) {
1089 			err = PTR_ERR(page);
1090 			goto failed;
1091 		}
1092 		index++;
1093 	}
1094 	lock_extent(tree, start, end, GFP_NOFS);
1095 	return 0;
1096 
1097 failed:
1098 	/*
1099 	 * we failed above in getting the page at 'index', so we undo here
1100 	 * up to but not including the page at 'index'
1101 	 */
1102 	end_index = index;
1103 	index = start >> PAGE_CACHE_SHIFT;
1104 	while (index < end_index) {
1105 		page = find_get_page(tree->mapping, index);
1106 		unlock_page(page);
1107 		page_cache_release(page);
1108 		index++;
1109 	}
1110 	return err;
1111 }
1112 EXPORT_SYMBOL(lock_range);
1113 
1114 /*
1115  * helper function to unlock both pages and extents in the tree.
1116  */
1117 int unlock_range(struct extent_map_tree *tree, u64 start, u64 end)
1118 {
1119 	unsigned long index = start >> PAGE_CACHE_SHIFT;
1120 	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1121 	struct page *page;
1122 
1123 	while (index <= end_index) {
1124 		page = find_get_page(tree->mapping, index);
1125 		unlock_page(page);
1126 		page_cache_release(page);
1127 		index++;
1128 	}
1129 	unlock_extent(tree, start, end, GFP_NOFS);
1130 	return 0;
1131 }
1132 EXPORT_SYMBOL(unlock_range);
1133 
1134 int set_state_private(struct extent_map_tree *tree, u64 start, u64 private)
1135 {
1136 	struct rb_node *node;
1137 	struct extent_state *state;
1138 	int ret = 0;
1139 
1140 	write_lock_irq(&tree->lock);
1141 	/*
1142 	 * this search will find all the extents that end after
1143 	 * our range starts.
1144 	 */
1145 	node = tree_search(&tree->state, start);
1146 	if (!node || IS_ERR(node)) {
1147 		ret = -ENOENT;
1148 		goto out;
1149 	}
1150 	state = rb_entry(node, struct extent_state, rb_node);
1151 	if (state->start != start) {
1152 		ret = -ENOENT;
1153 		goto out;
1154 	}
1155 	state->private = private;
1156 out:
1157 	write_unlock_irq(&tree->lock);
1158 	return ret;
1159 }
1160 
1161 int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private)
1162 {
1163 	struct rb_node *node;
1164 	struct extent_state *state;
1165 	int ret = 0;
1166 
1167 	read_lock_irq(&tree->lock);
1168 	/*
1169 	 * this search will find all the extents that end after
1170 	 * our range starts.
1171 	 */
1172 	node = tree_search(&tree->state, start);
1173 	if (!node || IS_ERR(node)) {
1174 		ret = -ENOENT;
1175 		goto out;
1176 	}
1177 	state = rb_entry(node, struct extent_state, rb_node);
1178 	if (state->start != start) {
1179 		ret = -ENOENT;
1180 		goto out;
1181 	}
1182 	*private = state->private;
1183 out:
1184 	read_unlock_irq(&tree->lock);
1185 	return ret;
1186 }
1187 
1188 /*
1189  * searches a range in the state tree for a given mask.
1190  * If 'filled' == 1, this returns 1 only if ever extent in the tree
1191  * has the bits set.  Otherwise, 1 is returned if any bit in the
1192  * range is found set.
1193  */
1194 int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
1195 		   int bits, int filled)
1196 {
1197 	struct extent_state *state = NULL;
1198 	struct rb_node *node;
1199 	int bitset = 0;
1200 
1201 	read_lock_irq(&tree->lock);
1202 	node = tree_search(&tree->state, start);
1203 	while (node && start <= end) {
1204 		state = rb_entry(node, struct extent_state, rb_node);
1205 		if (state->start > end)
1206 			break;
1207 
1208 		if (filled && state->start > start) {
1209 			bitset = 0;
1210 			break;
1211 		}
1212 		if (state->state & bits) {
1213 			bitset = 1;
1214 			if (!filled)
1215 				break;
1216 		} else if (filled) {
1217 			bitset = 0;
1218 			break;
1219 		}
1220 		start = state->end + 1;
1221 		if (start > end)
1222 			break;
1223 		node = rb_next(node);
1224 	}
1225 	read_unlock_irq(&tree->lock);
1226 	return bitset;
1227 }
1228 EXPORT_SYMBOL(test_range_bit);
1229 
1230 /*
1231  * helper function to set a given page up to date if all the
1232  * extents in the tree for that page are up to date
1233  */
1234 static int check_page_uptodate(struct extent_map_tree *tree,
1235 			       struct page *page)
1236 {
1237 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1238 	u64 end = start + PAGE_CACHE_SIZE - 1;
1239 	if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
1240 		SetPageUptodate(page);
1241 	return 0;
1242 }
1243 
1244 /*
1245  * helper function to unlock a page if all the extents in the tree
1246  * for that page are unlocked
1247  */
1248 static int check_page_locked(struct extent_map_tree *tree,
1249 			     struct page *page)
1250 {
1251 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1252 	u64 end = start + PAGE_CACHE_SIZE - 1;
1253 	if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
1254 		unlock_page(page);
1255 	return 0;
1256 }
1257 
1258 /*
1259  * helper function to end page writeback if all the extents
1260  * in the tree for that page are done with writeback
1261  */
1262 static int check_page_writeback(struct extent_map_tree *tree,
1263 			     struct page *page)
1264 {
1265 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1266 	u64 end = start + PAGE_CACHE_SIZE - 1;
1267 	if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1268 		end_page_writeback(page);
1269 	return 0;
1270 }
1271 
1272 /* lots and lots of room for performance fixes in the end_bio funcs */
1273 
1274 /*
1275  * after a writepage IO is done, we need to:
1276  * clear the uptodate bits on error
1277  * clear the writeback bits in the extent tree for this IO
1278  * end_page_writeback if the page has no more pending IO
1279  *
1280  * Scheduling is not allowed, so the extent state tree is expected
1281  * to have one and only one object corresponding to this IO.
1282  */
1283 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1284 static void end_bio_extent_writepage(struct bio *bio, int err)
1285 #else
1286 static int end_bio_extent_writepage(struct bio *bio,
1287 				   unsigned int bytes_done, int err)
1288 #endif
1289 {
1290 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1291 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1292 	struct extent_map_tree *tree = bio->bi_private;
1293 	u64 start;
1294 	u64 end;
1295 	int whole_page;
1296 
1297 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1298 	if (bio->bi_size)
1299 		return 1;
1300 #endif
1301 
1302 	do {
1303 		struct page *page = bvec->bv_page;
1304 		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1305 			 bvec->bv_offset;
1306 		end = start + bvec->bv_len - 1;
1307 
1308 		if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1309 			whole_page = 1;
1310 		else
1311 			whole_page = 0;
1312 
1313 		if (--bvec >= bio->bi_io_vec)
1314 			prefetchw(&bvec->bv_page->flags);
1315 
1316 		if (!uptodate) {
1317 			clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
1318 			ClearPageUptodate(page);
1319 			SetPageError(page);
1320 		}
1321 		clear_extent_writeback(tree, start, end, GFP_ATOMIC);
1322 
1323 		if (whole_page)
1324 			end_page_writeback(page);
1325 		else
1326 			check_page_writeback(tree, page);
1327 		if (tree->ops && tree->ops->writepage_end_io_hook)
1328 			tree->ops->writepage_end_io_hook(page, start, end);
1329 	} while (bvec >= bio->bi_io_vec);
1330 
1331 	bio_put(bio);
1332 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1333 	return 0;
1334 #endif
1335 }
1336 
1337 /*
1338  * after a readpage IO is done, we need to:
1339  * clear the uptodate bits on error
1340  * set the uptodate bits if things worked
1341  * set the page up to date if all extents in the tree are uptodate
1342  * clear the lock bit in the extent tree
1343  * unlock the page if there are no other extents locked for it
1344  *
1345  * Scheduling is not allowed, so the extent state tree is expected
1346  * to have one and only one object corresponding to this IO.
1347  */
1348 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1349 static void end_bio_extent_readpage(struct bio *bio, int err)
1350 #else
1351 static int end_bio_extent_readpage(struct bio *bio,
1352 				   unsigned int bytes_done, int err)
1353 #endif
1354 {
1355 	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1356 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1357 	struct extent_map_tree *tree = bio->bi_private;
1358 	u64 start;
1359 	u64 end;
1360 	int whole_page;
1361 	int ret;
1362 
1363 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1364 	if (bio->bi_size)
1365 		return 1;
1366 #endif
1367 
1368 	do {
1369 		struct page *page = bvec->bv_page;
1370 		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1371 			bvec->bv_offset;
1372 		end = start + bvec->bv_len - 1;
1373 
1374 		if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1375 			whole_page = 1;
1376 		else
1377 			whole_page = 0;
1378 
1379 		if (--bvec >= bio->bi_io_vec)
1380 			prefetchw(&bvec->bv_page->flags);
1381 
1382 		if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1383 			ret = tree->ops->readpage_end_io_hook(page, start, end);
1384 			if (ret)
1385 				uptodate = 0;
1386 		}
1387 		if (uptodate) {
1388 			set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1389 			if (whole_page)
1390 				SetPageUptodate(page);
1391 			else
1392 				check_page_uptodate(tree, page);
1393 		} else {
1394 			ClearPageUptodate(page);
1395 			SetPageError(page);
1396 		}
1397 
1398 		unlock_extent(tree, start, end, GFP_ATOMIC);
1399 
1400 		if (whole_page)
1401 			unlock_page(page);
1402 		else
1403 			check_page_locked(tree, page);
1404 	} while (bvec >= bio->bi_io_vec);
1405 
1406 	bio_put(bio);
1407 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1408 	return 0;
1409 #endif
1410 }
1411 
1412 /*
1413  * IO done from prepare_write is pretty simple, we just unlock
1414  * the structs in the extent tree when done, and set the uptodate bits
1415  * as appropriate.
1416  */
1417 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1418 static void end_bio_extent_preparewrite(struct bio *bio, int err)
1419 #else
1420 static int end_bio_extent_preparewrite(struct bio *bio,
1421 				       unsigned int bytes_done, int err)
1422 #endif
1423 {
1424 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1425 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1426 	struct extent_map_tree *tree = bio->bi_private;
1427 	u64 start;
1428 	u64 end;
1429 
1430 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1431 	if (bio->bi_size)
1432 		return 1;
1433 #endif
1434 
1435 	do {
1436 		struct page *page = bvec->bv_page;
1437 		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1438 			bvec->bv_offset;
1439 		end = start + bvec->bv_len - 1;
1440 
1441 		if (--bvec >= bio->bi_io_vec)
1442 			prefetchw(&bvec->bv_page->flags);
1443 
1444 		if (uptodate) {
1445 			set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1446 		} else {
1447 			ClearPageUptodate(page);
1448 			SetPageError(page);
1449 		}
1450 
1451 		unlock_extent(tree, start, end, GFP_ATOMIC);
1452 
1453 	} while (bvec >= bio->bi_io_vec);
1454 
1455 	bio_put(bio);
1456 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1457 	return 0;
1458 #endif
1459 }
1460 
1461 static int submit_extent_page(int rw, struct extent_map_tree *tree,
1462 			      struct page *page, sector_t sector,
1463 			      size_t size, unsigned long offset,
1464 			      struct block_device *bdev,
1465 			      bio_end_io_t end_io_func)
1466 {
1467 	struct bio *bio;
1468 	int ret = 0;
1469 
1470 	bio = bio_alloc(GFP_NOIO, 1);
1471 
1472 	bio->bi_sector = sector;
1473 	bio->bi_bdev = bdev;
1474 	bio->bi_io_vec[0].bv_page = page;
1475 	bio->bi_io_vec[0].bv_len = size;
1476 	bio->bi_io_vec[0].bv_offset = offset;
1477 
1478 	bio->bi_vcnt = 1;
1479 	bio->bi_idx = 0;
1480 	bio->bi_size = size;
1481 
1482 	bio->bi_end_io = end_io_func;
1483 	bio->bi_private = tree;
1484 
1485 	bio_get(bio);
1486 	submit_bio(rw, bio);
1487 
1488 	if (bio_flagged(bio, BIO_EOPNOTSUPP))
1489 		ret = -EOPNOTSUPP;
1490 
1491 	bio_put(bio);
1492 	return ret;
1493 }
1494 
1495 void set_page_extent_mapped(struct page *page)
1496 {
1497 	if (!PagePrivate(page)) {
1498 		SetPagePrivate(page);
1499 		WARN_ON(!page->mapping->a_ops->invalidatepage);
1500 		set_page_private(page, EXTENT_PAGE_PRIVATE);
1501 		page_cache_get(page);
1502 	}
1503 }
1504 
1505 /*
1506  * basic readpage implementation.  Locked extent state structs are inserted
1507  * into the tree that are removed when the IO is done (by the end_io
1508  * handlers)
1509  */
1510 int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
1511 			  get_extent_t *get_extent)
1512 {
1513 	struct inode *inode = page->mapping->host;
1514 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1515 	u64 page_end = start + PAGE_CACHE_SIZE - 1;
1516 	u64 end;
1517 	u64 cur = start;
1518 	u64 extent_offset;
1519 	u64 last_byte = i_size_read(inode);
1520 	u64 block_start;
1521 	u64 cur_end;
1522 	sector_t sector;
1523 	struct extent_map *em;
1524 	struct block_device *bdev;
1525 	int ret;
1526 	int nr = 0;
1527 	size_t page_offset = 0;
1528 	size_t iosize;
1529 	size_t blocksize = inode->i_sb->s_blocksize;
1530 
1531 	set_page_extent_mapped(page);
1532 
1533 	end = page_end;
1534 	lock_extent(tree, start, end, GFP_NOFS);
1535 
1536 	while (cur <= end) {
1537 		if (cur >= last_byte) {
1538 			iosize = PAGE_CACHE_SIZE - page_offset;
1539 			zero_user_page(page, page_offset, iosize, KM_USER0);
1540 			set_extent_uptodate(tree, cur, cur + iosize - 1,
1541 					    GFP_NOFS);
1542 			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1543 			break;
1544 		}
1545 		em = get_extent(inode, page, page_offset, cur, end, 0);
1546 		if (IS_ERR(em) || !em) {
1547 			SetPageError(page);
1548 			unlock_extent(tree, cur, end, GFP_NOFS);
1549 			break;
1550 		}
1551 
1552 		extent_offset = cur - em->start;
1553 		BUG_ON(em->end < cur);
1554 		BUG_ON(end < cur);
1555 
1556 		iosize = min(em->end - cur, end - cur) + 1;
1557 		cur_end = min(em->end, end);
1558 		iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1559 		sector = (em->block_start + extent_offset) >> 9;
1560 		bdev = em->bdev;
1561 		block_start = em->block_start;
1562 		free_extent_map(em);
1563 		em = NULL;
1564 
1565 		/* we've found a hole, just zero and go on */
1566 		if (block_start == EXTENT_MAP_HOLE) {
1567 			zero_user_page(page, page_offset, iosize, KM_USER0);
1568 			set_extent_uptodate(tree, cur, cur + iosize - 1,
1569 					    GFP_NOFS);
1570 			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1571 			cur = cur + iosize;
1572 			page_offset += iosize;
1573 			continue;
1574 		}
1575 		/* the get_extent function already copied into the page */
1576 		if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
1577 			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1578 			cur = cur + iosize;
1579 			page_offset += iosize;
1580 			continue;
1581 		}
1582 
1583 		ret = 0;
1584 		if (tree->ops && tree->ops->readpage_io_hook) {
1585 			ret = tree->ops->readpage_io_hook(page, cur,
1586 							  cur + iosize - 1);
1587 		}
1588 		if (!ret) {
1589 			ret = submit_extent_page(READ, tree, page,
1590 						 sector, iosize, page_offset,
1591 						 bdev, end_bio_extent_readpage);
1592 		}
1593 		if (ret)
1594 			SetPageError(page);
1595 		cur = cur + iosize;
1596 		page_offset += iosize;
1597 		nr++;
1598 	}
1599 	if (!nr) {
1600 		if (!PageError(page))
1601 			SetPageUptodate(page);
1602 		unlock_page(page);
1603 	}
1604 	return 0;
1605 }
1606 EXPORT_SYMBOL(extent_read_full_page);
1607 
1608 /*
1609  * the writepage semantics are similar to regular writepage.  extent
1610  * records are inserted to lock ranges in the tree, and as dirty areas
1611  * are found, they are marked writeback.  Then the lock bits are removed
1612  * and the end_io handler clears the writeback ranges
1613  */
1614 int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
1615 			  get_extent_t *get_extent,
1616 			  struct writeback_control *wbc)
1617 {
1618 	struct inode *inode = page->mapping->host;
1619 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1620 	u64 page_end = start + PAGE_CACHE_SIZE - 1;
1621 	u64 end;
1622 	u64 cur = start;
1623 	u64 extent_offset;
1624 	u64 last_byte = i_size_read(inode);
1625 	u64 block_start;
1626 	u64 iosize;
1627 	sector_t sector;
1628 	struct extent_map *em;
1629 	struct block_device *bdev;
1630 	int ret;
1631 	int nr = 0;
1632 	size_t page_offset = 0;
1633 	size_t blocksize;
1634 	loff_t i_size = i_size_read(inode);
1635 	unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
1636 	u64 nr_delalloc;
1637 	u64 delalloc_end;
1638 
1639 	WARN_ON(!PageLocked(page));
1640 	if (page->index > end_index) {
1641 		clear_extent_dirty(tree, start, page_end, GFP_NOFS);
1642 		unlock_page(page);
1643 		return 0;
1644 	}
1645 
1646 	if (page->index == end_index) {
1647 		size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
1648 		zero_user_page(page, offset,
1649 			       PAGE_CACHE_SIZE - offset, KM_USER0);
1650 	}
1651 
1652 	set_page_extent_mapped(page);
1653 
1654 	lock_extent(tree, start, page_end, GFP_NOFS);
1655 	nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1,
1656 					       &delalloc_end,
1657 					       128 * 1024 * 1024);
1658 	if (nr_delalloc) {
1659 		tree->ops->fill_delalloc(inode, start, delalloc_end);
1660 		if (delalloc_end >= page_end + 1) {
1661 			clear_extent_bit(tree, page_end + 1, delalloc_end,
1662 					 EXTENT_LOCKED | EXTENT_DELALLOC,
1663 					 1, 0, GFP_NOFS);
1664 		}
1665 		clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC,
1666 				 0, 0, GFP_NOFS);
1667 		if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1668 			printk("found delalloc bits after clear extent_bit\n");
1669 		}
1670 	} else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1671 		printk("found delalloc bits after find_delalloc_range returns 0\n");
1672 	}
1673 
1674 	end = page_end;
1675 	if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1676 		printk("found delalloc bits after lock_extent\n");
1677 	}
1678 
1679 	if (last_byte <= start) {
1680 		clear_extent_dirty(tree, start, page_end, GFP_NOFS);
1681 		goto done;
1682 	}
1683 
1684 	set_extent_uptodate(tree, start, page_end, GFP_NOFS);
1685 	blocksize = inode->i_sb->s_blocksize;
1686 
1687 	while (cur <= end) {
1688 		if (cur >= last_byte) {
1689 			clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
1690 			break;
1691 		}
1692 		em = get_extent(inode, page, page_offset, cur, end, 1);
1693 		if (IS_ERR(em) || !em) {
1694 			SetPageError(page);
1695 			break;
1696 		}
1697 
1698 		extent_offset = cur - em->start;
1699 		BUG_ON(em->end < cur);
1700 		BUG_ON(end < cur);
1701 		iosize = min(em->end - cur, end - cur) + 1;
1702 		iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1703 		sector = (em->block_start + extent_offset) >> 9;
1704 		bdev = em->bdev;
1705 		block_start = em->block_start;
1706 		free_extent_map(em);
1707 		em = NULL;
1708 
1709 		if (block_start == EXTENT_MAP_HOLE ||
1710 		    block_start == EXTENT_MAP_INLINE) {
1711 			clear_extent_dirty(tree, cur,
1712 					   cur + iosize - 1, GFP_NOFS);
1713 			cur = cur + iosize;
1714 			page_offset += iosize;
1715 			continue;
1716 		}
1717 
1718 		/* leave this out until we have a page_mkwrite call */
1719 		if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
1720 				   EXTENT_DIRTY, 0)) {
1721 			cur = cur + iosize;
1722 			page_offset += iosize;
1723 			continue;
1724 		}
1725 		clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
1726 		if (tree->ops && tree->ops->writepage_io_hook) {
1727 			ret = tree->ops->writepage_io_hook(page, cur,
1728 						cur + iosize - 1);
1729 		} else {
1730 			ret = 0;
1731 		}
1732 		if (ret)
1733 			SetPageError(page);
1734 		else {
1735 			set_range_writeback(tree, cur, cur + iosize - 1);
1736 			ret = submit_extent_page(WRITE, tree, page, sector,
1737 						 iosize, page_offset, bdev,
1738 						 end_bio_extent_writepage);
1739 			if (ret)
1740 				SetPageError(page);
1741 		}
1742 		cur = cur + iosize;
1743 		page_offset += iosize;
1744 		nr++;
1745 	}
1746 done:
1747 	unlock_extent(tree, start, page_end, GFP_NOFS);
1748 	unlock_page(page);
1749 	return 0;
1750 }
1751 EXPORT_SYMBOL(extent_write_full_page);
1752 
1753 /*
1754  * basic invalidatepage code, this waits on any locked or writeback
1755  * ranges corresponding to the page, and then deletes any extent state
1756  * records from the tree
1757  */
1758 int extent_invalidatepage(struct extent_map_tree *tree,
1759 			  struct page *page, unsigned long offset)
1760 {
1761 	u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
1762 	u64 end = start + PAGE_CACHE_SIZE - 1;
1763 	size_t blocksize = page->mapping->host->i_sb->s_blocksize;
1764 
1765 	start += (offset + blocksize -1) & ~(blocksize - 1);
1766 	if (start > end)
1767 		return 0;
1768 
1769 	lock_extent(tree, start, end, GFP_NOFS);
1770 	wait_on_extent_writeback(tree, start, end);
1771 	clear_extent_bit(tree, start, end,
1772 			 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
1773 			 1, 1, GFP_NOFS);
1774 	return 0;
1775 }
1776 EXPORT_SYMBOL(extent_invalidatepage);
1777 
1778 /*
1779  * simple commit_write call, set_range_dirty is used to mark both
1780  * the pages and the extent records as dirty
1781  */
1782 int extent_commit_write(struct extent_map_tree *tree,
1783 			struct inode *inode, struct page *page,
1784 			unsigned from, unsigned to)
1785 {
1786 	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1787 
1788 	set_page_extent_mapped(page);
1789 	set_page_dirty(page);
1790 
1791 	if (pos > inode->i_size) {
1792 		i_size_write(inode, pos);
1793 		mark_inode_dirty(inode);
1794 	}
1795 	return 0;
1796 }
1797 EXPORT_SYMBOL(extent_commit_write);
1798 
1799 int extent_prepare_write(struct extent_map_tree *tree,
1800 			 struct inode *inode, struct page *page,
1801 			 unsigned from, unsigned to, get_extent_t *get_extent)
1802 {
1803 	u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1804 	u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
1805 	u64 block_start;
1806 	u64 orig_block_start;
1807 	u64 block_end;
1808 	u64 cur_end;
1809 	struct extent_map *em;
1810 	unsigned blocksize = 1 << inode->i_blkbits;
1811 	size_t page_offset = 0;
1812 	size_t block_off_start;
1813 	size_t block_off_end;
1814 	int err = 0;
1815 	int iocount = 0;
1816 	int ret = 0;
1817 	int isnew;
1818 
1819 	set_page_extent_mapped(page);
1820 
1821 	block_start = (page_start + from) & ~((u64)blocksize - 1);
1822 	block_end = (page_start + to - 1) | (blocksize - 1);
1823 	orig_block_start = block_start;
1824 
1825 	lock_extent(tree, page_start, page_end, GFP_NOFS);
1826 	while(block_start <= block_end) {
1827 		em = get_extent(inode, page, page_offset, block_start,
1828 				block_end, 1);
1829 		if (IS_ERR(em) || !em) {
1830 			goto err;
1831 		}
1832 		cur_end = min(block_end, em->end);
1833 		block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
1834 		block_off_end = block_off_start + blocksize;
1835 		isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
1836 
1837 		if (!PageUptodate(page) && isnew &&
1838 		    (block_off_end > to || block_off_start < from)) {
1839 			void *kaddr;
1840 
1841 			kaddr = kmap_atomic(page, KM_USER0);
1842 			if (block_off_end > to)
1843 				memset(kaddr + to, 0, block_off_end - to);
1844 			if (block_off_start < from)
1845 				memset(kaddr + block_off_start, 0,
1846 				       from - block_off_start);
1847 			flush_dcache_page(page);
1848 			kunmap_atomic(kaddr, KM_USER0);
1849 		}
1850 		if (!isnew && !PageUptodate(page) &&
1851 		    (block_off_end > to || block_off_start < from) &&
1852 		    !test_range_bit(tree, block_start, cur_end,
1853 				    EXTENT_UPTODATE, 1)) {
1854 			u64 sector;
1855 			u64 extent_offset = block_start - em->start;
1856 			size_t iosize;
1857 			sector = (em->block_start + extent_offset) >> 9;
1858 			iosize = (cur_end - block_start + blocksize - 1) &
1859 				~((u64)blocksize - 1);
1860 			/*
1861 			 * we've already got the extent locked, but we
1862 			 * need to split the state such that our end_bio
1863 			 * handler can clear the lock.
1864 			 */
1865 			set_extent_bit(tree, block_start,
1866 				       block_start + iosize - 1,
1867 				       EXTENT_LOCKED, 0, NULL, GFP_NOFS);
1868 			ret = submit_extent_page(READ, tree, page,
1869 					 sector, iosize, page_offset, em->bdev,
1870 					 end_bio_extent_preparewrite);
1871 			iocount++;
1872 			block_start = block_start + iosize;
1873 		} else {
1874 			set_extent_uptodate(tree, block_start, cur_end,
1875 					    GFP_NOFS);
1876 			unlock_extent(tree, block_start, cur_end, GFP_NOFS);
1877 			block_start = cur_end + 1;
1878 		}
1879 		page_offset = block_start & (PAGE_CACHE_SIZE - 1);
1880 		free_extent_map(em);
1881 	}
1882 	if (iocount) {
1883 		wait_extent_bit(tree, orig_block_start,
1884 				block_end, EXTENT_LOCKED);
1885 	}
1886 	check_page_uptodate(tree, page);
1887 err:
1888 	/* FIXME, zero out newly allocated blocks on error */
1889 	return err;
1890 }
1891 EXPORT_SYMBOL(extent_prepare_write);
1892 
1893 /*
1894  * a helper for releasepage.  As long as there are no locked extents
1895  * in the range corresponding to the page, both state records and extent
1896  * map records are removed
1897  */
1898 int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page)
1899 {
1900 	struct extent_map *em;
1901 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1902 	u64 end = start + PAGE_CACHE_SIZE - 1;
1903 	u64 orig_start = start;
1904 	int ret = 1;
1905 
1906 	while (start <= end) {
1907 		em = lookup_extent_mapping(tree, start, end);
1908 		if (!em || IS_ERR(em))
1909 			break;
1910 		if (!test_range_bit(tree, em->start, em->end,
1911 				    EXTENT_LOCKED, 0)) {
1912 			remove_extent_mapping(tree, em);
1913 			/* once for the rb tree */
1914 			free_extent_map(em);
1915 		}
1916 		start = em->end + 1;
1917 		/* once for us */
1918 		free_extent_map(em);
1919 	}
1920 	if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0))
1921 		ret = 0;
1922 	else
1923 		clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
1924 				 1, 1, GFP_NOFS);
1925 	return ret;
1926 }
1927 EXPORT_SYMBOL(try_release_extent_mapping);
1928 
1929 sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
1930 		get_extent_t *get_extent)
1931 {
1932 	struct inode *inode = mapping->host;
1933 	u64 start = iblock << inode->i_blkbits;
1934 	u64 end = start + (1 << inode->i_blkbits) - 1;
1935 	sector_t sector = 0;
1936 	struct extent_map *em;
1937 
1938 	em = get_extent(inode, NULL, 0, start, end, 0);
1939 	if (!em || IS_ERR(em))
1940 		return 0;
1941 
1942 	if (em->block_start == EXTENT_MAP_INLINE ||
1943 	    em->block_start == EXTENT_MAP_HOLE)
1944 		goto out;
1945 
1946 	sector = (em->block_start + start - em->start) >> inode->i_blkbits;
1947 out:
1948 	free_extent_map(em);
1949 	return sector;
1950 }
1951 
1952 static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb)
1953 {
1954 	if (list_empty(&eb->lru)) {
1955 		extent_buffer_get(eb);
1956 		list_add(&eb->lru, &tree->buffer_lru);
1957 		tree->lru_size++;
1958 		if (tree->lru_size >= BUFFER_LRU_MAX) {
1959 			struct extent_buffer *rm;
1960 			rm = list_entry(tree->buffer_lru.prev,
1961 					struct extent_buffer, lru);
1962 			tree->lru_size--;
1963 			list_del(&rm->lru);
1964 			free_extent_buffer(rm);
1965 		}
1966 	} else
1967 		list_move(&eb->lru, &tree->buffer_lru);
1968 	return 0;
1969 }
1970 static struct extent_buffer *find_lru(struct extent_map_tree *tree,
1971 				      u64 start, unsigned long len)
1972 {
1973 	struct list_head *lru = &tree->buffer_lru;
1974 	struct list_head *cur = lru->next;
1975 	struct extent_buffer *eb;
1976 
1977 	if (list_empty(lru))
1978 		return NULL;
1979 
1980 	do {
1981 		eb = list_entry(cur, struct extent_buffer, lru);
1982 		if (eb->start == start && eb->len == len) {
1983 			extent_buffer_get(eb);
1984 			return eb;
1985 		}
1986 		cur = cur->next;
1987 	} while (cur != lru);
1988 	return NULL;
1989 }
1990 
1991 static inline unsigned long num_extent_pages(u64 start, u64 len)
1992 {
1993 	return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
1994 		(start >> PAGE_CACHE_SHIFT);
1995 }
1996 
1997 static inline struct page *extent_buffer_page(struct extent_buffer *eb,
1998 					      unsigned long i)
1999 {
2000 	struct page *p;
2001 	struct address_space *mapping;
2002 
2003 	if (i == 0)
2004 		return eb->first_page;
2005 	i += eb->start >> PAGE_CACHE_SHIFT;
2006 	mapping = eb->first_page->mapping;
2007 	read_lock_irq(&mapping->tree_lock);
2008 	p = radix_tree_lookup(&mapping->page_tree, i);
2009 	read_unlock_irq(&mapping->tree_lock);
2010 	return p;
2011 }
2012 
2013 static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree,
2014 						   u64 start,
2015 						   unsigned long len,
2016 						   gfp_t mask)
2017 {
2018 	struct extent_buffer *eb = NULL;
2019 
2020 	spin_lock(&tree->lru_lock);
2021 	eb = find_lru(tree, start, len);
2022 	if (eb) {
2023 		goto lru_add;
2024 	}
2025 	spin_unlock(&tree->lru_lock);
2026 
2027 	if (eb) {
2028 		memset(eb, 0, sizeof(*eb));
2029 	} else {
2030 		eb = kmem_cache_zalloc(extent_buffer_cache, mask);
2031 	}
2032 	INIT_LIST_HEAD(&eb->lru);
2033 	eb->start = start;
2034 	eb->len = len;
2035 	atomic_set(&eb->refs, 1);
2036 
2037 	spin_lock(&tree->lru_lock);
2038 lru_add:
2039 	add_lru(tree, eb);
2040 	spin_unlock(&tree->lru_lock);
2041 	return eb;
2042 }
2043 
2044 static void __free_extent_buffer(struct extent_buffer *eb)
2045 {
2046 	kmem_cache_free(extent_buffer_cache, eb);
2047 }
2048 
2049 struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
2050 					  u64 start, unsigned long len,
2051 					  struct page *page0,
2052 					  gfp_t mask)
2053 {
2054 	unsigned long num_pages = num_extent_pages(start, len);
2055 	unsigned long i;
2056 	unsigned long index = start >> PAGE_CACHE_SHIFT;
2057 	struct extent_buffer *eb;
2058 	struct page *p;
2059 	struct address_space *mapping = tree->mapping;
2060 	int uptodate = 1;
2061 
2062 	eb = __alloc_extent_buffer(tree, start, len, mask);
2063 	if (!eb || IS_ERR(eb))
2064 		return NULL;
2065 
2066 	if (eb->flags & EXTENT_BUFFER_FILLED)
2067 		return eb;
2068 
2069 	if (page0) {
2070 		eb->first_page = page0;
2071 		i = 1;
2072 		index++;
2073 		page_cache_get(page0);
2074 		mark_page_accessed(page0);
2075 		set_page_extent_mapped(page0);
2076 		set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE |
2077 				 len << 2);
2078 	} else {
2079 		i = 0;
2080 	}
2081 	for (; i < num_pages; i++, index++) {
2082 		p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
2083 		if (!p) {
2084 			WARN_ON(1);
2085 			/* make sure the free only frees the pages we've
2086 			 * grabbed a reference on
2087 			 */
2088 			eb->len = i << PAGE_CACHE_SHIFT;
2089 			eb->start &= ~((u64)PAGE_CACHE_SIZE - 1);
2090 			goto fail;
2091 		}
2092 		set_page_extent_mapped(p);
2093 		mark_page_accessed(p);
2094 		if (i == 0) {
2095 			eb->first_page = p;
2096 			set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE |
2097 					 len << 2);
2098 		} else {
2099 			set_page_private(p, EXTENT_PAGE_PRIVATE);
2100 		}
2101 		if (!PageUptodate(p))
2102 			uptodate = 0;
2103 		unlock_page(p);
2104 	}
2105 	if (uptodate)
2106 		eb->flags |= EXTENT_UPTODATE;
2107 	eb->flags |= EXTENT_BUFFER_FILLED;
2108 	return eb;
2109 fail:
2110 	free_extent_buffer(eb);
2111 	return NULL;
2112 }
2113 EXPORT_SYMBOL(alloc_extent_buffer);
2114 
2115 struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
2116 					 u64 start, unsigned long len,
2117 					  gfp_t mask)
2118 {
2119 	unsigned long num_pages = num_extent_pages(start, len);
2120 	unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT;
2121 	struct extent_buffer *eb;
2122 	struct page *p;
2123 	struct address_space *mapping = tree->mapping;
2124 	int uptodate = 1;
2125 
2126 	eb = __alloc_extent_buffer(tree, start, len, mask);
2127 	if (!eb || IS_ERR(eb))
2128 		return NULL;
2129 
2130 	if (eb->flags & EXTENT_BUFFER_FILLED)
2131 		return eb;
2132 
2133 	for (i = 0; i < num_pages; i++, index++) {
2134 		p = find_lock_page(mapping, index);
2135 		if (!p) {
2136 			/* make sure the free only frees the pages we've
2137 			 * grabbed a reference on
2138 			 */
2139 			eb->len = i << PAGE_CACHE_SHIFT;
2140 			eb->start &= ~((u64)PAGE_CACHE_SIZE - 1);
2141 			goto fail;
2142 		}
2143 		set_page_extent_mapped(p);
2144 		mark_page_accessed(p);
2145 
2146 		if (i == 0) {
2147 			eb->first_page = p;
2148 			set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE |
2149 					 len << 2);
2150 		} else {
2151 			set_page_private(p, EXTENT_PAGE_PRIVATE);
2152 		}
2153 
2154 		if (!PageUptodate(p))
2155 			uptodate = 0;
2156 		unlock_page(p);
2157 	}
2158 	if (uptodate)
2159 		eb->flags |= EXTENT_UPTODATE;
2160 	eb->flags |= EXTENT_BUFFER_FILLED;
2161 	return eb;
2162 fail:
2163 	free_extent_buffer(eb);
2164 	return NULL;
2165 }
2166 EXPORT_SYMBOL(find_extent_buffer);
2167 
2168 void free_extent_buffer(struct extent_buffer *eb)
2169 {
2170 	unsigned long i;
2171 	unsigned long num_pages;
2172 
2173 	if (!eb)
2174 		return;
2175 
2176 	if (!atomic_dec_and_test(&eb->refs))
2177 		return;
2178 
2179 	num_pages = num_extent_pages(eb->start, eb->len);
2180 
2181 	for (i = 0; i < num_pages; i++) {
2182 		page_cache_release(extent_buffer_page(eb, i));
2183 	}
2184 	__free_extent_buffer(eb);
2185 }
2186 EXPORT_SYMBOL(free_extent_buffer);
2187 
2188 int clear_extent_buffer_dirty(struct extent_map_tree *tree,
2189 			      struct extent_buffer *eb)
2190 {
2191 	int set;
2192 	unsigned long i;
2193 	unsigned long num_pages;
2194 	struct page *page;
2195 
2196 	u64 start = eb->start;
2197 	u64 end = start + eb->len - 1;
2198 
2199 	set = clear_extent_dirty(tree, start, end, GFP_NOFS);
2200 	num_pages = num_extent_pages(eb->start, eb->len);
2201 
2202 	for (i = 0; i < num_pages; i++) {
2203 		page = extent_buffer_page(eb, i);
2204 		lock_page(page);
2205 		/*
2206 		 * if we're on the last page or the first page and the
2207 		 * block isn't aligned on a page boundary, do extra checks
2208 		 * to make sure we don't clean page that is partially dirty
2209 		 */
2210 		if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
2211 		    ((i == num_pages - 1) &&
2212 		     ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
2213 			start = (u64)page->index << PAGE_CACHE_SHIFT;
2214 			end  = start + PAGE_CACHE_SIZE - 1;
2215 			if (test_range_bit(tree, start, end,
2216 					   EXTENT_DIRTY, 0)) {
2217 				unlock_page(page);
2218 				continue;
2219 			}
2220 		}
2221 		clear_page_dirty_for_io(page);
2222 		unlock_page(page);
2223 	}
2224 	return 0;
2225 }
2226 EXPORT_SYMBOL(clear_extent_buffer_dirty);
2227 
2228 int wait_on_extent_buffer_writeback(struct extent_map_tree *tree,
2229 				    struct extent_buffer *eb)
2230 {
2231 	return wait_on_extent_writeback(tree, eb->start,
2232 					eb->start + eb->len - 1);
2233 }
2234 EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
2235 
2236 int set_extent_buffer_dirty(struct extent_map_tree *tree,
2237 			     struct extent_buffer *eb)
2238 {
2239 	unsigned long i;
2240 	unsigned long num_pages;
2241 
2242 	num_pages = num_extent_pages(eb->start, eb->len);
2243 	for (i = 0; i < num_pages; i++) {
2244 		struct page *page = extent_buffer_page(eb, i);
2245 		/* writepage may need to do something special for the
2246 		 * first page, we have to make sure page->private is
2247 		 * properly set.  releasepage may drop page->private
2248 		 * on us if the page isn't already dirty.
2249 		 */
2250 		if (i == 0) {
2251 			lock_page(page);
2252 			set_page_private(page,
2253 					 EXTENT_PAGE_PRIVATE_FIRST_PAGE |
2254 					 eb->len << 2);
2255 		}
2256 		__set_page_dirty_nobuffers(extent_buffer_page(eb, i));
2257 		if (i == 0)
2258 			unlock_page(page);
2259 	}
2260 	return set_extent_dirty(tree, eb->start,
2261 				eb->start + eb->len - 1, GFP_NOFS);
2262 }
2263 EXPORT_SYMBOL(set_extent_buffer_dirty);
2264 
2265 int set_extent_buffer_uptodate(struct extent_map_tree *tree,
2266 				struct extent_buffer *eb)
2267 {
2268 	unsigned long i;
2269 	struct page *page;
2270 	unsigned long num_pages;
2271 
2272 	num_pages = num_extent_pages(eb->start, eb->len);
2273 
2274 	set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
2275 			    GFP_NOFS);
2276 	for (i = 0; i < num_pages; i++) {
2277 		page = extent_buffer_page(eb, i);
2278 		if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
2279 		    ((i == num_pages - 1) &&
2280 		     ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
2281 			check_page_uptodate(tree, page);
2282 			continue;
2283 		}
2284 		SetPageUptodate(page);
2285 	}
2286 	return 0;
2287 }
2288 EXPORT_SYMBOL(set_extent_buffer_uptodate);
2289 
2290 int extent_buffer_uptodate(struct extent_map_tree *tree,
2291 			     struct extent_buffer *eb)
2292 {
2293 	if (eb->flags & EXTENT_UPTODATE)
2294 		return 1;
2295 	return test_range_bit(tree, eb->start, eb->start + eb->len - 1,
2296 			   EXTENT_UPTODATE, 1);
2297 }
2298 EXPORT_SYMBOL(extent_buffer_uptodate);
2299 
2300 int read_extent_buffer_pages(struct extent_map_tree *tree,
2301 			     struct extent_buffer *eb,
2302 			     u64 start,
2303 			     int wait)
2304 {
2305 	unsigned long i;
2306 	unsigned long start_i;
2307 	struct page *page;
2308 	int err;
2309 	int ret = 0;
2310 	unsigned long num_pages;
2311 
2312 	if (eb->flags & EXTENT_UPTODATE)
2313 		return 0;
2314 
2315 	if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1,
2316 			   EXTENT_UPTODATE, 1)) {
2317 		return 0;
2318 	}
2319 	if (start) {
2320 		WARN_ON(start < eb->start);
2321 		start_i = (start >> PAGE_CACHE_SHIFT) -
2322 			(eb->start >> PAGE_CACHE_SHIFT);
2323 	} else {
2324 		start_i = 0;
2325 	}
2326 
2327 	num_pages = num_extent_pages(eb->start, eb->len);
2328 	for (i = start_i; i < num_pages; i++) {
2329 		page = extent_buffer_page(eb, i);
2330 		if (PageUptodate(page)) {
2331 			continue;
2332 		}
2333 		if (!wait) {
2334 			if (TestSetPageLocked(page)) {
2335 				continue;
2336 			}
2337 		} else {
2338 			lock_page(page);
2339 		}
2340 		if (!PageUptodate(page)) {
2341 			err = page->mapping->a_ops->readpage(NULL, page);
2342 			if (err) {
2343 				ret = err;
2344 			}
2345 		} else {
2346 			unlock_page(page);
2347 		}
2348 	}
2349 
2350 	if (ret || !wait) {
2351 		return ret;
2352 	}
2353 
2354 	for (i = start_i; i < num_pages; i++) {
2355 		page = extent_buffer_page(eb, i);
2356 		wait_on_page_locked(page);
2357 		if (!PageUptodate(page)) {
2358 			ret = -EIO;
2359 		}
2360 	}
2361 	if (!ret)
2362 		eb->flags |= EXTENT_UPTODATE;
2363 	return ret;
2364 }
2365 EXPORT_SYMBOL(read_extent_buffer_pages);
2366 
2367 void read_extent_buffer(struct extent_buffer *eb, void *dstv,
2368 			unsigned long start,
2369 			unsigned long len)
2370 {
2371 	size_t cur;
2372 	size_t offset;
2373 	struct page *page;
2374 	char *kaddr;
2375 	char *dst = (char *)dstv;
2376 	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2377 	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2378 	unsigned long num_pages = num_extent_pages(eb->start, eb->len);
2379 
2380 	WARN_ON(start > eb->len);
2381 	WARN_ON(start + len > eb->start + eb->len);
2382 
2383 	offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
2384 
2385 	while(len > 0) {
2386 		page = extent_buffer_page(eb, i);
2387 		if (!PageUptodate(page)) {
2388 			printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len);
2389 			WARN_ON(1);
2390 		}
2391 		WARN_ON(!PageUptodate(page));
2392 
2393 		cur = min(len, (PAGE_CACHE_SIZE - offset));
2394 		kaddr = kmap_atomic(page, KM_USER1);
2395 		memcpy(dst, kaddr + offset, cur);
2396 		kunmap_atomic(kaddr, KM_USER1);
2397 
2398 		dst += cur;
2399 		len -= cur;
2400 		offset = 0;
2401 		i++;
2402 	}
2403 }
2404 EXPORT_SYMBOL(read_extent_buffer);
2405 
2406 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
2407 			       unsigned long min_len, char **token, char **map,
2408 			       unsigned long *map_start,
2409 			       unsigned long *map_len, int km)
2410 {
2411 	size_t offset = start & (PAGE_CACHE_SIZE - 1);
2412 	char *kaddr;
2413 	struct page *p;
2414 	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2415 	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2416 	unsigned long end_i = (start_offset + start + min_len - 1) >>
2417 		PAGE_CACHE_SHIFT;
2418 
2419 	if (i != end_i)
2420 		return -EINVAL;
2421 
2422 	if (i == 0) {
2423 		offset = start_offset;
2424 		*map_start = 0;
2425 	} else {
2426 		offset = 0;
2427 		*map_start = (i << PAGE_CACHE_SHIFT) - start_offset;
2428 	}
2429 	if (start + min_len > eb->len) {
2430 printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
2431 		WARN_ON(1);
2432 	}
2433 
2434 	p = extent_buffer_page(eb, i);
2435 	WARN_ON(!PageUptodate(p));
2436 	kaddr = kmap_atomic(p, km);
2437 	*token = kaddr;
2438 	*map = kaddr + offset;
2439 	*map_len = PAGE_CACHE_SIZE - offset;
2440 	return 0;
2441 }
2442 EXPORT_SYMBOL(map_private_extent_buffer);
2443 
2444 int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
2445 		      unsigned long min_len,
2446 		      char **token, char **map,
2447 		      unsigned long *map_start,
2448 		      unsigned long *map_len, int km)
2449 {
2450 	int err;
2451 	int save = 0;
2452 	if (eb->map_token) {
2453 		unmap_extent_buffer(eb, eb->map_token, km);
2454 		eb->map_token = NULL;
2455 		save = 1;
2456 	}
2457 	err = map_private_extent_buffer(eb, start, min_len, token, map,
2458 				       map_start, map_len, km);
2459 	if (!err && save) {
2460 		eb->map_token = *token;
2461 		eb->kaddr = *map;
2462 		eb->map_start = *map_start;
2463 		eb->map_len = *map_len;
2464 	}
2465 	return err;
2466 }
2467 EXPORT_SYMBOL(map_extent_buffer);
2468 
2469 void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
2470 {
2471 	kunmap_atomic(token, km);
2472 }
2473 EXPORT_SYMBOL(unmap_extent_buffer);
2474 
2475 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
2476 			  unsigned long start,
2477 			  unsigned long len)
2478 {
2479 	size_t cur;
2480 	size_t offset;
2481 	struct page *page;
2482 	char *kaddr;
2483 	char *ptr = (char *)ptrv;
2484 	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2485 	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2486 	int ret = 0;
2487 
2488 	WARN_ON(start > eb->len);
2489 	WARN_ON(start + len > eb->start + eb->len);
2490 
2491 	offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
2492 
2493 	while(len > 0) {
2494 		page = extent_buffer_page(eb, i);
2495 		WARN_ON(!PageUptodate(page));
2496 
2497 		cur = min(len, (PAGE_CACHE_SIZE - offset));
2498 
2499 		kaddr = kmap_atomic(page, KM_USER0);
2500 		ret = memcmp(ptr, kaddr + offset, cur);
2501 		kunmap_atomic(kaddr, KM_USER0);
2502 		if (ret)
2503 			break;
2504 
2505 		ptr += cur;
2506 		len -= cur;
2507 		offset = 0;
2508 		i++;
2509 	}
2510 	return ret;
2511 }
2512 EXPORT_SYMBOL(memcmp_extent_buffer);
2513 
2514 void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
2515 			 unsigned long start, unsigned long len)
2516 {
2517 	size_t cur;
2518 	size_t offset;
2519 	struct page *page;
2520 	char *kaddr;
2521 	char *src = (char *)srcv;
2522 	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2523 	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2524 
2525 	WARN_ON(start > eb->len);
2526 	WARN_ON(start + len > eb->start + eb->len);
2527 
2528 	offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
2529 
2530 	while(len > 0) {
2531 		page = extent_buffer_page(eb, i);
2532 		WARN_ON(!PageUptodate(page));
2533 
2534 		cur = min(len, PAGE_CACHE_SIZE - offset);
2535 		kaddr = kmap_atomic(page, KM_USER1);
2536 		memcpy(kaddr + offset, src, cur);
2537 		kunmap_atomic(kaddr, KM_USER1);
2538 
2539 		src += cur;
2540 		len -= cur;
2541 		offset = 0;
2542 		i++;
2543 	}
2544 }
2545 EXPORT_SYMBOL(write_extent_buffer);
2546 
2547 void memset_extent_buffer(struct extent_buffer *eb, char c,
2548 			  unsigned long start, unsigned long len)
2549 {
2550 	size_t cur;
2551 	size_t offset;
2552 	struct page *page;
2553 	char *kaddr;
2554 	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2555 	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2556 
2557 	WARN_ON(start > eb->len);
2558 	WARN_ON(start + len > eb->start + eb->len);
2559 
2560 	offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
2561 
2562 	while(len > 0) {
2563 		page = extent_buffer_page(eb, i);
2564 		WARN_ON(!PageUptodate(page));
2565 
2566 		cur = min(len, PAGE_CACHE_SIZE - offset);
2567 		kaddr = kmap_atomic(page, KM_USER0);
2568 		memset(kaddr + offset, c, cur);
2569 		kunmap_atomic(kaddr, KM_USER0);
2570 
2571 		len -= cur;
2572 		offset = 0;
2573 		i++;
2574 	}
2575 }
2576 EXPORT_SYMBOL(memset_extent_buffer);
2577 
2578 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
2579 			unsigned long dst_offset, unsigned long src_offset,
2580 			unsigned long len)
2581 {
2582 	u64 dst_len = dst->len;
2583 	size_t cur;
2584 	size_t offset;
2585 	struct page *page;
2586 	char *kaddr;
2587 	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
2588 	unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
2589 
2590 	WARN_ON(src->len != dst_len);
2591 
2592 	offset = (start_offset + dst_offset) &
2593 		((unsigned long)PAGE_CACHE_SIZE - 1);
2594 
2595 	while(len > 0) {
2596 		page = extent_buffer_page(dst, i);
2597 		WARN_ON(!PageUptodate(page));
2598 
2599 		cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
2600 
2601 		kaddr = kmap_atomic(page, KM_USER0);
2602 		read_extent_buffer(src, kaddr + offset, src_offset, cur);
2603 		kunmap_atomic(kaddr, KM_USER0);
2604 
2605 		src_offset += cur;
2606 		len -= cur;
2607 		offset = 0;
2608 		i++;
2609 	}
2610 }
2611 EXPORT_SYMBOL(copy_extent_buffer);
2612 
2613 static void move_pages(struct page *dst_page, struct page *src_page,
2614 		       unsigned long dst_off, unsigned long src_off,
2615 		       unsigned long len)
2616 {
2617 	char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
2618 	if (dst_page == src_page) {
2619 		memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
2620 	} else {
2621 		char *src_kaddr = kmap_atomic(src_page, KM_USER1);
2622 		char *p = dst_kaddr + dst_off + len;
2623 		char *s = src_kaddr + src_off + len;
2624 
2625 		while (len--)
2626 			*--p = *--s;
2627 
2628 		kunmap_atomic(src_kaddr, KM_USER1);
2629 	}
2630 	kunmap_atomic(dst_kaddr, KM_USER0);
2631 }
2632 
2633 static void copy_pages(struct page *dst_page, struct page *src_page,
2634 		       unsigned long dst_off, unsigned long src_off,
2635 		       unsigned long len)
2636 {
2637 	char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
2638 	char *src_kaddr;
2639 
2640 	if (dst_page != src_page)
2641 		src_kaddr = kmap_atomic(src_page, KM_USER1);
2642 	else
2643 		src_kaddr = dst_kaddr;
2644 
2645 	memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
2646 	kunmap_atomic(dst_kaddr, KM_USER0);
2647 	if (dst_page != src_page)
2648 		kunmap_atomic(src_kaddr, KM_USER1);
2649 }
2650 
2651 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
2652 			   unsigned long src_offset, unsigned long len)
2653 {
2654 	size_t cur;
2655 	size_t dst_off_in_page;
2656 	size_t src_off_in_page;
2657 	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
2658 	unsigned long dst_i;
2659 	unsigned long src_i;
2660 
2661 	if (src_offset + len > dst->len) {
2662 		printk("memmove bogus src_offset %lu move len %lu len %lu\n",
2663 		       src_offset, len, dst->len);
2664 		BUG_ON(1);
2665 	}
2666 	if (dst_offset + len > dst->len) {
2667 		printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
2668 		       dst_offset, len, dst->len);
2669 		BUG_ON(1);
2670 	}
2671 
2672 	while(len > 0) {
2673 		dst_off_in_page = (start_offset + dst_offset) &
2674 			((unsigned long)PAGE_CACHE_SIZE - 1);
2675 		src_off_in_page = (start_offset + src_offset) &
2676 			((unsigned long)PAGE_CACHE_SIZE - 1);
2677 
2678 		dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
2679 		src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
2680 
2681 		cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
2682 					       src_off_in_page));
2683 		cur = min_t(unsigned long, cur,
2684 			(unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
2685 
2686 		copy_pages(extent_buffer_page(dst, dst_i),
2687 			   extent_buffer_page(dst, src_i),
2688 			   dst_off_in_page, src_off_in_page, cur);
2689 
2690 		src_offset += cur;
2691 		dst_offset += cur;
2692 		len -= cur;
2693 	}
2694 }
2695 EXPORT_SYMBOL(memcpy_extent_buffer);
2696 
2697 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
2698 			   unsigned long src_offset, unsigned long len)
2699 {
2700 	size_t cur;
2701 	size_t dst_off_in_page;
2702 	size_t src_off_in_page;
2703 	unsigned long dst_end = dst_offset + len - 1;
2704 	unsigned long src_end = src_offset + len - 1;
2705 	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
2706 	unsigned long dst_i;
2707 	unsigned long src_i;
2708 
2709 	if (src_offset + len > dst->len) {
2710 		printk("memmove bogus src_offset %lu move len %lu len %lu\n",
2711 		       src_offset, len, dst->len);
2712 		BUG_ON(1);
2713 	}
2714 	if (dst_offset + len > dst->len) {
2715 		printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
2716 		       dst_offset, len, dst->len);
2717 		BUG_ON(1);
2718 	}
2719 	if (dst_offset < src_offset) {
2720 		memcpy_extent_buffer(dst, dst_offset, src_offset, len);
2721 		return;
2722 	}
2723 	while(len > 0) {
2724 		dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
2725 		src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
2726 
2727 		dst_off_in_page = (start_offset + dst_end) &
2728 			((unsigned long)PAGE_CACHE_SIZE - 1);
2729 		src_off_in_page = (start_offset + src_end) &
2730 			((unsigned long)PAGE_CACHE_SIZE - 1);
2731 
2732 		cur = min_t(unsigned long, len, src_off_in_page + 1);
2733 		cur = min(cur, dst_off_in_page + 1);
2734 		move_pages(extent_buffer_page(dst, dst_i),
2735 			   extent_buffer_page(dst, src_i),
2736 			   dst_off_in_page - cur + 1,
2737 			   src_off_in_page - cur + 1, cur);
2738 
2739 		dst_end -= cur;
2740 		src_end -= cur;
2741 		len -= cur;
2742 	}
2743 }
2744 EXPORT_SYMBOL(memmove_extent_buffer);
2745