xref: /openbmc/linux/fs/btrfs/extent_map.c (revision 944746ec)
1 #include <linux/bitops.h>
2 #include <linux/slab.h>
3 #include <linux/bio.h>
4 #include <linux/mm.h>
5 #include <linux/gfp.h>
6 #include <linux/pagemap.h>
7 #include <linux/page-flags.h>
8 #include <linux/module.h>
9 #include <linux/spinlock.h>
10 #include <linux/blkdev.h>
11 #include <linux/swap.h>
12 #include <linux/version.h>
13 #include "extent_map.h"
14 
15 /* temporary define until extent_map moves out of btrfs */
16 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
17 				       unsigned long extra_flags,
18 				       void (*ctor)(void *, struct kmem_cache *,
19 						    unsigned long));
20 
21 static struct kmem_cache *extent_map_cache;
22 static struct kmem_cache *extent_state_cache;
23 static struct kmem_cache *extent_buffer_cache;
24 
25 static LIST_HEAD(buffers);
26 static LIST_HEAD(states);
27 
28 static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
29 #define BUFFER_LRU_MAX 64
30 
31 struct tree_entry {
32 	u64 start;
33 	u64 end;
34 	int in_tree;
35 	struct rb_node rb_node;
36 };
37 
38 void __init extent_map_init(void)
39 {
40 	extent_map_cache = btrfs_cache_create("extent_map",
41 					    sizeof(struct extent_map), 0,
42 					    NULL);
43 	extent_state_cache = btrfs_cache_create("extent_state",
44 					    sizeof(struct extent_state), 0,
45 					    NULL);
46 	extent_buffer_cache = btrfs_cache_create("extent_buffers",
47 					    sizeof(struct extent_buffer), 0,
48 					    NULL);
49 }
50 
51 void __exit extent_map_exit(void)
52 {
53 	struct extent_state *state;
54 
55 	while (!list_empty(&states)) {
56 		state = list_entry(states.next, struct extent_state, list);
57 		printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
58 		list_del(&state->list);
59 		kmem_cache_free(extent_state_cache, state);
60 
61 	}
62 
63 	if (extent_map_cache)
64 		kmem_cache_destroy(extent_map_cache);
65 	if (extent_state_cache)
66 		kmem_cache_destroy(extent_state_cache);
67 	if (extent_buffer_cache)
68 		kmem_cache_destroy(extent_buffer_cache);
69 }
70 
71 void extent_map_tree_init(struct extent_map_tree *tree,
72 			  struct address_space *mapping, gfp_t mask)
73 {
74 	tree->map.rb_node = NULL;
75 	tree->state.rb_node = NULL;
76 	tree->ops = NULL;
77 	rwlock_init(&tree->lock);
78 	spin_lock_init(&tree->lru_lock);
79 	tree->mapping = mapping;
80 	INIT_LIST_HEAD(&tree->buffer_lru);
81 	tree->lru_size = 0;
82 }
83 EXPORT_SYMBOL(extent_map_tree_init);
84 
85 void extent_map_tree_empty_lru(struct extent_map_tree *tree)
86 {
87 	struct extent_buffer *eb;
88 	while(!list_empty(&tree->buffer_lru)) {
89 		eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
90 				lru);
91 		list_del(&eb->lru);
92 		free_extent_buffer(eb);
93 	}
94 }
95 EXPORT_SYMBOL(extent_map_tree_empty_lru);
96 
97 struct extent_map *alloc_extent_map(gfp_t mask)
98 {
99 	struct extent_map *em;
100 	em = kmem_cache_alloc(extent_map_cache, mask);
101 	if (!em || IS_ERR(em))
102 		return em;
103 	em->in_tree = 0;
104 	atomic_set(&em->refs, 1);
105 	return em;
106 }
107 EXPORT_SYMBOL(alloc_extent_map);
108 
109 void free_extent_map(struct extent_map *em)
110 {
111 	if (!em)
112 		return;
113 	if (atomic_dec_and_test(&em->refs)) {
114 		WARN_ON(em->in_tree);
115 		kmem_cache_free(extent_map_cache, em);
116 	}
117 }
118 EXPORT_SYMBOL(free_extent_map);
119 
120 
121 struct extent_state *alloc_extent_state(gfp_t mask)
122 {
123 	struct extent_state *state;
124 	unsigned long flags;
125 
126 	state = kmem_cache_alloc(extent_state_cache, mask);
127 	if (!state || IS_ERR(state))
128 		return state;
129 	state->state = 0;
130 	state->in_tree = 0;
131 	state->private = 0;
132 
133 	spin_lock_irqsave(&state_lock, flags);
134 	list_add(&state->list, &states);
135 	spin_unlock_irqrestore(&state_lock, flags);
136 
137 	atomic_set(&state->refs, 1);
138 	init_waitqueue_head(&state->wq);
139 	return state;
140 }
141 EXPORT_SYMBOL(alloc_extent_state);
142 
143 void free_extent_state(struct extent_state *state)
144 {
145 	unsigned long flags;
146 	if (!state)
147 		return;
148 	if (atomic_dec_and_test(&state->refs)) {
149 		WARN_ON(state->in_tree);
150 		spin_lock_irqsave(&state_lock, flags);
151 		list_del(&state->list);
152 		spin_unlock_irqrestore(&state_lock, flags);
153 		kmem_cache_free(extent_state_cache, state);
154 	}
155 }
156 EXPORT_SYMBOL(free_extent_state);
157 
158 static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
159 				   struct rb_node *node)
160 {
161 	struct rb_node ** p = &root->rb_node;
162 	struct rb_node * parent = NULL;
163 	struct tree_entry *entry;
164 
165 	while(*p) {
166 		parent = *p;
167 		entry = rb_entry(parent, struct tree_entry, rb_node);
168 
169 		if (offset < entry->start)
170 			p = &(*p)->rb_left;
171 		else if (offset > entry->end)
172 			p = &(*p)->rb_right;
173 		else
174 			return parent;
175 	}
176 
177 	entry = rb_entry(node, struct tree_entry, rb_node);
178 	entry->in_tree = 1;
179 	rb_link_node(node, parent, p);
180 	rb_insert_color(node, root);
181 	return NULL;
182 }
183 
184 static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
185 				   struct rb_node **prev_ret)
186 {
187 	struct rb_node * n = root->rb_node;
188 	struct rb_node *prev = NULL;
189 	struct tree_entry *entry;
190 	struct tree_entry *prev_entry = NULL;
191 
192 	while(n) {
193 		entry = rb_entry(n, struct tree_entry, rb_node);
194 		prev = n;
195 		prev_entry = entry;
196 
197 		if (offset < entry->start)
198 			n = n->rb_left;
199 		else if (offset > entry->end)
200 			n = n->rb_right;
201 		else
202 			return n;
203 	}
204 	if (!prev_ret)
205 		return NULL;
206 	while(prev && offset > prev_entry->end) {
207 		prev = rb_next(prev);
208 		prev_entry = rb_entry(prev, struct tree_entry, rb_node);
209 	}
210 	*prev_ret = prev;
211 	return NULL;
212 }
213 
214 static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
215 {
216 	struct rb_node *prev;
217 	struct rb_node *ret;
218 	ret = __tree_search(root, offset, &prev);
219 	if (!ret)
220 		return prev;
221 	return ret;
222 }
223 
224 static int tree_delete(struct rb_root *root, u64 offset)
225 {
226 	struct rb_node *node;
227 	struct tree_entry *entry;
228 
229 	node = __tree_search(root, offset, NULL);
230 	if (!node)
231 		return -ENOENT;
232 	entry = rb_entry(node, struct tree_entry, rb_node);
233 	entry->in_tree = 0;
234 	rb_erase(node, root);
235 	return 0;
236 }
237 
238 /*
239  * add_extent_mapping tries a simple backward merge with existing
240  * mappings.  The extent_map struct passed in will be inserted into
241  * the tree directly (no copies made, just a reference taken).
242  */
243 int add_extent_mapping(struct extent_map_tree *tree,
244 		       struct extent_map *em)
245 {
246 	int ret = 0;
247 	struct extent_map *prev = NULL;
248 	struct rb_node *rb;
249 
250 	write_lock_irq(&tree->lock);
251 	rb = tree_insert(&tree->map, em->end, &em->rb_node);
252 	if (rb) {
253 		prev = rb_entry(rb, struct extent_map, rb_node);
254 		printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end);
255 		ret = -EEXIST;
256 		goto out;
257 	}
258 	atomic_inc(&em->refs);
259 	if (em->start != 0) {
260 		rb = rb_prev(&em->rb_node);
261 		if (rb)
262 			prev = rb_entry(rb, struct extent_map, rb_node);
263 		if (prev && prev->end + 1 == em->start &&
264 		    ((em->block_start == EXTENT_MAP_HOLE &&
265 		      prev->block_start == EXTENT_MAP_HOLE) ||
266 		     (em->block_start == EXTENT_MAP_INLINE &&
267 		      prev->block_start == EXTENT_MAP_INLINE) ||
268 		     (em->block_start == EXTENT_MAP_DELALLOC &&
269 		      prev->block_start == EXTENT_MAP_DELALLOC) ||
270 		     (em->block_start < EXTENT_MAP_DELALLOC - 1 &&
271 		      em->block_start == prev->block_end + 1))) {
272 			em->start = prev->start;
273 			em->block_start = prev->block_start;
274 			rb_erase(&prev->rb_node, &tree->map);
275 			prev->in_tree = 0;
276 			free_extent_map(prev);
277 		}
278 	 }
279 out:
280 	write_unlock_irq(&tree->lock);
281 	return ret;
282 }
283 EXPORT_SYMBOL(add_extent_mapping);
284 
285 /*
286  * lookup_extent_mapping returns the first extent_map struct in the
287  * tree that intersects the [start, end] (inclusive) range.  There may
288  * be additional objects in the tree that intersect, so check the object
289  * returned carefully to make sure you don't need additional lookups.
290  */
291 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
292 					 u64 start, u64 end)
293 {
294 	struct extent_map *em;
295 	struct rb_node *rb_node;
296 
297 	read_lock_irq(&tree->lock);
298 	rb_node = tree_search(&tree->map, start);
299 	if (!rb_node) {
300 		em = NULL;
301 		goto out;
302 	}
303 	if (IS_ERR(rb_node)) {
304 		em = ERR_PTR(PTR_ERR(rb_node));
305 		goto out;
306 	}
307 	em = rb_entry(rb_node, struct extent_map, rb_node);
308 	if (em->end < start || em->start > end) {
309 		em = NULL;
310 		goto out;
311 	}
312 	atomic_inc(&em->refs);
313 out:
314 	read_unlock_irq(&tree->lock);
315 	return em;
316 }
317 EXPORT_SYMBOL(lookup_extent_mapping);
318 
319 /*
320  * removes an extent_map struct from the tree.  No reference counts are
321  * dropped, and no checks are done to  see if the range is in use
322  */
323 int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
324 {
325 	int ret;
326 
327 	write_lock_irq(&tree->lock);
328 	ret = tree_delete(&tree->map, em->end);
329 	write_unlock_irq(&tree->lock);
330 	return ret;
331 }
332 EXPORT_SYMBOL(remove_extent_mapping);
333 
334 /*
335  * utility function to look for merge candidates inside a given range.
336  * Any extents with matching state are merged together into a single
337  * extent in the tree.  Extents with EXTENT_IO in their state field
338  * are not merged because the end_io handlers need to be able to do
339  * operations on them without sleeping (or doing allocations/splits).
340  *
341  * This should be called with the tree lock held.
342  */
343 static int merge_state(struct extent_map_tree *tree,
344 		       struct extent_state *state)
345 {
346 	struct extent_state *other;
347 	struct rb_node *other_node;
348 
349 	if (state->state & EXTENT_IOBITS)
350 		return 0;
351 
352 	other_node = rb_prev(&state->rb_node);
353 	if (other_node) {
354 		other = rb_entry(other_node, struct extent_state, rb_node);
355 		if (other->end == state->start - 1 &&
356 		    other->state == state->state) {
357 			state->start = other->start;
358 			other->in_tree = 0;
359 			rb_erase(&other->rb_node, &tree->state);
360 			free_extent_state(other);
361 		}
362 	}
363 	other_node = rb_next(&state->rb_node);
364 	if (other_node) {
365 		other = rb_entry(other_node, struct extent_state, rb_node);
366 		if (other->start == state->end + 1 &&
367 		    other->state == state->state) {
368 			other->start = state->start;
369 			state->in_tree = 0;
370 			rb_erase(&state->rb_node, &tree->state);
371 			free_extent_state(state);
372 		}
373 	}
374 	return 0;
375 }
376 
377 /*
378  * insert an extent_state struct into the tree.  'bits' are set on the
379  * struct before it is inserted.
380  *
381  * This may return -EEXIST if the extent is already there, in which case the
382  * state struct is freed.
383  *
384  * The tree lock is not taken internally.  This is a utility function and
385  * probably isn't what you want to call (see set/clear_extent_bit).
386  */
387 static int insert_state(struct extent_map_tree *tree,
388 			struct extent_state *state, u64 start, u64 end,
389 			int bits)
390 {
391 	struct rb_node *node;
392 
393 	if (end < start) {
394 		printk("end < start %Lu %Lu\n", end, start);
395 		WARN_ON(1);
396 	}
397 	state->state |= bits;
398 	state->start = start;
399 	state->end = end;
400 	node = tree_insert(&tree->state, end, &state->rb_node);
401 	if (node) {
402 		struct extent_state *found;
403 		found = rb_entry(node, struct extent_state, rb_node);
404 		printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
405 		free_extent_state(state);
406 		return -EEXIST;
407 	}
408 	merge_state(tree, state);
409 	return 0;
410 }
411 
412 /*
413  * split a given extent state struct in two, inserting the preallocated
414  * struct 'prealloc' as the newly created second half.  'split' indicates an
415  * offset inside 'orig' where it should be split.
416  *
417  * Before calling,
418  * the tree has 'orig' at [orig->start, orig->end].  After calling, there
419  * are two extent state structs in the tree:
420  * prealloc: [orig->start, split - 1]
421  * orig: [ split, orig->end ]
422  *
423  * The tree locks are not taken by this function. They need to be held
424  * by the caller.
425  */
426 static int split_state(struct extent_map_tree *tree, struct extent_state *orig,
427 		       struct extent_state *prealloc, u64 split)
428 {
429 	struct rb_node *node;
430 	prealloc->start = orig->start;
431 	prealloc->end = split - 1;
432 	prealloc->state = orig->state;
433 	orig->start = split;
434 
435 	node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
436 	if (node) {
437 		struct extent_state *found;
438 		found = rb_entry(node, struct extent_state, rb_node);
439 		printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
440 		free_extent_state(prealloc);
441 		return -EEXIST;
442 	}
443 	return 0;
444 }
445 
446 /*
447  * utility function to clear some bits in an extent state struct.
448  * it will optionally wake up any one waiting on this state (wake == 1), or
449  * forcibly remove the state from the tree (delete == 1).
450  *
451  * If no bits are set on the state struct after clearing things, the
452  * struct is freed and removed from the tree
453  */
454 static int clear_state_bit(struct extent_map_tree *tree,
455 			    struct extent_state *state, int bits, int wake,
456 			    int delete)
457 {
458 	int ret = state->state & bits;
459 	state->state &= ~bits;
460 	if (wake)
461 		wake_up(&state->wq);
462 	if (delete || state->state == 0) {
463 		if (state->in_tree) {
464 			rb_erase(&state->rb_node, &tree->state);
465 			state->in_tree = 0;
466 			free_extent_state(state);
467 		} else {
468 			WARN_ON(1);
469 		}
470 	} else {
471 		merge_state(tree, state);
472 	}
473 	return ret;
474 }
475 
476 /*
477  * clear some bits on a range in the tree.  This may require splitting
478  * or inserting elements in the tree, so the gfp mask is used to
479  * indicate which allocations or sleeping are allowed.
480  *
481  * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
482  * the given range from the tree regardless of state (ie for truncate).
483  *
484  * the range [start, end] is inclusive.
485  *
486  * This takes the tree lock, and returns < 0 on error, > 0 if any of the
487  * bits were already set, or zero if none of the bits were already set.
488  */
489 int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end,
490 		     int bits, int wake, int delete, gfp_t mask)
491 {
492 	struct extent_state *state;
493 	struct extent_state *prealloc = NULL;
494 	struct rb_node *node;
495 	unsigned long flags;
496 	int err;
497 	int set = 0;
498 
499 again:
500 	if (!prealloc && (mask & __GFP_WAIT)) {
501 		prealloc = alloc_extent_state(mask);
502 		if (!prealloc)
503 			return -ENOMEM;
504 	}
505 
506 	write_lock_irqsave(&tree->lock, flags);
507 	/*
508 	 * this search will find the extents that end after
509 	 * our range starts
510 	 */
511 	node = tree_search(&tree->state, start);
512 	if (!node)
513 		goto out;
514 	state = rb_entry(node, struct extent_state, rb_node);
515 	if (state->start > end)
516 		goto out;
517 	WARN_ON(state->end < start);
518 
519 	/*
520 	 *     | ---- desired range ---- |
521 	 *  | state | or
522 	 *  | ------------- state -------------- |
523 	 *
524 	 * We need to split the extent we found, and may flip
525 	 * bits on second half.
526 	 *
527 	 * If the extent we found extends past our range, we
528 	 * just split and search again.  It'll get split again
529 	 * the next time though.
530 	 *
531 	 * If the extent we found is inside our range, we clear
532 	 * the desired bit on it.
533 	 */
534 
535 	if (state->start < start) {
536 		err = split_state(tree, state, prealloc, start);
537 		BUG_ON(err == -EEXIST);
538 		prealloc = NULL;
539 		if (err)
540 			goto out;
541 		if (state->end <= end) {
542 			start = state->end + 1;
543 			set |= clear_state_bit(tree, state, bits,
544 					wake, delete);
545 		} else {
546 			start = state->start;
547 		}
548 		goto search_again;
549 	}
550 	/*
551 	 * | ---- desired range ---- |
552 	 *                        | state |
553 	 * We need to split the extent, and clear the bit
554 	 * on the first half
555 	 */
556 	if (state->start <= end && state->end > end) {
557 		err = split_state(tree, state, prealloc, end + 1);
558 		BUG_ON(err == -EEXIST);
559 
560 		if (wake)
561 			wake_up(&state->wq);
562 		set |= clear_state_bit(tree, prealloc, bits,
563 				       wake, delete);
564 		prealloc = NULL;
565 		goto out;
566 	}
567 
568 	start = state->end + 1;
569 	set |= clear_state_bit(tree, state, bits, wake, delete);
570 	goto search_again;
571 
572 out:
573 	write_unlock_irqrestore(&tree->lock, flags);
574 	if (prealloc)
575 		free_extent_state(prealloc);
576 
577 	return set;
578 
579 search_again:
580 	if (start > end)
581 		goto out;
582 	write_unlock_irqrestore(&tree->lock, flags);
583 	if (mask & __GFP_WAIT)
584 		cond_resched();
585 	goto again;
586 }
587 EXPORT_SYMBOL(clear_extent_bit);
588 
589 static int wait_on_state(struct extent_map_tree *tree,
590 			 struct extent_state *state)
591 {
592 	DEFINE_WAIT(wait);
593 	prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
594 	read_unlock_irq(&tree->lock);
595 	schedule();
596 	read_lock_irq(&tree->lock);
597 	finish_wait(&state->wq, &wait);
598 	return 0;
599 }
600 
601 /*
602  * waits for one or more bits to clear on a range in the state tree.
603  * The range [start, end] is inclusive.
604  * The tree lock is taken by this function
605  */
606 int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits)
607 {
608 	struct extent_state *state;
609 	struct rb_node *node;
610 
611 	read_lock_irq(&tree->lock);
612 again:
613 	while (1) {
614 		/*
615 		 * this search will find all the extents that end after
616 		 * our range starts
617 		 */
618 		node = tree_search(&tree->state, start);
619 		if (!node)
620 			break;
621 
622 		state = rb_entry(node, struct extent_state, rb_node);
623 
624 		if (state->start > end)
625 			goto out;
626 
627 		if (state->state & bits) {
628 			start = state->start;
629 			atomic_inc(&state->refs);
630 			wait_on_state(tree, state);
631 			free_extent_state(state);
632 			goto again;
633 		}
634 		start = state->end + 1;
635 
636 		if (start > end)
637 			break;
638 
639 		if (need_resched()) {
640 			read_unlock_irq(&tree->lock);
641 			cond_resched();
642 			read_lock_irq(&tree->lock);
643 		}
644 	}
645 out:
646 	read_unlock_irq(&tree->lock);
647 	return 0;
648 }
649 EXPORT_SYMBOL(wait_extent_bit);
650 
651 /*
652  * set some bits on a range in the tree.  This may require allocations
653  * or sleeping, so the gfp mask is used to indicate what is allowed.
654  *
655  * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
656  * range already has the desired bits set.  The start of the existing
657  * range is returned in failed_start in this case.
658  *
659  * [start, end] is inclusive
660  * This takes the tree lock.
661  */
662 int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits,
663 		   int exclusive, u64 *failed_start, gfp_t mask)
664 {
665 	struct extent_state *state;
666 	struct extent_state *prealloc = NULL;
667 	struct rb_node *node;
668 	unsigned long flags;
669 	int err = 0;
670 	int set;
671 	u64 last_start;
672 	u64 last_end;
673 again:
674 	if (!prealloc && (mask & __GFP_WAIT)) {
675 		prealloc = alloc_extent_state(mask);
676 		if (!prealloc)
677 			return -ENOMEM;
678 	}
679 
680 	write_lock_irqsave(&tree->lock, flags);
681 	/*
682 	 * this search will find all the extents that end after
683 	 * our range starts.
684 	 */
685 	node = tree_search(&tree->state, start);
686 	if (!node) {
687 		err = insert_state(tree, prealloc, start, end, bits);
688 		prealloc = NULL;
689 		BUG_ON(err == -EEXIST);
690 		goto out;
691 	}
692 
693 	state = rb_entry(node, struct extent_state, rb_node);
694 	last_start = state->start;
695 	last_end = state->end;
696 
697 	/*
698 	 * | ---- desired range ---- |
699 	 * | state |
700 	 *
701 	 * Just lock what we found and keep going
702 	 */
703 	if (state->start == start && state->end <= end) {
704 		set = state->state & bits;
705 		if (set && exclusive) {
706 			*failed_start = state->start;
707 			err = -EEXIST;
708 			goto out;
709 		}
710 		state->state |= bits;
711 		start = state->end + 1;
712 		merge_state(tree, state);
713 		goto search_again;
714 	}
715 
716 	/*
717 	 *     | ---- desired range ---- |
718 	 * | state |
719 	 *   or
720 	 * | ------------- state -------------- |
721 	 *
722 	 * We need to split the extent we found, and may flip bits on
723 	 * second half.
724 	 *
725 	 * If the extent we found extends past our
726 	 * range, we just split and search again.  It'll get split
727 	 * again the next time though.
728 	 *
729 	 * If the extent we found is inside our range, we set the
730 	 * desired bit on it.
731 	 */
732 	if (state->start < start) {
733 		set = state->state & bits;
734 		if (exclusive && set) {
735 			*failed_start = start;
736 			err = -EEXIST;
737 			goto out;
738 		}
739 		err = split_state(tree, state, prealloc, start);
740 		BUG_ON(err == -EEXIST);
741 		prealloc = NULL;
742 		if (err)
743 			goto out;
744 		if (state->end <= end) {
745 			state->state |= bits;
746 			start = state->end + 1;
747 			merge_state(tree, state);
748 		} else {
749 			start = state->start;
750 		}
751 		goto search_again;
752 	}
753 	/*
754 	 * | ---- desired range ---- |
755 	 *     | state | or               | state |
756 	 *
757 	 * There's a hole, we need to insert something in it and
758 	 * ignore the extent we found.
759 	 */
760 	if (state->start > start) {
761 		u64 this_end;
762 		if (end < last_start)
763 			this_end = end;
764 		else
765 			this_end = last_start -1;
766 		err = insert_state(tree, prealloc, start, this_end,
767 				   bits);
768 		prealloc = NULL;
769 		BUG_ON(err == -EEXIST);
770 		if (err)
771 			goto out;
772 		start = this_end + 1;
773 		goto search_again;
774 	}
775 	/*
776 	 * | ---- desired range ---- |
777 	 *                        | state |
778 	 * We need to split the extent, and set the bit
779 	 * on the first half
780 	 */
781 	if (state->start <= end && state->end > end) {
782 		set = state->state & bits;
783 		if (exclusive && set) {
784 			*failed_start = start;
785 			err = -EEXIST;
786 			goto out;
787 		}
788 		err = split_state(tree, state, prealloc, end + 1);
789 		BUG_ON(err == -EEXIST);
790 
791 		prealloc->state |= bits;
792 		merge_state(tree, prealloc);
793 		prealloc = NULL;
794 		goto out;
795 	}
796 
797 	goto search_again;
798 
799 out:
800 	write_unlock_irqrestore(&tree->lock, flags);
801 	if (prealloc)
802 		free_extent_state(prealloc);
803 
804 	return err;
805 
806 search_again:
807 	if (start > end)
808 		goto out;
809 	write_unlock_irqrestore(&tree->lock, flags);
810 	if (mask & __GFP_WAIT)
811 		cond_resched();
812 	goto again;
813 }
814 EXPORT_SYMBOL(set_extent_bit);
815 
816 /* wrappers around set/clear extent bit */
817 int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
818 		     gfp_t mask)
819 {
820 	return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
821 			      mask);
822 }
823 EXPORT_SYMBOL(set_extent_dirty);
824 
825 int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
826 		    int bits, gfp_t mask)
827 {
828 	return set_extent_bit(tree, start, end, bits, 0, NULL,
829 			      mask);
830 }
831 EXPORT_SYMBOL(set_extent_bits);
832 
833 int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end,
834 		      int bits, gfp_t mask)
835 {
836 	return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
837 }
838 EXPORT_SYMBOL(clear_extent_bits);
839 
840 int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end,
841 		     gfp_t mask)
842 {
843 	return set_extent_bit(tree, start, end,
844 			      EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
845 			      mask);
846 }
847 EXPORT_SYMBOL(set_extent_delalloc);
848 
849 int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
850 		       gfp_t mask)
851 {
852 	return clear_extent_bit(tree, start, end,
853 				EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
854 }
855 EXPORT_SYMBOL(clear_extent_dirty);
856 
857 int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
858 		     gfp_t mask)
859 {
860 	return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
861 			      mask);
862 }
863 EXPORT_SYMBOL(set_extent_new);
864 
865 int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
866 		       gfp_t mask)
867 {
868 	return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
869 }
870 EXPORT_SYMBOL(clear_extent_new);
871 
872 int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
873 			gfp_t mask)
874 {
875 	return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
876 			      mask);
877 }
878 EXPORT_SYMBOL(set_extent_uptodate);
879 
880 int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
881 			  gfp_t mask)
882 {
883 	return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
884 }
885 EXPORT_SYMBOL(clear_extent_uptodate);
886 
887 int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
888 			 gfp_t mask)
889 {
890 	return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
891 			      0, NULL, mask);
892 }
893 EXPORT_SYMBOL(set_extent_writeback);
894 
895 int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
896 			   gfp_t mask)
897 {
898 	return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
899 }
900 EXPORT_SYMBOL(clear_extent_writeback);
901 
902 int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end)
903 {
904 	return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
905 }
906 EXPORT_SYMBOL(wait_on_extent_writeback);
907 
908 /*
909  * locks a range in ascending order, waiting for any locked regions
910  * it hits on the way.  [start,end] are inclusive, and this will sleep.
911  */
912 int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask)
913 {
914 	int err;
915 	u64 failed_start;
916 	while (1) {
917 		err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
918 				     &failed_start, mask);
919 		if (err == -EEXIST && (mask & __GFP_WAIT)) {
920 			wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
921 			start = failed_start;
922 		} else {
923 			break;
924 		}
925 		WARN_ON(start > end);
926 	}
927 	return err;
928 }
929 EXPORT_SYMBOL(lock_extent);
930 
931 int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end,
932 		  gfp_t mask)
933 {
934 	return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
935 }
936 EXPORT_SYMBOL(unlock_extent);
937 
938 /*
939  * helper function to set pages and extents in the tree dirty
940  */
941 int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end)
942 {
943 	unsigned long index = start >> PAGE_CACHE_SHIFT;
944 	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
945 	struct page *page;
946 
947 	while (index <= end_index) {
948 		page = find_get_page(tree->mapping, index);
949 		BUG_ON(!page);
950 		__set_page_dirty_nobuffers(page);
951 		page_cache_release(page);
952 		index++;
953 	}
954 	set_extent_dirty(tree, start, end, GFP_NOFS);
955 	return 0;
956 }
957 EXPORT_SYMBOL(set_range_dirty);
958 
959 /*
960  * helper function to set both pages and extents in the tree writeback
961  */
962 int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end)
963 {
964 	unsigned long index = start >> PAGE_CACHE_SHIFT;
965 	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
966 	struct page *page;
967 
968 	while (index <= end_index) {
969 		page = find_get_page(tree->mapping, index);
970 		BUG_ON(!page);
971 		set_page_writeback(page);
972 		page_cache_release(page);
973 		index++;
974 	}
975 	set_extent_writeback(tree, start, end, GFP_NOFS);
976 	return 0;
977 }
978 EXPORT_SYMBOL(set_range_writeback);
979 
980 int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
981 			  u64 *start_ret, u64 *end_ret, int bits)
982 {
983 	struct rb_node *node;
984 	struct extent_state *state;
985 	int ret = 1;
986 
987 	read_lock_irq(&tree->lock);
988 	/*
989 	 * this search will find all the extents that end after
990 	 * our range starts.
991 	 */
992 	node = tree_search(&tree->state, start);
993 	if (!node || IS_ERR(node)) {
994 		goto out;
995 	}
996 
997 	while(1) {
998 		state = rb_entry(node, struct extent_state, rb_node);
999 		if (state->end >= start && (state->state & bits)) {
1000 			*start_ret = state->start;
1001 			*end_ret = state->end;
1002 			ret = 0;
1003 			break;
1004 		}
1005 		node = rb_next(node);
1006 		if (!node)
1007 			break;
1008 	}
1009 out:
1010 	read_unlock_irq(&tree->lock);
1011 	return ret;
1012 }
1013 EXPORT_SYMBOL(find_first_extent_bit);
1014 
1015 u64 find_lock_delalloc_range(struct extent_map_tree *tree,
1016 			     u64 start, u64 lock_start, u64 *end, u64 max_bytes)
1017 {
1018 	struct rb_node *node;
1019 	struct extent_state *state;
1020 	u64 cur_start = start;
1021 	u64 found = 0;
1022 	u64 total_bytes = 0;
1023 
1024 	write_lock_irq(&tree->lock);
1025 	/*
1026 	 * this search will find all the extents that end after
1027 	 * our range starts.
1028 	 */
1029 search_again:
1030 	node = tree_search(&tree->state, cur_start);
1031 	if (!node || IS_ERR(node)) {
1032 		goto out;
1033 	}
1034 
1035 	while(1) {
1036 		state = rb_entry(node, struct extent_state, rb_node);
1037 		if (state->start != cur_start) {
1038 			goto out;
1039 		}
1040 		if (!(state->state & EXTENT_DELALLOC)) {
1041 			goto out;
1042 		}
1043 		if (state->start >= lock_start) {
1044 			if (state->state & EXTENT_LOCKED) {
1045 				DEFINE_WAIT(wait);
1046 				atomic_inc(&state->refs);
1047 				prepare_to_wait(&state->wq, &wait,
1048 						TASK_UNINTERRUPTIBLE);
1049 				write_unlock_irq(&tree->lock);
1050 				schedule();
1051 				write_lock_irq(&tree->lock);
1052 				finish_wait(&state->wq, &wait);
1053 				free_extent_state(state);
1054 				goto search_again;
1055 			}
1056 			state->state |= EXTENT_LOCKED;
1057 		}
1058 		found++;
1059 		*end = state->end;
1060 		cur_start = state->end + 1;
1061 		node = rb_next(node);
1062 		if (!node)
1063 			break;
1064 		total_bytes += state->end - state->start + 1;
1065 		if (total_bytes >= max_bytes)
1066 			break;
1067 	}
1068 out:
1069 	write_unlock_irq(&tree->lock);
1070 	return found;
1071 }
1072 
1073 /*
1074  * helper function to lock both pages and extents in the tree.
1075  * pages must be locked first.
1076  */
1077 int lock_range(struct extent_map_tree *tree, u64 start, u64 end)
1078 {
1079 	unsigned long index = start >> PAGE_CACHE_SHIFT;
1080 	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1081 	struct page *page;
1082 	int err;
1083 
1084 	while (index <= end_index) {
1085 		page = grab_cache_page(tree->mapping, index);
1086 		if (!page) {
1087 			err = -ENOMEM;
1088 			goto failed;
1089 		}
1090 		if (IS_ERR(page)) {
1091 			err = PTR_ERR(page);
1092 			goto failed;
1093 		}
1094 		index++;
1095 	}
1096 	lock_extent(tree, start, end, GFP_NOFS);
1097 	return 0;
1098 
1099 failed:
1100 	/*
1101 	 * we failed above in getting the page at 'index', so we undo here
1102 	 * up to but not including the page at 'index'
1103 	 */
1104 	end_index = index;
1105 	index = start >> PAGE_CACHE_SHIFT;
1106 	while (index < end_index) {
1107 		page = find_get_page(tree->mapping, index);
1108 		unlock_page(page);
1109 		page_cache_release(page);
1110 		index++;
1111 	}
1112 	return err;
1113 }
1114 EXPORT_SYMBOL(lock_range);
1115 
1116 /*
1117  * helper function to unlock both pages and extents in the tree.
1118  */
1119 int unlock_range(struct extent_map_tree *tree, u64 start, u64 end)
1120 {
1121 	unsigned long index = start >> PAGE_CACHE_SHIFT;
1122 	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1123 	struct page *page;
1124 
1125 	while (index <= end_index) {
1126 		page = find_get_page(tree->mapping, index);
1127 		unlock_page(page);
1128 		page_cache_release(page);
1129 		index++;
1130 	}
1131 	unlock_extent(tree, start, end, GFP_NOFS);
1132 	return 0;
1133 }
1134 EXPORT_SYMBOL(unlock_range);
1135 
1136 int set_state_private(struct extent_map_tree *tree, u64 start, u64 private)
1137 {
1138 	struct rb_node *node;
1139 	struct extent_state *state;
1140 	int ret = 0;
1141 
1142 	write_lock_irq(&tree->lock);
1143 	/*
1144 	 * this search will find all the extents that end after
1145 	 * our range starts.
1146 	 */
1147 	node = tree_search(&tree->state, start);
1148 	if (!node || IS_ERR(node)) {
1149 		ret = -ENOENT;
1150 		goto out;
1151 	}
1152 	state = rb_entry(node, struct extent_state, rb_node);
1153 	if (state->start != start) {
1154 		ret = -ENOENT;
1155 		goto out;
1156 	}
1157 	state->private = private;
1158 out:
1159 	write_unlock_irq(&tree->lock);
1160 	return ret;
1161 }
1162 
1163 int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private)
1164 {
1165 	struct rb_node *node;
1166 	struct extent_state *state;
1167 	int ret = 0;
1168 
1169 	read_lock_irq(&tree->lock);
1170 	/*
1171 	 * this search will find all the extents that end after
1172 	 * our range starts.
1173 	 */
1174 	node = tree_search(&tree->state, start);
1175 	if (!node || IS_ERR(node)) {
1176 		ret = -ENOENT;
1177 		goto out;
1178 	}
1179 	state = rb_entry(node, struct extent_state, rb_node);
1180 	if (state->start != start) {
1181 		ret = -ENOENT;
1182 		goto out;
1183 	}
1184 	*private = state->private;
1185 out:
1186 	read_unlock_irq(&tree->lock);
1187 	return ret;
1188 }
1189 
1190 /*
1191  * searches a range in the state tree for a given mask.
1192  * If 'filled' == 1, this returns 1 only if ever extent in the tree
1193  * has the bits set.  Otherwise, 1 is returned if any bit in the
1194  * range is found set.
1195  */
1196 int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
1197 		   int bits, int filled)
1198 {
1199 	struct extent_state *state = NULL;
1200 	struct rb_node *node;
1201 	int bitset = 0;
1202 
1203 	read_lock_irq(&tree->lock);
1204 	node = tree_search(&tree->state, start);
1205 	while (node && start <= end) {
1206 		state = rb_entry(node, struct extent_state, rb_node);
1207 		if (state->start > end)
1208 			break;
1209 
1210 		if (filled && state->start > start) {
1211 			bitset = 0;
1212 			break;
1213 		}
1214 		if (state->state & bits) {
1215 			bitset = 1;
1216 			if (!filled)
1217 				break;
1218 		} else if (filled) {
1219 			bitset = 0;
1220 			break;
1221 		}
1222 		start = state->end + 1;
1223 		if (start > end)
1224 			break;
1225 		node = rb_next(node);
1226 	}
1227 	read_unlock_irq(&tree->lock);
1228 	return bitset;
1229 }
1230 EXPORT_SYMBOL(test_range_bit);
1231 
1232 /*
1233  * helper function to set a given page up to date if all the
1234  * extents in the tree for that page are up to date
1235  */
1236 static int check_page_uptodate(struct extent_map_tree *tree,
1237 			       struct page *page)
1238 {
1239 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1240 	u64 end = start + PAGE_CACHE_SIZE - 1;
1241 	if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
1242 		SetPageUptodate(page);
1243 	return 0;
1244 }
1245 
1246 /*
1247  * helper function to unlock a page if all the extents in the tree
1248  * for that page are unlocked
1249  */
1250 static int check_page_locked(struct extent_map_tree *tree,
1251 			     struct page *page)
1252 {
1253 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1254 	u64 end = start + PAGE_CACHE_SIZE - 1;
1255 	if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
1256 		unlock_page(page);
1257 	return 0;
1258 }
1259 
1260 /*
1261  * helper function to end page writeback if all the extents
1262  * in the tree for that page are done with writeback
1263  */
1264 static int check_page_writeback(struct extent_map_tree *tree,
1265 			     struct page *page)
1266 {
1267 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1268 	u64 end = start + PAGE_CACHE_SIZE - 1;
1269 	if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1270 		end_page_writeback(page);
1271 	return 0;
1272 }
1273 
1274 /* lots and lots of room for performance fixes in the end_bio funcs */
1275 
1276 /*
1277  * after a writepage IO is done, we need to:
1278  * clear the uptodate bits on error
1279  * clear the writeback bits in the extent tree for this IO
1280  * end_page_writeback if the page has no more pending IO
1281  *
1282  * Scheduling is not allowed, so the extent state tree is expected
1283  * to have one and only one object corresponding to this IO.
1284  */
1285 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1286 static void end_bio_extent_writepage(struct bio *bio, int err)
1287 #else
1288 static int end_bio_extent_writepage(struct bio *bio,
1289 				   unsigned int bytes_done, int err)
1290 #endif
1291 {
1292 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1293 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1294 	struct extent_map_tree *tree = bio->bi_private;
1295 	u64 start;
1296 	u64 end;
1297 	int whole_page;
1298 
1299 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1300 	if (bio->bi_size)
1301 		return 1;
1302 #endif
1303 
1304 	do {
1305 		struct page *page = bvec->bv_page;
1306 		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1307 			 bvec->bv_offset;
1308 		end = start + bvec->bv_len - 1;
1309 
1310 		if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1311 			whole_page = 1;
1312 		else
1313 			whole_page = 0;
1314 
1315 		if (--bvec >= bio->bi_io_vec)
1316 			prefetchw(&bvec->bv_page->flags);
1317 
1318 		if (!uptodate) {
1319 			clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
1320 			ClearPageUptodate(page);
1321 			SetPageError(page);
1322 		}
1323 		clear_extent_writeback(tree, start, end, GFP_ATOMIC);
1324 
1325 		if (whole_page)
1326 			end_page_writeback(page);
1327 		else
1328 			check_page_writeback(tree, page);
1329 		if (tree->ops && tree->ops->writepage_end_io_hook)
1330 			tree->ops->writepage_end_io_hook(page, start, end);
1331 	} while (bvec >= bio->bi_io_vec);
1332 
1333 	bio_put(bio);
1334 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1335 	return 0;
1336 #endif
1337 }
1338 
1339 /*
1340  * after a readpage IO is done, we need to:
1341  * clear the uptodate bits on error
1342  * set the uptodate bits if things worked
1343  * set the page up to date if all extents in the tree are uptodate
1344  * clear the lock bit in the extent tree
1345  * unlock the page if there are no other extents locked for it
1346  *
1347  * Scheduling is not allowed, so the extent state tree is expected
1348  * to have one and only one object corresponding to this IO.
1349  */
1350 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1351 static void end_bio_extent_readpage(struct bio *bio, int err)
1352 #else
1353 static int end_bio_extent_readpage(struct bio *bio,
1354 				   unsigned int bytes_done, int err)
1355 #endif
1356 {
1357 	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1358 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1359 	struct extent_map_tree *tree = bio->bi_private;
1360 	u64 start;
1361 	u64 end;
1362 	int whole_page;
1363 	int ret;
1364 
1365 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1366 	if (bio->bi_size)
1367 		return 1;
1368 #endif
1369 
1370 	do {
1371 		struct page *page = bvec->bv_page;
1372 		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1373 			bvec->bv_offset;
1374 		end = start + bvec->bv_len - 1;
1375 
1376 		if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1377 			whole_page = 1;
1378 		else
1379 			whole_page = 0;
1380 
1381 		if (--bvec >= bio->bi_io_vec)
1382 			prefetchw(&bvec->bv_page->flags);
1383 
1384 		if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1385 			ret = tree->ops->readpage_end_io_hook(page, start, end);
1386 			if (ret)
1387 				uptodate = 0;
1388 		}
1389 		if (uptodate) {
1390 			set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1391 			if (whole_page)
1392 				SetPageUptodate(page);
1393 			else
1394 				check_page_uptodate(tree, page);
1395 		} else {
1396 			ClearPageUptodate(page);
1397 			SetPageError(page);
1398 		}
1399 
1400 		unlock_extent(tree, start, end, GFP_ATOMIC);
1401 
1402 		if (whole_page)
1403 			unlock_page(page);
1404 		else
1405 			check_page_locked(tree, page);
1406 	} while (bvec >= bio->bi_io_vec);
1407 
1408 	bio_put(bio);
1409 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1410 	return 0;
1411 #endif
1412 }
1413 
1414 /*
1415  * IO done from prepare_write is pretty simple, we just unlock
1416  * the structs in the extent tree when done, and set the uptodate bits
1417  * as appropriate.
1418  */
1419 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1420 static void end_bio_extent_preparewrite(struct bio *bio, int err)
1421 #else
1422 static int end_bio_extent_preparewrite(struct bio *bio,
1423 				       unsigned int bytes_done, int err)
1424 #endif
1425 {
1426 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1427 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1428 	struct extent_map_tree *tree = bio->bi_private;
1429 	u64 start;
1430 	u64 end;
1431 
1432 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1433 	if (bio->bi_size)
1434 		return 1;
1435 #endif
1436 
1437 	do {
1438 		struct page *page = bvec->bv_page;
1439 		start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1440 			bvec->bv_offset;
1441 		end = start + bvec->bv_len - 1;
1442 
1443 		if (--bvec >= bio->bi_io_vec)
1444 			prefetchw(&bvec->bv_page->flags);
1445 
1446 		if (uptodate) {
1447 			set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1448 		} else {
1449 			ClearPageUptodate(page);
1450 			SetPageError(page);
1451 		}
1452 
1453 		unlock_extent(tree, start, end, GFP_ATOMIC);
1454 
1455 	} while (bvec >= bio->bi_io_vec);
1456 
1457 	bio_put(bio);
1458 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1459 	return 0;
1460 #endif
1461 }
1462 
1463 static int submit_extent_page(int rw, struct extent_map_tree *tree,
1464 			      struct page *page, sector_t sector,
1465 			      size_t size, unsigned long offset,
1466 			      struct block_device *bdev,
1467 			      bio_end_io_t end_io_func)
1468 {
1469 	struct bio *bio;
1470 	int ret = 0;
1471 
1472 	bio = bio_alloc(GFP_NOIO, 1);
1473 
1474 	bio->bi_sector = sector;
1475 	bio->bi_bdev = bdev;
1476 	bio->bi_io_vec[0].bv_page = page;
1477 	bio->bi_io_vec[0].bv_len = size;
1478 	bio->bi_io_vec[0].bv_offset = offset;
1479 
1480 	bio->bi_vcnt = 1;
1481 	bio->bi_idx = 0;
1482 	bio->bi_size = size;
1483 
1484 	bio->bi_end_io = end_io_func;
1485 	bio->bi_private = tree;
1486 
1487 	bio_get(bio);
1488 	submit_bio(rw, bio);
1489 
1490 	if (bio_flagged(bio, BIO_EOPNOTSUPP))
1491 		ret = -EOPNOTSUPP;
1492 
1493 	bio_put(bio);
1494 	return ret;
1495 }
1496 
1497 void set_page_extent_mapped(struct page *page)
1498 {
1499 	if (!PagePrivate(page)) {
1500 		SetPagePrivate(page);
1501 		WARN_ON(!page->mapping->a_ops->invalidatepage);
1502 		set_page_private(page, EXTENT_PAGE_PRIVATE);
1503 		page_cache_get(page);
1504 	}
1505 }
1506 
1507 /*
1508  * basic readpage implementation.  Locked extent state structs are inserted
1509  * into the tree that are removed when the IO is done (by the end_io
1510  * handlers)
1511  */
1512 int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
1513 			  get_extent_t *get_extent)
1514 {
1515 	struct inode *inode = page->mapping->host;
1516 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1517 	u64 page_end = start + PAGE_CACHE_SIZE - 1;
1518 	u64 end;
1519 	u64 cur = start;
1520 	u64 extent_offset;
1521 	u64 last_byte = i_size_read(inode);
1522 	u64 block_start;
1523 	u64 cur_end;
1524 	sector_t sector;
1525 	struct extent_map *em;
1526 	struct block_device *bdev;
1527 	int ret;
1528 	int nr = 0;
1529 	size_t page_offset = 0;
1530 	size_t iosize;
1531 	size_t blocksize = inode->i_sb->s_blocksize;
1532 
1533 	set_page_extent_mapped(page);
1534 
1535 	end = page_end;
1536 	lock_extent(tree, start, end, GFP_NOFS);
1537 
1538 	while (cur <= end) {
1539 		if (cur >= last_byte) {
1540 			iosize = PAGE_CACHE_SIZE - page_offset;
1541 			zero_user_page(page, page_offset, iosize, KM_USER0);
1542 			set_extent_uptodate(tree, cur, cur + iosize - 1,
1543 					    GFP_NOFS);
1544 			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1545 			break;
1546 		}
1547 		em = get_extent(inode, page, page_offset, cur, end, 0);
1548 		if (IS_ERR(em) || !em) {
1549 			SetPageError(page);
1550 			unlock_extent(tree, cur, end, GFP_NOFS);
1551 			break;
1552 		}
1553 
1554 		extent_offset = cur - em->start;
1555 		BUG_ON(em->end < cur);
1556 		BUG_ON(end < cur);
1557 
1558 		iosize = min(em->end - cur, end - cur) + 1;
1559 		cur_end = min(em->end, end);
1560 		iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1561 		sector = (em->block_start + extent_offset) >> 9;
1562 		bdev = em->bdev;
1563 		block_start = em->block_start;
1564 		free_extent_map(em);
1565 		em = NULL;
1566 
1567 		/* we've found a hole, just zero and go on */
1568 		if (block_start == EXTENT_MAP_HOLE) {
1569 			zero_user_page(page, page_offset, iosize, KM_USER0);
1570 			set_extent_uptodate(tree, cur, cur + iosize - 1,
1571 					    GFP_NOFS);
1572 			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1573 			cur = cur + iosize;
1574 			page_offset += iosize;
1575 			continue;
1576 		}
1577 		/* the get_extent function already copied into the page */
1578 		if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
1579 			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1580 			cur = cur + iosize;
1581 			page_offset += iosize;
1582 			continue;
1583 		}
1584 
1585 		ret = 0;
1586 		if (tree->ops && tree->ops->readpage_io_hook) {
1587 			ret = tree->ops->readpage_io_hook(page, cur,
1588 							  cur + iosize - 1);
1589 		}
1590 		if (!ret) {
1591 			ret = submit_extent_page(READ, tree, page,
1592 						 sector, iosize, page_offset,
1593 						 bdev, end_bio_extent_readpage);
1594 		}
1595 		if (ret)
1596 			SetPageError(page);
1597 		cur = cur + iosize;
1598 		page_offset += iosize;
1599 		nr++;
1600 	}
1601 	if (!nr) {
1602 		if (!PageError(page))
1603 			SetPageUptodate(page);
1604 		unlock_page(page);
1605 	}
1606 	return 0;
1607 }
1608 EXPORT_SYMBOL(extent_read_full_page);
1609 
1610 /*
1611  * the writepage semantics are similar to regular writepage.  extent
1612  * records are inserted to lock ranges in the tree, and as dirty areas
1613  * are found, they are marked writeback.  Then the lock bits are removed
1614  * and the end_io handler clears the writeback ranges
1615  */
1616 int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
1617 			  get_extent_t *get_extent,
1618 			  struct writeback_control *wbc)
1619 {
1620 	struct inode *inode = page->mapping->host;
1621 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1622 	u64 page_end = start + PAGE_CACHE_SIZE - 1;
1623 	u64 end;
1624 	u64 cur = start;
1625 	u64 extent_offset;
1626 	u64 last_byte = i_size_read(inode);
1627 	u64 block_start;
1628 	u64 iosize;
1629 	sector_t sector;
1630 	struct extent_map *em;
1631 	struct block_device *bdev;
1632 	int ret;
1633 	int nr = 0;
1634 	size_t page_offset = 0;
1635 	size_t blocksize;
1636 	loff_t i_size = i_size_read(inode);
1637 	unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
1638 	u64 nr_delalloc;
1639 	u64 delalloc_end;
1640 
1641 	WARN_ON(!PageLocked(page));
1642 	if (page->index > end_index) {
1643 		clear_extent_dirty(tree, start, page_end, GFP_NOFS);
1644 		unlock_page(page);
1645 		return 0;
1646 	}
1647 
1648 	if (page->index == end_index) {
1649 		size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
1650 		zero_user_page(page, offset,
1651 			       PAGE_CACHE_SIZE - offset, KM_USER0);
1652 	}
1653 
1654 	set_page_extent_mapped(page);
1655 
1656 	lock_extent(tree, start, page_end, GFP_NOFS);
1657 	nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1,
1658 					       &delalloc_end,
1659 					       128 * 1024 * 1024);
1660 	if (nr_delalloc) {
1661 		tree->ops->fill_delalloc(inode, start, delalloc_end);
1662 		if (delalloc_end >= page_end + 1) {
1663 			clear_extent_bit(tree, page_end + 1, delalloc_end,
1664 					 EXTENT_LOCKED | EXTENT_DELALLOC,
1665 					 1, 0, GFP_NOFS);
1666 		}
1667 		clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC,
1668 				 0, 0, GFP_NOFS);
1669 		if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1670 			printk("found delalloc bits after clear extent_bit\n");
1671 		}
1672 	} else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1673 		printk("found delalloc bits after find_delalloc_range returns 0\n");
1674 	}
1675 
1676 	end = page_end;
1677 	if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1678 		printk("found delalloc bits after lock_extent\n");
1679 	}
1680 
1681 	if (last_byte <= start) {
1682 		clear_extent_dirty(tree, start, page_end, GFP_NOFS);
1683 		goto done;
1684 	}
1685 
1686 	set_extent_uptodate(tree, start, page_end, GFP_NOFS);
1687 	blocksize = inode->i_sb->s_blocksize;
1688 
1689 	while (cur <= end) {
1690 		if (cur >= last_byte) {
1691 			clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
1692 			break;
1693 		}
1694 		em = get_extent(inode, page, page_offset, cur, end, 1);
1695 		if (IS_ERR(em) || !em) {
1696 			SetPageError(page);
1697 			break;
1698 		}
1699 
1700 		extent_offset = cur - em->start;
1701 		BUG_ON(em->end < cur);
1702 		BUG_ON(end < cur);
1703 		iosize = min(em->end - cur, end - cur) + 1;
1704 		iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1705 		sector = (em->block_start + extent_offset) >> 9;
1706 		bdev = em->bdev;
1707 		block_start = em->block_start;
1708 		free_extent_map(em);
1709 		em = NULL;
1710 
1711 		if (block_start == EXTENT_MAP_HOLE ||
1712 		    block_start == EXTENT_MAP_INLINE) {
1713 			clear_extent_dirty(tree, cur,
1714 					   cur + iosize - 1, GFP_NOFS);
1715 			cur = cur + iosize;
1716 			page_offset += iosize;
1717 			continue;
1718 		}
1719 
1720 		/* leave this out until we have a page_mkwrite call */
1721 		if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
1722 				   EXTENT_DIRTY, 0)) {
1723 			cur = cur + iosize;
1724 			page_offset += iosize;
1725 			continue;
1726 		}
1727 		clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
1728 		if (tree->ops && tree->ops->writepage_io_hook) {
1729 			ret = tree->ops->writepage_io_hook(page, cur,
1730 						cur + iosize - 1);
1731 		} else {
1732 			ret = 0;
1733 		}
1734 		if (ret)
1735 			SetPageError(page);
1736 		else {
1737 			set_range_writeback(tree, cur, cur + iosize - 1);
1738 			ret = submit_extent_page(WRITE, tree, page, sector,
1739 						 iosize, page_offset, bdev,
1740 						 end_bio_extent_writepage);
1741 			if (ret)
1742 				SetPageError(page);
1743 		}
1744 		cur = cur + iosize;
1745 		page_offset += iosize;
1746 		nr++;
1747 	}
1748 done:
1749 	unlock_extent(tree, start, page_end, GFP_NOFS);
1750 	unlock_page(page);
1751 	return 0;
1752 }
1753 EXPORT_SYMBOL(extent_write_full_page);
1754 
1755 /*
1756  * basic invalidatepage code, this waits on any locked or writeback
1757  * ranges corresponding to the page, and then deletes any extent state
1758  * records from the tree
1759  */
1760 int extent_invalidatepage(struct extent_map_tree *tree,
1761 			  struct page *page, unsigned long offset)
1762 {
1763 	u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
1764 	u64 end = start + PAGE_CACHE_SIZE - 1;
1765 	size_t blocksize = page->mapping->host->i_sb->s_blocksize;
1766 
1767 	start += (offset + blocksize -1) & ~(blocksize - 1);
1768 	if (start > end)
1769 		return 0;
1770 
1771 	lock_extent(tree, start, end, GFP_NOFS);
1772 	wait_on_extent_writeback(tree, start, end);
1773 	clear_extent_bit(tree, start, end,
1774 			 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
1775 			 1, 1, GFP_NOFS);
1776 	return 0;
1777 }
1778 EXPORT_SYMBOL(extent_invalidatepage);
1779 
1780 /*
1781  * simple commit_write call, set_range_dirty is used to mark both
1782  * the pages and the extent records as dirty
1783  */
1784 int extent_commit_write(struct extent_map_tree *tree,
1785 			struct inode *inode, struct page *page,
1786 			unsigned from, unsigned to)
1787 {
1788 	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1789 
1790 	set_page_extent_mapped(page);
1791 	set_page_dirty(page);
1792 
1793 	if (pos > inode->i_size) {
1794 		i_size_write(inode, pos);
1795 		mark_inode_dirty(inode);
1796 	}
1797 	return 0;
1798 }
1799 EXPORT_SYMBOL(extent_commit_write);
1800 
1801 int extent_prepare_write(struct extent_map_tree *tree,
1802 			 struct inode *inode, struct page *page,
1803 			 unsigned from, unsigned to, get_extent_t *get_extent)
1804 {
1805 	u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1806 	u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
1807 	u64 block_start;
1808 	u64 orig_block_start;
1809 	u64 block_end;
1810 	u64 cur_end;
1811 	struct extent_map *em;
1812 	unsigned blocksize = 1 << inode->i_blkbits;
1813 	size_t page_offset = 0;
1814 	size_t block_off_start;
1815 	size_t block_off_end;
1816 	int err = 0;
1817 	int iocount = 0;
1818 	int ret = 0;
1819 	int isnew;
1820 
1821 	set_page_extent_mapped(page);
1822 
1823 	block_start = (page_start + from) & ~((u64)blocksize - 1);
1824 	block_end = (page_start + to - 1) | (blocksize - 1);
1825 	orig_block_start = block_start;
1826 
1827 	lock_extent(tree, page_start, page_end, GFP_NOFS);
1828 	while(block_start <= block_end) {
1829 		em = get_extent(inode, page, page_offset, block_start,
1830 				block_end, 1);
1831 		if (IS_ERR(em) || !em) {
1832 			goto err;
1833 		}
1834 		cur_end = min(block_end, em->end);
1835 		block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
1836 		block_off_end = block_off_start + blocksize;
1837 		isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
1838 
1839 		if (!PageUptodate(page) && isnew &&
1840 		    (block_off_end > to || block_off_start < from)) {
1841 			void *kaddr;
1842 
1843 			kaddr = kmap_atomic(page, KM_USER0);
1844 			if (block_off_end > to)
1845 				memset(kaddr + to, 0, block_off_end - to);
1846 			if (block_off_start < from)
1847 				memset(kaddr + block_off_start, 0,
1848 				       from - block_off_start);
1849 			flush_dcache_page(page);
1850 			kunmap_atomic(kaddr, KM_USER0);
1851 		}
1852 		if (!isnew && !PageUptodate(page) &&
1853 		    (block_off_end > to || block_off_start < from) &&
1854 		    !test_range_bit(tree, block_start, cur_end,
1855 				    EXTENT_UPTODATE, 1)) {
1856 			u64 sector;
1857 			u64 extent_offset = block_start - em->start;
1858 			size_t iosize;
1859 			sector = (em->block_start + extent_offset) >> 9;
1860 			iosize = (cur_end - block_start + blocksize - 1) &
1861 				~((u64)blocksize - 1);
1862 			/*
1863 			 * we've already got the extent locked, but we
1864 			 * need to split the state such that our end_bio
1865 			 * handler can clear the lock.
1866 			 */
1867 			set_extent_bit(tree, block_start,
1868 				       block_start + iosize - 1,
1869 				       EXTENT_LOCKED, 0, NULL, GFP_NOFS);
1870 			ret = submit_extent_page(READ, tree, page,
1871 					 sector, iosize, page_offset, em->bdev,
1872 					 end_bio_extent_preparewrite);
1873 			iocount++;
1874 			block_start = block_start + iosize;
1875 		} else {
1876 			set_extent_uptodate(tree, block_start, cur_end,
1877 					    GFP_NOFS);
1878 			unlock_extent(tree, block_start, cur_end, GFP_NOFS);
1879 			block_start = cur_end + 1;
1880 		}
1881 		page_offset = block_start & (PAGE_CACHE_SIZE - 1);
1882 		free_extent_map(em);
1883 	}
1884 	if (iocount) {
1885 		wait_extent_bit(tree, orig_block_start,
1886 				block_end, EXTENT_LOCKED);
1887 	}
1888 	check_page_uptodate(tree, page);
1889 err:
1890 	/* FIXME, zero out newly allocated blocks on error */
1891 	return err;
1892 }
1893 EXPORT_SYMBOL(extent_prepare_write);
1894 
1895 /*
1896  * a helper for releasepage.  As long as there are no locked extents
1897  * in the range corresponding to the page, both state records and extent
1898  * map records are removed
1899  */
1900 int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page)
1901 {
1902 	struct extent_map *em;
1903 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1904 	u64 end = start + PAGE_CACHE_SIZE - 1;
1905 	u64 orig_start = start;
1906 	int ret = 1;
1907 
1908 	while (start <= end) {
1909 		em = lookup_extent_mapping(tree, start, end);
1910 		if (!em || IS_ERR(em))
1911 			break;
1912 		if (!test_range_bit(tree, em->start, em->end,
1913 				    EXTENT_LOCKED, 0)) {
1914 			remove_extent_mapping(tree, em);
1915 			/* once for the rb tree */
1916 			free_extent_map(em);
1917 		}
1918 		start = em->end + 1;
1919 		/* once for us */
1920 		free_extent_map(em);
1921 	}
1922 	if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0))
1923 		ret = 0;
1924 	else
1925 		clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
1926 				 1, 1, GFP_NOFS);
1927 	return ret;
1928 }
1929 EXPORT_SYMBOL(try_release_extent_mapping);
1930 
1931 sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
1932 		get_extent_t *get_extent)
1933 {
1934 	struct inode *inode = mapping->host;
1935 	u64 start = iblock << inode->i_blkbits;
1936 	u64 end = start + (1 << inode->i_blkbits) - 1;
1937 	sector_t sector = 0;
1938 	struct extent_map *em;
1939 
1940 	em = get_extent(inode, NULL, 0, start, end, 0);
1941 	if (!em || IS_ERR(em))
1942 		return 0;
1943 
1944 	if (em->block_start == EXTENT_MAP_INLINE ||
1945 	    em->block_start == EXTENT_MAP_HOLE)
1946 		goto out;
1947 
1948 	sector = (em->block_start + start - em->start) >> inode->i_blkbits;
1949 out:
1950 	free_extent_map(em);
1951 	return sector;
1952 }
1953 
1954 static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb)
1955 {
1956 	if (list_empty(&eb->lru)) {
1957 		extent_buffer_get(eb);
1958 		list_add(&eb->lru, &tree->buffer_lru);
1959 		tree->lru_size++;
1960 		if (tree->lru_size >= BUFFER_LRU_MAX) {
1961 			struct extent_buffer *rm;
1962 			rm = list_entry(tree->buffer_lru.prev,
1963 					struct extent_buffer, lru);
1964 			tree->lru_size--;
1965 			list_del(&rm->lru);
1966 			free_extent_buffer(rm);
1967 		}
1968 	} else
1969 		list_move(&eb->lru, &tree->buffer_lru);
1970 	return 0;
1971 }
1972 static struct extent_buffer *find_lru(struct extent_map_tree *tree,
1973 				      u64 start, unsigned long len)
1974 {
1975 	struct list_head *lru = &tree->buffer_lru;
1976 	struct list_head *cur = lru->next;
1977 	struct extent_buffer *eb;
1978 
1979 	if (list_empty(lru))
1980 		return NULL;
1981 
1982 	do {
1983 		eb = list_entry(cur, struct extent_buffer, lru);
1984 		if (eb->start == start && eb->len == len) {
1985 			extent_buffer_get(eb);
1986 			return eb;
1987 		}
1988 		cur = cur->next;
1989 	} while (cur != lru);
1990 	return NULL;
1991 }
1992 
1993 static inline unsigned long num_extent_pages(u64 start, u64 len)
1994 {
1995 	return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
1996 		(start >> PAGE_CACHE_SHIFT);
1997 }
1998 
1999 static inline struct page *extent_buffer_page(struct extent_buffer *eb,
2000 					      unsigned long i)
2001 {
2002 	struct page *p;
2003 	struct address_space *mapping;
2004 
2005 	if (i == 0)
2006 		return eb->first_page;
2007 	i += eb->start >> PAGE_CACHE_SHIFT;
2008 	mapping = eb->first_page->mapping;
2009 	read_lock_irq(&mapping->tree_lock);
2010 	p = radix_tree_lookup(&mapping->page_tree, i);
2011 	read_unlock_irq(&mapping->tree_lock);
2012 	return p;
2013 }
2014 
2015 static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree,
2016 						   u64 start,
2017 						   unsigned long len,
2018 						   gfp_t mask)
2019 {
2020 	struct extent_buffer *eb = NULL;
2021 
2022 	spin_lock(&tree->lru_lock);
2023 	eb = find_lru(tree, start, len);
2024 	if (eb) {
2025 		goto lru_add;
2026 	}
2027 	spin_unlock(&tree->lru_lock);
2028 
2029 	if (eb) {
2030 		memset(eb, 0, sizeof(*eb));
2031 	} else {
2032 		eb = kmem_cache_zalloc(extent_buffer_cache, mask);
2033 	}
2034 	INIT_LIST_HEAD(&eb->lru);
2035 	eb->start = start;
2036 	eb->len = len;
2037 	atomic_set(&eb->refs, 1);
2038 
2039 	spin_lock(&tree->lru_lock);
2040 lru_add:
2041 	add_lru(tree, eb);
2042 	spin_unlock(&tree->lru_lock);
2043 	return eb;
2044 }
2045 
2046 static void __free_extent_buffer(struct extent_buffer *eb)
2047 {
2048 	kmem_cache_free(extent_buffer_cache, eb);
2049 }
2050 
2051 struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
2052 					  u64 start, unsigned long len,
2053 					  struct page *page0,
2054 					  gfp_t mask)
2055 {
2056 	unsigned long num_pages = num_extent_pages(start, len);
2057 	unsigned long i;
2058 	unsigned long index = start >> PAGE_CACHE_SHIFT;
2059 	struct extent_buffer *eb;
2060 	struct page *p;
2061 	struct address_space *mapping = tree->mapping;
2062 	int uptodate = 1;
2063 
2064 	eb = __alloc_extent_buffer(tree, start, len, mask);
2065 	if (!eb || IS_ERR(eb))
2066 		return NULL;
2067 
2068 	if (eb->flags & EXTENT_BUFFER_FILLED)
2069 		return eb;
2070 
2071 	if (page0) {
2072 		eb->first_page = page0;
2073 		i = 1;
2074 		index++;
2075 		page_cache_get(page0);
2076 		mark_page_accessed(page0);
2077 		set_page_extent_mapped(page0);
2078 		set_page_private(page0, EXTENT_PAGE_PRIVATE_FIRST_PAGE |
2079 				 len << 2);
2080 	} else {
2081 		i = 0;
2082 	}
2083 	for (; i < num_pages; i++, index++) {
2084 		p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
2085 		if (!p) {
2086 			WARN_ON(1);
2087 			/* make sure the free only frees the pages we've
2088 			 * grabbed a reference on
2089 			 */
2090 			eb->len = i << PAGE_CACHE_SHIFT;
2091 			eb->start &= ~((u64)PAGE_CACHE_SIZE - 1);
2092 			goto fail;
2093 		}
2094 		set_page_extent_mapped(p);
2095 		mark_page_accessed(p);
2096 		if (i == 0) {
2097 			eb->first_page = p;
2098 			set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE |
2099 					 len << 2);
2100 		} else {
2101 			set_page_private(p, EXTENT_PAGE_PRIVATE);
2102 		}
2103 		if (!PageUptodate(p))
2104 			uptodate = 0;
2105 		unlock_page(p);
2106 	}
2107 	if (uptodate)
2108 		eb->flags |= EXTENT_UPTODATE;
2109 	eb->flags |= EXTENT_BUFFER_FILLED;
2110 	return eb;
2111 fail:
2112 	free_extent_buffer(eb);
2113 	return NULL;
2114 }
2115 EXPORT_SYMBOL(alloc_extent_buffer);
2116 
2117 struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
2118 					 u64 start, unsigned long len,
2119 					  gfp_t mask)
2120 {
2121 	unsigned long num_pages = num_extent_pages(start, len);
2122 	unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT;
2123 	struct extent_buffer *eb;
2124 	struct page *p;
2125 	struct address_space *mapping = tree->mapping;
2126 	int uptodate = 1;
2127 
2128 	eb = __alloc_extent_buffer(tree, start, len, mask);
2129 	if (!eb || IS_ERR(eb))
2130 		return NULL;
2131 
2132 	if (eb->flags & EXTENT_BUFFER_FILLED)
2133 		return eb;
2134 
2135 	for (i = 0; i < num_pages; i++, index++) {
2136 		p = find_lock_page(mapping, index);
2137 		if (!p) {
2138 			/* make sure the free only frees the pages we've
2139 			 * grabbed a reference on
2140 			 */
2141 			eb->len = i << PAGE_CACHE_SHIFT;
2142 			eb->start &= ~((u64)PAGE_CACHE_SIZE - 1);
2143 			goto fail;
2144 		}
2145 		set_page_extent_mapped(p);
2146 		mark_page_accessed(p);
2147 
2148 		if (i == 0) {
2149 			eb->first_page = p;
2150 			set_page_private(p, EXTENT_PAGE_PRIVATE_FIRST_PAGE |
2151 					 len << 2);
2152 		} else {
2153 			set_page_private(p, EXTENT_PAGE_PRIVATE);
2154 		}
2155 
2156 		if (!PageUptodate(p))
2157 			uptodate = 0;
2158 		unlock_page(p);
2159 	}
2160 	if (uptodate)
2161 		eb->flags |= EXTENT_UPTODATE;
2162 	eb->flags |= EXTENT_BUFFER_FILLED;
2163 	return eb;
2164 fail:
2165 	free_extent_buffer(eb);
2166 	return NULL;
2167 }
2168 EXPORT_SYMBOL(find_extent_buffer);
2169 
2170 void free_extent_buffer(struct extent_buffer *eb)
2171 {
2172 	unsigned long i;
2173 	unsigned long num_pages;
2174 
2175 	if (!eb)
2176 		return;
2177 
2178 	if (!atomic_dec_and_test(&eb->refs))
2179 		return;
2180 
2181 	num_pages = num_extent_pages(eb->start, eb->len);
2182 
2183 	for (i = 0; i < num_pages; i++) {
2184 		page_cache_release(extent_buffer_page(eb, i));
2185 	}
2186 	__free_extent_buffer(eb);
2187 }
2188 EXPORT_SYMBOL(free_extent_buffer);
2189 
2190 int clear_extent_buffer_dirty(struct extent_map_tree *tree,
2191 			      struct extent_buffer *eb)
2192 {
2193 	int set;
2194 	unsigned long i;
2195 	unsigned long num_pages;
2196 	struct page *page;
2197 
2198 	u64 start = eb->start;
2199 	u64 end = start + eb->len - 1;
2200 
2201 	set = clear_extent_dirty(tree, start, end, GFP_NOFS);
2202 	num_pages = num_extent_pages(eb->start, eb->len);
2203 
2204 	for (i = 0; i < num_pages; i++) {
2205 		page = extent_buffer_page(eb, i);
2206 		lock_page(page);
2207 		/*
2208 		 * if we're on the last page or the first page and the
2209 		 * block isn't aligned on a page boundary, do extra checks
2210 		 * to make sure we don't clean page that is partially dirty
2211 		 */
2212 		if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
2213 		    ((i == num_pages - 1) &&
2214 		     ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
2215 			start = (u64)page->index << PAGE_CACHE_SHIFT;
2216 			end  = start + PAGE_CACHE_SIZE - 1;
2217 			if (test_range_bit(tree, start, end,
2218 					   EXTENT_DIRTY, 0)) {
2219 				unlock_page(page);
2220 				continue;
2221 			}
2222 		}
2223 		clear_page_dirty_for_io(page);
2224 		unlock_page(page);
2225 	}
2226 	return 0;
2227 }
2228 EXPORT_SYMBOL(clear_extent_buffer_dirty);
2229 
2230 int wait_on_extent_buffer_writeback(struct extent_map_tree *tree,
2231 				    struct extent_buffer *eb)
2232 {
2233 	return wait_on_extent_writeback(tree, eb->start,
2234 					eb->start + eb->len - 1);
2235 }
2236 EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
2237 
2238 int set_extent_buffer_dirty(struct extent_map_tree *tree,
2239 			     struct extent_buffer *eb)
2240 {
2241 	unsigned long i;
2242 	unsigned long num_pages;
2243 
2244 	num_pages = num_extent_pages(eb->start, eb->len);
2245 	for (i = 0; i < num_pages; i++) {
2246 		struct page *page = extent_buffer_page(eb, i);
2247 		/* writepage may need to do something special for the
2248 		 * first page, we have to make sure page->private is
2249 		 * properly set.  releasepage may drop page->private
2250 		 * on us if the page isn't already dirty.
2251 		 */
2252 		if (i == 0) {
2253 			lock_page(page);
2254 			set_page_private(page,
2255 					 EXTENT_PAGE_PRIVATE_FIRST_PAGE |
2256 					 eb->len << 2);
2257 		}
2258 		__set_page_dirty_nobuffers(extent_buffer_page(eb, i));
2259 		if (i == 0)
2260 			unlock_page(page);
2261 	}
2262 	return set_extent_dirty(tree, eb->start,
2263 				eb->start + eb->len - 1, GFP_NOFS);
2264 }
2265 EXPORT_SYMBOL(set_extent_buffer_dirty);
2266 
2267 int set_extent_buffer_uptodate(struct extent_map_tree *tree,
2268 				struct extent_buffer *eb)
2269 {
2270 	unsigned long i;
2271 	struct page *page;
2272 	unsigned long num_pages;
2273 
2274 	num_pages = num_extent_pages(eb->start, eb->len);
2275 
2276 	set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
2277 			    GFP_NOFS);
2278 	for (i = 0; i < num_pages; i++) {
2279 		page = extent_buffer_page(eb, i);
2280 		if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
2281 		    ((i == num_pages - 1) &&
2282 		     ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
2283 			check_page_uptodate(tree, page);
2284 			continue;
2285 		}
2286 		SetPageUptodate(page);
2287 	}
2288 	return 0;
2289 }
2290 EXPORT_SYMBOL(set_extent_buffer_uptodate);
2291 
2292 int extent_buffer_uptodate(struct extent_map_tree *tree,
2293 			     struct extent_buffer *eb)
2294 {
2295 	if (eb->flags & EXTENT_UPTODATE)
2296 		return 1;
2297 	return test_range_bit(tree, eb->start, eb->start + eb->len - 1,
2298 			   EXTENT_UPTODATE, 1);
2299 }
2300 EXPORT_SYMBOL(extent_buffer_uptodate);
2301 
2302 int read_extent_buffer_pages(struct extent_map_tree *tree,
2303 			     struct extent_buffer *eb,
2304 			     u64 start,
2305 			     int wait)
2306 {
2307 	unsigned long i;
2308 	unsigned long start_i;
2309 	struct page *page;
2310 	int err;
2311 	int ret = 0;
2312 	unsigned long num_pages;
2313 
2314 	if (eb->flags & EXTENT_UPTODATE)
2315 		return 0;
2316 
2317 	if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1,
2318 			   EXTENT_UPTODATE, 1)) {
2319 		return 0;
2320 	}
2321 	if (start) {
2322 		WARN_ON(start < eb->start);
2323 		start_i = (start >> PAGE_CACHE_SHIFT) -
2324 			(eb->start >> PAGE_CACHE_SHIFT);
2325 	} else {
2326 		start_i = 0;
2327 	}
2328 
2329 	num_pages = num_extent_pages(eb->start, eb->len);
2330 	for (i = start_i; i < num_pages; i++) {
2331 		page = extent_buffer_page(eb, i);
2332 		if (PageUptodate(page)) {
2333 			continue;
2334 		}
2335 		if (!wait) {
2336 			if (TestSetPageLocked(page)) {
2337 				continue;
2338 			}
2339 		} else {
2340 			lock_page(page);
2341 		}
2342 		if (!PageUptodate(page)) {
2343 			err = page->mapping->a_ops->readpage(NULL, page);
2344 			if (err) {
2345 				ret = err;
2346 			}
2347 		} else {
2348 			unlock_page(page);
2349 		}
2350 	}
2351 
2352 	if (ret || !wait) {
2353 		return ret;
2354 	}
2355 
2356 	for (i = start_i; i < num_pages; i++) {
2357 		page = extent_buffer_page(eb, i);
2358 		wait_on_page_locked(page);
2359 		if (!PageUptodate(page)) {
2360 			ret = -EIO;
2361 		}
2362 	}
2363 	if (!ret)
2364 		eb->flags |= EXTENT_UPTODATE;
2365 	return ret;
2366 }
2367 EXPORT_SYMBOL(read_extent_buffer_pages);
2368 
2369 void read_extent_buffer(struct extent_buffer *eb, void *dstv,
2370 			unsigned long start,
2371 			unsigned long len)
2372 {
2373 	size_t cur;
2374 	size_t offset;
2375 	struct page *page;
2376 	char *kaddr;
2377 	char *dst = (char *)dstv;
2378 	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2379 	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2380 	unsigned long num_pages = num_extent_pages(eb->start, eb->len);
2381 
2382 	WARN_ON(start > eb->len);
2383 	WARN_ON(start + len > eb->start + eb->len);
2384 
2385 	offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
2386 
2387 	while(len > 0) {
2388 		page = extent_buffer_page(eb, i);
2389 		if (!PageUptodate(page)) {
2390 			printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len);
2391 			WARN_ON(1);
2392 		}
2393 		WARN_ON(!PageUptodate(page));
2394 
2395 		cur = min(len, (PAGE_CACHE_SIZE - offset));
2396 		kaddr = kmap_atomic(page, KM_USER1);
2397 		memcpy(dst, kaddr + offset, cur);
2398 		kunmap_atomic(kaddr, KM_USER1);
2399 
2400 		dst += cur;
2401 		len -= cur;
2402 		offset = 0;
2403 		i++;
2404 	}
2405 }
2406 EXPORT_SYMBOL(read_extent_buffer);
2407 
2408 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
2409 			       unsigned long min_len, char **token, char **map,
2410 			       unsigned long *map_start,
2411 			       unsigned long *map_len, int km)
2412 {
2413 	size_t offset = start & (PAGE_CACHE_SIZE - 1);
2414 	char *kaddr;
2415 	struct page *p;
2416 	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2417 	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2418 	unsigned long end_i = (start_offset + start + min_len - 1) >>
2419 		PAGE_CACHE_SHIFT;
2420 
2421 	if (i != end_i)
2422 		return -EINVAL;
2423 
2424 	if (i == 0) {
2425 		offset = start_offset;
2426 		*map_start = 0;
2427 	} else {
2428 		offset = 0;
2429 		*map_start = (i << PAGE_CACHE_SHIFT) - start_offset;
2430 	}
2431 	if (start + min_len > eb->len) {
2432 printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
2433 		WARN_ON(1);
2434 	}
2435 
2436 	p = extent_buffer_page(eb, i);
2437 	WARN_ON(!PageUptodate(p));
2438 	kaddr = kmap_atomic(p, km);
2439 	*token = kaddr;
2440 	*map = kaddr + offset;
2441 	*map_len = PAGE_CACHE_SIZE - offset;
2442 	return 0;
2443 }
2444 EXPORT_SYMBOL(map_private_extent_buffer);
2445 
2446 int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
2447 		      unsigned long min_len,
2448 		      char **token, char **map,
2449 		      unsigned long *map_start,
2450 		      unsigned long *map_len, int km)
2451 {
2452 	int err;
2453 	int save = 0;
2454 	if (eb->map_token) {
2455 		unmap_extent_buffer(eb, eb->map_token, km);
2456 		eb->map_token = NULL;
2457 		save = 1;
2458 	}
2459 	err = map_private_extent_buffer(eb, start, min_len, token, map,
2460 				       map_start, map_len, km);
2461 	if (!err && save) {
2462 		eb->map_token = *token;
2463 		eb->kaddr = *map;
2464 		eb->map_start = *map_start;
2465 		eb->map_len = *map_len;
2466 	}
2467 	return err;
2468 }
2469 EXPORT_SYMBOL(map_extent_buffer);
2470 
2471 void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
2472 {
2473 	kunmap_atomic(token, km);
2474 }
2475 EXPORT_SYMBOL(unmap_extent_buffer);
2476 
2477 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
2478 			  unsigned long start,
2479 			  unsigned long len)
2480 {
2481 	size_t cur;
2482 	size_t offset;
2483 	struct page *page;
2484 	char *kaddr;
2485 	char *ptr = (char *)ptrv;
2486 	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2487 	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2488 	int ret = 0;
2489 
2490 	WARN_ON(start > eb->len);
2491 	WARN_ON(start + len > eb->start + eb->len);
2492 
2493 	offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
2494 
2495 	while(len > 0) {
2496 		page = extent_buffer_page(eb, i);
2497 		WARN_ON(!PageUptodate(page));
2498 
2499 		cur = min(len, (PAGE_CACHE_SIZE - offset));
2500 
2501 		kaddr = kmap_atomic(page, KM_USER0);
2502 		ret = memcmp(ptr, kaddr + offset, cur);
2503 		kunmap_atomic(kaddr, KM_USER0);
2504 		if (ret)
2505 			break;
2506 
2507 		ptr += cur;
2508 		len -= cur;
2509 		offset = 0;
2510 		i++;
2511 	}
2512 	return ret;
2513 }
2514 EXPORT_SYMBOL(memcmp_extent_buffer);
2515 
2516 void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
2517 			 unsigned long start, unsigned long len)
2518 {
2519 	size_t cur;
2520 	size_t offset;
2521 	struct page *page;
2522 	char *kaddr;
2523 	char *src = (char *)srcv;
2524 	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2525 	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2526 
2527 	WARN_ON(start > eb->len);
2528 	WARN_ON(start + len > eb->start + eb->len);
2529 
2530 	offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
2531 
2532 	while(len > 0) {
2533 		page = extent_buffer_page(eb, i);
2534 		WARN_ON(!PageUptodate(page));
2535 
2536 		cur = min(len, PAGE_CACHE_SIZE - offset);
2537 		kaddr = kmap_atomic(page, KM_USER1);
2538 		memcpy(kaddr + offset, src, cur);
2539 		kunmap_atomic(kaddr, KM_USER1);
2540 
2541 		src += cur;
2542 		len -= cur;
2543 		offset = 0;
2544 		i++;
2545 	}
2546 }
2547 EXPORT_SYMBOL(write_extent_buffer);
2548 
2549 void memset_extent_buffer(struct extent_buffer *eb, char c,
2550 			  unsigned long start, unsigned long len)
2551 {
2552 	size_t cur;
2553 	size_t offset;
2554 	struct page *page;
2555 	char *kaddr;
2556 	size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2557 	unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2558 
2559 	WARN_ON(start > eb->len);
2560 	WARN_ON(start + len > eb->start + eb->len);
2561 
2562 	offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
2563 
2564 	while(len > 0) {
2565 		page = extent_buffer_page(eb, i);
2566 		WARN_ON(!PageUptodate(page));
2567 
2568 		cur = min(len, PAGE_CACHE_SIZE - offset);
2569 		kaddr = kmap_atomic(page, KM_USER0);
2570 		memset(kaddr + offset, c, cur);
2571 		kunmap_atomic(kaddr, KM_USER0);
2572 
2573 		len -= cur;
2574 		offset = 0;
2575 		i++;
2576 	}
2577 }
2578 EXPORT_SYMBOL(memset_extent_buffer);
2579 
2580 void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
2581 			unsigned long dst_offset, unsigned long src_offset,
2582 			unsigned long len)
2583 {
2584 	u64 dst_len = dst->len;
2585 	size_t cur;
2586 	size_t offset;
2587 	struct page *page;
2588 	char *kaddr;
2589 	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
2590 	unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
2591 
2592 	WARN_ON(src->len != dst_len);
2593 
2594 	offset = (start_offset + dst_offset) &
2595 		((unsigned long)PAGE_CACHE_SIZE - 1);
2596 
2597 	while(len > 0) {
2598 		page = extent_buffer_page(dst, i);
2599 		WARN_ON(!PageUptodate(page));
2600 
2601 		cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
2602 
2603 		kaddr = kmap_atomic(page, KM_USER0);
2604 		read_extent_buffer(src, kaddr + offset, src_offset, cur);
2605 		kunmap_atomic(kaddr, KM_USER0);
2606 
2607 		src_offset += cur;
2608 		len -= cur;
2609 		offset = 0;
2610 		i++;
2611 	}
2612 }
2613 EXPORT_SYMBOL(copy_extent_buffer);
2614 
2615 static void move_pages(struct page *dst_page, struct page *src_page,
2616 		       unsigned long dst_off, unsigned long src_off,
2617 		       unsigned long len)
2618 {
2619 	char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
2620 	if (dst_page == src_page) {
2621 		memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
2622 	} else {
2623 		char *src_kaddr = kmap_atomic(src_page, KM_USER1);
2624 		char *p = dst_kaddr + dst_off + len;
2625 		char *s = src_kaddr + src_off + len;
2626 
2627 		while (len--)
2628 			*--p = *--s;
2629 
2630 		kunmap_atomic(src_kaddr, KM_USER1);
2631 	}
2632 	kunmap_atomic(dst_kaddr, KM_USER0);
2633 }
2634 
2635 static void copy_pages(struct page *dst_page, struct page *src_page,
2636 		       unsigned long dst_off, unsigned long src_off,
2637 		       unsigned long len)
2638 {
2639 	char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
2640 	char *src_kaddr;
2641 
2642 	if (dst_page != src_page)
2643 		src_kaddr = kmap_atomic(src_page, KM_USER1);
2644 	else
2645 		src_kaddr = dst_kaddr;
2646 
2647 	memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
2648 	kunmap_atomic(dst_kaddr, KM_USER0);
2649 	if (dst_page != src_page)
2650 		kunmap_atomic(src_kaddr, KM_USER1);
2651 }
2652 
2653 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
2654 			   unsigned long src_offset, unsigned long len)
2655 {
2656 	size_t cur;
2657 	size_t dst_off_in_page;
2658 	size_t src_off_in_page;
2659 	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
2660 	unsigned long dst_i;
2661 	unsigned long src_i;
2662 
2663 	if (src_offset + len > dst->len) {
2664 		printk("memmove bogus src_offset %lu move len %lu len %lu\n",
2665 		       src_offset, len, dst->len);
2666 		BUG_ON(1);
2667 	}
2668 	if (dst_offset + len > dst->len) {
2669 		printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
2670 		       dst_offset, len, dst->len);
2671 		BUG_ON(1);
2672 	}
2673 
2674 	while(len > 0) {
2675 		dst_off_in_page = (start_offset + dst_offset) &
2676 			((unsigned long)PAGE_CACHE_SIZE - 1);
2677 		src_off_in_page = (start_offset + src_offset) &
2678 			((unsigned long)PAGE_CACHE_SIZE - 1);
2679 
2680 		dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
2681 		src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
2682 
2683 		cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
2684 					       src_off_in_page));
2685 		cur = min_t(unsigned long, cur,
2686 			(unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
2687 
2688 		copy_pages(extent_buffer_page(dst, dst_i),
2689 			   extent_buffer_page(dst, src_i),
2690 			   dst_off_in_page, src_off_in_page, cur);
2691 
2692 		src_offset += cur;
2693 		dst_offset += cur;
2694 		len -= cur;
2695 	}
2696 }
2697 EXPORT_SYMBOL(memcpy_extent_buffer);
2698 
2699 void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
2700 			   unsigned long src_offset, unsigned long len)
2701 {
2702 	size_t cur;
2703 	size_t dst_off_in_page;
2704 	size_t src_off_in_page;
2705 	unsigned long dst_end = dst_offset + len - 1;
2706 	unsigned long src_end = src_offset + len - 1;
2707 	size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
2708 	unsigned long dst_i;
2709 	unsigned long src_i;
2710 
2711 	if (src_offset + len > dst->len) {
2712 		printk("memmove bogus src_offset %lu move len %lu len %lu\n",
2713 		       src_offset, len, dst->len);
2714 		BUG_ON(1);
2715 	}
2716 	if (dst_offset + len > dst->len) {
2717 		printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
2718 		       dst_offset, len, dst->len);
2719 		BUG_ON(1);
2720 	}
2721 	if (dst_offset < src_offset) {
2722 		memcpy_extent_buffer(dst, dst_offset, src_offset, len);
2723 		return;
2724 	}
2725 	while(len > 0) {
2726 		dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
2727 		src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
2728 
2729 		dst_off_in_page = (start_offset + dst_end) &
2730 			((unsigned long)PAGE_CACHE_SIZE - 1);
2731 		src_off_in_page = (start_offset + src_end) &
2732 			((unsigned long)PAGE_CACHE_SIZE - 1);
2733 
2734 		cur = min_t(unsigned long, len, src_off_in_page + 1);
2735 		cur = min(cur, dst_off_in_page + 1);
2736 		move_pages(extent_buffer_page(dst, dst_i),
2737 			   extent_buffer_page(dst, src_i),
2738 			   dst_off_in_page - cur + 1,
2739 			   src_off_in_page - cur + 1, cur);
2740 
2741 		dst_end -= cur;
2742 		src_end -= cur;
2743 		len -= cur;
2744 	}
2745 }
2746 EXPORT_SYMBOL(memmove_extent_buffer);
2747