xref: /openbmc/linux/fs/btrfs/extent_map.c (revision a52d9a8033c454cd9b4697cfafb467509fc1693f)
1*a52d9a80SChris Mason #include <linux/bitops.h>
2*a52d9a80SChris Mason #include <linux/slab.h>
3*a52d9a80SChris Mason #include <linux/bio.h>
4*a52d9a80SChris Mason #include <linux/mm.h>
5*a52d9a80SChris Mason #include <linux/gfp.h>
6*a52d9a80SChris Mason #include <linux/pagemap.h>
7*a52d9a80SChris Mason #include <linux/page-flags.h>
8*a52d9a80SChris Mason #include <linux/module.h>
9*a52d9a80SChris Mason #include <linux/spinlock.h>
10*a52d9a80SChris Mason #include <linux/blkdev.h>
11*a52d9a80SChris Mason #include "extent_map.h"
12*a52d9a80SChris Mason 
13*a52d9a80SChris Mason static struct kmem_cache *extent_map_cache;
14*a52d9a80SChris Mason static struct kmem_cache *extent_state_cache;
15*a52d9a80SChris Mason 
16*a52d9a80SChris Mason struct tree_entry {
17*a52d9a80SChris Mason 	u64 start;
18*a52d9a80SChris Mason 	u64 end;
19*a52d9a80SChris Mason 	int in_tree;
20*a52d9a80SChris Mason 	struct rb_node rb_node;
21*a52d9a80SChris Mason };
22*a52d9a80SChris Mason 
23*a52d9a80SChris Mason /* bits for the extent state */
24*a52d9a80SChris Mason #define EXTENT_DIRTY 1
25*a52d9a80SChris Mason #define EXTENT_WRITEBACK (1 << 1)
26*a52d9a80SChris Mason #define EXTENT_UPTODATE (1 << 2)
27*a52d9a80SChris Mason #define EXTENT_LOCKED (1 << 3)
28*a52d9a80SChris Mason #define EXTENT_NEW (1 << 4)
29*a52d9a80SChris Mason #define EXTENT_DELALLOC (1 << 5)
30*a52d9a80SChris Mason 
31*a52d9a80SChris Mason #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
32*a52d9a80SChris Mason 
33*a52d9a80SChris Mason static LIST_HEAD(all_states);
34*a52d9a80SChris Mason spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
35*a52d9a80SChris Mason 
36*a52d9a80SChris Mason void __init extent_map_init(void)
37*a52d9a80SChris Mason {
38*a52d9a80SChris Mason 	extent_map_cache = kmem_cache_create("extent_map",
39*a52d9a80SChris Mason 					    sizeof(struct extent_map), 0,
40*a52d9a80SChris Mason 					    SLAB_RECLAIM_ACCOUNT |
41*a52d9a80SChris Mason 					    SLAB_DESTROY_BY_RCU,
42*a52d9a80SChris Mason 					    NULL);
43*a52d9a80SChris Mason 	extent_state_cache = kmem_cache_create("extent_state",
44*a52d9a80SChris Mason 					    sizeof(struct extent_state), 0,
45*a52d9a80SChris Mason 					    SLAB_RECLAIM_ACCOUNT |
46*a52d9a80SChris Mason 					    SLAB_DESTROY_BY_RCU,
47*a52d9a80SChris Mason 					    NULL);
48*a52d9a80SChris Mason }
49*a52d9a80SChris Mason 
50*a52d9a80SChris Mason void __exit extent_map_exit(void)
51*a52d9a80SChris Mason {
52*a52d9a80SChris Mason 	while(!list_empty(&all_states)) {
53*a52d9a80SChris Mason 		struct extent_state *state;
54*a52d9a80SChris Mason 		struct list_head *cur = all_states.next;
55*a52d9a80SChris Mason 		state = list_entry(cur, struct extent_state, list);
56*a52d9a80SChris Mason 		printk("found leaked state %Lu %Lu state %d in_tree %d\n",
57*a52d9a80SChris Mason 		       state->start, state->end, state->state, state->in_tree);
58*a52d9a80SChris Mason 		list_del(&state->list);
59*a52d9a80SChris Mason 		kfree(state);
60*a52d9a80SChris Mason 	}
61*a52d9a80SChris Mason 	if (extent_map_cache)
62*a52d9a80SChris Mason 		kmem_cache_destroy(extent_map_cache);
63*a52d9a80SChris Mason 	if (extent_state_cache)
64*a52d9a80SChris Mason 		kmem_cache_destroy(extent_state_cache);
65*a52d9a80SChris Mason }
66*a52d9a80SChris Mason 
67*a52d9a80SChris Mason void extent_map_tree_init(struct extent_map_tree *tree,
68*a52d9a80SChris Mason 			  struct address_space *mapping, gfp_t mask)
69*a52d9a80SChris Mason {
70*a52d9a80SChris Mason 	tree->map.rb_node = NULL;
71*a52d9a80SChris Mason 	tree->state.rb_node = NULL;
72*a52d9a80SChris Mason 	rwlock_init(&tree->lock);
73*a52d9a80SChris Mason 	tree->mapping = mapping;
74*a52d9a80SChris Mason }
75*a52d9a80SChris Mason EXPORT_SYMBOL(extent_map_tree_init);
76*a52d9a80SChris Mason 
77*a52d9a80SChris Mason struct extent_map *alloc_extent_map(gfp_t mask)
78*a52d9a80SChris Mason {
79*a52d9a80SChris Mason 	struct extent_map *em;
80*a52d9a80SChris Mason 	em = kmem_cache_alloc(extent_map_cache, mask);
81*a52d9a80SChris Mason 	if (!em || IS_ERR(em))
82*a52d9a80SChris Mason 		return em;
83*a52d9a80SChris Mason 	em->in_tree = 0;
84*a52d9a80SChris Mason 	atomic_set(&em->refs, 1);
85*a52d9a80SChris Mason 	return em;
86*a52d9a80SChris Mason }
87*a52d9a80SChris Mason EXPORT_SYMBOL(alloc_extent_map);
88*a52d9a80SChris Mason 
89*a52d9a80SChris Mason void free_extent_map(struct extent_map *em)
90*a52d9a80SChris Mason {
91*a52d9a80SChris Mason 	if (atomic_dec_and_test(&em->refs)) {
92*a52d9a80SChris Mason 		WARN_ON(em->in_tree);
93*a52d9a80SChris Mason 		kmem_cache_free(extent_map_cache, em);
94*a52d9a80SChris Mason 	}
95*a52d9a80SChris Mason }
96*a52d9a80SChris Mason EXPORT_SYMBOL(free_extent_map);
97*a52d9a80SChris Mason 
98*a52d9a80SChris Mason 
99*a52d9a80SChris Mason struct extent_state *alloc_extent_state(gfp_t mask)
100*a52d9a80SChris Mason {
101*a52d9a80SChris Mason 	struct extent_state *state;
102*a52d9a80SChris Mason 	state = kmem_cache_alloc(extent_state_cache, mask);
103*a52d9a80SChris Mason 	if (!state || IS_ERR(state))
104*a52d9a80SChris Mason 		return state;
105*a52d9a80SChris Mason 	state->state = 0;
106*a52d9a80SChris Mason 	state->in_tree = 0;
107*a52d9a80SChris Mason 	atomic_set(&state->refs, 1);
108*a52d9a80SChris Mason 	init_waitqueue_head(&state->wq);
109*a52d9a80SChris Mason 	spin_lock_irq(&state_lock);
110*a52d9a80SChris Mason 	list_add(&state->list, &all_states);
111*a52d9a80SChris Mason 	spin_unlock_irq(&state_lock);
112*a52d9a80SChris Mason 	return state;
113*a52d9a80SChris Mason }
114*a52d9a80SChris Mason EXPORT_SYMBOL(alloc_extent_state);
115*a52d9a80SChris Mason 
116*a52d9a80SChris Mason void free_extent_state(struct extent_state *state)
117*a52d9a80SChris Mason {
118*a52d9a80SChris Mason 	if (atomic_dec_and_test(&state->refs)) {
119*a52d9a80SChris Mason 		WARN_ON(state->in_tree);
120*a52d9a80SChris Mason 		spin_lock_irq(&state_lock);
121*a52d9a80SChris Mason 		list_del_init(&state->list);
122*a52d9a80SChris Mason 		spin_unlock_irq(&state_lock);
123*a52d9a80SChris Mason 		kmem_cache_free(extent_state_cache, state);
124*a52d9a80SChris Mason 	}
125*a52d9a80SChris Mason }
126*a52d9a80SChris Mason EXPORT_SYMBOL(free_extent_state);
127*a52d9a80SChris Mason 
128*a52d9a80SChris Mason static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
129*a52d9a80SChris Mason 				   struct rb_node *node)
130*a52d9a80SChris Mason {
131*a52d9a80SChris Mason 	struct rb_node ** p = &root->rb_node;
132*a52d9a80SChris Mason 	struct rb_node * parent = NULL;
133*a52d9a80SChris Mason 	struct tree_entry *entry;
134*a52d9a80SChris Mason 
135*a52d9a80SChris Mason 	while(*p) {
136*a52d9a80SChris Mason 		parent = *p;
137*a52d9a80SChris Mason 		entry = rb_entry(parent, struct tree_entry, rb_node);
138*a52d9a80SChris Mason 
139*a52d9a80SChris Mason 		if (offset < entry->start)
140*a52d9a80SChris Mason 			p = &(*p)->rb_left;
141*a52d9a80SChris Mason 		else if (offset > entry->end)
142*a52d9a80SChris Mason 			p = &(*p)->rb_right;
143*a52d9a80SChris Mason 		else
144*a52d9a80SChris Mason 			return parent;
145*a52d9a80SChris Mason 	}
146*a52d9a80SChris Mason 
147*a52d9a80SChris Mason 	entry = rb_entry(node, struct tree_entry, rb_node);
148*a52d9a80SChris Mason 	entry->in_tree = 1;
149*a52d9a80SChris Mason 	rb_link_node(node, parent, p);
150*a52d9a80SChris Mason 	rb_insert_color(node, root);
151*a52d9a80SChris Mason 	return NULL;
152*a52d9a80SChris Mason }
153*a52d9a80SChris Mason 
154*a52d9a80SChris Mason static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
155*a52d9a80SChris Mason 				   struct rb_node **prev_ret)
156*a52d9a80SChris Mason {
157*a52d9a80SChris Mason 	struct rb_node * n = root->rb_node;
158*a52d9a80SChris Mason 	struct rb_node *prev = NULL;
159*a52d9a80SChris Mason 	struct tree_entry *entry;
160*a52d9a80SChris Mason 	struct tree_entry *prev_entry = NULL;
161*a52d9a80SChris Mason 
162*a52d9a80SChris Mason 	while(n) {
163*a52d9a80SChris Mason 		entry = rb_entry(n, struct tree_entry, rb_node);
164*a52d9a80SChris Mason 		prev = n;
165*a52d9a80SChris Mason 		prev_entry = entry;
166*a52d9a80SChris Mason 
167*a52d9a80SChris Mason 		if (offset < entry->start)
168*a52d9a80SChris Mason 			n = n->rb_left;
169*a52d9a80SChris Mason 		else if (offset > entry->end)
170*a52d9a80SChris Mason 			n = n->rb_right;
171*a52d9a80SChris Mason 		else
172*a52d9a80SChris Mason 			return n;
173*a52d9a80SChris Mason 	}
174*a52d9a80SChris Mason 	if (!prev_ret)
175*a52d9a80SChris Mason 		return NULL;
176*a52d9a80SChris Mason 	while(prev && offset > prev_entry->end) {
177*a52d9a80SChris Mason 		prev = rb_next(prev);
178*a52d9a80SChris Mason 		prev_entry = rb_entry(prev, struct tree_entry, rb_node);
179*a52d9a80SChris Mason 	}
180*a52d9a80SChris Mason 	*prev_ret = prev;
181*a52d9a80SChris Mason 	return NULL;
182*a52d9a80SChris Mason }
183*a52d9a80SChris Mason 
184*a52d9a80SChris Mason static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
185*a52d9a80SChris Mason {
186*a52d9a80SChris Mason 	struct rb_node *prev;
187*a52d9a80SChris Mason 	struct rb_node *ret;
188*a52d9a80SChris Mason 	ret = __tree_search(root, offset, &prev);
189*a52d9a80SChris Mason 	if (!ret)
190*a52d9a80SChris Mason 		return prev;
191*a52d9a80SChris Mason 	return ret;
192*a52d9a80SChris Mason }
193*a52d9a80SChris Mason 
194*a52d9a80SChris Mason static int tree_delete(struct rb_root *root, u64 offset)
195*a52d9a80SChris Mason {
196*a52d9a80SChris Mason 	struct rb_node *node;
197*a52d9a80SChris Mason 	struct tree_entry *entry;
198*a52d9a80SChris Mason 
199*a52d9a80SChris Mason 	node = __tree_search(root, offset, NULL);
200*a52d9a80SChris Mason 	if (!node)
201*a52d9a80SChris Mason 		return -ENOENT;
202*a52d9a80SChris Mason 	entry = rb_entry(node, struct tree_entry, rb_node);
203*a52d9a80SChris Mason 	entry->in_tree = 0;
204*a52d9a80SChris Mason 	rb_erase(node, root);
205*a52d9a80SChris Mason 	return 0;
206*a52d9a80SChris Mason }
207*a52d9a80SChris Mason 
208*a52d9a80SChris Mason /*
209*a52d9a80SChris Mason  * add_extent_mapping tries a simple backward merge with existing
210*a52d9a80SChris Mason  * mappings.  The extent_map struct passed in will be inserted into
211*a52d9a80SChris Mason  * the tree directly (no copies made, just a reference taken).
212*a52d9a80SChris Mason  */
213*a52d9a80SChris Mason int add_extent_mapping(struct extent_map_tree *tree,
214*a52d9a80SChris Mason 		       struct extent_map *em)
215*a52d9a80SChris Mason {
216*a52d9a80SChris Mason 	int ret = 0;
217*a52d9a80SChris Mason 	struct extent_map *prev = NULL;
218*a52d9a80SChris Mason 	struct rb_node *rb;
219*a52d9a80SChris Mason 
220*a52d9a80SChris Mason 	write_lock_irq(&tree->lock);
221*a52d9a80SChris Mason 	rb = tree_insert(&tree->map, em->end, &em->rb_node);
222*a52d9a80SChris Mason 	if (rb) {
223*a52d9a80SChris Mason 		prev = rb_entry(rb, struct extent_map, rb_node);
224*a52d9a80SChris Mason 		printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end);
225*a52d9a80SChris Mason 		ret = -EEXIST;
226*a52d9a80SChris Mason 		goto out;
227*a52d9a80SChris Mason 	}
228*a52d9a80SChris Mason 	atomic_inc(&em->refs);
229*a52d9a80SChris Mason 	if (em->start != 0) {
230*a52d9a80SChris Mason 		rb = rb_prev(&em->rb_node);
231*a52d9a80SChris Mason 		if (rb)
232*a52d9a80SChris Mason 			prev = rb_entry(rb, struct extent_map, rb_node);
233*a52d9a80SChris Mason 		if (prev && prev->end + 1 == em->start &&
234*a52d9a80SChris Mason 		    ((em->block_start == 0 && prev->block_start == 0) ||
235*a52d9a80SChris Mason 			     (em->block_start == prev->block_end + 1))) {
236*a52d9a80SChris Mason 			em->start = prev->start;
237*a52d9a80SChris Mason 			em->block_start = prev->block_start;
238*a52d9a80SChris Mason 			rb_erase(&prev->rb_node, &tree->map);
239*a52d9a80SChris Mason 			prev->in_tree = 0;
240*a52d9a80SChris Mason 			free_extent_map(prev);
241*a52d9a80SChris Mason 		}
242*a52d9a80SChris Mason 	 }
243*a52d9a80SChris Mason out:
244*a52d9a80SChris Mason 	write_unlock_irq(&tree->lock);
245*a52d9a80SChris Mason 	return ret;
246*a52d9a80SChris Mason }
247*a52d9a80SChris Mason EXPORT_SYMBOL(add_extent_mapping);
248*a52d9a80SChris Mason 
249*a52d9a80SChris Mason /*
250*a52d9a80SChris Mason  * lookup_extent_mapping returns the first extent_map struct in the
251*a52d9a80SChris Mason  * tree that intersects the [start, end] (inclusive) range.  There may
252*a52d9a80SChris Mason  * be additional objects in the tree that intersect, so check the object
253*a52d9a80SChris Mason  * returned carefully to make sure you don't need additional lookups.
254*a52d9a80SChris Mason  */
255*a52d9a80SChris Mason struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
256*a52d9a80SChris Mason 					 u64 start, u64 end)
257*a52d9a80SChris Mason {
258*a52d9a80SChris Mason 	struct extent_map *em;
259*a52d9a80SChris Mason 	struct rb_node *rb_node;
260*a52d9a80SChris Mason 
261*a52d9a80SChris Mason 	read_lock_irq(&tree->lock);
262*a52d9a80SChris Mason 	rb_node = tree_search(&tree->map, start);
263*a52d9a80SChris Mason 	if (!rb_node) {
264*a52d9a80SChris Mason 		em = NULL;
265*a52d9a80SChris Mason 		goto out;
266*a52d9a80SChris Mason 	}
267*a52d9a80SChris Mason 	if (IS_ERR(rb_node)) {
268*a52d9a80SChris Mason 		em = ERR_PTR(PTR_ERR(rb_node));
269*a52d9a80SChris Mason 		goto out;
270*a52d9a80SChris Mason 	}
271*a52d9a80SChris Mason 	em = rb_entry(rb_node, struct extent_map, rb_node);
272*a52d9a80SChris Mason 	if (em->end < start || em->start > end) {
273*a52d9a80SChris Mason 		em = NULL;
274*a52d9a80SChris Mason 		goto out;
275*a52d9a80SChris Mason 	}
276*a52d9a80SChris Mason 	atomic_inc(&em->refs);
277*a52d9a80SChris Mason out:
278*a52d9a80SChris Mason 	read_unlock_irq(&tree->lock);
279*a52d9a80SChris Mason 	return em;
280*a52d9a80SChris Mason }
281*a52d9a80SChris Mason EXPORT_SYMBOL(lookup_extent_mapping);
282*a52d9a80SChris Mason 
283*a52d9a80SChris Mason /*
284*a52d9a80SChris Mason  * removes an extent_map struct from the tree.  No reference counts are
285*a52d9a80SChris Mason  * dropped, and no checks are done to  see if the range is in use
286*a52d9a80SChris Mason  */
287*a52d9a80SChris Mason int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
288*a52d9a80SChris Mason {
289*a52d9a80SChris Mason 	int ret;
290*a52d9a80SChris Mason 
291*a52d9a80SChris Mason 	write_lock_irq(&tree->lock);
292*a52d9a80SChris Mason 	ret = tree_delete(&tree->map, em->end);
293*a52d9a80SChris Mason 	write_unlock_irq(&tree->lock);
294*a52d9a80SChris Mason 	return ret;
295*a52d9a80SChris Mason }
296*a52d9a80SChris Mason EXPORT_SYMBOL(remove_extent_mapping);
297*a52d9a80SChris Mason 
298*a52d9a80SChris Mason /*
299*a52d9a80SChris Mason  * utility function to look for merge candidates inside a given range.
300*a52d9a80SChris Mason  * Any extents with matching state are merged together into a single
301*a52d9a80SChris Mason  * extent in the tree.  Extents with EXTENT_IO in their state field
302*a52d9a80SChris Mason  * are not merged because the end_io handlers need to be able to do
303*a52d9a80SChris Mason  * operations on them without sleeping (or doing allocations/splits).
304*a52d9a80SChris Mason  *
305*a52d9a80SChris Mason  * This should be called with the tree lock held.
306*a52d9a80SChris Mason  */
307*a52d9a80SChris Mason static int merge_state(struct extent_map_tree *tree,
308*a52d9a80SChris Mason 		       struct extent_state *state)
309*a52d9a80SChris Mason {
310*a52d9a80SChris Mason 	struct extent_state *other;
311*a52d9a80SChris Mason 	struct rb_node *other_node;
312*a52d9a80SChris Mason 
313*a52d9a80SChris Mason 	if (state->state & EXTENT_IOBITS)
314*a52d9a80SChris Mason 		return 0;
315*a52d9a80SChris Mason 
316*a52d9a80SChris Mason 	other_node = rb_prev(&state->rb_node);
317*a52d9a80SChris Mason 	if (other_node) {
318*a52d9a80SChris Mason 		other = rb_entry(other_node, struct extent_state, rb_node);
319*a52d9a80SChris Mason 		if (other->end == state->start - 1 &&
320*a52d9a80SChris Mason 		    other->state == state->state) {
321*a52d9a80SChris Mason 			state->start = other->start;
322*a52d9a80SChris Mason 			other->in_tree = 0;
323*a52d9a80SChris Mason 			rb_erase(&other->rb_node, &tree->state);
324*a52d9a80SChris Mason 			free_extent_state(other);
325*a52d9a80SChris Mason 		}
326*a52d9a80SChris Mason 	}
327*a52d9a80SChris Mason 	other_node = rb_next(&state->rb_node);
328*a52d9a80SChris Mason 	if (other_node) {
329*a52d9a80SChris Mason 		other = rb_entry(other_node, struct extent_state, rb_node);
330*a52d9a80SChris Mason 		if (other->start == state->end + 1 &&
331*a52d9a80SChris Mason 		    other->state == state->state) {
332*a52d9a80SChris Mason 			other->start = state->start;
333*a52d9a80SChris Mason 			state->in_tree = 0;
334*a52d9a80SChris Mason 			rb_erase(&state->rb_node, &tree->state);
335*a52d9a80SChris Mason 			free_extent_state(state);
336*a52d9a80SChris Mason 		}
337*a52d9a80SChris Mason 	}
338*a52d9a80SChris Mason 	return 0;
339*a52d9a80SChris Mason }
340*a52d9a80SChris Mason 
341*a52d9a80SChris Mason /*
342*a52d9a80SChris Mason  * insert an extent_state struct into the tree.  'bits' are set on the
343*a52d9a80SChris Mason  * struct before it is inserted.
344*a52d9a80SChris Mason  *
345*a52d9a80SChris Mason  * This may return -EEXIST if the extent is already there, in which case the
346*a52d9a80SChris Mason  * state struct is freed.
347*a52d9a80SChris Mason  *
348*a52d9a80SChris Mason  * The tree lock is not taken internally.  This is a utility function and
349*a52d9a80SChris Mason  * probably isn't what you want to call (see set/clear_extent_bit).
350*a52d9a80SChris Mason  */
351*a52d9a80SChris Mason static int insert_state(struct extent_map_tree *tree,
352*a52d9a80SChris Mason 			struct extent_state *state, u64 start, u64 end,
353*a52d9a80SChris Mason 			int bits)
354*a52d9a80SChris Mason {
355*a52d9a80SChris Mason 	struct rb_node *node;
356*a52d9a80SChris Mason 
357*a52d9a80SChris Mason 	if (end < start) {
358*a52d9a80SChris Mason 		printk("end < start %Lu %Lu\n", end, start);
359*a52d9a80SChris Mason 		WARN_ON(1);
360*a52d9a80SChris Mason 	}
361*a52d9a80SChris Mason 	state->state |= bits;
362*a52d9a80SChris Mason 	state->start = start;
363*a52d9a80SChris Mason 	state->end = end;
364*a52d9a80SChris Mason 	if ((end & 4095) == 0) {
365*a52d9a80SChris Mason 		printk("insert state %Lu %Lu strange end\n", start, end);
366*a52d9a80SChris Mason 		WARN_ON(1);
367*a52d9a80SChris Mason 	}
368*a52d9a80SChris Mason 	node = tree_insert(&tree->state, end, &state->rb_node);
369*a52d9a80SChris Mason 	if (node) {
370*a52d9a80SChris Mason 		struct extent_state *found;
371*a52d9a80SChris Mason 		found = rb_entry(node, struct extent_state, rb_node);
372*a52d9a80SChris Mason printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
373*a52d9a80SChris Mason 		free_extent_state(state);
374*a52d9a80SChris Mason 		return -EEXIST;
375*a52d9a80SChris Mason 	}
376*a52d9a80SChris Mason 	merge_state(tree, state);
377*a52d9a80SChris Mason 	return 0;
378*a52d9a80SChris Mason }
379*a52d9a80SChris Mason 
380*a52d9a80SChris Mason /*
381*a52d9a80SChris Mason  * split a given extent state struct in two, inserting the preallocated
382*a52d9a80SChris Mason  * struct 'prealloc' as the newly created second half.  'split' indicates an
383*a52d9a80SChris Mason  * offset inside 'orig' where it should be split.
384*a52d9a80SChris Mason  *
385*a52d9a80SChris Mason  * Before calling,
386*a52d9a80SChris Mason  * the tree has 'orig' at [orig->start, orig->end].  After calling, there
387*a52d9a80SChris Mason  * are two extent state structs in the tree:
388*a52d9a80SChris Mason  * prealloc: [orig->start, split - 1]
389*a52d9a80SChris Mason  * orig: [ split, orig->end ]
390*a52d9a80SChris Mason  *
391*a52d9a80SChris Mason  * The tree locks are not taken by this function. They need to be held
392*a52d9a80SChris Mason  * by the caller.
393*a52d9a80SChris Mason  */
394*a52d9a80SChris Mason static int split_state(struct extent_map_tree *tree, struct extent_state *orig,
395*a52d9a80SChris Mason 		       struct extent_state *prealloc, u64 split)
396*a52d9a80SChris Mason {
397*a52d9a80SChris Mason 	struct rb_node *node;
398*a52d9a80SChris Mason 	prealloc->start = orig->start;
399*a52d9a80SChris Mason 	prealloc->end = split - 1;
400*a52d9a80SChris Mason 	prealloc->state = orig->state;
401*a52d9a80SChris Mason 	orig->start = split;
402*a52d9a80SChris Mason 	if ((prealloc->end & 4095) == 0) {
403*a52d9a80SChris Mason 		printk("insert state %Lu %Lu strange end\n", prealloc->start,
404*a52d9a80SChris Mason 		       prealloc->end);
405*a52d9a80SChris Mason 		WARN_ON(1);
406*a52d9a80SChris Mason 	}
407*a52d9a80SChris Mason 	node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
408*a52d9a80SChris Mason 	if (node) {
409*a52d9a80SChris Mason 		struct extent_state *found;
410*a52d9a80SChris Mason 		found = rb_entry(node, struct extent_state, rb_node);
411*a52d9a80SChris Mason printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
412*a52d9a80SChris Mason 		free_extent_state(prealloc);
413*a52d9a80SChris Mason 		return -EEXIST;
414*a52d9a80SChris Mason 	}
415*a52d9a80SChris Mason 	return 0;
416*a52d9a80SChris Mason }
417*a52d9a80SChris Mason 
418*a52d9a80SChris Mason /*
419*a52d9a80SChris Mason  * utility function to clear some bits in an extent state struct.
420*a52d9a80SChris Mason  * it will optionally wake up any one waiting on this state (wake == 1), or
421*a52d9a80SChris Mason  * forcibly remove the state from the tree (delete == 1).
422*a52d9a80SChris Mason  *
423*a52d9a80SChris Mason  * If no bits are set on the state struct after clearing things, the
424*a52d9a80SChris Mason  * struct is freed and removed from the tree
425*a52d9a80SChris Mason  */
426*a52d9a80SChris Mason static int clear_state_bit(struct extent_map_tree *tree,
427*a52d9a80SChris Mason 			    struct extent_state *state, int bits, int wake,
428*a52d9a80SChris Mason 			    int delete)
429*a52d9a80SChris Mason {
430*a52d9a80SChris Mason 	int ret = state->state & bits;
431*a52d9a80SChris Mason 	state->state &= ~bits;
432*a52d9a80SChris Mason 	if (wake)
433*a52d9a80SChris Mason 		wake_up(&state->wq);
434*a52d9a80SChris Mason 	if (delete || state->state == 0) {
435*a52d9a80SChris Mason 		if (state->in_tree) {
436*a52d9a80SChris Mason 			rb_erase(&state->rb_node, &tree->state);
437*a52d9a80SChris Mason 			state->in_tree = 0;
438*a52d9a80SChris Mason 			free_extent_state(state);
439*a52d9a80SChris Mason 		} else {
440*a52d9a80SChris Mason 			WARN_ON(1);
441*a52d9a80SChris Mason 		}
442*a52d9a80SChris Mason 	} else {
443*a52d9a80SChris Mason 		merge_state(tree, state);
444*a52d9a80SChris Mason 	}
445*a52d9a80SChris Mason 	return ret;
446*a52d9a80SChris Mason }
447*a52d9a80SChris Mason 
448*a52d9a80SChris Mason /*
449*a52d9a80SChris Mason  * clear some bits on a range in the tree.  This may require splitting
450*a52d9a80SChris Mason  * or inserting elements in the tree, so the gfp mask is used to
451*a52d9a80SChris Mason  * indicate which allocations or sleeping are allowed.
452*a52d9a80SChris Mason  *
453*a52d9a80SChris Mason  * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
454*a52d9a80SChris Mason  * the given range from the tree regardless of state (ie for truncate).
455*a52d9a80SChris Mason  *
456*a52d9a80SChris Mason  * the range [start, end] is inclusive.
457*a52d9a80SChris Mason  *
458*a52d9a80SChris Mason  * This takes the tree lock, and returns < 0 on error, > 0 if any of the
459*a52d9a80SChris Mason  * bits were already set, or zero if none of the bits were already set.
460*a52d9a80SChris Mason  */
461*a52d9a80SChris Mason int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end,
462*a52d9a80SChris Mason 		     int bits, int wake, int delete, gfp_t mask)
463*a52d9a80SChris Mason {
464*a52d9a80SChris Mason 	struct extent_state *state;
465*a52d9a80SChris Mason 	struct extent_state *prealloc = NULL;
466*a52d9a80SChris Mason 	struct rb_node *node;
467*a52d9a80SChris Mason 	int err;
468*a52d9a80SChris Mason 	int set = 0;
469*a52d9a80SChris Mason 
470*a52d9a80SChris Mason again:
471*a52d9a80SChris Mason 	if (!prealloc && (mask & __GFP_WAIT)) {
472*a52d9a80SChris Mason 		prealloc = alloc_extent_state(mask);
473*a52d9a80SChris Mason 		if (!prealloc)
474*a52d9a80SChris Mason 			return -ENOMEM;
475*a52d9a80SChris Mason 	}
476*a52d9a80SChris Mason 
477*a52d9a80SChris Mason 	write_lock_irq(&tree->lock);
478*a52d9a80SChris Mason 	/*
479*a52d9a80SChris Mason 	 * this search will find the extents that end after
480*a52d9a80SChris Mason 	 * our range starts
481*a52d9a80SChris Mason 	 */
482*a52d9a80SChris Mason 	node = tree_search(&tree->state, start);
483*a52d9a80SChris Mason 	if (!node)
484*a52d9a80SChris Mason 		goto out;
485*a52d9a80SChris Mason 	state = rb_entry(node, struct extent_state, rb_node);
486*a52d9a80SChris Mason 	if (state->start > end)
487*a52d9a80SChris Mason 		goto out;
488*a52d9a80SChris Mason 	WARN_ON(state->end < start);
489*a52d9a80SChris Mason 
490*a52d9a80SChris Mason 	/*
491*a52d9a80SChris Mason 	 *     | ---- desired range ---- |
492*a52d9a80SChris Mason 	 *  | state | or
493*a52d9a80SChris Mason 	 *  | ------------- state -------------- |
494*a52d9a80SChris Mason 	 *
495*a52d9a80SChris Mason 	 * We need to split the extent we found, and may flip
496*a52d9a80SChris Mason 	 * bits on second half.
497*a52d9a80SChris Mason 	 *
498*a52d9a80SChris Mason 	 * If the extent we found extends past our range, we
499*a52d9a80SChris Mason 	 * just split and search again.  It'll get split again
500*a52d9a80SChris Mason 	 * the next time though.
501*a52d9a80SChris Mason 	 *
502*a52d9a80SChris Mason 	 * If the extent we found is inside our range, we clear
503*a52d9a80SChris Mason 	 * the desired bit on it.
504*a52d9a80SChris Mason 	 */
505*a52d9a80SChris Mason 
506*a52d9a80SChris Mason 	if (state->start < start) {
507*a52d9a80SChris Mason 		err = split_state(tree, state, prealloc, start);
508*a52d9a80SChris Mason 		BUG_ON(err == -EEXIST);
509*a52d9a80SChris Mason 		prealloc = NULL;
510*a52d9a80SChris Mason 		if (err)
511*a52d9a80SChris Mason 			goto out;
512*a52d9a80SChris Mason 		if (state->end <= end) {
513*a52d9a80SChris Mason 			start = state->end + 1;
514*a52d9a80SChris Mason 			set |= clear_state_bit(tree, state, bits,
515*a52d9a80SChris Mason 					wake, delete);
516*a52d9a80SChris Mason 		} else {
517*a52d9a80SChris Mason 			start = state->start;
518*a52d9a80SChris Mason 		}
519*a52d9a80SChris Mason 		goto search_again;
520*a52d9a80SChris Mason 	}
521*a52d9a80SChris Mason 	/*
522*a52d9a80SChris Mason 	 * | ---- desired range ---- |
523*a52d9a80SChris Mason 	 *                        | state |
524*a52d9a80SChris Mason 	 * We need to split the extent, and clear the bit
525*a52d9a80SChris Mason 	 * on the first half
526*a52d9a80SChris Mason 	 */
527*a52d9a80SChris Mason 	if (state->start <= end && state->end > end) {
528*a52d9a80SChris Mason 		err = split_state(tree, state, prealloc, end + 1);
529*a52d9a80SChris Mason 		BUG_ON(err == -EEXIST);
530*a52d9a80SChris Mason 
531*a52d9a80SChris Mason 		if (wake)
532*a52d9a80SChris Mason 			wake_up(&state->wq);
533*a52d9a80SChris Mason 		set |= clear_state_bit(tree, prealloc, bits,
534*a52d9a80SChris Mason 				       wake, delete);
535*a52d9a80SChris Mason 		prealloc = NULL;
536*a52d9a80SChris Mason 		goto out;
537*a52d9a80SChris Mason 	}
538*a52d9a80SChris Mason 
539*a52d9a80SChris Mason 	start = state->end + 1;
540*a52d9a80SChris Mason 	set |= clear_state_bit(tree, state, bits, wake, delete);
541*a52d9a80SChris Mason 	goto search_again;
542*a52d9a80SChris Mason 
543*a52d9a80SChris Mason out:
544*a52d9a80SChris Mason 	write_unlock_irq(&tree->lock);
545*a52d9a80SChris Mason 	if (prealloc)
546*a52d9a80SChris Mason 		free_extent_state(prealloc);
547*a52d9a80SChris Mason 
548*a52d9a80SChris Mason 	return set;
549*a52d9a80SChris Mason 
550*a52d9a80SChris Mason search_again:
551*a52d9a80SChris Mason 	if (start >= end)
552*a52d9a80SChris Mason 		goto out;
553*a52d9a80SChris Mason 	write_unlock_irq(&tree->lock);
554*a52d9a80SChris Mason 	if (mask & __GFP_WAIT)
555*a52d9a80SChris Mason 		cond_resched();
556*a52d9a80SChris Mason 	goto again;
557*a52d9a80SChris Mason }
558*a52d9a80SChris Mason EXPORT_SYMBOL(clear_extent_bit);
559*a52d9a80SChris Mason 
560*a52d9a80SChris Mason static int wait_on_state(struct extent_map_tree *tree,
561*a52d9a80SChris Mason 			 struct extent_state *state)
562*a52d9a80SChris Mason {
563*a52d9a80SChris Mason 	DEFINE_WAIT(wait);
564*a52d9a80SChris Mason 	prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
565*a52d9a80SChris Mason 	read_unlock_irq(&tree->lock);
566*a52d9a80SChris Mason 	schedule();
567*a52d9a80SChris Mason 	read_lock_irq(&tree->lock);
568*a52d9a80SChris Mason 	finish_wait(&state->wq, &wait);
569*a52d9a80SChris Mason 	return 0;
570*a52d9a80SChris Mason }
571*a52d9a80SChris Mason 
572*a52d9a80SChris Mason /*
573*a52d9a80SChris Mason  * waits for one or more bits to clear on a range in the state tree.
574*a52d9a80SChris Mason  * The range [start, end] is inclusive.
575*a52d9a80SChris Mason  * The tree lock is taken by this function
576*a52d9a80SChris Mason  */
577*a52d9a80SChris Mason int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits)
578*a52d9a80SChris Mason {
579*a52d9a80SChris Mason 	struct extent_state *state;
580*a52d9a80SChris Mason 	struct rb_node *node;
581*a52d9a80SChris Mason 
582*a52d9a80SChris Mason 	read_lock_irq(&tree->lock);
583*a52d9a80SChris Mason again:
584*a52d9a80SChris Mason 	while (1) {
585*a52d9a80SChris Mason 		/*
586*a52d9a80SChris Mason 		 * this search will find all the extents that end after
587*a52d9a80SChris Mason 		 * our range starts
588*a52d9a80SChris Mason 		 */
589*a52d9a80SChris Mason 		node = tree_search(&tree->state, start);
590*a52d9a80SChris Mason 		if (!node)
591*a52d9a80SChris Mason 			break;
592*a52d9a80SChris Mason 
593*a52d9a80SChris Mason 		state = rb_entry(node, struct extent_state, rb_node);
594*a52d9a80SChris Mason 
595*a52d9a80SChris Mason 		if (state->start > end)
596*a52d9a80SChris Mason 			goto out;
597*a52d9a80SChris Mason 
598*a52d9a80SChris Mason 		if (state->state & bits) {
599*a52d9a80SChris Mason 			start = state->start;
600*a52d9a80SChris Mason 			atomic_inc(&state->refs);
601*a52d9a80SChris Mason 			wait_on_state(tree, state);
602*a52d9a80SChris Mason 			free_extent_state(state);
603*a52d9a80SChris Mason 			goto again;
604*a52d9a80SChris Mason 		}
605*a52d9a80SChris Mason 		start = state->end + 1;
606*a52d9a80SChris Mason 
607*a52d9a80SChris Mason 		if (start > end)
608*a52d9a80SChris Mason 			break;
609*a52d9a80SChris Mason 
610*a52d9a80SChris Mason 		if (need_resched()) {
611*a52d9a80SChris Mason 			read_unlock_irq(&tree->lock);
612*a52d9a80SChris Mason 			cond_resched();
613*a52d9a80SChris Mason 			read_lock_irq(&tree->lock);
614*a52d9a80SChris Mason 		}
615*a52d9a80SChris Mason 	}
616*a52d9a80SChris Mason out:
617*a52d9a80SChris Mason 	read_unlock_irq(&tree->lock);
618*a52d9a80SChris Mason 	return 0;
619*a52d9a80SChris Mason }
620*a52d9a80SChris Mason EXPORT_SYMBOL(wait_extent_bit);
621*a52d9a80SChris Mason 
622*a52d9a80SChris Mason /*
623*a52d9a80SChris Mason  * set some bits on a range in the tree.  This may require allocations
624*a52d9a80SChris Mason  * or sleeping, so the gfp mask is used to indicate what is allowed.
625*a52d9a80SChris Mason  *
626*a52d9a80SChris Mason  * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
627*a52d9a80SChris Mason  * range already has the desired bits set.  The start of the existing
628*a52d9a80SChris Mason  * range is returned in failed_start in this case.
629*a52d9a80SChris Mason  *
630*a52d9a80SChris Mason  * [start, end] is inclusive
631*a52d9a80SChris Mason  * This takes the tree lock.
632*a52d9a80SChris Mason  */
633*a52d9a80SChris Mason int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits,
634*a52d9a80SChris Mason 		   int exclusive, u64 *failed_start, gfp_t mask)
635*a52d9a80SChris Mason {
636*a52d9a80SChris Mason 	struct extent_state *state;
637*a52d9a80SChris Mason 	struct extent_state *prealloc = NULL;
638*a52d9a80SChris Mason 	struct rb_node *node;
639*a52d9a80SChris Mason 	int err = 0;
640*a52d9a80SChris Mason 	int set;
641*a52d9a80SChris Mason 	u64 last_start;
642*a52d9a80SChris Mason 	u64 last_end;
643*a52d9a80SChris Mason again:
644*a52d9a80SChris Mason 	if (!prealloc && (mask & __GFP_WAIT)) {
645*a52d9a80SChris Mason 		prealloc = alloc_extent_state(mask);
646*a52d9a80SChris Mason 		if (!prealloc)
647*a52d9a80SChris Mason 			return -ENOMEM;
648*a52d9a80SChris Mason 	}
649*a52d9a80SChris Mason 
650*a52d9a80SChris Mason 	write_lock_irq(&tree->lock);
651*a52d9a80SChris Mason 	/*
652*a52d9a80SChris Mason 	 * this search will find all the extents that end after
653*a52d9a80SChris Mason 	 * our range starts.
654*a52d9a80SChris Mason 	 */
655*a52d9a80SChris Mason 	node = tree_search(&tree->state, start);
656*a52d9a80SChris Mason 	if (!node) {
657*a52d9a80SChris Mason 		err = insert_state(tree, prealloc, start, end, bits);
658*a52d9a80SChris Mason 		prealloc = NULL;
659*a52d9a80SChris Mason 		BUG_ON(err == -EEXIST);
660*a52d9a80SChris Mason 		goto out;
661*a52d9a80SChris Mason 	}
662*a52d9a80SChris Mason 
663*a52d9a80SChris Mason 	state = rb_entry(node, struct extent_state, rb_node);
664*a52d9a80SChris Mason 	last_start = state->start;
665*a52d9a80SChris Mason 	last_end = state->end;
666*a52d9a80SChris Mason 
667*a52d9a80SChris Mason 	/*
668*a52d9a80SChris Mason 	 * | ---- desired range ---- |
669*a52d9a80SChris Mason 	 * | state |
670*a52d9a80SChris Mason 	 *
671*a52d9a80SChris Mason 	 * Just lock what we found and keep going
672*a52d9a80SChris Mason 	 */
673*a52d9a80SChris Mason 	if (state->start == start && state->end <= end) {
674*a52d9a80SChris Mason 		set = state->state & bits;
675*a52d9a80SChris Mason 		if (set && exclusive) {
676*a52d9a80SChris Mason 			*failed_start = state->start;
677*a52d9a80SChris Mason 			err = -EEXIST;
678*a52d9a80SChris Mason 			goto out;
679*a52d9a80SChris Mason 		}
680*a52d9a80SChris Mason 		state->state |= bits;
681*a52d9a80SChris Mason 		start = state->end + 1;
682*a52d9a80SChris Mason 		merge_state(tree, state);
683*a52d9a80SChris Mason 		goto search_again;
684*a52d9a80SChris Mason 	}
685*a52d9a80SChris Mason 
686*a52d9a80SChris Mason 	/*
687*a52d9a80SChris Mason 	 *     | ---- desired range ---- |
688*a52d9a80SChris Mason 	 * | state |
689*a52d9a80SChris Mason 	 *   or
690*a52d9a80SChris Mason 	 * | ------------- state -------------- |
691*a52d9a80SChris Mason 	 *
692*a52d9a80SChris Mason 	 * We need to split the extent we found, and may flip bits on
693*a52d9a80SChris Mason 	 * second half.
694*a52d9a80SChris Mason 	 *
695*a52d9a80SChris Mason 	 * If the extent we found extends past our
696*a52d9a80SChris Mason 	 * range, we just split and search again.  It'll get split
697*a52d9a80SChris Mason 	 * again the next time though.
698*a52d9a80SChris Mason 	 *
699*a52d9a80SChris Mason 	 * If the extent we found is inside our range, we set the
700*a52d9a80SChris Mason 	 * desired bit on it.
701*a52d9a80SChris Mason 	 */
702*a52d9a80SChris Mason 	if (state->start < start) {
703*a52d9a80SChris Mason 		set = state->state & bits;
704*a52d9a80SChris Mason 		if (exclusive && set) {
705*a52d9a80SChris Mason 			*failed_start = start;
706*a52d9a80SChris Mason 			err = -EEXIST;
707*a52d9a80SChris Mason 			goto out;
708*a52d9a80SChris Mason 		}
709*a52d9a80SChris Mason 		err = split_state(tree, state, prealloc, start);
710*a52d9a80SChris Mason 		BUG_ON(err == -EEXIST);
711*a52d9a80SChris Mason 		prealloc = NULL;
712*a52d9a80SChris Mason 		if (err)
713*a52d9a80SChris Mason 			goto out;
714*a52d9a80SChris Mason 		if (state->end <= end) {
715*a52d9a80SChris Mason 			state->state |= bits;
716*a52d9a80SChris Mason 			start = state->end + 1;
717*a52d9a80SChris Mason 			merge_state(tree, state);
718*a52d9a80SChris Mason 		} else {
719*a52d9a80SChris Mason 			start = state->start;
720*a52d9a80SChris Mason 		}
721*a52d9a80SChris Mason 		goto search_again;
722*a52d9a80SChris Mason 	}
723*a52d9a80SChris Mason 	/*
724*a52d9a80SChris Mason 	 * | ---- desired range ---- |
725*a52d9a80SChris Mason 	 *                        | state |
726*a52d9a80SChris Mason 	 * We need to split the extent, and set the bit
727*a52d9a80SChris Mason 	 * on the first half
728*a52d9a80SChris Mason 	 */
729*a52d9a80SChris Mason 	if (state->start <= end && state->end > end) {
730*a52d9a80SChris Mason 		set = state->state & bits;
731*a52d9a80SChris Mason 		if (exclusive && set) {
732*a52d9a80SChris Mason 			*failed_start = start;
733*a52d9a80SChris Mason 			err = -EEXIST;
734*a52d9a80SChris Mason 			goto out;
735*a52d9a80SChris Mason 		}
736*a52d9a80SChris Mason 		err = split_state(tree, state, prealloc, end + 1);
737*a52d9a80SChris Mason 		BUG_ON(err == -EEXIST);
738*a52d9a80SChris Mason 
739*a52d9a80SChris Mason 		prealloc->state |= bits;
740*a52d9a80SChris Mason 		merge_state(tree, prealloc);
741*a52d9a80SChris Mason 		prealloc = NULL;
742*a52d9a80SChris Mason 		goto out;
743*a52d9a80SChris Mason 	}
744*a52d9a80SChris Mason 
745*a52d9a80SChris Mason 	/*
746*a52d9a80SChris Mason 	 * | ---- desired range ---- |
747*a52d9a80SChris Mason 	 *     | state | or               | state |
748*a52d9a80SChris Mason 	 *
749*a52d9a80SChris Mason 	 * There's a hole, we need to insert something in it and
750*a52d9a80SChris Mason 	 * ignore the extent we found.
751*a52d9a80SChris Mason 	 */
752*a52d9a80SChris Mason 	if (state->start > start) {
753*a52d9a80SChris Mason 		u64 this_end;
754*a52d9a80SChris Mason 		if (end < last_start)
755*a52d9a80SChris Mason 			this_end = end;
756*a52d9a80SChris Mason 		else
757*a52d9a80SChris Mason 			this_end = last_start -1;
758*a52d9a80SChris Mason 		err = insert_state(tree, prealloc, start, this_end,
759*a52d9a80SChris Mason 				   bits);
760*a52d9a80SChris Mason 		prealloc = NULL;
761*a52d9a80SChris Mason 		BUG_ON(err == -EEXIST);
762*a52d9a80SChris Mason 		if (err)
763*a52d9a80SChris Mason 			goto out;
764*a52d9a80SChris Mason 		start = this_end + 1;
765*a52d9a80SChris Mason 		goto search_again;
766*a52d9a80SChris Mason 	}
767*a52d9a80SChris Mason 	goto search_again;
768*a52d9a80SChris Mason 
769*a52d9a80SChris Mason out:
770*a52d9a80SChris Mason 	write_unlock_irq(&tree->lock);
771*a52d9a80SChris Mason 	if (prealloc)
772*a52d9a80SChris Mason 		free_extent_state(prealloc);
773*a52d9a80SChris Mason 
774*a52d9a80SChris Mason 	return err;
775*a52d9a80SChris Mason 
776*a52d9a80SChris Mason search_again:
777*a52d9a80SChris Mason 	if (start > end)
778*a52d9a80SChris Mason 		goto out;
779*a52d9a80SChris Mason 	write_unlock_irq(&tree->lock);
780*a52d9a80SChris Mason 	if (mask & __GFP_WAIT)
781*a52d9a80SChris Mason 		cond_resched();
782*a52d9a80SChris Mason 	goto again;
783*a52d9a80SChris Mason }
784*a52d9a80SChris Mason EXPORT_SYMBOL(set_extent_bit);
785*a52d9a80SChris Mason 
786*a52d9a80SChris Mason /* wrappers around set/clear extent bit */
787*a52d9a80SChris Mason int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
788*a52d9a80SChris Mason 		     gfp_t mask)
789*a52d9a80SChris Mason {
790*a52d9a80SChris Mason 	return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
791*a52d9a80SChris Mason 			      mask);
792*a52d9a80SChris Mason }
793*a52d9a80SChris Mason EXPORT_SYMBOL(set_extent_dirty);
794*a52d9a80SChris Mason 
795*a52d9a80SChris Mason int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
796*a52d9a80SChris Mason 		       gfp_t mask)
797*a52d9a80SChris Mason {
798*a52d9a80SChris Mason 	return clear_extent_bit(tree, start, end, EXTENT_DIRTY, 0, 0, mask);
799*a52d9a80SChris Mason }
800*a52d9a80SChris Mason EXPORT_SYMBOL(clear_extent_dirty);
801*a52d9a80SChris Mason 
802*a52d9a80SChris Mason int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
803*a52d9a80SChris Mason 		     gfp_t mask)
804*a52d9a80SChris Mason {
805*a52d9a80SChris Mason 	return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
806*a52d9a80SChris Mason 			      mask);
807*a52d9a80SChris Mason }
808*a52d9a80SChris Mason EXPORT_SYMBOL(set_extent_new);
809*a52d9a80SChris Mason 
810*a52d9a80SChris Mason int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
811*a52d9a80SChris Mason 		       gfp_t mask)
812*a52d9a80SChris Mason {
813*a52d9a80SChris Mason 	return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
814*a52d9a80SChris Mason }
815*a52d9a80SChris Mason EXPORT_SYMBOL(clear_extent_new);
816*a52d9a80SChris Mason 
817*a52d9a80SChris Mason int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
818*a52d9a80SChris Mason 			gfp_t mask)
819*a52d9a80SChris Mason {
820*a52d9a80SChris Mason 	return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
821*a52d9a80SChris Mason 			      mask);
822*a52d9a80SChris Mason }
823*a52d9a80SChris Mason EXPORT_SYMBOL(set_extent_uptodate);
824*a52d9a80SChris Mason 
825*a52d9a80SChris Mason int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
826*a52d9a80SChris Mason 			  gfp_t mask)
827*a52d9a80SChris Mason {
828*a52d9a80SChris Mason 	return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
829*a52d9a80SChris Mason }
830*a52d9a80SChris Mason EXPORT_SYMBOL(clear_extent_uptodate);
831*a52d9a80SChris Mason 
832*a52d9a80SChris Mason int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
833*a52d9a80SChris Mason 			 gfp_t mask)
834*a52d9a80SChris Mason {
835*a52d9a80SChris Mason 	return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
836*a52d9a80SChris Mason 			      0, NULL, mask);
837*a52d9a80SChris Mason }
838*a52d9a80SChris Mason EXPORT_SYMBOL(set_extent_writeback);
839*a52d9a80SChris Mason 
840*a52d9a80SChris Mason int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
841*a52d9a80SChris Mason 			   gfp_t mask)
842*a52d9a80SChris Mason {
843*a52d9a80SChris Mason 	return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
844*a52d9a80SChris Mason }
845*a52d9a80SChris Mason EXPORT_SYMBOL(clear_extent_writeback);
846*a52d9a80SChris Mason 
847*a52d9a80SChris Mason int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end)
848*a52d9a80SChris Mason {
849*a52d9a80SChris Mason 	return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
850*a52d9a80SChris Mason }
851*a52d9a80SChris Mason EXPORT_SYMBOL(wait_on_extent_writeback);
852*a52d9a80SChris Mason 
853*a52d9a80SChris Mason /*
854*a52d9a80SChris Mason  * locks a range in ascending order, waiting for any locked regions
855*a52d9a80SChris Mason  * it hits on the way.  [start,end] are inclusive, and this will sleep.
856*a52d9a80SChris Mason  */
857*a52d9a80SChris Mason int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask)
858*a52d9a80SChris Mason {
859*a52d9a80SChris Mason 	int err;
860*a52d9a80SChris Mason 	u64 failed_start;
861*a52d9a80SChris Mason 	while (1) {
862*a52d9a80SChris Mason 		err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
863*a52d9a80SChris Mason 				     &failed_start, mask);
864*a52d9a80SChris Mason 		if (err == -EEXIST && (mask & __GFP_WAIT)) {
865*a52d9a80SChris Mason 			wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
866*a52d9a80SChris Mason 			start = failed_start;
867*a52d9a80SChris Mason 		} else {
868*a52d9a80SChris Mason 			break;
869*a52d9a80SChris Mason 		}
870*a52d9a80SChris Mason 		WARN_ON(start > end);
871*a52d9a80SChris Mason 	}
872*a52d9a80SChris Mason 	return err;
873*a52d9a80SChris Mason }
874*a52d9a80SChris Mason EXPORT_SYMBOL(lock_extent);
875*a52d9a80SChris Mason 
876*a52d9a80SChris Mason int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end,
877*a52d9a80SChris Mason 		  gfp_t mask)
878*a52d9a80SChris Mason {
879*a52d9a80SChris Mason 	return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
880*a52d9a80SChris Mason }
881*a52d9a80SChris Mason EXPORT_SYMBOL(unlock_extent);
882*a52d9a80SChris Mason 
883*a52d9a80SChris Mason /*
884*a52d9a80SChris Mason  * helper function to set pages and extents in the tree dirty
885*a52d9a80SChris Mason  */
886*a52d9a80SChris Mason int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end)
887*a52d9a80SChris Mason {
888*a52d9a80SChris Mason 	unsigned long index = start >> PAGE_CACHE_SHIFT;
889*a52d9a80SChris Mason 	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
890*a52d9a80SChris Mason 	struct page *page;
891*a52d9a80SChris Mason 
892*a52d9a80SChris Mason 	while (index <= end_index) {
893*a52d9a80SChris Mason 		page = find_get_page(tree->mapping, index);
894*a52d9a80SChris Mason 		BUG_ON(!page);
895*a52d9a80SChris Mason 		__set_page_dirty_nobuffers(page);
896*a52d9a80SChris Mason 		page_cache_release(page);
897*a52d9a80SChris Mason 		index++;
898*a52d9a80SChris Mason 	}
899*a52d9a80SChris Mason 	set_extent_dirty(tree, start, end, GFP_NOFS);
900*a52d9a80SChris Mason 	return 0;
901*a52d9a80SChris Mason }
902*a52d9a80SChris Mason EXPORT_SYMBOL(set_range_dirty);
903*a52d9a80SChris Mason 
904*a52d9a80SChris Mason /*
905*a52d9a80SChris Mason  * helper function to set both pages and extents in the tree writeback
906*a52d9a80SChris Mason  */
907*a52d9a80SChris Mason int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end)
908*a52d9a80SChris Mason {
909*a52d9a80SChris Mason 	unsigned long index = start >> PAGE_CACHE_SHIFT;
910*a52d9a80SChris Mason 	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
911*a52d9a80SChris Mason 	struct page *page;
912*a52d9a80SChris Mason 
913*a52d9a80SChris Mason 	while (index <= end_index) {
914*a52d9a80SChris Mason 		page = find_get_page(tree->mapping, index);
915*a52d9a80SChris Mason 		BUG_ON(!page);
916*a52d9a80SChris Mason 		set_page_writeback(page);
917*a52d9a80SChris Mason 		page_cache_release(page);
918*a52d9a80SChris Mason 		index++;
919*a52d9a80SChris Mason 	}
920*a52d9a80SChris Mason 	set_extent_writeback(tree, start, end, GFP_NOFS);
921*a52d9a80SChris Mason 	return 0;
922*a52d9a80SChris Mason }
923*a52d9a80SChris Mason EXPORT_SYMBOL(set_range_writeback);
924*a52d9a80SChris Mason 
925*a52d9a80SChris Mason /*
926*a52d9a80SChris Mason  * helper function to lock both pages and extents in the tree.
927*a52d9a80SChris Mason  * pages must be locked first.
928*a52d9a80SChris Mason  */
929*a52d9a80SChris Mason int lock_range(struct extent_map_tree *tree, u64 start, u64 end)
930*a52d9a80SChris Mason {
931*a52d9a80SChris Mason 	unsigned long index = start >> PAGE_CACHE_SHIFT;
932*a52d9a80SChris Mason 	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
933*a52d9a80SChris Mason 	struct page *page;
934*a52d9a80SChris Mason 	int err;
935*a52d9a80SChris Mason 
936*a52d9a80SChris Mason 	while (index <= end_index) {
937*a52d9a80SChris Mason 		page = grab_cache_page(tree->mapping, index);
938*a52d9a80SChris Mason 		if (!page) {
939*a52d9a80SChris Mason 			err = -ENOMEM;
940*a52d9a80SChris Mason 			goto failed;
941*a52d9a80SChris Mason 		}
942*a52d9a80SChris Mason 		if (IS_ERR(page)) {
943*a52d9a80SChris Mason 			err = PTR_ERR(page);
944*a52d9a80SChris Mason 			goto failed;
945*a52d9a80SChris Mason 		}
946*a52d9a80SChris Mason 		index++;
947*a52d9a80SChris Mason 	}
948*a52d9a80SChris Mason 	lock_extent(tree, start, end, GFP_NOFS);
949*a52d9a80SChris Mason 	return 0;
950*a52d9a80SChris Mason 
951*a52d9a80SChris Mason failed:
952*a52d9a80SChris Mason 	/*
953*a52d9a80SChris Mason 	 * we failed above in getting the page at 'index', so we undo here
954*a52d9a80SChris Mason 	 * up to but not including the page at 'index'
955*a52d9a80SChris Mason 	 */
956*a52d9a80SChris Mason 	end_index = index;
957*a52d9a80SChris Mason 	index = start >> PAGE_CACHE_SHIFT;
958*a52d9a80SChris Mason 	while (index < end_index) {
959*a52d9a80SChris Mason 		page = find_get_page(tree->mapping, index);
960*a52d9a80SChris Mason 		unlock_page(page);
961*a52d9a80SChris Mason 		page_cache_release(page);
962*a52d9a80SChris Mason 		index++;
963*a52d9a80SChris Mason 	}
964*a52d9a80SChris Mason 	return err;
965*a52d9a80SChris Mason }
966*a52d9a80SChris Mason EXPORT_SYMBOL(lock_range);
967*a52d9a80SChris Mason 
968*a52d9a80SChris Mason /*
969*a52d9a80SChris Mason  * helper function to unlock both pages and extents in the tree.
970*a52d9a80SChris Mason  */
971*a52d9a80SChris Mason int unlock_range(struct extent_map_tree *tree, u64 start, u64 end)
972*a52d9a80SChris Mason {
973*a52d9a80SChris Mason 	unsigned long index = start >> PAGE_CACHE_SHIFT;
974*a52d9a80SChris Mason 	unsigned long end_index = end >> PAGE_CACHE_SHIFT;
975*a52d9a80SChris Mason 	struct page *page;
976*a52d9a80SChris Mason 
977*a52d9a80SChris Mason 	while (index <= end_index) {
978*a52d9a80SChris Mason 		page = find_get_page(tree->mapping, index);
979*a52d9a80SChris Mason 		unlock_page(page);
980*a52d9a80SChris Mason 		page_cache_release(page);
981*a52d9a80SChris Mason 		index++;
982*a52d9a80SChris Mason 	}
983*a52d9a80SChris Mason 	unlock_extent(tree, start, end, GFP_NOFS);
984*a52d9a80SChris Mason 	return 0;
985*a52d9a80SChris Mason }
986*a52d9a80SChris Mason EXPORT_SYMBOL(unlock_range);
987*a52d9a80SChris Mason 
988*a52d9a80SChris Mason /*
989*a52d9a80SChris Mason  * searches a range in the state tree for a given mask.
990*a52d9a80SChris Mason  * If 'filled' == 1, this returns 1 only if ever extent in the tree
991*a52d9a80SChris Mason  * has the bits set.  Otherwise, 1 is returned if any bit in the
992*a52d9a80SChris Mason  * range is found set.
993*a52d9a80SChris Mason  */
994*a52d9a80SChris Mason static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
995*a52d9a80SChris Mason 			  int bits, int filled)
996*a52d9a80SChris Mason {
997*a52d9a80SChris Mason 	struct extent_state *state = NULL;
998*a52d9a80SChris Mason 	struct rb_node *node;
999*a52d9a80SChris Mason 	int bitset = 0;
1000*a52d9a80SChris Mason 
1001*a52d9a80SChris Mason 	read_lock_irq(&tree->lock);
1002*a52d9a80SChris Mason 	node = tree_search(&tree->state, start);
1003*a52d9a80SChris Mason 	while (node && start <= end) {
1004*a52d9a80SChris Mason 		state = rb_entry(node, struct extent_state, rb_node);
1005*a52d9a80SChris Mason 		if (state->start > end)
1006*a52d9a80SChris Mason 			break;
1007*a52d9a80SChris Mason 
1008*a52d9a80SChris Mason 		if (filled && state->start > start) {
1009*a52d9a80SChris Mason 			bitset = 0;
1010*a52d9a80SChris Mason 			break;
1011*a52d9a80SChris Mason 		}
1012*a52d9a80SChris Mason 		if (state->state & bits) {
1013*a52d9a80SChris Mason 			bitset = 1;
1014*a52d9a80SChris Mason 			if (!filled)
1015*a52d9a80SChris Mason 				break;
1016*a52d9a80SChris Mason 		} else if (filled) {
1017*a52d9a80SChris Mason 			bitset = 0;
1018*a52d9a80SChris Mason 			break;
1019*a52d9a80SChris Mason 		}
1020*a52d9a80SChris Mason 		start = state->end + 1;
1021*a52d9a80SChris Mason 		if (start > end)
1022*a52d9a80SChris Mason 			break;
1023*a52d9a80SChris Mason 		node = rb_next(node);
1024*a52d9a80SChris Mason 	}
1025*a52d9a80SChris Mason 	read_unlock_irq(&tree->lock);
1026*a52d9a80SChris Mason 	return bitset;
1027*a52d9a80SChris Mason }
1028*a52d9a80SChris Mason 
1029*a52d9a80SChris Mason /*
1030*a52d9a80SChris Mason  * helper function to set a given page up to date if all the
1031*a52d9a80SChris Mason  * extents in the tree for that page are up to date
1032*a52d9a80SChris Mason  */
1033*a52d9a80SChris Mason static int check_page_uptodate(struct extent_map_tree *tree,
1034*a52d9a80SChris Mason 			       struct page *page)
1035*a52d9a80SChris Mason {
1036*a52d9a80SChris Mason 	u64 start = page->index << PAGE_CACHE_SHIFT;
1037*a52d9a80SChris Mason 	u64 end = start + PAGE_CACHE_SIZE - 1;
1038*a52d9a80SChris Mason 	if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
1039*a52d9a80SChris Mason 		SetPageUptodate(page);
1040*a52d9a80SChris Mason 	return 0;
1041*a52d9a80SChris Mason }
1042*a52d9a80SChris Mason 
1043*a52d9a80SChris Mason /*
1044*a52d9a80SChris Mason  * helper function to unlock a page if all the extents in the tree
1045*a52d9a80SChris Mason  * for that page are unlocked
1046*a52d9a80SChris Mason  */
1047*a52d9a80SChris Mason static int check_page_locked(struct extent_map_tree *tree,
1048*a52d9a80SChris Mason 			     struct page *page)
1049*a52d9a80SChris Mason {
1050*a52d9a80SChris Mason 	u64 start = page->index << PAGE_CACHE_SHIFT;
1051*a52d9a80SChris Mason 	u64 end = start + PAGE_CACHE_SIZE - 1;
1052*a52d9a80SChris Mason 	if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
1053*a52d9a80SChris Mason 		unlock_page(page);
1054*a52d9a80SChris Mason 	return 0;
1055*a52d9a80SChris Mason }
1056*a52d9a80SChris Mason 
1057*a52d9a80SChris Mason /*
1058*a52d9a80SChris Mason  * helper function to end page writeback if all the extents
1059*a52d9a80SChris Mason  * in the tree for that page are done with writeback
1060*a52d9a80SChris Mason  */
1061*a52d9a80SChris Mason static int check_page_writeback(struct extent_map_tree *tree,
1062*a52d9a80SChris Mason 			     struct page *page)
1063*a52d9a80SChris Mason {
1064*a52d9a80SChris Mason 	u64 start = page->index << PAGE_CACHE_SHIFT;
1065*a52d9a80SChris Mason 	u64 end = start + PAGE_CACHE_SIZE - 1;
1066*a52d9a80SChris Mason 	if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1067*a52d9a80SChris Mason 		end_page_writeback(page);
1068*a52d9a80SChris Mason 	return 0;
1069*a52d9a80SChris Mason }
1070*a52d9a80SChris Mason 
1071*a52d9a80SChris Mason /* lots and lots of room for performance fixes in the end_bio funcs */
1072*a52d9a80SChris Mason 
1073*a52d9a80SChris Mason /*
1074*a52d9a80SChris Mason  * after a writepage IO is done, we need to:
1075*a52d9a80SChris Mason  * clear the uptodate bits on error
1076*a52d9a80SChris Mason  * clear the writeback bits in the extent tree for this IO
1077*a52d9a80SChris Mason  * end_page_writeback if the page has no more pending IO
1078*a52d9a80SChris Mason  *
1079*a52d9a80SChris Mason  * Scheduling is not allowed, so the extent state tree is expected
1080*a52d9a80SChris Mason  * to have one and only one object corresponding to this IO.
1081*a52d9a80SChris Mason  */
1082*a52d9a80SChris Mason static int end_bio_extent_writepage(struct bio *bio,
1083*a52d9a80SChris Mason 				   unsigned int bytes_done, int err)
1084*a52d9a80SChris Mason {
1085*a52d9a80SChris Mason 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1086*a52d9a80SChris Mason 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1087*a52d9a80SChris Mason 	struct extent_map_tree *tree = bio->bi_private;
1088*a52d9a80SChris Mason 	u64 start;
1089*a52d9a80SChris Mason 	u64 end;
1090*a52d9a80SChris Mason 	int whole_page;
1091*a52d9a80SChris Mason 
1092*a52d9a80SChris Mason 	if (bio->bi_size)
1093*a52d9a80SChris Mason 		return 1;
1094*a52d9a80SChris Mason 
1095*a52d9a80SChris Mason 	do {
1096*a52d9a80SChris Mason 		struct page *page = bvec->bv_page;
1097*a52d9a80SChris Mason 		start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1098*a52d9a80SChris Mason 		end = start + bvec->bv_len - 1;
1099*a52d9a80SChris Mason 
1100*a52d9a80SChris Mason 		if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1101*a52d9a80SChris Mason 			whole_page = 1;
1102*a52d9a80SChris Mason 		else
1103*a52d9a80SChris Mason 			whole_page = 0;
1104*a52d9a80SChris Mason 
1105*a52d9a80SChris Mason 		if (--bvec >= bio->bi_io_vec)
1106*a52d9a80SChris Mason 			prefetchw(&bvec->bv_page->flags);
1107*a52d9a80SChris Mason 
1108*a52d9a80SChris Mason 		if (!uptodate) {
1109*a52d9a80SChris Mason 			clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
1110*a52d9a80SChris Mason 			ClearPageUptodate(page);
1111*a52d9a80SChris Mason 			SetPageError(page);
1112*a52d9a80SChris Mason 		}
1113*a52d9a80SChris Mason 		clear_extent_writeback(tree, start, end, GFP_ATOMIC);
1114*a52d9a80SChris Mason 
1115*a52d9a80SChris Mason 		if (whole_page)
1116*a52d9a80SChris Mason 			end_page_writeback(page);
1117*a52d9a80SChris Mason 		else
1118*a52d9a80SChris Mason 			check_page_writeback(tree, page);
1119*a52d9a80SChris Mason 	} while (bvec >= bio->bi_io_vec);
1120*a52d9a80SChris Mason 
1121*a52d9a80SChris Mason 	bio_put(bio);
1122*a52d9a80SChris Mason 	return 0;
1123*a52d9a80SChris Mason }
1124*a52d9a80SChris Mason 
1125*a52d9a80SChris Mason /*
1126*a52d9a80SChris Mason  * after a readpage IO is done, we need to:
1127*a52d9a80SChris Mason  * clear the uptodate bits on error
1128*a52d9a80SChris Mason  * set the uptodate bits if things worked
1129*a52d9a80SChris Mason  * set the page up to date if all extents in the tree are uptodate
1130*a52d9a80SChris Mason  * clear the lock bit in the extent tree
1131*a52d9a80SChris Mason  * unlock the page if there are no other extents locked for it
1132*a52d9a80SChris Mason  *
1133*a52d9a80SChris Mason  * Scheduling is not allowed, so the extent state tree is expected
1134*a52d9a80SChris Mason  * to have one and only one object corresponding to this IO.
1135*a52d9a80SChris Mason  */
1136*a52d9a80SChris Mason static int end_bio_extent_readpage(struct bio *bio,
1137*a52d9a80SChris Mason 				   unsigned int bytes_done, int err)
1138*a52d9a80SChris Mason {
1139*a52d9a80SChris Mason 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1140*a52d9a80SChris Mason 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1141*a52d9a80SChris Mason 	struct extent_map_tree *tree = bio->bi_private;
1142*a52d9a80SChris Mason 	u64 start;
1143*a52d9a80SChris Mason 	u64 end;
1144*a52d9a80SChris Mason 	int whole_page;
1145*a52d9a80SChris Mason 
1146*a52d9a80SChris Mason 	if (bio->bi_size)
1147*a52d9a80SChris Mason 		return 1;
1148*a52d9a80SChris Mason 
1149*a52d9a80SChris Mason 	do {
1150*a52d9a80SChris Mason 		struct page *page = bvec->bv_page;
1151*a52d9a80SChris Mason 		start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1152*a52d9a80SChris Mason 		end = start + bvec->bv_len - 1;
1153*a52d9a80SChris Mason 
1154*a52d9a80SChris Mason 		if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1155*a52d9a80SChris Mason 			whole_page = 1;
1156*a52d9a80SChris Mason 		else
1157*a52d9a80SChris Mason 			whole_page = 0;
1158*a52d9a80SChris Mason 
1159*a52d9a80SChris Mason 		if (--bvec >= bio->bi_io_vec)
1160*a52d9a80SChris Mason 			prefetchw(&bvec->bv_page->flags);
1161*a52d9a80SChris Mason 
1162*a52d9a80SChris Mason 		if (uptodate) {
1163*a52d9a80SChris Mason 			set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1164*a52d9a80SChris Mason 			if (whole_page)
1165*a52d9a80SChris Mason 				SetPageUptodate(page);
1166*a52d9a80SChris Mason 			else
1167*a52d9a80SChris Mason 				check_page_uptodate(tree, page);
1168*a52d9a80SChris Mason 		} else {
1169*a52d9a80SChris Mason 			ClearPageUptodate(page);
1170*a52d9a80SChris Mason 			SetPageError(page);
1171*a52d9a80SChris Mason 		}
1172*a52d9a80SChris Mason 
1173*a52d9a80SChris Mason 		unlock_extent(tree, start, end, GFP_ATOMIC);
1174*a52d9a80SChris Mason 
1175*a52d9a80SChris Mason 		if (whole_page)
1176*a52d9a80SChris Mason 			unlock_page(page);
1177*a52d9a80SChris Mason 		else
1178*a52d9a80SChris Mason 			check_page_locked(tree, page);
1179*a52d9a80SChris Mason 	} while (bvec >= bio->bi_io_vec);
1180*a52d9a80SChris Mason 
1181*a52d9a80SChris Mason 	bio_put(bio);
1182*a52d9a80SChris Mason 	return 0;
1183*a52d9a80SChris Mason }
1184*a52d9a80SChris Mason 
1185*a52d9a80SChris Mason /*
1186*a52d9a80SChris Mason  * IO done from prepare_write is pretty simple, we just unlock
1187*a52d9a80SChris Mason  * the structs in the extent tree when done, and set the uptodate bits
1188*a52d9a80SChris Mason  * as appropriate.
1189*a52d9a80SChris Mason  */
1190*a52d9a80SChris Mason static int end_bio_extent_preparewrite(struct bio *bio,
1191*a52d9a80SChris Mason 				       unsigned int bytes_done, int err)
1192*a52d9a80SChris Mason {
1193*a52d9a80SChris Mason 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1194*a52d9a80SChris Mason 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1195*a52d9a80SChris Mason 	struct extent_map_tree *tree = bio->bi_private;
1196*a52d9a80SChris Mason 	u64 start;
1197*a52d9a80SChris Mason 	u64 end;
1198*a52d9a80SChris Mason 
1199*a52d9a80SChris Mason 	if (bio->bi_size)
1200*a52d9a80SChris Mason 		return 1;
1201*a52d9a80SChris Mason 
1202*a52d9a80SChris Mason 	do {
1203*a52d9a80SChris Mason 		struct page *page = bvec->bv_page;
1204*a52d9a80SChris Mason 		start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1205*a52d9a80SChris Mason 		end = start + bvec->bv_len - 1;
1206*a52d9a80SChris Mason 
1207*a52d9a80SChris Mason 		if (--bvec >= bio->bi_io_vec)
1208*a52d9a80SChris Mason 			prefetchw(&bvec->bv_page->flags);
1209*a52d9a80SChris Mason 
1210*a52d9a80SChris Mason 		if (uptodate) {
1211*a52d9a80SChris Mason 			set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1212*a52d9a80SChris Mason 		} else {
1213*a52d9a80SChris Mason 			ClearPageUptodate(page);
1214*a52d9a80SChris Mason 			SetPageError(page);
1215*a52d9a80SChris Mason 		}
1216*a52d9a80SChris Mason 
1217*a52d9a80SChris Mason 		unlock_extent(tree, start, end, GFP_ATOMIC);
1218*a52d9a80SChris Mason 
1219*a52d9a80SChris Mason 	} while (bvec >= bio->bi_io_vec);
1220*a52d9a80SChris Mason 
1221*a52d9a80SChris Mason 	bio_put(bio);
1222*a52d9a80SChris Mason 	return 0;
1223*a52d9a80SChris Mason }
1224*a52d9a80SChris Mason 
1225*a52d9a80SChris Mason static int submit_extent_page(int rw, struct extent_map_tree *tree,
1226*a52d9a80SChris Mason 			      struct page *page, sector_t sector,
1227*a52d9a80SChris Mason 			      size_t size, unsigned long offset,
1228*a52d9a80SChris Mason 			      struct block_device *bdev,
1229*a52d9a80SChris Mason 			      bio_end_io_t end_io_func)
1230*a52d9a80SChris Mason {
1231*a52d9a80SChris Mason 	struct bio *bio;
1232*a52d9a80SChris Mason 	int ret = 0;
1233*a52d9a80SChris Mason 
1234*a52d9a80SChris Mason 	bio = bio_alloc(GFP_NOIO, 1);
1235*a52d9a80SChris Mason 
1236*a52d9a80SChris Mason 	bio->bi_sector = sector;
1237*a52d9a80SChris Mason 	bio->bi_bdev = bdev;
1238*a52d9a80SChris Mason 	bio->bi_io_vec[0].bv_page = page;
1239*a52d9a80SChris Mason 	bio->bi_io_vec[0].bv_len = size;
1240*a52d9a80SChris Mason 	bio->bi_io_vec[0].bv_offset = offset;
1241*a52d9a80SChris Mason 
1242*a52d9a80SChris Mason 	bio->bi_vcnt = 1;
1243*a52d9a80SChris Mason 	bio->bi_idx = 0;
1244*a52d9a80SChris Mason 	bio->bi_size = size;
1245*a52d9a80SChris Mason 
1246*a52d9a80SChris Mason 	bio->bi_end_io = end_io_func;
1247*a52d9a80SChris Mason 	bio->bi_private = tree;
1248*a52d9a80SChris Mason 
1249*a52d9a80SChris Mason 	bio_get(bio);
1250*a52d9a80SChris Mason 	submit_bio(rw, bio);
1251*a52d9a80SChris Mason 
1252*a52d9a80SChris Mason 	if (bio_flagged(bio, BIO_EOPNOTSUPP))
1253*a52d9a80SChris Mason 		ret = -EOPNOTSUPP;
1254*a52d9a80SChris Mason 
1255*a52d9a80SChris Mason 	bio_put(bio);
1256*a52d9a80SChris Mason 	return ret;
1257*a52d9a80SChris Mason }
1258*a52d9a80SChris Mason 
1259*a52d9a80SChris Mason /*
1260*a52d9a80SChris Mason  * basic readpage implementation.  Locked extent state structs are inserted
1261*a52d9a80SChris Mason  * into the tree that are removed when the IO is done (by the end_io
1262*a52d9a80SChris Mason  * handlers)
1263*a52d9a80SChris Mason  */
1264*a52d9a80SChris Mason int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
1265*a52d9a80SChris Mason 			  get_extent_t *get_extent)
1266*a52d9a80SChris Mason {
1267*a52d9a80SChris Mason 	struct inode *inode = page->mapping->host;
1268*a52d9a80SChris Mason 	u64 start = page->index << PAGE_CACHE_SHIFT;
1269*a52d9a80SChris Mason 	u64 page_end = start + PAGE_CACHE_SIZE - 1;
1270*a52d9a80SChris Mason 	u64 end;
1271*a52d9a80SChris Mason 	u64 cur = start;
1272*a52d9a80SChris Mason 	u64 extent_offset;
1273*a52d9a80SChris Mason 	u64 last_byte = i_size_read(inode);
1274*a52d9a80SChris Mason 	u64 block_start;
1275*a52d9a80SChris Mason 	u64 cur_end;
1276*a52d9a80SChris Mason 	sector_t sector;
1277*a52d9a80SChris Mason 	struct extent_map *em;
1278*a52d9a80SChris Mason 	struct block_device *bdev;
1279*a52d9a80SChris Mason 	int ret;
1280*a52d9a80SChris Mason 	int nr = 0;
1281*a52d9a80SChris Mason 	size_t page_offset = 0;
1282*a52d9a80SChris Mason 	size_t iosize;
1283*a52d9a80SChris Mason 	size_t blocksize = inode->i_sb->s_blocksize;
1284*a52d9a80SChris Mason 
1285*a52d9a80SChris Mason 	if (!PagePrivate(page)) {
1286*a52d9a80SChris Mason 		SetPagePrivate(page);
1287*a52d9a80SChris Mason 		set_page_private(page, 1);
1288*a52d9a80SChris Mason 		page_cache_get(page);
1289*a52d9a80SChris Mason 	}
1290*a52d9a80SChris Mason 
1291*a52d9a80SChris Mason 	end = page_end;
1292*a52d9a80SChris Mason 	lock_extent(tree, start, end, GFP_NOFS);
1293*a52d9a80SChris Mason 
1294*a52d9a80SChris Mason 	while (cur <= end) {
1295*a52d9a80SChris Mason 		if (cur >= last_byte) {
1296*a52d9a80SChris Mason 			iosize = PAGE_CACHE_SIZE - page_offset;
1297*a52d9a80SChris Mason 			zero_user_page(page, page_offset, iosize, KM_USER0);
1298*a52d9a80SChris Mason 			set_extent_uptodate(tree, cur, cur + iosize - 1,
1299*a52d9a80SChris Mason 					    GFP_NOFS);
1300*a52d9a80SChris Mason 			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1301*a52d9a80SChris Mason 			break;
1302*a52d9a80SChris Mason 		}
1303*a52d9a80SChris Mason 		em = get_extent(inode, page, page_offset, cur, end, 0);
1304*a52d9a80SChris Mason 		if (IS_ERR(em) || !em) {
1305*a52d9a80SChris Mason 			SetPageError(page);
1306*a52d9a80SChris Mason 			unlock_extent(tree, cur, end, GFP_NOFS);
1307*a52d9a80SChris Mason 			break;
1308*a52d9a80SChris Mason 		}
1309*a52d9a80SChris Mason 
1310*a52d9a80SChris Mason 		extent_offset = cur - em->start;
1311*a52d9a80SChris Mason 		BUG_ON(em->end < cur);
1312*a52d9a80SChris Mason 		BUG_ON(end < cur);
1313*a52d9a80SChris Mason 
1314*a52d9a80SChris Mason 		iosize = min(em->end - cur, end - cur) + 1;
1315*a52d9a80SChris Mason 		cur_end = min(em->end, end);
1316*a52d9a80SChris Mason 		iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1317*a52d9a80SChris Mason 		sector = (em->block_start + extent_offset) >> 9;
1318*a52d9a80SChris Mason 		bdev = em->bdev;
1319*a52d9a80SChris Mason 		block_start = em->block_start;
1320*a52d9a80SChris Mason 		free_extent_map(em);
1321*a52d9a80SChris Mason 		em = NULL;
1322*a52d9a80SChris Mason 
1323*a52d9a80SChris Mason 		/* we've found a hole, just zero and go on */
1324*a52d9a80SChris Mason 		if (block_start == 0) {
1325*a52d9a80SChris Mason 			zero_user_page(page, page_offset, iosize, KM_USER0);
1326*a52d9a80SChris Mason 			set_extent_uptodate(tree, cur, cur + iosize - 1,
1327*a52d9a80SChris Mason 					    GFP_NOFS);
1328*a52d9a80SChris Mason 			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1329*a52d9a80SChris Mason 			cur = cur + iosize;
1330*a52d9a80SChris Mason 			page_offset += iosize;
1331*a52d9a80SChris Mason 			continue;
1332*a52d9a80SChris Mason 		}
1333*a52d9a80SChris Mason 		/* the get_extent function already copied into the page */
1334*a52d9a80SChris Mason 		if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
1335*a52d9a80SChris Mason 			unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1336*a52d9a80SChris Mason 			cur = cur + iosize;
1337*a52d9a80SChris Mason 			page_offset += iosize;
1338*a52d9a80SChris Mason 			continue;
1339*a52d9a80SChris Mason 		}
1340*a52d9a80SChris Mason 
1341*a52d9a80SChris Mason 		ret = submit_extent_page(READ, tree, page,
1342*a52d9a80SChris Mason 					 sector, iosize, page_offset, bdev,
1343*a52d9a80SChris Mason 					 end_bio_extent_readpage);
1344*a52d9a80SChris Mason 		if (ret)
1345*a52d9a80SChris Mason 			SetPageError(page);
1346*a52d9a80SChris Mason 		cur = cur + iosize;
1347*a52d9a80SChris Mason 		page_offset += iosize;
1348*a52d9a80SChris Mason 		nr++;
1349*a52d9a80SChris Mason 	}
1350*a52d9a80SChris Mason 	if (!nr) {
1351*a52d9a80SChris Mason 		if (!PageError(page))
1352*a52d9a80SChris Mason 			SetPageUptodate(page);
1353*a52d9a80SChris Mason 		unlock_page(page);
1354*a52d9a80SChris Mason 	}
1355*a52d9a80SChris Mason 	return 0;
1356*a52d9a80SChris Mason }
1357*a52d9a80SChris Mason EXPORT_SYMBOL(extent_read_full_page);
1358*a52d9a80SChris Mason 
1359*a52d9a80SChris Mason /*
1360*a52d9a80SChris Mason  * the writepage semantics are similar to regular writepage.  extent
1361*a52d9a80SChris Mason  * records are inserted to lock ranges in the tree, and as dirty areas
1362*a52d9a80SChris Mason  * are found, they are marked writeback.  Then the lock bits are removed
1363*a52d9a80SChris Mason  * and the end_io handler clears the writeback ranges
1364*a52d9a80SChris Mason  */
1365*a52d9a80SChris Mason int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
1366*a52d9a80SChris Mason 			  get_extent_t *get_extent,
1367*a52d9a80SChris Mason 			  struct writeback_control *wbc)
1368*a52d9a80SChris Mason {
1369*a52d9a80SChris Mason 	struct inode *inode = page->mapping->host;
1370*a52d9a80SChris Mason 	u64 start = page->index << PAGE_CACHE_SHIFT;
1371*a52d9a80SChris Mason 	u64 page_end = start + PAGE_CACHE_SIZE - 1;
1372*a52d9a80SChris Mason 	u64 end;
1373*a52d9a80SChris Mason 	u64 cur = start;
1374*a52d9a80SChris Mason 	u64 extent_offset;
1375*a52d9a80SChris Mason 	u64 last_byte = i_size_read(inode);
1376*a52d9a80SChris Mason 	u64 block_start;
1377*a52d9a80SChris Mason 	sector_t sector;
1378*a52d9a80SChris Mason 	struct extent_map *em;
1379*a52d9a80SChris Mason 	struct block_device *bdev;
1380*a52d9a80SChris Mason 	int ret;
1381*a52d9a80SChris Mason 	int nr = 0;
1382*a52d9a80SChris Mason 	size_t page_offset = 0;
1383*a52d9a80SChris Mason 	size_t iosize;
1384*a52d9a80SChris Mason 	size_t blocksize;
1385*a52d9a80SChris Mason 	loff_t i_size = i_size_read(inode);
1386*a52d9a80SChris Mason 	unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
1387*a52d9a80SChris Mason 
1388*a52d9a80SChris Mason 	if (page->index > end_index) {
1389*a52d9a80SChris Mason 		clear_extent_dirty(tree, start, page_end, GFP_NOFS);
1390*a52d9a80SChris Mason 		unlock_page(page);
1391*a52d9a80SChris Mason 		return 0;
1392*a52d9a80SChris Mason 	}
1393*a52d9a80SChris Mason 
1394*a52d9a80SChris Mason 	if (page->index == end_index) {
1395*a52d9a80SChris Mason 		size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
1396*a52d9a80SChris Mason 		zero_user_page(page, offset,
1397*a52d9a80SChris Mason 			       PAGE_CACHE_SIZE - offset, KM_USER0);
1398*a52d9a80SChris Mason 	}
1399*a52d9a80SChris Mason 
1400*a52d9a80SChris Mason 	if (!PagePrivate(page)) {
1401*a52d9a80SChris Mason 		SetPagePrivate(page);
1402*a52d9a80SChris Mason 		set_page_private(page, 1);
1403*a52d9a80SChris Mason 		page_cache_get(page);
1404*a52d9a80SChris Mason 	}
1405*a52d9a80SChris Mason 
1406*a52d9a80SChris Mason 	end = page_end;
1407*a52d9a80SChris Mason 	lock_extent(tree, start, page_end, GFP_NOFS);
1408*a52d9a80SChris Mason 
1409*a52d9a80SChris Mason 	if (last_byte <= start) {
1410*a52d9a80SChris Mason 		clear_extent_dirty(tree, start, page_end, GFP_NOFS);
1411*a52d9a80SChris Mason 		goto done;
1412*a52d9a80SChris Mason 	}
1413*a52d9a80SChris Mason 
1414*a52d9a80SChris Mason 	set_extent_uptodate(tree, start, page_end, GFP_NOFS);
1415*a52d9a80SChris Mason 	blocksize = inode->i_sb->s_blocksize;
1416*a52d9a80SChris Mason 
1417*a52d9a80SChris Mason 	while (cur <= end) {
1418*a52d9a80SChris Mason 		if (cur >= last_byte) {
1419*a52d9a80SChris Mason 			clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
1420*a52d9a80SChris Mason 			break;
1421*a52d9a80SChris Mason 		}
1422*a52d9a80SChris Mason 		em = get_extent(inode, page, page_offset, cur, end, 1);
1423*a52d9a80SChris Mason 		if (IS_ERR(em) || !em) {
1424*a52d9a80SChris Mason 			SetPageError(page);
1425*a52d9a80SChris Mason 			break;
1426*a52d9a80SChris Mason 		}
1427*a52d9a80SChris Mason 
1428*a52d9a80SChris Mason 		extent_offset = cur - em->start;
1429*a52d9a80SChris Mason 		BUG_ON(em->end < cur);
1430*a52d9a80SChris Mason 		BUG_ON(end < cur);
1431*a52d9a80SChris Mason 		iosize = min(em->end - cur, end - cur) + 1;
1432*a52d9a80SChris Mason 		iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1433*a52d9a80SChris Mason 		sector = (em->block_start + extent_offset) >> 9;
1434*a52d9a80SChris Mason 		bdev = em->bdev;
1435*a52d9a80SChris Mason 		block_start = em->block_start;
1436*a52d9a80SChris Mason 		free_extent_map(em);
1437*a52d9a80SChris Mason 		em = NULL;
1438*a52d9a80SChris Mason 
1439*a52d9a80SChris Mason 		if (block_start == 0 || block_start == EXTENT_MAP_INLINE) {
1440*a52d9a80SChris Mason 			clear_extent_dirty(tree, cur,
1441*a52d9a80SChris Mason 					   cur + iosize - 1, GFP_NOFS);
1442*a52d9a80SChris Mason 			cur = cur + iosize;
1443*a52d9a80SChris Mason 			page_offset += iosize;
1444*a52d9a80SChris Mason 			continue;
1445*a52d9a80SChris Mason 		}
1446*a52d9a80SChris Mason 
1447*a52d9a80SChris Mason 		/* leave this out until we have a page_mkwrite call */
1448*a52d9a80SChris Mason 		if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
1449*a52d9a80SChris Mason 				   EXTENT_DIRTY, 0)) {
1450*a52d9a80SChris Mason 			cur = cur + iosize;
1451*a52d9a80SChris Mason 			page_offset += iosize;
1452*a52d9a80SChris Mason 			continue;
1453*a52d9a80SChris Mason 		}
1454*a52d9a80SChris Mason 		clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
1455*a52d9a80SChris Mason 		set_range_writeback(tree, cur, cur + iosize - 1);
1456*a52d9a80SChris Mason 		ret = submit_extent_page(WRITE, tree, page,
1457*a52d9a80SChris Mason 					 sector, iosize, page_offset, bdev,
1458*a52d9a80SChris Mason 					 end_bio_extent_writepage);
1459*a52d9a80SChris Mason 		if (ret)
1460*a52d9a80SChris Mason 			SetPageError(page);
1461*a52d9a80SChris Mason 		cur = cur + iosize;
1462*a52d9a80SChris Mason 		page_offset += iosize;
1463*a52d9a80SChris Mason 		nr++;
1464*a52d9a80SChris Mason 	}
1465*a52d9a80SChris Mason done:
1466*a52d9a80SChris Mason 	WARN_ON(test_range_bit(tree, start, page_end, EXTENT_DIRTY, 0));
1467*a52d9a80SChris Mason 	unlock_extent(tree, start, page_end, GFP_NOFS);
1468*a52d9a80SChris Mason 	unlock_page(page);
1469*a52d9a80SChris Mason 	return 0;
1470*a52d9a80SChris Mason }
1471*a52d9a80SChris Mason EXPORT_SYMBOL(extent_write_full_page);
1472*a52d9a80SChris Mason 
1473*a52d9a80SChris Mason /*
1474*a52d9a80SChris Mason  * basic invalidatepage code, this waits on any locked or writeback
1475*a52d9a80SChris Mason  * ranges corresponding to the page, and then deletes any extent state
1476*a52d9a80SChris Mason  * records from the tree
1477*a52d9a80SChris Mason  */
1478*a52d9a80SChris Mason int extent_invalidatepage(struct extent_map_tree *tree,
1479*a52d9a80SChris Mason 			  struct page *page, unsigned long offset)
1480*a52d9a80SChris Mason {
1481*a52d9a80SChris Mason 	u64 start = (page->index << PAGE_CACHE_SHIFT);
1482*a52d9a80SChris Mason 	u64 end = start + PAGE_CACHE_SIZE - 1;
1483*a52d9a80SChris Mason 	size_t blocksize = page->mapping->host->i_sb->s_blocksize;
1484*a52d9a80SChris Mason 
1485*a52d9a80SChris Mason 	start += (offset + blocksize -1) & ~(blocksize - 1);
1486*a52d9a80SChris Mason 	if (start > end)
1487*a52d9a80SChris Mason 		return 0;
1488*a52d9a80SChris Mason 
1489*a52d9a80SChris Mason 	lock_extent(tree, start, end, GFP_NOFS);
1490*a52d9a80SChris Mason 	wait_on_extent_writeback(tree, start, end);
1491*a52d9a80SChris Mason 	clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY,
1492*a52d9a80SChris Mason 			 1, 1, GFP_NOFS);
1493*a52d9a80SChris Mason 	return 0;
1494*a52d9a80SChris Mason }
1495*a52d9a80SChris Mason EXPORT_SYMBOL(extent_invalidatepage);
1496*a52d9a80SChris Mason 
1497*a52d9a80SChris Mason /*
1498*a52d9a80SChris Mason  * simple commit_write call, set_range_dirty is used to mark both
1499*a52d9a80SChris Mason  * the pages and the extent records as dirty
1500*a52d9a80SChris Mason  */
1501*a52d9a80SChris Mason int extent_commit_write(struct extent_map_tree *tree,
1502*a52d9a80SChris Mason 			struct inode *inode, struct page *page,
1503*a52d9a80SChris Mason 			unsigned from, unsigned to)
1504*a52d9a80SChris Mason {
1505*a52d9a80SChris Mason 	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1506*a52d9a80SChris Mason 
1507*a52d9a80SChris Mason 	if (!PagePrivate(page)) {
1508*a52d9a80SChris Mason 		SetPagePrivate(page);
1509*a52d9a80SChris Mason 		set_page_private(page, 1);
1510*a52d9a80SChris Mason 		page_cache_get(page);
1511*a52d9a80SChris Mason 	}
1512*a52d9a80SChris Mason 
1513*a52d9a80SChris Mason 	set_page_dirty(page);
1514*a52d9a80SChris Mason 
1515*a52d9a80SChris Mason 	if (pos > inode->i_size) {
1516*a52d9a80SChris Mason 		i_size_write(inode, pos);
1517*a52d9a80SChris Mason 		mark_inode_dirty(inode);
1518*a52d9a80SChris Mason 	}
1519*a52d9a80SChris Mason 	return 0;
1520*a52d9a80SChris Mason }
1521*a52d9a80SChris Mason EXPORT_SYMBOL(extent_commit_write);
1522*a52d9a80SChris Mason 
1523*a52d9a80SChris Mason int extent_prepare_write(struct extent_map_tree *tree,
1524*a52d9a80SChris Mason 			 struct inode *inode, struct page *page,
1525*a52d9a80SChris Mason 			 unsigned from, unsigned to, get_extent_t *get_extent)
1526*a52d9a80SChris Mason {
1527*a52d9a80SChris Mason 	u64 page_start = page->index << PAGE_CACHE_SHIFT;
1528*a52d9a80SChris Mason 	u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
1529*a52d9a80SChris Mason 	u64 block_start;
1530*a52d9a80SChris Mason 	u64 orig_block_start;
1531*a52d9a80SChris Mason 	u64 block_end;
1532*a52d9a80SChris Mason 	u64 cur_end;
1533*a52d9a80SChris Mason 	struct extent_map *em;
1534*a52d9a80SChris Mason 	unsigned blocksize = 1 << inode->i_blkbits;
1535*a52d9a80SChris Mason 	size_t page_offset = 0;
1536*a52d9a80SChris Mason 	size_t block_off_start;
1537*a52d9a80SChris Mason 	size_t block_off_end;
1538*a52d9a80SChris Mason 	int err = 0;
1539*a52d9a80SChris Mason 	int iocount = 0;
1540*a52d9a80SChris Mason 	int ret = 0;
1541*a52d9a80SChris Mason 	int isnew;
1542*a52d9a80SChris Mason 
1543*a52d9a80SChris Mason 	if (!PagePrivate(page)) {
1544*a52d9a80SChris Mason 		SetPagePrivate(page);
1545*a52d9a80SChris Mason 		set_page_private(page, 1);
1546*a52d9a80SChris Mason 		page_cache_get(page);
1547*a52d9a80SChris Mason 	}
1548*a52d9a80SChris Mason 	block_start = (page_start + from) & ~((u64)blocksize - 1);
1549*a52d9a80SChris Mason 	block_end = (page_start + to - 1) | (blocksize - 1);
1550*a52d9a80SChris Mason 	orig_block_start = block_start;
1551*a52d9a80SChris Mason 
1552*a52d9a80SChris Mason 	lock_extent(tree, page_start, page_end, GFP_NOFS);
1553*a52d9a80SChris Mason 	while(block_start <= block_end) {
1554*a52d9a80SChris Mason 		em = get_extent(inode, page, page_offset, block_start,
1555*a52d9a80SChris Mason 				block_end, 1);
1556*a52d9a80SChris Mason 		if (IS_ERR(em) || !em) {
1557*a52d9a80SChris Mason 			goto err;
1558*a52d9a80SChris Mason 		}
1559*a52d9a80SChris Mason 		cur_end = min(block_end, em->end);
1560*a52d9a80SChris Mason 		block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
1561*a52d9a80SChris Mason 		block_off_end = block_off_start + blocksize;
1562*a52d9a80SChris Mason 		isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
1563*a52d9a80SChris Mason 
1564*a52d9a80SChris Mason 		if (!PageUptodate(page) && isnew &&
1565*a52d9a80SChris Mason 		    (block_off_end > to || block_off_start < from)) {
1566*a52d9a80SChris Mason 			void *kaddr;
1567*a52d9a80SChris Mason 
1568*a52d9a80SChris Mason 			kaddr = kmap_atomic(page, KM_USER0);
1569*a52d9a80SChris Mason 			if (block_off_end > to)
1570*a52d9a80SChris Mason 				memset(kaddr + to, 0, block_off_end - to);
1571*a52d9a80SChris Mason 			if (block_off_start < from)
1572*a52d9a80SChris Mason 				memset(kaddr + block_off_start, 0,
1573*a52d9a80SChris Mason 				       from - block_off_start);
1574*a52d9a80SChris Mason 			flush_dcache_page(page);
1575*a52d9a80SChris Mason 			kunmap_atomic(kaddr, KM_USER0);
1576*a52d9a80SChris Mason 		}
1577*a52d9a80SChris Mason 		if (!isnew && !PageUptodate(page) &&
1578*a52d9a80SChris Mason 		    (block_off_end > to || block_off_start < from) &&
1579*a52d9a80SChris Mason 		    !test_range_bit(tree, block_start, cur_end,
1580*a52d9a80SChris Mason 				    EXTENT_UPTODATE, 1)) {
1581*a52d9a80SChris Mason 			u64 sector;
1582*a52d9a80SChris Mason 			u64 extent_offset = block_start - em->start;
1583*a52d9a80SChris Mason 			size_t iosize;
1584*a52d9a80SChris Mason 			sector = (em->block_start + extent_offset) >> 9;
1585*a52d9a80SChris Mason 			iosize = (cur_end - block_start + blocksize - 1) &
1586*a52d9a80SChris Mason 				~((u64)blocksize - 1);
1587*a52d9a80SChris Mason 			/*
1588*a52d9a80SChris Mason 			 * we've already got the extent locked, but we
1589*a52d9a80SChris Mason 			 * need to split the state such that our end_bio
1590*a52d9a80SChris Mason 			 * handler can clear the lock.
1591*a52d9a80SChris Mason 			 */
1592*a52d9a80SChris Mason 			set_extent_bit(tree, block_start,
1593*a52d9a80SChris Mason 				       block_start + iosize - 1,
1594*a52d9a80SChris Mason 				       EXTENT_LOCKED, 0, NULL, GFP_NOFS);
1595*a52d9a80SChris Mason 			ret = submit_extent_page(READ, tree, page,
1596*a52d9a80SChris Mason 					 sector, iosize, page_offset, em->bdev,
1597*a52d9a80SChris Mason 					 end_bio_extent_preparewrite);
1598*a52d9a80SChris Mason 			iocount++;
1599*a52d9a80SChris Mason 			block_start = block_start + iosize;
1600*a52d9a80SChris Mason 		} else {
1601*a52d9a80SChris Mason 			set_extent_uptodate(tree, block_start, cur_end,
1602*a52d9a80SChris Mason 					    GFP_NOFS);
1603*a52d9a80SChris Mason 			unlock_extent(tree, block_start, cur_end, GFP_NOFS);
1604*a52d9a80SChris Mason 			block_start = cur_end + 1;
1605*a52d9a80SChris Mason 		}
1606*a52d9a80SChris Mason 		page_offset = block_start & (PAGE_CACHE_SIZE - 1);
1607*a52d9a80SChris Mason 		free_extent_map(em);
1608*a52d9a80SChris Mason 	}
1609*a52d9a80SChris Mason 	if (iocount) {
1610*a52d9a80SChris Mason 		wait_extent_bit(tree, orig_block_start,
1611*a52d9a80SChris Mason 				block_end, EXTENT_LOCKED);
1612*a52d9a80SChris Mason 	}
1613*a52d9a80SChris Mason 	check_page_uptodate(tree, page);
1614*a52d9a80SChris Mason err:
1615*a52d9a80SChris Mason 	/* FIXME, zero out newly allocated blocks on error */
1616*a52d9a80SChris Mason 	return err;
1617*a52d9a80SChris Mason }
1618*a52d9a80SChris Mason EXPORT_SYMBOL(extent_prepare_write);
1619*a52d9a80SChris Mason 
1620*a52d9a80SChris Mason /*
1621*a52d9a80SChris Mason  * a helper for releasepage.  As long as there are no locked extents
1622*a52d9a80SChris Mason  * in the range corresponding to the page, both state records and extent
1623*a52d9a80SChris Mason  * map records are removed
1624*a52d9a80SChris Mason  */
1625*a52d9a80SChris Mason int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page)
1626*a52d9a80SChris Mason {
1627*a52d9a80SChris Mason 	struct extent_map *em;
1628*a52d9a80SChris Mason 	u64 start = page->index << PAGE_CACHE_SHIFT;
1629*a52d9a80SChris Mason 	u64 end = start + PAGE_CACHE_SIZE - 1;
1630*a52d9a80SChris Mason 	u64 orig_start = start;
1631*a52d9a80SChris Mason 
1632*a52d9a80SChris Mason 	while (start <= end) {
1633*a52d9a80SChris Mason 		em = lookup_extent_mapping(tree, start, end);
1634*a52d9a80SChris Mason 		if (!em || IS_ERR(em))
1635*a52d9a80SChris Mason 			break;
1636*a52d9a80SChris Mason 		if (test_range_bit(tree, em->start, em->end,
1637*a52d9a80SChris Mason 				   EXTENT_LOCKED, 0)) {
1638*a52d9a80SChris Mason 			free_extent_map(em);
1639*a52d9a80SChris Mason 			start = em->end + 1;
1640*a52d9a80SChris Mason printk("range still locked %Lu %Lu\n", em->start, em->end);
1641*a52d9a80SChris Mason 			break;
1642*a52d9a80SChris Mason 		}
1643*a52d9a80SChris Mason 		remove_extent_mapping(tree, em);
1644*a52d9a80SChris Mason 		start = em->end + 1;
1645*a52d9a80SChris Mason 		/* once for the rb tree */
1646*a52d9a80SChris Mason 		free_extent_map(em);
1647*a52d9a80SChris Mason 		/* once for us */
1648*a52d9a80SChris Mason 		free_extent_map(em);
1649*a52d9a80SChris Mason 	}
1650*a52d9a80SChris Mason 	WARN_ON(test_range_bit(tree, orig_start, end, EXTENT_WRITEBACK, 0));
1651*a52d9a80SChris Mason 	clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
1652*a52d9a80SChris Mason 			 1, 1, GFP_NOFS);
1653*a52d9a80SChris Mason 	return 1;
1654*a52d9a80SChris Mason }
1655*a52d9a80SChris Mason EXPORT_SYMBOL(try_release_extent_mapping);
1656*a52d9a80SChris Mason 
1657