xref: /openbmc/linux/fs/btrfs/delayed-ref.c (revision 840ef8b7cc584a23c4f9d05352f4dbaf8e56e5ab)
1 /*
2  * Copyright (C) 2009 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/sched.h>
20 #include <linux/slab.h>
21 #include <linux/sort.h>
22 #include "ctree.h"
23 #include "delayed-ref.h"
24 #include "transaction.h"
25 
26 struct kmem_cache *btrfs_delayed_ref_head_cachep;
27 struct kmem_cache *btrfs_delayed_tree_ref_cachep;
28 struct kmem_cache *btrfs_delayed_data_ref_cachep;
29 struct kmem_cache *btrfs_delayed_extent_op_cachep;
30 /*
31  * delayed back reference update tracking.  For subvolume trees
32  * we queue up extent allocations and backref maintenance for
33  * delayed processing.   This avoids deep call chains where we
34  * add extents in the middle of btrfs_search_slot, and it allows
35  * us to buffer up frequently modified backrefs in an rb tree instead
36  * of hammering updates on the extent allocation tree.
37  */
38 
39 /*
40  * compare two delayed tree backrefs with same bytenr and type
41  */
42 static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
43 			  struct btrfs_delayed_tree_ref *ref1)
44 {
45 	if (ref1->root < ref2->root)
46 		return -1;
47 	if (ref1->root > ref2->root)
48 		return 1;
49 	if (ref1->parent < ref2->parent)
50 		return -1;
51 	if (ref1->parent > ref2->parent)
52 		return 1;
53 	return 0;
54 }
55 
56 /*
57  * compare two delayed data backrefs with same bytenr and type
58  */
59 static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
60 			  struct btrfs_delayed_data_ref *ref1)
61 {
62 	if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
63 		if (ref1->root < ref2->root)
64 			return -1;
65 		if (ref1->root > ref2->root)
66 			return 1;
67 		if (ref1->objectid < ref2->objectid)
68 			return -1;
69 		if (ref1->objectid > ref2->objectid)
70 			return 1;
71 		if (ref1->offset < ref2->offset)
72 			return -1;
73 		if (ref1->offset > ref2->offset)
74 			return 1;
75 	} else {
76 		if (ref1->parent < ref2->parent)
77 			return -1;
78 		if (ref1->parent > ref2->parent)
79 			return 1;
80 	}
81 	return 0;
82 }
83 
84 /*
85  * entries in the rb tree are ordered by the byte number of the extent,
86  * type of the delayed backrefs and content of delayed backrefs.
87  */
88 static int comp_entry(struct btrfs_delayed_ref_node *ref2,
89 		      struct btrfs_delayed_ref_node *ref1,
90 		      bool compare_seq)
91 {
92 	if (ref1->bytenr < ref2->bytenr)
93 		return -1;
94 	if (ref1->bytenr > ref2->bytenr)
95 		return 1;
96 	if (ref1->is_head && ref2->is_head)
97 		return 0;
98 	if (ref2->is_head)
99 		return -1;
100 	if (ref1->is_head)
101 		return 1;
102 	if (ref1->type < ref2->type)
103 		return -1;
104 	if (ref1->type > ref2->type)
105 		return 1;
106 	/* merging of sequenced refs is not allowed */
107 	if (compare_seq) {
108 		if (ref1->seq < ref2->seq)
109 			return -1;
110 		if (ref1->seq > ref2->seq)
111 			return 1;
112 	}
113 	if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
114 	    ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
115 		return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
116 				      btrfs_delayed_node_to_tree_ref(ref1));
117 	} else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
118 		   ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
119 		return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
120 				      btrfs_delayed_node_to_data_ref(ref1));
121 	}
122 	BUG();
123 	return 0;
124 }
125 
126 /*
127  * insert a new ref into the rbtree.  This returns any existing refs
128  * for the same (bytenr,parent) tuple, or NULL if the new node was properly
129  * inserted.
130  */
131 static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
132 						  struct rb_node *node)
133 {
134 	struct rb_node **p = &root->rb_node;
135 	struct rb_node *parent_node = NULL;
136 	struct btrfs_delayed_ref_node *entry;
137 	struct btrfs_delayed_ref_node *ins;
138 	int cmp;
139 
140 	ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
141 	while (*p) {
142 		parent_node = *p;
143 		entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
144 				 rb_node);
145 
146 		cmp = comp_entry(entry, ins, 1);
147 		if (cmp < 0)
148 			p = &(*p)->rb_left;
149 		else if (cmp > 0)
150 			p = &(*p)->rb_right;
151 		else
152 			return entry;
153 	}
154 
155 	rb_link_node(node, parent_node, p);
156 	rb_insert_color(node, root);
157 	return NULL;
158 }
159 
160 /*
161  * find an head entry based on bytenr. This returns the delayed ref
162  * head if it was able to find one, or NULL if nothing was in that spot.
163  * If return_bigger is given, the next bigger entry is returned if no exact
164  * match is found.
165  */
166 static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
167 				  u64 bytenr,
168 				  struct btrfs_delayed_ref_node **last,
169 				  int return_bigger)
170 {
171 	struct rb_node *n;
172 	struct btrfs_delayed_ref_node *entry;
173 	int cmp = 0;
174 
175 again:
176 	n = root->rb_node;
177 	entry = NULL;
178 	while (n) {
179 		entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
180 		WARN_ON(!entry->in_tree);
181 		if (last)
182 			*last = entry;
183 
184 		if (bytenr < entry->bytenr)
185 			cmp = -1;
186 		else if (bytenr > entry->bytenr)
187 			cmp = 1;
188 		else if (!btrfs_delayed_ref_is_head(entry))
189 			cmp = 1;
190 		else
191 			cmp = 0;
192 
193 		if (cmp < 0)
194 			n = n->rb_left;
195 		else if (cmp > 0)
196 			n = n->rb_right;
197 		else
198 			return entry;
199 	}
200 	if (entry && return_bigger) {
201 		if (cmp > 0) {
202 			n = rb_next(&entry->rb_node);
203 			if (!n)
204 				n = rb_first(root);
205 			entry = rb_entry(n, struct btrfs_delayed_ref_node,
206 					 rb_node);
207 			bytenr = entry->bytenr;
208 			return_bigger = 0;
209 			goto again;
210 		}
211 		return entry;
212 	}
213 	return NULL;
214 }
215 
216 int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
217 			   struct btrfs_delayed_ref_head *head)
218 {
219 	struct btrfs_delayed_ref_root *delayed_refs;
220 
221 	delayed_refs = &trans->transaction->delayed_refs;
222 	assert_spin_locked(&delayed_refs->lock);
223 	if (mutex_trylock(&head->mutex))
224 		return 0;
225 
226 	atomic_inc(&head->node.refs);
227 	spin_unlock(&delayed_refs->lock);
228 
229 	mutex_lock(&head->mutex);
230 	spin_lock(&delayed_refs->lock);
231 	if (!head->node.in_tree) {
232 		mutex_unlock(&head->mutex);
233 		btrfs_put_delayed_ref(&head->node);
234 		return -EAGAIN;
235 	}
236 	btrfs_put_delayed_ref(&head->node);
237 	return 0;
238 }
239 
240 static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
241 				    struct btrfs_delayed_ref_root *delayed_refs,
242 				    struct btrfs_delayed_ref_node *ref)
243 {
244 	rb_erase(&ref->rb_node, &delayed_refs->root);
245 	ref->in_tree = 0;
246 	btrfs_put_delayed_ref(ref);
247 	delayed_refs->num_entries--;
248 	if (trans->delayed_ref_updates)
249 		trans->delayed_ref_updates--;
250 }
251 
252 static int merge_ref(struct btrfs_trans_handle *trans,
253 		     struct btrfs_delayed_ref_root *delayed_refs,
254 		     struct btrfs_delayed_ref_node *ref, u64 seq)
255 {
256 	struct rb_node *node;
257 	int merged = 0;
258 	int mod = 0;
259 	int done = 0;
260 
261 	node = rb_prev(&ref->rb_node);
262 	while (node) {
263 		struct btrfs_delayed_ref_node *next;
264 
265 		next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
266 		node = rb_prev(node);
267 		if (next->bytenr != ref->bytenr)
268 			break;
269 		if (seq && next->seq >= seq)
270 			break;
271 		if (comp_entry(ref, next, 0))
272 			continue;
273 
274 		if (ref->action == next->action) {
275 			mod = next->ref_mod;
276 		} else {
277 			if (ref->ref_mod < next->ref_mod) {
278 				struct btrfs_delayed_ref_node *tmp;
279 
280 				tmp = ref;
281 				ref = next;
282 				next = tmp;
283 				done = 1;
284 			}
285 			mod = -next->ref_mod;
286 		}
287 
288 		merged++;
289 		drop_delayed_ref(trans, delayed_refs, next);
290 		ref->ref_mod += mod;
291 		if (ref->ref_mod == 0) {
292 			drop_delayed_ref(trans, delayed_refs, ref);
293 			break;
294 		} else {
295 			/*
296 			 * You can't have multiples of the same ref on a tree
297 			 * block.
298 			 */
299 			WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
300 				ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
301 		}
302 
303 		if (done)
304 			break;
305 		node = rb_prev(&ref->rb_node);
306 	}
307 
308 	return merged;
309 }
310 
311 void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
312 			      struct btrfs_fs_info *fs_info,
313 			      struct btrfs_delayed_ref_root *delayed_refs,
314 			      struct btrfs_delayed_ref_head *head)
315 {
316 	struct rb_node *node;
317 	u64 seq = 0;
318 
319 	spin_lock(&fs_info->tree_mod_seq_lock);
320 	if (!list_empty(&fs_info->tree_mod_seq_list)) {
321 		struct seq_list *elem;
322 
323 		elem = list_first_entry(&fs_info->tree_mod_seq_list,
324 					struct seq_list, list);
325 		seq = elem->seq;
326 	}
327 	spin_unlock(&fs_info->tree_mod_seq_lock);
328 
329 	node = rb_prev(&head->node.rb_node);
330 	while (node) {
331 		struct btrfs_delayed_ref_node *ref;
332 
333 		ref = rb_entry(node, struct btrfs_delayed_ref_node,
334 			       rb_node);
335 		if (ref->bytenr != head->node.bytenr)
336 			break;
337 
338 		/* We can't merge refs that are outside of our seq count */
339 		if (seq && ref->seq >= seq)
340 			break;
341 		if (merge_ref(trans, delayed_refs, ref, seq))
342 			node = rb_prev(&head->node.rb_node);
343 		else
344 			node = rb_prev(node);
345 	}
346 }
347 
348 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
349 			    struct btrfs_delayed_ref_root *delayed_refs,
350 			    u64 seq)
351 {
352 	struct seq_list *elem;
353 	int ret = 0;
354 
355 	spin_lock(&fs_info->tree_mod_seq_lock);
356 	if (!list_empty(&fs_info->tree_mod_seq_list)) {
357 		elem = list_first_entry(&fs_info->tree_mod_seq_list,
358 					struct seq_list, list);
359 		if (seq >= elem->seq) {
360 			pr_debug("holding back delayed_ref %llu, lowest is "
361 				 "%llu (%p)\n", seq, elem->seq, delayed_refs);
362 			ret = 1;
363 		}
364 	}
365 
366 	spin_unlock(&fs_info->tree_mod_seq_lock);
367 	return ret;
368 }
369 
370 int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
371 			   struct list_head *cluster, u64 start)
372 {
373 	int count = 0;
374 	struct btrfs_delayed_ref_root *delayed_refs;
375 	struct rb_node *node;
376 	struct btrfs_delayed_ref_node *ref;
377 	struct btrfs_delayed_ref_head *head;
378 
379 	delayed_refs = &trans->transaction->delayed_refs;
380 	if (start == 0) {
381 		node = rb_first(&delayed_refs->root);
382 	} else {
383 		ref = NULL;
384 		find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
385 		if (ref) {
386 			node = &ref->rb_node;
387 		} else
388 			node = rb_first(&delayed_refs->root);
389 	}
390 again:
391 	while (node && count < 32) {
392 		ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
393 		if (btrfs_delayed_ref_is_head(ref)) {
394 			head = btrfs_delayed_node_to_head(ref);
395 			if (list_empty(&head->cluster)) {
396 				list_add_tail(&head->cluster, cluster);
397 				delayed_refs->run_delayed_start =
398 					head->node.bytenr;
399 				count++;
400 
401 				WARN_ON(delayed_refs->num_heads_ready == 0);
402 				delayed_refs->num_heads_ready--;
403 			} else if (count) {
404 				/* the goal of the clustering is to find extents
405 				 * that are likely to end up in the same extent
406 				 * leaf on disk.  So, we don't want them spread
407 				 * all over the tree.  Stop now if we've hit
408 				 * a head that was already in use
409 				 */
410 				break;
411 			}
412 		}
413 		node = rb_next(node);
414 	}
415 	if (count) {
416 		return 0;
417 	} else if (start) {
418 		/*
419 		 * we've gone to the end of the rbtree without finding any
420 		 * clusters.  start from the beginning and try again
421 		 */
422 		start = 0;
423 		node = rb_first(&delayed_refs->root);
424 		goto again;
425 	}
426 	return 1;
427 }
428 
429 void btrfs_release_ref_cluster(struct list_head *cluster)
430 {
431 	struct list_head *pos, *q;
432 
433 	list_for_each_safe(pos, q, cluster)
434 		list_del_init(pos);
435 }
436 
437 /*
438  * helper function to update an extent delayed ref in the
439  * rbtree.  existing and update must both have the same
440  * bytenr and parent
441  *
442  * This may free existing if the update cancels out whatever
443  * operation it was doing.
444  */
445 static noinline void
446 update_existing_ref(struct btrfs_trans_handle *trans,
447 		    struct btrfs_delayed_ref_root *delayed_refs,
448 		    struct btrfs_delayed_ref_node *existing,
449 		    struct btrfs_delayed_ref_node *update)
450 {
451 	if (update->action != existing->action) {
452 		/*
453 		 * this is effectively undoing either an add or a
454 		 * drop.  We decrement the ref_mod, and if it goes
455 		 * down to zero we just delete the entry without
456 		 * every changing the extent allocation tree.
457 		 */
458 		existing->ref_mod--;
459 		if (existing->ref_mod == 0)
460 			drop_delayed_ref(trans, delayed_refs, existing);
461 		else
462 			WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
463 				existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
464 	} else {
465 		WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
466 			existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
467 		/*
468 		 * the action on the existing ref matches
469 		 * the action on the ref we're trying to add.
470 		 * Bump the ref_mod by one so the backref that
471 		 * is eventually added/removed has the correct
472 		 * reference count
473 		 */
474 		existing->ref_mod += update->ref_mod;
475 	}
476 }
477 
478 /*
479  * helper function to update the accounting in the head ref
480  * existing and update must have the same bytenr
481  */
482 static noinline void
483 update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
484 			 struct btrfs_delayed_ref_node *update)
485 {
486 	struct btrfs_delayed_ref_head *existing_ref;
487 	struct btrfs_delayed_ref_head *ref;
488 
489 	existing_ref = btrfs_delayed_node_to_head(existing);
490 	ref = btrfs_delayed_node_to_head(update);
491 	BUG_ON(existing_ref->is_data != ref->is_data);
492 
493 	if (ref->must_insert_reserved) {
494 		/* if the extent was freed and then
495 		 * reallocated before the delayed ref
496 		 * entries were processed, we can end up
497 		 * with an existing head ref without
498 		 * the must_insert_reserved flag set.
499 		 * Set it again here
500 		 */
501 		existing_ref->must_insert_reserved = ref->must_insert_reserved;
502 
503 		/*
504 		 * update the num_bytes so we make sure the accounting
505 		 * is done correctly
506 		 */
507 		existing->num_bytes = update->num_bytes;
508 
509 	}
510 
511 	if (ref->extent_op) {
512 		if (!existing_ref->extent_op) {
513 			existing_ref->extent_op = ref->extent_op;
514 		} else {
515 			if (ref->extent_op->update_key) {
516 				memcpy(&existing_ref->extent_op->key,
517 				       &ref->extent_op->key,
518 				       sizeof(ref->extent_op->key));
519 				existing_ref->extent_op->update_key = 1;
520 			}
521 			if (ref->extent_op->update_flags) {
522 				existing_ref->extent_op->flags_to_set |=
523 					ref->extent_op->flags_to_set;
524 				existing_ref->extent_op->update_flags = 1;
525 			}
526 			btrfs_free_delayed_extent_op(ref->extent_op);
527 		}
528 	}
529 	/*
530 	 * update the reference mod on the head to reflect this new operation
531 	 */
532 	existing->ref_mod += update->ref_mod;
533 }
534 
535 /*
536  * helper function to actually insert a head node into the rbtree.
537  * this does all the dirty work in terms of maintaining the correct
538  * overall modification count.
539  */
540 static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
541 					struct btrfs_trans_handle *trans,
542 					struct btrfs_delayed_ref_node *ref,
543 					u64 bytenr, u64 num_bytes,
544 					int action, int is_data)
545 {
546 	struct btrfs_delayed_ref_node *existing;
547 	struct btrfs_delayed_ref_head *head_ref = NULL;
548 	struct btrfs_delayed_ref_root *delayed_refs;
549 	int count_mod = 1;
550 	int must_insert_reserved = 0;
551 
552 	/*
553 	 * the head node stores the sum of all the mods, so dropping a ref
554 	 * should drop the sum in the head node by one.
555 	 */
556 	if (action == BTRFS_UPDATE_DELAYED_HEAD)
557 		count_mod = 0;
558 	else if (action == BTRFS_DROP_DELAYED_REF)
559 		count_mod = -1;
560 
561 	/*
562 	 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
563 	 * the reserved accounting when the extent is finally added, or
564 	 * if a later modification deletes the delayed ref without ever
565 	 * inserting the extent into the extent allocation tree.
566 	 * ref->must_insert_reserved is the flag used to record
567 	 * that accounting mods are required.
568 	 *
569 	 * Once we record must_insert_reserved, switch the action to
570 	 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
571 	 */
572 	if (action == BTRFS_ADD_DELAYED_EXTENT)
573 		must_insert_reserved = 1;
574 	else
575 		must_insert_reserved = 0;
576 
577 	delayed_refs = &trans->transaction->delayed_refs;
578 
579 	/* first set the basic ref node struct up */
580 	atomic_set(&ref->refs, 1);
581 	ref->bytenr = bytenr;
582 	ref->num_bytes = num_bytes;
583 	ref->ref_mod = count_mod;
584 	ref->type  = 0;
585 	ref->action  = 0;
586 	ref->is_head = 1;
587 	ref->in_tree = 1;
588 	ref->seq = 0;
589 
590 	head_ref = btrfs_delayed_node_to_head(ref);
591 	head_ref->must_insert_reserved = must_insert_reserved;
592 	head_ref->is_data = is_data;
593 
594 	INIT_LIST_HEAD(&head_ref->cluster);
595 	mutex_init(&head_ref->mutex);
596 
597 	trace_btrfs_delayed_ref_head(ref, head_ref, action);
598 
599 	existing = tree_insert(&delayed_refs->root, &ref->rb_node);
600 
601 	if (existing) {
602 		update_existing_head_ref(existing, ref);
603 		/*
604 		 * we've updated the existing ref, free the newly
605 		 * allocated ref
606 		 */
607 		kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
608 	} else {
609 		delayed_refs->num_heads++;
610 		delayed_refs->num_heads_ready++;
611 		delayed_refs->num_entries++;
612 		trans->delayed_ref_updates++;
613 	}
614 }
615 
616 /*
617  * helper to insert a delayed tree ref into the rbtree.
618  */
619 static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
620 					 struct btrfs_trans_handle *trans,
621 					 struct btrfs_delayed_ref_node *ref,
622 					 u64 bytenr, u64 num_bytes, u64 parent,
623 					 u64 ref_root, int level, int action,
624 					 int for_cow)
625 {
626 	struct btrfs_delayed_ref_node *existing;
627 	struct btrfs_delayed_tree_ref *full_ref;
628 	struct btrfs_delayed_ref_root *delayed_refs;
629 	u64 seq = 0;
630 
631 	if (action == BTRFS_ADD_DELAYED_EXTENT)
632 		action = BTRFS_ADD_DELAYED_REF;
633 
634 	delayed_refs = &trans->transaction->delayed_refs;
635 
636 	/* first set the basic ref node struct up */
637 	atomic_set(&ref->refs, 1);
638 	ref->bytenr = bytenr;
639 	ref->num_bytes = num_bytes;
640 	ref->ref_mod = 1;
641 	ref->action = action;
642 	ref->is_head = 0;
643 	ref->in_tree = 1;
644 
645 	if (need_ref_seq(for_cow, ref_root))
646 		seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
647 	ref->seq = seq;
648 
649 	full_ref = btrfs_delayed_node_to_tree_ref(ref);
650 	full_ref->parent = parent;
651 	full_ref->root = ref_root;
652 	if (parent)
653 		ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
654 	else
655 		ref->type = BTRFS_TREE_BLOCK_REF_KEY;
656 	full_ref->level = level;
657 
658 	trace_btrfs_delayed_tree_ref(ref, full_ref, action);
659 
660 	existing = tree_insert(&delayed_refs->root, &ref->rb_node);
661 
662 	if (existing) {
663 		update_existing_ref(trans, delayed_refs, existing, ref);
664 		/*
665 		 * we've updated the existing ref, free the newly
666 		 * allocated ref
667 		 */
668 		kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
669 	} else {
670 		delayed_refs->num_entries++;
671 		trans->delayed_ref_updates++;
672 	}
673 }
674 
675 /*
676  * helper to insert a delayed data ref into the rbtree.
677  */
678 static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
679 					 struct btrfs_trans_handle *trans,
680 					 struct btrfs_delayed_ref_node *ref,
681 					 u64 bytenr, u64 num_bytes, u64 parent,
682 					 u64 ref_root, u64 owner, u64 offset,
683 					 int action, int for_cow)
684 {
685 	struct btrfs_delayed_ref_node *existing;
686 	struct btrfs_delayed_data_ref *full_ref;
687 	struct btrfs_delayed_ref_root *delayed_refs;
688 	u64 seq = 0;
689 
690 	if (action == BTRFS_ADD_DELAYED_EXTENT)
691 		action = BTRFS_ADD_DELAYED_REF;
692 
693 	delayed_refs = &trans->transaction->delayed_refs;
694 
695 	/* first set the basic ref node struct up */
696 	atomic_set(&ref->refs, 1);
697 	ref->bytenr = bytenr;
698 	ref->num_bytes = num_bytes;
699 	ref->ref_mod = 1;
700 	ref->action = action;
701 	ref->is_head = 0;
702 	ref->in_tree = 1;
703 
704 	if (need_ref_seq(for_cow, ref_root))
705 		seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
706 	ref->seq = seq;
707 
708 	full_ref = btrfs_delayed_node_to_data_ref(ref);
709 	full_ref->parent = parent;
710 	full_ref->root = ref_root;
711 	if (parent)
712 		ref->type = BTRFS_SHARED_DATA_REF_KEY;
713 	else
714 		ref->type = BTRFS_EXTENT_DATA_REF_KEY;
715 
716 	full_ref->objectid = owner;
717 	full_ref->offset = offset;
718 
719 	trace_btrfs_delayed_data_ref(ref, full_ref, action);
720 
721 	existing = tree_insert(&delayed_refs->root, &ref->rb_node);
722 
723 	if (existing) {
724 		update_existing_ref(trans, delayed_refs, existing, ref);
725 		/*
726 		 * we've updated the existing ref, free the newly
727 		 * allocated ref
728 		 */
729 		kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
730 	} else {
731 		delayed_refs->num_entries++;
732 		trans->delayed_ref_updates++;
733 	}
734 }
735 
736 /*
737  * add a delayed tree ref.  This does all of the accounting required
738  * to make sure the delayed ref is eventually processed before this
739  * transaction commits.
740  */
741 int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
742 			       struct btrfs_trans_handle *trans,
743 			       u64 bytenr, u64 num_bytes, u64 parent,
744 			       u64 ref_root,  int level, int action,
745 			       struct btrfs_delayed_extent_op *extent_op,
746 			       int for_cow)
747 {
748 	struct btrfs_delayed_tree_ref *ref;
749 	struct btrfs_delayed_ref_head *head_ref;
750 	struct btrfs_delayed_ref_root *delayed_refs;
751 
752 	BUG_ON(extent_op && extent_op->is_data);
753 	ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
754 	if (!ref)
755 		return -ENOMEM;
756 
757 	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
758 	if (!head_ref) {
759 		kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
760 		return -ENOMEM;
761 	}
762 
763 	head_ref->extent_op = extent_op;
764 
765 	delayed_refs = &trans->transaction->delayed_refs;
766 	spin_lock(&delayed_refs->lock);
767 
768 	/*
769 	 * insert both the head node and the new ref without dropping
770 	 * the spin lock
771 	 */
772 	add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
773 				   num_bytes, action, 0);
774 
775 	add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
776 				   num_bytes, parent, ref_root, level, action,
777 				   for_cow);
778 	spin_unlock(&delayed_refs->lock);
779 	if (need_ref_seq(for_cow, ref_root))
780 		btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
781 
782 	return 0;
783 }
784 
785 /*
786  * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
787  */
788 int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
789 			       struct btrfs_trans_handle *trans,
790 			       u64 bytenr, u64 num_bytes,
791 			       u64 parent, u64 ref_root,
792 			       u64 owner, u64 offset, int action,
793 			       struct btrfs_delayed_extent_op *extent_op,
794 			       int for_cow)
795 {
796 	struct btrfs_delayed_data_ref *ref;
797 	struct btrfs_delayed_ref_head *head_ref;
798 	struct btrfs_delayed_ref_root *delayed_refs;
799 
800 	BUG_ON(extent_op && !extent_op->is_data);
801 	ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
802 	if (!ref)
803 		return -ENOMEM;
804 
805 	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
806 	if (!head_ref) {
807 		kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
808 		return -ENOMEM;
809 	}
810 
811 	head_ref->extent_op = extent_op;
812 
813 	delayed_refs = &trans->transaction->delayed_refs;
814 	spin_lock(&delayed_refs->lock);
815 
816 	/*
817 	 * insert both the head node and the new ref without dropping
818 	 * the spin lock
819 	 */
820 	add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
821 				   num_bytes, action, 1);
822 
823 	add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
824 				   num_bytes, parent, ref_root, owner, offset,
825 				   action, for_cow);
826 	spin_unlock(&delayed_refs->lock);
827 	if (need_ref_seq(for_cow, ref_root))
828 		btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
829 
830 	return 0;
831 }
832 
833 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
834 				struct btrfs_trans_handle *trans,
835 				u64 bytenr, u64 num_bytes,
836 				struct btrfs_delayed_extent_op *extent_op)
837 {
838 	struct btrfs_delayed_ref_head *head_ref;
839 	struct btrfs_delayed_ref_root *delayed_refs;
840 
841 	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
842 	if (!head_ref)
843 		return -ENOMEM;
844 
845 	head_ref->extent_op = extent_op;
846 
847 	delayed_refs = &trans->transaction->delayed_refs;
848 	spin_lock(&delayed_refs->lock);
849 
850 	add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
851 				   num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
852 				   extent_op->is_data);
853 
854 	spin_unlock(&delayed_refs->lock);
855 	return 0;
856 }
857 
858 /*
859  * this does a simple search for the head node for a given extent.
860  * It must be called with the delayed ref spinlock held, and it returns
861  * the head node if any where found, or NULL if not.
862  */
863 struct btrfs_delayed_ref_head *
864 btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
865 {
866 	struct btrfs_delayed_ref_node *ref;
867 	struct btrfs_delayed_ref_root *delayed_refs;
868 
869 	delayed_refs = &trans->transaction->delayed_refs;
870 	ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0);
871 	if (ref)
872 		return btrfs_delayed_node_to_head(ref);
873 	return NULL;
874 }
875 
876 void btrfs_delayed_ref_exit(void)
877 {
878 	if (btrfs_delayed_ref_head_cachep)
879 		kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
880 	if (btrfs_delayed_tree_ref_cachep)
881 		kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
882 	if (btrfs_delayed_data_ref_cachep)
883 		kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
884 	if (btrfs_delayed_extent_op_cachep)
885 		kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
886 }
887 
888 int btrfs_delayed_ref_init(void)
889 {
890 	btrfs_delayed_ref_head_cachep = kmem_cache_create(
891 				"btrfs_delayed_ref_head",
892 				sizeof(struct btrfs_delayed_ref_head), 0,
893 				SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
894 	if (!btrfs_delayed_ref_head_cachep)
895 		goto fail;
896 
897 	btrfs_delayed_tree_ref_cachep = kmem_cache_create(
898 				"btrfs_delayed_tree_ref",
899 				sizeof(struct btrfs_delayed_tree_ref), 0,
900 				SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
901 	if (!btrfs_delayed_tree_ref_cachep)
902 		goto fail;
903 
904 	btrfs_delayed_data_ref_cachep = kmem_cache_create(
905 				"btrfs_delayed_data_ref",
906 				sizeof(struct btrfs_delayed_data_ref), 0,
907 				SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
908 	if (!btrfs_delayed_data_ref_cachep)
909 		goto fail;
910 
911 	btrfs_delayed_extent_op_cachep = kmem_cache_create(
912 				"btrfs_delayed_extent_op",
913 				sizeof(struct btrfs_delayed_extent_op), 0,
914 				SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
915 	if (!btrfs_delayed_extent_op_cachep)
916 		goto fail;
917 
918 	return 0;
919 fail:
920 	btrfs_delayed_ref_exit();
921 	return -ENOMEM;
922 }
923