xref: /openbmc/linux/fs/btrfs/qgroup.c (revision b34e08d5)
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 #include <linux/btrfs.h>
27 
28 #include "ctree.h"
29 #include "transaction.h"
30 #include "disk-io.h"
31 #include "locking.h"
32 #include "ulist.h"
33 #include "backref.h"
34 #include "extent_io.h"
35 
36 /* TODO XXX FIXME
37  *  - subvol delete -> delete when ref goes to 0? delete limits also?
38  *  - reorganize keys
39  *  - compressed
40  *  - sync
41  *  - copy also limits on subvol creation
42  *  - limit
43  *  - caches fuer ulists
44  *  - performance benchmarks
45  *  - check all ioctl parameters
46  */
47 
48 /*
49  * one struct for each qgroup, organized in fs_info->qgroup_tree.
50  */
51 struct btrfs_qgroup {
52 	u64 qgroupid;
53 
54 	/*
55 	 * state
56 	 */
57 	u64 rfer;	/* referenced */
58 	u64 rfer_cmpr;	/* referenced compressed */
59 	u64 excl;	/* exclusive */
60 	u64 excl_cmpr;	/* exclusive compressed */
61 
62 	/*
63 	 * limits
64 	 */
65 	u64 lim_flags;	/* which limits are set */
66 	u64 max_rfer;
67 	u64 max_excl;
68 	u64 rsv_rfer;
69 	u64 rsv_excl;
70 
71 	/*
72 	 * reservation tracking
73 	 */
74 	u64 reserved;
75 
76 	/*
77 	 * lists
78 	 */
79 	struct list_head groups;  /* groups this group is member of */
80 	struct list_head members; /* groups that are members of this group */
81 	struct list_head dirty;   /* dirty groups */
82 	struct rb_node node;	  /* tree of qgroups */
83 
84 	/*
85 	 * temp variables for accounting operations
86 	 */
87 	u64 tag;
88 	u64 refcnt;
89 };
90 
91 /*
92  * glue structure to represent the relations between qgroups.
93  */
94 struct btrfs_qgroup_list {
95 	struct list_head next_group;
96 	struct list_head next_member;
97 	struct btrfs_qgroup *group;
98 	struct btrfs_qgroup *member;
99 };
100 
101 static int
102 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
103 		   int init_flags);
104 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
105 
106 /* must be called with qgroup_ioctl_lock held */
107 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
108 					   u64 qgroupid)
109 {
110 	struct rb_node *n = fs_info->qgroup_tree.rb_node;
111 	struct btrfs_qgroup *qgroup;
112 
113 	while (n) {
114 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
115 		if (qgroup->qgroupid < qgroupid)
116 			n = n->rb_left;
117 		else if (qgroup->qgroupid > qgroupid)
118 			n = n->rb_right;
119 		else
120 			return qgroup;
121 	}
122 	return NULL;
123 }
124 
125 /* must be called with qgroup_lock held */
126 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
127 					  u64 qgroupid)
128 {
129 	struct rb_node **p = &fs_info->qgroup_tree.rb_node;
130 	struct rb_node *parent = NULL;
131 	struct btrfs_qgroup *qgroup;
132 
133 	while (*p) {
134 		parent = *p;
135 		qgroup = rb_entry(parent, struct btrfs_qgroup, node);
136 
137 		if (qgroup->qgroupid < qgroupid)
138 			p = &(*p)->rb_left;
139 		else if (qgroup->qgroupid > qgroupid)
140 			p = &(*p)->rb_right;
141 		else
142 			return qgroup;
143 	}
144 
145 	qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
146 	if (!qgroup)
147 		return ERR_PTR(-ENOMEM);
148 
149 	qgroup->qgroupid = qgroupid;
150 	INIT_LIST_HEAD(&qgroup->groups);
151 	INIT_LIST_HEAD(&qgroup->members);
152 	INIT_LIST_HEAD(&qgroup->dirty);
153 
154 	rb_link_node(&qgroup->node, parent, p);
155 	rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
156 
157 	return qgroup;
158 }
159 
160 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
161 {
162 	struct btrfs_qgroup_list *list;
163 
164 	list_del(&qgroup->dirty);
165 	while (!list_empty(&qgroup->groups)) {
166 		list = list_first_entry(&qgroup->groups,
167 					struct btrfs_qgroup_list, next_group);
168 		list_del(&list->next_group);
169 		list_del(&list->next_member);
170 		kfree(list);
171 	}
172 
173 	while (!list_empty(&qgroup->members)) {
174 		list = list_first_entry(&qgroup->members,
175 					struct btrfs_qgroup_list, next_member);
176 		list_del(&list->next_group);
177 		list_del(&list->next_member);
178 		kfree(list);
179 	}
180 	kfree(qgroup);
181 }
182 
183 /* must be called with qgroup_lock held */
184 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
185 {
186 	struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
187 
188 	if (!qgroup)
189 		return -ENOENT;
190 
191 	rb_erase(&qgroup->node, &fs_info->qgroup_tree);
192 	__del_qgroup_rb(qgroup);
193 	return 0;
194 }
195 
196 /* must be called with qgroup_lock held */
197 static int add_relation_rb(struct btrfs_fs_info *fs_info,
198 			   u64 memberid, u64 parentid)
199 {
200 	struct btrfs_qgroup *member;
201 	struct btrfs_qgroup *parent;
202 	struct btrfs_qgroup_list *list;
203 
204 	member = find_qgroup_rb(fs_info, memberid);
205 	parent = find_qgroup_rb(fs_info, parentid);
206 	if (!member || !parent)
207 		return -ENOENT;
208 
209 	list = kzalloc(sizeof(*list), GFP_ATOMIC);
210 	if (!list)
211 		return -ENOMEM;
212 
213 	list->group = parent;
214 	list->member = member;
215 	list_add_tail(&list->next_group, &member->groups);
216 	list_add_tail(&list->next_member, &parent->members);
217 
218 	return 0;
219 }
220 
221 /* must be called with qgroup_lock held */
222 static int del_relation_rb(struct btrfs_fs_info *fs_info,
223 			   u64 memberid, u64 parentid)
224 {
225 	struct btrfs_qgroup *member;
226 	struct btrfs_qgroup *parent;
227 	struct btrfs_qgroup_list *list;
228 
229 	member = find_qgroup_rb(fs_info, memberid);
230 	parent = find_qgroup_rb(fs_info, parentid);
231 	if (!member || !parent)
232 		return -ENOENT;
233 
234 	list_for_each_entry(list, &member->groups, next_group) {
235 		if (list->group == parent) {
236 			list_del(&list->next_group);
237 			list_del(&list->next_member);
238 			kfree(list);
239 			return 0;
240 		}
241 	}
242 	return -ENOENT;
243 }
244 
245 /*
246  * The full config is read in one go, only called from open_ctree()
247  * It doesn't use any locking, as at this point we're still single-threaded
248  */
249 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
250 {
251 	struct btrfs_key key;
252 	struct btrfs_key found_key;
253 	struct btrfs_root *quota_root = fs_info->quota_root;
254 	struct btrfs_path *path = NULL;
255 	struct extent_buffer *l;
256 	int slot;
257 	int ret = 0;
258 	u64 flags = 0;
259 	u64 rescan_progress = 0;
260 
261 	if (!fs_info->quota_enabled)
262 		return 0;
263 
264 	fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
265 	if (!fs_info->qgroup_ulist) {
266 		ret = -ENOMEM;
267 		goto out;
268 	}
269 
270 	path = btrfs_alloc_path();
271 	if (!path) {
272 		ret = -ENOMEM;
273 		goto out;
274 	}
275 
276 	/* default this to quota off, in case no status key is found */
277 	fs_info->qgroup_flags = 0;
278 
279 	/*
280 	 * pass 1: read status, all qgroup infos and limits
281 	 */
282 	key.objectid = 0;
283 	key.type = 0;
284 	key.offset = 0;
285 	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
286 	if (ret)
287 		goto out;
288 
289 	while (1) {
290 		struct btrfs_qgroup *qgroup;
291 
292 		slot = path->slots[0];
293 		l = path->nodes[0];
294 		btrfs_item_key_to_cpu(l, &found_key, slot);
295 
296 		if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
297 			struct btrfs_qgroup_status_item *ptr;
298 
299 			ptr = btrfs_item_ptr(l, slot,
300 					     struct btrfs_qgroup_status_item);
301 
302 			if (btrfs_qgroup_status_version(l, ptr) !=
303 			    BTRFS_QGROUP_STATUS_VERSION) {
304 				btrfs_err(fs_info,
305 				 "old qgroup version, quota disabled");
306 				goto out;
307 			}
308 			if (btrfs_qgroup_status_generation(l, ptr) !=
309 			    fs_info->generation) {
310 				flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
311 				btrfs_err(fs_info,
312 					"qgroup generation mismatch, "
313 					"marked as inconsistent");
314 			}
315 			fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
316 									  ptr);
317 			rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
318 			goto next1;
319 		}
320 
321 		if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
322 		    found_key.type != BTRFS_QGROUP_LIMIT_KEY)
323 			goto next1;
324 
325 		qgroup = find_qgroup_rb(fs_info, found_key.offset);
326 		if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
327 		    (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
328 			btrfs_err(fs_info, "inconsitent qgroup config");
329 			flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
330 		}
331 		if (!qgroup) {
332 			qgroup = add_qgroup_rb(fs_info, found_key.offset);
333 			if (IS_ERR(qgroup)) {
334 				ret = PTR_ERR(qgroup);
335 				goto out;
336 			}
337 		}
338 		switch (found_key.type) {
339 		case BTRFS_QGROUP_INFO_KEY: {
340 			struct btrfs_qgroup_info_item *ptr;
341 
342 			ptr = btrfs_item_ptr(l, slot,
343 					     struct btrfs_qgroup_info_item);
344 			qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
345 			qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
346 			qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
347 			qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
348 			/* generation currently unused */
349 			break;
350 		}
351 		case BTRFS_QGROUP_LIMIT_KEY: {
352 			struct btrfs_qgroup_limit_item *ptr;
353 
354 			ptr = btrfs_item_ptr(l, slot,
355 					     struct btrfs_qgroup_limit_item);
356 			qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
357 			qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
358 			qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
359 			qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
360 			qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
361 			break;
362 		}
363 		}
364 next1:
365 		ret = btrfs_next_item(quota_root, path);
366 		if (ret < 0)
367 			goto out;
368 		if (ret)
369 			break;
370 	}
371 	btrfs_release_path(path);
372 
373 	/*
374 	 * pass 2: read all qgroup relations
375 	 */
376 	key.objectid = 0;
377 	key.type = BTRFS_QGROUP_RELATION_KEY;
378 	key.offset = 0;
379 	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
380 	if (ret)
381 		goto out;
382 	while (1) {
383 		slot = path->slots[0];
384 		l = path->nodes[0];
385 		btrfs_item_key_to_cpu(l, &found_key, slot);
386 
387 		if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
388 			goto next2;
389 
390 		if (found_key.objectid > found_key.offset) {
391 			/* parent <- member, not needed to build config */
392 			/* FIXME should we omit the key completely? */
393 			goto next2;
394 		}
395 
396 		ret = add_relation_rb(fs_info, found_key.objectid,
397 				      found_key.offset);
398 		if (ret == -ENOENT) {
399 			btrfs_warn(fs_info,
400 				"orphan qgroup relation 0x%llx->0x%llx",
401 				found_key.objectid, found_key.offset);
402 			ret = 0;	/* ignore the error */
403 		}
404 		if (ret)
405 			goto out;
406 next2:
407 		ret = btrfs_next_item(quota_root, path);
408 		if (ret < 0)
409 			goto out;
410 		if (ret)
411 			break;
412 	}
413 out:
414 	fs_info->qgroup_flags |= flags;
415 	if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
416 		fs_info->quota_enabled = 0;
417 		fs_info->pending_quota_state = 0;
418 	} else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
419 		   ret >= 0) {
420 		ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
421 	}
422 	btrfs_free_path(path);
423 
424 	if (ret < 0) {
425 		ulist_free(fs_info->qgroup_ulist);
426 		fs_info->qgroup_ulist = NULL;
427 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
428 	}
429 
430 	return ret < 0 ? ret : 0;
431 }
432 
433 /*
434  * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
435  * first two are in single-threaded paths.And for the third one, we have set
436  * quota_root to be null with qgroup_lock held before, so it is safe to clean
437  * up the in-memory structures without qgroup_lock held.
438  */
439 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
440 {
441 	struct rb_node *n;
442 	struct btrfs_qgroup *qgroup;
443 
444 	while ((n = rb_first(&fs_info->qgroup_tree))) {
445 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
446 		rb_erase(n, &fs_info->qgroup_tree);
447 		__del_qgroup_rb(qgroup);
448 	}
449 	/*
450 	 * we call btrfs_free_qgroup_config() when umounting
451 	 * filesystem and disabling quota, so we set qgroup_ulit
452 	 * to be null here to avoid double free.
453 	 */
454 	ulist_free(fs_info->qgroup_ulist);
455 	fs_info->qgroup_ulist = NULL;
456 }
457 
458 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
459 				    struct btrfs_root *quota_root,
460 				    u64 src, u64 dst)
461 {
462 	int ret;
463 	struct btrfs_path *path;
464 	struct btrfs_key key;
465 
466 	path = btrfs_alloc_path();
467 	if (!path)
468 		return -ENOMEM;
469 
470 	key.objectid = src;
471 	key.type = BTRFS_QGROUP_RELATION_KEY;
472 	key.offset = dst;
473 
474 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
475 
476 	btrfs_mark_buffer_dirty(path->nodes[0]);
477 
478 	btrfs_free_path(path);
479 	return ret;
480 }
481 
482 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
483 				    struct btrfs_root *quota_root,
484 				    u64 src, u64 dst)
485 {
486 	int ret;
487 	struct btrfs_path *path;
488 	struct btrfs_key key;
489 
490 	path = btrfs_alloc_path();
491 	if (!path)
492 		return -ENOMEM;
493 
494 	key.objectid = src;
495 	key.type = BTRFS_QGROUP_RELATION_KEY;
496 	key.offset = dst;
497 
498 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
499 	if (ret < 0)
500 		goto out;
501 
502 	if (ret > 0) {
503 		ret = -ENOENT;
504 		goto out;
505 	}
506 
507 	ret = btrfs_del_item(trans, quota_root, path);
508 out:
509 	btrfs_free_path(path);
510 	return ret;
511 }
512 
513 static int add_qgroup_item(struct btrfs_trans_handle *trans,
514 			   struct btrfs_root *quota_root, u64 qgroupid)
515 {
516 	int ret;
517 	struct btrfs_path *path;
518 	struct btrfs_qgroup_info_item *qgroup_info;
519 	struct btrfs_qgroup_limit_item *qgroup_limit;
520 	struct extent_buffer *leaf;
521 	struct btrfs_key key;
522 
523 	path = btrfs_alloc_path();
524 	if (!path)
525 		return -ENOMEM;
526 
527 	key.objectid = 0;
528 	key.type = BTRFS_QGROUP_INFO_KEY;
529 	key.offset = qgroupid;
530 
531 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
532 				      sizeof(*qgroup_info));
533 	if (ret)
534 		goto out;
535 
536 	leaf = path->nodes[0];
537 	qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
538 				 struct btrfs_qgroup_info_item);
539 	btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
540 	btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
541 	btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
542 	btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
543 	btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
544 
545 	btrfs_mark_buffer_dirty(leaf);
546 
547 	btrfs_release_path(path);
548 
549 	key.type = BTRFS_QGROUP_LIMIT_KEY;
550 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
551 				      sizeof(*qgroup_limit));
552 	if (ret)
553 		goto out;
554 
555 	leaf = path->nodes[0];
556 	qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
557 				  struct btrfs_qgroup_limit_item);
558 	btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
559 	btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
560 	btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
561 	btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
562 	btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
563 
564 	btrfs_mark_buffer_dirty(leaf);
565 
566 	ret = 0;
567 out:
568 	btrfs_free_path(path);
569 	return ret;
570 }
571 
572 static int del_qgroup_item(struct btrfs_trans_handle *trans,
573 			   struct btrfs_root *quota_root, u64 qgroupid)
574 {
575 	int ret;
576 	struct btrfs_path *path;
577 	struct btrfs_key key;
578 
579 	path = btrfs_alloc_path();
580 	if (!path)
581 		return -ENOMEM;
582 
583 	key.objectid = 0;
584 	key.type = BTRFS_QGROUP_INFO_KEY;
585 	key.offset = qgroupid;
586 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
587 	if (ret < 0)
588 		goto out;
589 
590 	if (ret > 0) {
591 		ret = -ENOENT;
592 		goto out;
593 	}
594 
595 	ret = btrfs_del_item(trans, quota_root, path);
596 	if (ret)
597 		goto out;
598 
599 	btrfs_release_path(path);
600 
601 	key.type = BTRFS_QGROUP_LIMIT_KEY;
602 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
603 	if (ret < 0)
604 		goto out;
605 
606 	if (ret > 0) {
607 		ret = -ENOENT;
608 		goto out;
609 	}
610 
611 	ret = btrfs_del_item(trans, quota_root, path);
612 
613 out:
614 	btrfs_free_path(path);
615 	return ret;
616 }
617 
618 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
619 				    struct btrfs_root *root, u64 qgroupid,
620 				    u64 flags, u64 max_rfer, u64 max_excl,
621 				    u64 rsv_rfer, u64 rsv_excl)
622 {
623 	struct btrfs_path *path;
624 	struct btrfs_key key;
625 	struct extent_buffer *l;
626 	struct btrfs_qgroup_limit_item *qgroup_limit;
627 	int ret;
628 	int slot;
629 
630 	key.objectid = 0;
631 	key.type = BTRFS_QGROUP_LIMIT_KEY;
632 	key.offset = qgroupid;
633 
634 	path = btrfs_alloc_path();
635 	if (!path)
636 		return -ENOMEM;
637 
638 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
639 	if (ret > 0)
640 		ret = -ENOENT;
641 
642 	if (ret)
643 		goto out;
644 
645 	l = path->nodes[0];
646 	slot = path->slots[0];
647 	qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
648 	btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
649 	btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
650 	btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
651 	btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
652 	btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
653 
654 	btrfs_mark_buffer_dirty(l);
655 
656 out:
657 	btrfs_free_path(path);
658 	return ret;
659 }
660 
661 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
662 				   struct btrfs_root *root,
663 				   struct btrfs_qgroup *qgroup)
664 {
665 	struct btrfs_path *path;
666 	struct btrfs_key key;
667 	struct extent_buffer *l;
668 	struct btrfs_qgroup_info_item *qgroup_info;
669 	int ret;
670 	int slot;
671 
672 	key.objectid = 0;
673 	key.type = BTRFS_QGROUP_INFO_KEY;
674 	key.offset = qgroup->qgroupid;
675 
676 	path = btrfs_alloc_path();
677 	if (!path)
678 		return -ENOMEM;
679 
680 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
681 	if (ret > 0)
682 		ret = -ENOENT;
683 
684 	if (ret)
685 		goto out;
686 
687 	l = path->nodes[0];
688 	slot = path->slots[0];
689 	qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
690 	btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
691 	btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
692 	btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
693 	btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
694 	btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
695 
696 	btrfs_mark_buffer_dirty(l);
697 
698 out:
699 	btrfs_free_path(path);
700 	return ret;
701 }
702 
703 static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
704 				     struct btrfs_fs_info *fs_info,
705 				    struct btrfs_root *root)
706 {
707 	struct btrfs_path *path;
708 	struct btrfs_key key;
709 	struct extent_buffer *l;
710 	struct btrfs_qgroup_status_item *ptr;
711 	int ret;
712 	int slot;
713 
714 	key.objectid = 0;
715 	key.type = BTRFS_QGROUP_STATUS_KEY;
716 	key.offset = 0;
717 
718 	path = btrfs_alloc_path();
719 	if (!path)
720 		return -ENOMEM;
721 
722 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
723 	if (ret > 0)
724 		ret = -ENOENT;
725 
726 	if (ret)
727 		goto out;
728 
729 	l = path->nodes[0];
730 	slot = path->slots[0];
731 	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
732 	btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
733 	btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
734 	btrfs_set_qgroup_status_rescan(l, ptr,
735 				fs_info->qgroup_rescan_progress.objectid);
736 
737 	btrfs_mark_buffer_dirty(l);
738 
739 out:
740 	btrfs_free_path(path);
741 	return ret;
742 }
743 
744 /*
745  * called with qgroup_lock held
746  */
747 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
748 				  struct btrfs_root *root)
749 {
750 	struct btrfs_path *path;
751 	struct btrfs_key key;
752 	struct extent_buffer *leaf = NULL;
753 	int ret;
754 	int nr = 0;
755 
756 	path = btrfs_alloc_path();
757 	if (!path)
758 		return -ENOMEM;
759 
760 	path->leave_spinning = 1;
761 
762 	key.objectid = 0;
763 	key.offset = 0;
764 	key.type = 0;
765 
766 	while (1) {
767 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
768 		if (ret < 0)
769 			goto out;
770 		leaf = path->nodes[0];
771 		nr = btrfs_header_nritems(leaf);
772 		if (!nr)
773 			break;
774 		/*
775 		 * delete the leaf one by one
776 		 * since the whole tree is going
777 		 * to be deleted.
778 		 */
779 		path->slots[0] = 0;
780 		ret = btrfs_del_items(trans, root, path, 0, nr);
781 		if (ret)
782 			goto out;
783 
784 		btrfs_release_path(path);
785 	}
786 	ret = 0;
787 out:
788 	root->fs_info->pending_quota_state = 0;
789 	btrfs_free_path(path);
790 	return ret;
791 }
792 
793 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
794 		       struct btrfs_fs_info *fs_info)
795 {
796 	struct btrfs_root *quota_root;
797 	struct btrfs_root *tree_root = fs_info->tree_root;
798 	struct btrfs_path *path = NULL;
799 	struct btrfs_qgroup_status_item *ptr;
800 	struct extent_buffer *leaf;
801 	struct btrfs_key key;
802 	struct btrfs_key found_key;
803 	struct btrfs_qgroup *qgroup = NULL;
804 	int ret = 0;
805 	int slot;
806 
807 	mutex_lock(&fs_info->qgroup_ioctl_lock);
808 	if (fs_info->quota_root) {
809 		fs_info->pending_quota_state = 1;
810 		goto out;
811 	}
812 
813 	fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
814 	if (!fs_info->qgroup_ulist) {
815 		ret = -ENOMEM;
816 		goto out;
817 	}
818 
819 	/*
820 	 * initially create the quota tree
821 	 */
822 	quota_root = btrfs_create_tree(trans, fs_info,
823 				       BTRFS_QUOTA_TREE_OBJECTID);
824 	if (IS_ERR(quota_root)) {
825 		ret =  PTR_ERR(quota_root);
826 		goto out;
827 	}
828 
829 	path = btrfs_alloc_path();
830 	if (!path) {
831 		ret = -ENOMEM;
832 		goto out_free_root;
833 	}
834 
835 	key.objectid = 0;
836 	key.type = BTRFS_QGROUP_STATUS_KEY;
837 	key.offset = 0;
838 
839 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
840 				      sizeof(*ptr));
841 	if (ret)
842 		goto out_free_path;
843 
844 	leaf = path->nodes[0];
845 	ptr = btrfs_item_ptr(leaf, path->slots[0],
846 				 struct btrfs_qgroup_status_item);
847 	btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
848 	btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
849 	fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
850 				BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
851 	btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
852 	btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
853 
854 	btrfs_mark_buffer_dirty(leaf);
855 
856 	key.objectid = 0;
857 	key.type = BTRFS_ROOT_REF_KEY;
858 	key.offset = 0;
859 
860 	btrfs_release_path(path);
861 	ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
862 	if (ret > 0)
863 		goto out_add_root;
864 	if (ret < 0)
865 		goto out_free_path;
866 
867 
868 	while (1) {
869 		slot = path->slots[0];
870 		leaf = path->nodes[0];
871 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
872 
873 		if (found_key.type == BTRFS_ROOT_REF_KEY) {
874 			ret = add_qgroup_item(trans, quota_root,
875 					      found_key.offset);
876 			if (ret)
877 				goto out_free_path;
878 
879 			qgroup = add_qgroup_rb(fs_info, found_key.offset);
880 			if (IS_ERR(qgroup)) {
881 				ret = PTR_ERR(qgroup);
882 				goto out_free_path;
883 			}
884 		}
885 		ret = btrfs_next_item(tree_root, path);
886 		if (ret < 0)
887 			goto out_free_path;
888 		if (ret)
889 			break;
890 	}
891 
892 out_add_root:
893 	btrfs_release_path(path);
894 	ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
895 	if (ret)
896 		goto out_free_path;
897 
898 	qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
899 	if (IS_ERR(qgroup)) {
900 		ret = PTR_ERR(qgroup);
901 		goto out_free_path;
902 	}
903 	spin_lock(&fs_info->qgroup_lock);
904 	fs_info->quota_root = quota_root;
905 	fs_info->pending_quota_state = 1;
906 	spin_unlock(&fs_info->qgroup_lock);
907 out_free_path:
908 	btrfs_free_path(path);
909 out_free_root:
910 	if (ret) {
911 		free_extent_buffer(quota_root->node);
912 		free_extent_buffer(quota_root->commit_root);
913 		kfree(quota_root);
914 	}
915 out:
916 	if (ret) {
917 		ulist_free(fs_info->qgroup_ulist);
918 		fs_info->qgroup_ulist = NULL;
919 	}
920 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
921 	return ret;
922 }
923 
924 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
925 			struct btrfs_fs_info *fs_info)
926 {
927 	struct btrfs_root *tree_root = fs_info->tree_root;
928 	struct btrfs_root *quota_root;
929 	int ret = 0;
930 
931 	mutex_lock(&fs_info->qgroup_ioctl_lock);
932 	if (!fs_info->quota_root)
933 		goto out;
934 	spin_lock(&fs_info->qgroup_lock);
935 	fs_info->quota_enabled = 0;
936 	fs_info->pending_quota_state = 0;
937 	quota_root = fs_info->quota_root;
938 	fs_info->quota_root = NULL;
939 	spin_unlock(&fs_info->qgroup_lock);
940 
941 	btrfs_free_qgroup_config(fs_info);
942 
943 	ret = btrfs_clean_quota_tree(trans, quota_root);
944 	if (ret)
945 		goto out;
946 
947 	ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
948 	if (ret)
949 		goto out;
950 
951 	list_del(&quota_root->dirty_list);
952 
953 	btrfs_tree_lock(quota_root->node);
954 	clean_tree_block(trans, tree_root, quota_root->node);
955 	btrfs_tree_unlock(quota_root->node);
956 	btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
957 
958 	free_extent_buffer(quota_root->node);
959 	free_extent_buffer(quota_root->commit_root);
960 	kfree(quota_root);
961 out:
962 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
963 	return ret;
964 }
965 
966 static void qgroup_dirty(struct btrfs_fs_info *fs_info,
967 			 struct btrfs_qgroup *qgroup)
968 {
969 	if (list_empty(&qgroup->dirty))
970 		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
971 }
972 
973 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
974 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
975 {
976 	struct btrfs_root *quota_root;
977 	struct btrfs_qgroup *parent;
978 	struct btrfs_qgroup *member;
979 	struct btrfs_qgroup_list *list;
980 	int ret = 0;
981 
982 	mutex_lock(&fs_info->qgroup_ioctl_lock);
983 	quota_root = fs_info->quota_root;
984 	if (!quota_root) {
985 		ret = -EINVAL;
986 		goto out;
987 	}
988 	member = find_qgroup_rb(fs_info, src);
989 	parent = find_qgroup_rb(fs_info, dst);
990 	if (!member || !parent) {
991 		ret = -EINVAL;
992 		goto out;
993 	}
994 
995 	/* check if such qgroup relation exist firstly */
996 	list_for_each_entry(list, &member->groups, next_group) {
997 		if (list->group == parent) {
998 			ret = -EEXIST;
999 			goto out;
1000 		}
1001 	}
1002 
1003 	ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1004 	if (ret)
1005 		goto out;
1006 
1007 	ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1008 	if (ret) {
1009 		del_qgroup_relation_item(trans, quota_root, src, dst);
1010 		goto out;
1011 	}
1012 
1013 	spin_lock(&fs_info->qgroup_lock);
1014 	ret = add_relation_rb(quota_root->fs_info, src, dst);
1015 	spin_unlock(&fs_info->qgroup_lock);
1016 out:
1017 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1018 	return ret;
1019 }
1020 
1021 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1022 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1023 {
1024 	struct btrfs_root *quota_root;
1025 	struct btrfs_qgroup *parent;
1026 	struct btrfs_qgroup *member;
1027 	struct btrfs_qgroup_list *list;
1028 	int ret = 0;
1029 	int err;
1030 
1031 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1032 	quota_root = fs_info->quota_root;
1033 	if (!quota_root) {
1034 		ret = -EINVAL;
1035 		goto out;
1036 	}
1037 
1038 	member = find_qgroup_rb(fs_info, src);
1039 	parent = find_qgroup_rb(fs_info, dst);
1040 	if (!member || !parent) {
1041 		ret = -EINVAL;
1042 		goto out;
1043 	}
1044 
1045 	/* check if such qgroup relation exist firstly */
1046 	list_for_each_entry(list, &member->groups, next_group) {
1047 		if (list->group == parent)
1048 			goto exist;
1049 	}
1050 	ret = -ENOENT;
1051 	goto out;
1052 exist:
1053 	ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1054 	err = del_qgroup_relation_item(trans, quota_root, dst, src);
1055 	if (err && !ret)
1056 		ret = err;
1057 
1058 	spin_lock(&fs_info->qgroup_lock);
1059 	del_relation_rb(fs_info, src, dst);
1060 	spin_unlock(&fs_info->qgroup_lock);
1061 out:
1062 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1063 	return ret;
1064 }
1065 
1066 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1067 			struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
1068 {
1069 	struct btrfs_root *quota_root;
1070 	struct btrfs_qgroup *qgroup;
1071 	int ret = 0;
1072 
1073 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1074 	quota_root = fs_info->quota_root;
1075 	if (!quota_root) {
1076 		ret = -EINVAL;
1077 		goto out;
1078 	}
1079 	qgroup = find_qgroup_rb(fs_info, qgroupid);
1080 	if (qgroup) {
1081 		ret = -EEXIST;
1082 		goto out;
1083 	}
1084 
1085 	ret = add_qgroup_item(trans, quota_root, qgroupid);
1086 	if (ret)
1087 		goto out;
1088 
1089 	spin_lock(&fs_info->qgroup_lock);
1090 	qgroup = add_qgroup_rb(fs_info, qgroupid);
1091 	spin_unlock(&fs_info->qgroup_lock);
1092 
1093 	if (IS_ERR(qgroup))
1094 		ret = PTR_ERR(qgroup);
1095 out:
1096 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1097 	return ret;
1098 }
1099 
1100 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1101 			struct btrfs_fs_info *fs_info, u64 qgroupid)
1102 {
1103 	struct btrfs_root *quota_root;
1104 	struct btrfs_qgroup *qgroup;
1105 	int ret = 0;
1106 
1107 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1108 	quota_root = fs_info->quota_root;
1109 	if (!quota_root) {
1110 		ret = -EINVAL;
1111 		goto out;
1112 	}
1113 
1114 	qgroup = find_qgroup_rb(fs_info, qgroupid);
1115 	if (!qgroup) {
1116 		ret = -ENOENT;
1117 		goto out;
1118 	} else {
1119 		/* check if there are no relations to this qgroup */
1120 		if (!list_empty(&qgroup->groups) ||
1121 		    !list_empty(&qgroup->members)) {
1122 			ret = -EBUSY;
1123 			goto out;
1124 		}
1125 	}
1126 	ret = del_qgroup_item(trans, quota_root, qgroupid);
1127 
1128 	spin_lock(&fs_info->qgroup_lock);
1129 	del_qgroup_rb(quota_root->fs_info, qgroupid);
1130 	spin_unlock(&fs_info->qgroup_lock);
1131 out:
1132 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1133 	return ret;
1134 }
1135 
1136 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1137 		       struct btrfs_fs_info *fs_info, u64 qgroupid,
1138 		       struct btrfs_qgroup_limit *limit)
1139 {
1140 	struct btrfs_root *quota_root;
1141 	struct btrfs_qgroup *qgroup;
1142 	int ret = 0;
1143 
1144 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1145 	quota_root = fs_info->quota_root;
1146 	if (!quota_root) {
1147 		ret = -EINVAL;
1148 		goto out;
1149 	}
1150 
1151 	qgroup = find_qgroup_rb(fs_info, qgroupid);
1152 	if (!qgroup) {
1153 		ret = -ENOENT;
1154 		goto out;
1155 	}
1156 	ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1157 				       limit->flags, limit->max_rfer,
1158 				       limit->max_excl, limit->rsv_rfer,
1159 				       limit->rsv_excl);
1160 	if (ret) {
1161 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1162 		btrfs_info(fs_info, "unable to update quota limit for %llu",
1163 		       qgroupid);
1164 	}
1165 
1166 	spin_lock(&fs_info->qgroup_lock);
1167 	qgroup->lim_flags = limit->flags;
1168 	qgroup->max_rfer = limit->max_rfer;
1169 	qgroup->max_excl = limit->max_excl;
1170 	qgroup->rsv_rfer = limit->rsv_rfer;
1171 	qgroup->rsv_excl = limit->rsv_excl;
1172 	spin_unlock(&fs_info->qgroup_lock);
1173 out:
1174 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1175 	return ret;
1176 }
1177 
1178 /*
1179  * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1180  * the modification into a list that's later used by btrfs_end_transaction to
1181  * pass the recorded modifications on to btrfs_qgroup_account_ref.
1182  */
1183 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1184 			    struct btrfs_delayed_ref_node *node,
1185 			    struct btrfs_delayed_extent_op *extent_op)
1186 {
1187 	struct qgroup_update *u;
1188 
1189 	BUG_ON(!trans->delayed_ref_elem.seq);
1190 	u = kmalloc(sizeof(*u), GFP_NOFS);
1191 	if (!u)
1192 		return -ENOMEM;
1193 
1194 	u->node = node;
1195 	u->extent_op = extent_op;
1196 	list_add_tail(&u->list, &trans->qgroup_ref_list);
1197 
1198 	return 0;
1199 }
1200 
1201 static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
1202 				    struct ulist *roots, struct ulist *tmp,
1203 				    u64 seq)
1204 {
1205 	struct ulist_node *unode;
1206 	struct ulist_iterator uiter;
1207 	struct ulist_node *tmp_unode;
1208 	struct ulist_iterator tmp_uiter;
1209 	struct btrfs_qgroup *qg;
1210 	int ret;
1211 
1212 	ULIST_ITER_INIT(&uiter);
1213 	while ((unode = ulist_next(roots, &uiter))) {
1214 		qg = find_qgroup_rb(fs_info, unode->val);
1215 		if (!qg)
1216 			continue;
1217 
1218 		ulist_reinit(tmp);
1219 						/* XXX id not needed */
1220 		ret = ulist_add(tmp, qg->qgroupid,
1221 				(u64)(uintptr_t)qg, GFP_ATOMIC);
1222 		if (ret < 0)
1223 			return ret;
1224 		ULIST_ITER_INIT(&tmp_uiter);
1225 		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1226 			struct btrfs_qgroup_list *glist;
1227 
1228 			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1229 			if (qg->refcnt < seq)
1230 				qg->refcnt = seq + 1;
1231 			else
1232 				++qg->refcnt;
1233 
1234 			list_for_each_entry(glist, &qg->groups, next_group) {
1235 				ret = ulist_add(tmp, glist->group->qgroupid,
1236 						(u64)(uintptr_t)glist->group,
1237 						GFP_ATOMIC);
1238 				if (ret < 0)
1239 					return ret;
1240 			}
1241 		}
1242 	}
1243 
1244 	return 0;
1245 }
1246 
1247 static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
1248 				    struct ulist *roots, struct ulist *tmp,
1249 				    u64 seq, int sgn, u64 num_bytes,
1250 				    struct btrfs_qgroup *qgroup)
1251 {
1252 	struct ulist_node *unode;
1253 	struct ulist_iterator uiter;
1254 	struct btrfs_qgroup *qg;
1255 	struct btrfs_qgroup_list *glist;
1256 	int ret;
1257 
1258 	ulist_reinit(tmp);
1259 	ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1260 	if (ret < 0)
1261 		return ret;
1262 
1263 	ULIST_ITER_INIT(&uiter);
1264 	while ((unode = ulist_next(tmp, &uiter))) {
1265 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1266 		if (qg->refcnt < seq) {
1267 			/* not visited by step 1 */
1268 			qg->rfer += sgn * num_bytes;
1269 			qg->rfer_cmpr += sgn * num_bytes;
1270 			if (roots->nnodes == 0) {
1271 				qg->excl += sgn * num_bytes;
1272 				qg->excl_cmpr += sgn * num_bytes;
1273 			}
1274 			qgroup_dirty(fs_info, qg);
1275 		}
1276 		WARN_ON(qg->tag >= seq);
1277 		qg->tag = seq;
1278 
1279 		list_for_each_entry(glist, &qg->groups, next_group) {
1280 			ret = ulist_add(tmp, glist->group->qgroupid,
1281 					(uintptr_t)glist->group, GFP_ATOMIC);
1282 			if (ret < 0)
1283 				return ret;
1284 		}
1285 	}
1286 
1287 	return 0;
1288 }
1289 
1290 static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
1291 				    struct ulist *roots, struct ulist *tmp,
1292 				    u64 seq, int sgn, u64 num_bytes)
1293 {
1294 	struct ulist_node *unode;
1295 	struct ulist_iterator uiter;
1296 	struct btrfs_qgroup *qg;
1297 	struct ulist_node *tmp_unode;
1298 	struct ulist_iterator tmp_uiter;
1299 	int ret;
1300 
1301 	ULIST_ITER_INIT(&uiter);
1302 	while ((unode = ulist_next(roots, &uiter))) {
1303 		qg = find_qgroup_rb(fs_info, unode->val);
1304 		if (!qg)
1305 			continue;
1306 
1307 		ulist_reinit(tmp);
1308 		ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1309 		if (ret < 0)
1310 			return ret;
1311 
1312 		ULIST_ITER_INIT(&tmp_uiter);
1313 		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1314 			struct btrfs_qgroup_list *glist;
1315 
1316 			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1317 			if (qg->tag == seq)
1318 				continue;
1319 
1320 			if (qg->refcnt - seq == roots->nnodes) {
1321 				qg->excl -= sgn * num_bytes;
1322 				qg->excl_cmpr -= sgn * num_bytes;
1323 				qgroup_dirty(fs_info, qg);
1324 			}
1325 
1326 			list_for_each_entry(glist, &qg->groups, next_group) {
1327 				ret = ulist_add(tmp, glist->group->qgroupid,
1328 						(uintptr_t)glist->group,
1329 						GFP_ATOMIC);
1330 				if (ret < 0)
1331 					return ret;
1332 			}
1333 		}
1334 	}
1335 
1336 	return 0;
1337 }
1338 
1339 /*
1340  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1341  * from the fs. First, all roots referencing the extent are searched, and
1342  * then the space is accounted accordingly to the different roots. The
1343  * accounting algorithm works in 3 steps documented inline.
1344  */
1345 int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1346 			     struct btrfs_fs_info *fs_info,
1347 			     struct btrfs_delayed_ref_node *node,
1348 			     struct btrfs_delayed_extent_op *extent_op)
1349 {
1350 	struct btrfs_root *quota_root;
1351 	u64 ref_root;
1352 	struct btrfs_qgroup *qgroup;
1353 	struct ulist *roots = NULL;
1354 	u64 seq;
1355 	int ret = 0;
1356 	int sgn;
1357 
1358 	if (!fs_info->quota_enabled)
1359 		return 0;
1360 
1361 	BUG_ON(!fs_info->quota_root);
1362 
1363 	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1364 	    node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
1365 		struct btrfs_delayed_tree_ref *ref;
1366 		ref = btrfs_delayed_node_to_tree_ref(node);
1367 		ref_root = ref->root;
1368 	} else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1369 		   node->type == BTRFS_SHARED_DATA_REF_KEY) {
1370 		struct btrfs_delayed_data_ref *ref;
1371 		ref = btrfs_delayed_node_to_data_ref(node);
1372 		ref_root = ref->root;
1373 	} else {
1374 		BUG();
1375 	}
1376 
1377 	if (!is_fstree(ref_root)) {
1378 		/*
1379 		 * non-fs-trees are not being accounted
1380 		 */
1381 		return 0;
1382 	}
1383 
1384 	switch (node->action) {
1385 	case BTRFS_ADD_DELAYED_REF:
1386 	case BTRFS_ADD_DELAYED_EXTENT:
1387 		sgn = 1;
1388 		seq = btrfs_tree_mod_seq_prev(node->seq);
1389 		break;
1390 	case BTRFS_DROP_DELAYED_REF:
1391 		sgn = -1;
1392 		seq = node->seq;
1393 		break;
1394 	case BTRFS_UPDATE_DELAYED_HEAD:
1395 		return 0;
1396 	default:
1397 		BUG();
1398 	}
1399 
1400 	mutex_lock(&fs_info->qgroup_rescan_lock);
1401 	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1402 		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1403 			mutex_unlock(&fs_info->qgroup_rescan_lock);
1404 			return 0;
1405 		}
1406 	}
1407 	mutex_unlock(&fs_info->qgroup_rescan_lock);
1408 
1409 	/*
1410 	 * the delayed ref sequence number we pass depends on the direction of
1411 	 * the operation. for add operations, we pass
1412 	 * tree_mod_log_prev_seq(node->seq) to skip
1413 	 * the delayed ref's current sequence number, because we need the state
1414 	 * of the tree before the add operation. for delete operations, we pass
1415 	 * (node->seq) to include the delayed ref's current sequence number,
1416 	 * because we need the state of the tree after the delete operation.
1417 	 */
1418 	ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
1419 	if (ret < 0)
1420 		return ret;
1421 
1422 	spin_lock(&fs_info->qgroup_lock);
1423 
1424 	quota_root = fs_info->quota_root;
1425 	if (!quota_root)
1426 		goto unlock;
1427 
1428 	qgroup = find_qgroup_rb(fs_info, ref_root);
1429 	if (!qgroup)
1430 		goto unlock;
1431 
1432 	/*
1433 	 * step 1: for each old ref, visit all nodes once and inc refcnt
1434 	 */
1435 	ulist_reinit(fs_info->qgroup_ulist);
1436 	seq = fs_info->qgroup_seq;
1437 	fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1438 
1439 	ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist,
1440 				       seq);
1441 	if (ret)
1442 		goto unlock;
1443 
1444 	/*
1445 	 * step 2: walk from the new root
1446 	 */
1447 	ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist,
1448 				       seq, sgn, node->num_bytes, qgroup);
1449 	if (ret)
1450 		goto unlock;
1451 
1452 	/*
1453 	 * step 3: walk again from old refs
1454 	 */
1455 	ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist,
1456 				       seq, sgn, node->num_bytes);
1457 	if (ret)
1458 		goto unlock;
1459 
1460 unlock:
1461 	spin_unlock(&fs_info->qgroup_lock);
1462 	ulist_free(roots);
1463 
1464 	return ret;
1465 }
1466 
1467 /*
1468  * called from commit_transaction. Writes all changed qgroups to disk.
1469  */
1470 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1471 		      struct btrfs_fs_info *fs_info)
1472 {
1473 	struct btrfs_root *quota_root = fs_info->quota_root;
1474 	int ret = 0;
1475 	int start_rescan_worker = 0;
1476 
1477 	if (!quota_root)
1478 		goto out;
1479 
1480 	if (!fs_info->quota_enabled && fs_info->pending_quota_state)
1481 		start_rescan_worker = 1;
1482 
1483 	fs_info->quota_enabled = fs_info->pending_quota_state;
1484 
1485 	spin_lock(&fs_info->qgroup_lock);
1486 	while (!list_empty(&fs_info->dirty_qgroups)) {
1487 		struct btrfs_qgroup *qgroup;
1488 		qgroup = list_first_entry(&fs_info->dirty_qgroups,
1489 					  struct btrfs_qgroup, dirty);
1490 		list_del_init(&qgroup->dirty);
1491 		spin_unlock(&fs_info->qgroup_lock);
1492 		ret = update_qgroup_info_item(trans, quota_root, qgroup);
1493 		if (ret)
1494 			fs_info->qgroup_flags |=
1495 					BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1496 		spin_lock(&fs_info->qgroup_lock);
1497 	}
1498 	if (fs_info->quota_enabled)
1499 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
1500 	else
1501 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1502 	spin_unlock(&fs_info->qgroup_lock);
1503 
1504 	ret = update_qgroup_status_item(trans, fs_info, quota_root);
1505 	if (ret)
1506 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1507 
1508 	if (!ret && start_rescan_worker) {
1509 		ret = qgroup_rescan_init(fs_info, 0, 1);
1510 		if (!ret) {
1511 			qgroup_rescan_zero_tracking(fs_info);
1512 			btrfs_queue_work(fs_info->qgroup_rescan_workers,
1513 					 &fs_info->qgroup_rescan_work);
1514 		}
1515 		ret = 0;
1516 	}
1517 
1518 out:
1519 
1520 	return ret;
1521 }
1522 
1523 /*
1524  * copy the acounting information between qgroups. This is necessary when a
1525  * snapshot or a subvolume is created
1526  */
1527 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1528 			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
1529 			 struct btrfs_qgroup_inherit *inherit)
1530 {
1531 	int ret = 0;
1532 	int i;
1533 	u64 *i_qgroups;
1534 	struct btrfs_root *quota_root = fs_info->quota_root;
1535 	struct btrfs_qgroup *srcgroup;
1536 	struct btrfs_qgroup *dstgroup;
1537 	u32 level_size = 0;
1538 	u64 nums;
1539 
1540 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1541 	if (!fs_info->quota_enabled)
1542 		goto out;
1543 
1544 	if (!quota_root) {
1545 		ret = -EINVAL;
1546 		goto out;
1547 	}
1548 
1549 	if (inherit) {
1550 		i_qgroups = (u64 *)(inherit + 1);
1551 		nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
1552 		       2 * inherit->num_excl_copies;
1553 		for (i = 0; i < nums; ++i) {
1554 			srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
1555 			if (!srcgroup) {
1556 				ret = -EINVAL;
1557 				goto out;
1558 			}
1559 			++i_qgroups;
1560 		}
1561 	}
1562 
1563 	/*
1564 	 * create a tracking group for the subvol itself
1565 	 */
1566 	ret = add_qgroup_item(trans, quota_root, objectid);
1567 	if (ret)
1568 		goto out;
1569 
1570 	if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
1571 		ret = update_qgroup_limit_item(trans, quota_root, objectid,
1572 					       inherit->lim.flags,
1573 					       inherit->lim.max_rfer,
1574 					       inherit->lim.max_excl,
1575 					       inherit->lim.rsv_rfer,
1576 					       inherit->lim.rsv_excl);
1577 		if (ret)
1578 			goto out;
1579 	}
1580 
1581 	if (srcid) {
1582 		struct btrfs_root *srcroot;
1583 		struct btrfs_key srckey;
1584 		int srcroot_level;
1585 
1586 		srckey.objectid = srcid;
1587 		srckey.type = BTRFS_ROOT_ITEM_KEY;
1588 		srckey.offset = (u64)-1;
1589 		srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
1590 		if (IS_ERR(srcroot)) {
1591 			ret = PTR_ERR(srcroot);
1592 			goto out;
1593 		}
1594 
1595 		rcu_read_lock();
1596 		srcroot_level = btrfs_header_level(srcroot->node);
1597 		level_size = btrfs_level_size(srcroot, srcroot_level);
1598 		rcu_read_unlock();
1599 	}
1600 
1601 	/*
1602 	 * add qgroup to all inherited groups
1603 	 */
1604 	if (inherit) {
1605 		i_qgroups = (u64 *)(inherit + 1);
1606 		for (i = 0; i < inherit->num_qgroups; ++i) {
1607 			ret = add_qgroup_relation_item(trans, quota_root,
1608 						       objectid, *i_qgroups);
1609 			if (ret)
1610 				goto out;
1611 			ret = add_qgroup_relation_item(trans, quota_root,
1612 						       *i_qgroups, objectid);
1613 			if (ret)
1614 				goto out;
1615 			++i_qgroups;
1616 		}
1617 	}
1618 
1619 
1620 	spin_lock(&fs_info->qgroup_lock);
1621 
1622 	dstgroup = add_qgroup_rb(fs_info, objectid);
1623 	if (IS_ERR(dstgroup)) {
1624 		ret = PTR_ERR(dstgroup);
1625 		goto unlock;
1626 	}
1627 
1628 	if (srcid) {
1629 		srcgroup = find_qgroup_rb(fs_info, srcid);
1630 		if (!srcgroup)
1631 			goto unlock;
1632 		dstgroup->rfer = srcgroup->rfer - level_size;
1633 		dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1634 		srcgroup->excl = level_size;
1635 		srcgroup->excl_cmpr = level_size;
1636 		qgroup_dirty(fs_info, dstgroup);
1637 		qgroup_dirty(fs_info, srcgroup);
1638 	}
1639 
1640 	if (!inherit)
1641 		goto unlock;
1642 
1643 	i_qgroups = (u64 *)(inherit + 1);
1644 	for (i = 0; i < inherit->num_qgroups; ++i) {
1645 		ret = add_relation_rb(quota_root->fs_info, objectid,
1646 				      *i_qgroups);
1647 		if (ret)
1648 			goto unlock;
1649 		++i_qgroups;
1650 	}
1651 
1652 	for (i = 0; i <  inherit->num_ref_copies; ++i) {
1653 		struct btrfs_qgroup *src;
1654 		struct btrfs_qgroup *dst;
1655 
1656 		src = find_qgroup_rb(fs_info, i_qgroups[0]);
1657 		dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1658 
1659 		if (!src || !dst) {
1660 			ret = -EINVAL;
1661 			goto unlock;
1662 		}
1663 
1664 		dst->rfer = src->rfer - level_size;
1665 		dst->rfer_cmpr = src->rfer_cmpr - level_size;
1666 		i_qgroups += 2;
1667 	}
1668 	for (i = 0; i <  inherit->num_excl_copies; ++i) {
1669 		struct btrfs_qgroup *src;
1670 		struct btrfs_qgroup *dst;
1671 
1672 		src = find_qgroup_rb(fs_info, i_qgroups[0]);
1673 		dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1674 
1675 		if (!src || !dst) {
1676 			ret = -EINVAL;
1677 			goto unlock;
1678 		}
1679 
1680 		dst->excl = src->excl + level_size;
1681 		dst->excl_cmpr = src->excl_cmpr + level_size;
1682 		i_qgroups += 2;
1683 	}
1684 
1685 unlock:
1686 	spin_unlock(&fs_info->qgroup_lock);
1687 out:
1688 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1689 	return ret;
1690 }
1691 
1692 /*
1693  * reserve some space for a qgroup and all its parents. The reservation takes
1694  * place with start_transaction or dealloc_reserve, similar to ENOSPC
1695  * accounting. If not enough space is available, EDQUOT is returned.
1696  * We assume that the requested space is new for all qgroups.
1697  */
1698 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1699 {
1700 	struct btrfs_root *quota_root;
1701 	struct btrfs_qgroup *qgroup;
1702 	struct btrfs_fs_info *fs_info = root->fs_info;
1703 	u64 ref_root = root->root_key.objectid;
1704 	int ret = 0;
1705 	struct ulist_node *unode;
1706 	struct ulist_iterator uiter;
1707 
1708 	if (!is_fstree(ref_root))
1709 		return 0;
1710 
1711 	if (num_bytes == 0)
1712 		return 0;
1713 
1714 	spin_lock(&fs_info->qgroup_lock);
1715 	quota_root = fs_info->quota_root;
1716 	if (!quota_root)
1717 		goto out;
1718 
1719 	qgroup = find_qgroup_rb(fs_info, ref_root);
1720 	if (!qgroup)
1721 		goto out;
1722 
1723 	/*
1724 	 * in a first step, we check all affected qgroups if any limits would
1725 	 * be exceeded
1726 	 */
1727 	ulist_reinit(fs_info->qgroup_ulist);
1728 	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
1729 			(uintptr_t)qgroup, GFP_ATOMIC);
1730 	if (ret < 0)
1731 		goto out;
1732 	ULIST_ITER_INIT(&uiter);
1733 	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1734 		struct btrfs_qgroup *qg;
1735 		struct btrfs_qgroup_list *glist;
1736 
1737 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1738 
1739 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1740 		    qg->reserved + (s64)qg->rfer + num_bytes >
1741 		    qg->max_rfer) {
1742 			ret = -EDQUOT;
1743 			goto out;
1744 		}
1745 
1746 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
1747 		    qg->reserved + (s64)qg->excl + num_bytes >
1748 		    qg->max_excl) {
1749 			ret = -EDQUOT;
1750 			goto out;
1751 		}
1752 
1753 		list_for_each_entry(glist, &qg->groups, next_group) {
1754 			ret = ulist_add(fs_info->qgroup_ulist,
1755 					glist->group->qgroupid,
1756 					(uintptr_t)glist->group, GFP_ATOMIC);
1757 			if (ret < 0)
1758 				goto out;
1759 		}
1760 	}
1761 	ret = 0;
1762 	/*
1763 	 * no limits exceeded, now record the reservation into all qgroups
1764 	 */
1765 	ULIST_ITER_INIT(&uiter);
1766 	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1767 		struct btrfs_qgroup *qg;
1768 
1769 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1770 
1771 		qg->reserved += num_bytes;
1772 	}
1773 
1774 out:
1775 	spin_unlock(&fs_info->qgroup_lock);
1776 	return ret;
1777 }
1778 
1779 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1780 {
1781 	struct btrfs_root *quota_root;
1782 	struct btrfs_qgroup *qgroup;
1783 	struct btrfs_fs_info *fs_info = root->fs_info;
1784 	struct ulist_node *unode;
1785 	struct ulist_iterator uiter;
1786 	u64 ref_root = root->root_key.objectid;
1787 	int ret = 0;
1788 
1789 	if (!is_fstree(ref_root))
1790 		return;
1791 
1792 	if (num_bytes == 0)
1793 		return;
1794 
1795 	spin_lock(&fs_info->qgroup_lock);
1796 
1797 	quota_root = fs_info->quota_root;
1798 	if (!quota_root)
1799 		goto out;
1800 
1801 	qgroup = find_qgroup_rb(fs_info, ref_root);
1802 	if (!qgroup)
1803 		goto out;
1804 
1805 	ulist_reinit(fs_info->qgroup_ulist);
1806 	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
1807 			(uintptr_t)qgroup, GFP_ATOMIC);
1808 	if (ret < 0)
1809 		goto out;
1810 	ULIST_ITER_INIT(&uiter);
1811 	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1812 		struct btrfs_qgroup *qg;
1813 		struct btrfs_qgroup_list *glist;
1814 
1815 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1816 
1817 		qg->reserved -= num_bytes;
1818 
1819 		list_for_each_entry(glist, &qg->groups, next_group) {
1820 			ret = ulist_add(fs_info->qgroup_ulist,
1821 					glist->group->qgroupid,
1822 					(uintptr_t)glist->group, GFP_ATOMIC);
1823 			if (ret < 0)
1824 				goto out;
1825 		}
1826 	}
1827 
1828 out:
1829 	spin_unlock(&fs_info->qgroup_lock);
1830 }
1831 
1832 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1833 {
1834 	if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1835 		return;
1836 	btrfs_err(trans->root->fs_info,
1837 		"qgroups not uptodate in trans handle %p:  list is%s empty, "
1838 		"seq is %#x.%x",
1839 		trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
1840 		(u32)(trans->delayed_ref_elem.seq >> 32),
1841 		(u32)trans->delayed_ref_elem.seq);
1842 	BUG();
1843 }
1844 
1845 /*
1846  * returns < 0 on error, 0 when more leafs are to be scanned.
1847  * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
1848  */
1849 static int
1850 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1851 		   struct btrfs_trans_handle *trans, struct ulist *tmp,
1852 		   struct extent_buffer *scratch_leaf)
1853 {
1854 	struct btrfs_key found;
1855 	struct ulist *roots = NULL;
1856 	struct ulist_node *unode;
1857 	struct ulist_iterator uiter;
1858 	struct seq_list tree_mod_seq_elem = {};
1859 	u64 seq;
1860 	int slot;
1861 	int ret;
1862 
1863 	path->leave_spinning = 1;
1864 	mutex_lock(&fs_info->qgroup_rescan_lock);
1865 	ret = btrfs_search_slot_for_read(fs_info->extent_root,
1866 					 &fs_info->qgroup_rescan_progress,
1867 					 path, 1, 0);
1868 
1869 	pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1870 		 fs_info->qgroup_rescan_progress.objectid,
1871 		 fs_info->qgroup_rescan_progress.type,
1872 		 fs_info->qgroup_rescan_progress.offset, ret);
1873 
1874 	if (ret) {
1875 		/*
1876 		 * The rescan is about to end, we will not be scanning any
1877 		 * further blocks. We cannot unset the RESCAN flag here, because
1878 		 * we want to commit the transaction if everything went well.
1879 		 * To make the live accounting work in this phase, we set our
1880 		 * scan progress pointer such that every real extent objectid
1881 		 * will be smaller.
1882 		 */
1883 		fs_info->qgroup_rescan_progress.objectid = (u64)-1;
1884 		btrfs_release_path(path);
1885 		mutex_unlock(&fs_info->qgroup_rescan_lock);
1886 		return ret;
1887 	}
1888 
1889 	btrfs_item_key_to_cpu(path->nodes[0], &found,
1890 			      btrfs_header_nritems(path->nodes[0]) - 1);
1891 	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
1892 
1893 	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1894 	memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
1895 	slot = path->slots[0];
1896 	btrfs_release_path(path);
1897 	mutex_unlock(&fs_info->qgroup_rescan_lock);
1898 
1899 	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1900 		u64 num_bytes;
1901 
1902 		btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1903 		if (found.type != BTRFS_EXTENT_ITEM_KEY &&
1904 		    found.type != BTRFS_METADATA_ITEM_KEY)
1905 			continue;
1906 		if (found.type == BTRFS_METADATA_ITEM_KEY)
1907 			num_bytes = fs_info->extent_root->leafsize;
1908 		else
1909 			num_bytes = found.offset;
1910 
1911 		ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1912 					   tree_mod_seq_elem.seq, &roots);
1913 		if (ret < 0)
1914 			goto out;
1915 		spin_lock(&fs_info->qgroup_lock);
1916 		seq = fs_info->qgroup_seq;
1917 		fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1918 
1919 		ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1920 		if (ret) {
1921 			spin_unlock(&fs_info->qgroup_lock);
1922 			ulist_free(roots);
1923 			goto out;
1924 		}
1925 
1926 		/*
1927 		 * step2 of btrfs_qgroup_account_ref works from a single root,
1928 		 * we're doing all at once here.
1929 		 */
1930 		ulist_reinit(tmp);
1931 		ULIST_ITER_INIT(&uiter);
1932 		while ((unode = ulist_next(roots, &uiter))) {
1933 			struct btrfs_qgroup *qg;
1934 
1935 			qg = find_qgroup_rb(fs_info, unode->val);
1936 			if (!qg)
1937 				continue;
1938 
1939 			ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1940 					GFP_ATOMIC);
1941 			if (ret < 0) {
1942 				spin_unlock(&fs_info->qgroup_lock);
1943 				ulist_free(roots);
1944 				goto out;
1945 			}
1946 		}
1947 
1948 		/* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1949 		ULIST_ITER_INIT(&uiter);
1950 		while ((unode = ulist_next(tmp, &uiter))) {
1951 			struct btrfs_qgroup *qg;
1952 			struct btrfs_qgroup_list *glist;
1953 
1954 			qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1955 			qg->rfer += num_bytes;
1956 			qg->rfer_cmpr += num_bytes;
1957 			WARN_ON(qg->tag >= seq);
1958 			if (qg->refcnt - seq == roots->nnodes) {
1959 				qg->excl += num_bytes;
1960 				qg->excl_cmpr += num_bytes;
1961 			}
1962 			qgroup_dirty(fs_info, qg);
1963 
1964 			list_for_each_entry(glist, &qg->groups, next_group) {
1965 				ret = ulist_add(tmp, glist->group->qgroupid,
1966 						(uintptr_t)glist->group,
1967 						GFP_ATOMIC);
1968 				if (ret < 0) {
1969 					spin_unlock(&fs_info->qgroup_lock);
1970 					ulist_free(roots);
1971 					goto out;
1972 				}
1973 			}
1974 		}
1975 
1976 		spin_unlock(&fs_info->qgroup_lock);
1977 		ulist_free(roots);
1978 		ret = 0;
1979 	}
1980 
1981 out:
1982 	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1983 
1984 	return ret;
1985 }
1986 
1987 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
1988 {
1989 	struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
1990 						     qgroup_rescan_work);
1991 	struct btrfs_path *path;
1992 	struct btrfs_trans_handle *trans = NULL;
1993 	struct ulist *tmp = NULL;
1994 	struct extent_buffer *scratch_leaf = NULL;
1995 	int err = -ENOMEM;
1996 
1997 	path = btrfs_alloc_path();
1998 	if (!path)
1999 		goto out;
2000 	tmp = ulist_alloc(GFP_NOFS);
2001 	if (!tmp)
2002 		goto out;
2003 	scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2004 	if (!scratch_leaf)
2005 		goto out;
2006 
2007 	err = 0;
2008 	while (!err) {
2009 		trans = btrfs_start_transaction(fs_info->fs_root, 0);
2010 		if (IS_ERR(trans)) {
2011 			err = PTR_ERR(trans);
2012 			break;
2013 		}
2014 		if (!fs_info->quota_enabled) {
2015 			err = -EINTR;
2016 		} else {
2017 			err = qgroup_rescan_leaf(fs_info, path, trans,
2018 						 tmp, scratch_leaf);
2019 		}
2020 		if (err > 0)
2021 			btrfs_commit_transaction(trans, fs_info->fs_root);
2022 		else
2023 			btrfs_end_transaction(trans, fs_info->fs_root);
2024 	}
2025 
2026 out:
2027 	kfree(scratch_leaf);
2028 	ulist_free(tmp);
2029 	btrfs_free_path(path);
2030 
2031 	mutex_lock(&fs_info->qgroup_rescan_lock);
2032 	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2033 
2034 	if (err == 2 &&
2035 	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2036 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2037 	} else if (err < 0) {
2038 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2039 	}
2040 	mutex_unlock(&fs_info->qgroup_rescan_lock);
2041 
2042 	if (err >= 0) {
2043 		btrfs_info(fs_info, "qgroup scan completed%s",
2044 			err == 2 ? " (inconsistency flag cleared)" : "");
2045 	} else {
2046 		btrfs_err(fs_info, "qgroup scan failed with %d", err);
2047 	}
2048 
2049 	complete_all(&fs_info->qgroup_rescan_completion);
2050 }
2051 
2052 /*
2053  * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2054  * memory required for the rescan context.
2055  */
2056 static int
2057 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2058 		   int init_flags)
2059 {
2060 	int ret = 0;
2061 
2062 	if (!init_flags &&
2063 	    (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2064 	     !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2065 		ret = -EINVAL;
2066 		goto err;
2067 	}
2068 
2069 	mutex_lock(&fs_info->qgroup_rescan_lock);
2070 	spin_lock(&fs_info->qgroup_lock);
2071 
2072 	if (init_flags) {
2073 		if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2074 			ret = -EINPROGRESS;
2075 		else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2076 			ret = -EINVAL;
2077 
2078 		if (ret) {
2079 			spin_unlock(&fs_info->qgroup_lock);
2080 			mutex_unlock(&fs_info->qgroup_rescan_lock);
2081 			goto err;
2082 		}
2083 
2084 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2085 	}
2086 
2087 	memset(&fs_info->qgroup_rescan_progress, 0,
2088 		sizeof(fs_info->qgroup_rescan_progress));
2089 	fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2090 
2091 	spin_unlock(&fs_info->qgroup_lock);
2092 	mutex_unlock(&fs_info->qgroup_rescan_lock);
2093 
2094 	init_completion(&fs_info->qgroup_rescan_completion);
2095 
2096 	memset(&fs_info->qgroup_rescan_work, 0,
2097 	       sizeof(fs_info->qgroup_rescan_work));
2098 	btrfs_init_work(&fs_info->qgroup_rescan_work,
2099 			btrfs_qgroup_rescan_worker, NULL, NULL);
2100 
2101 	if (ret) {
2102 err:
2103 		btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
2104 		return ret;
2105 	}
2106 
2107 	return 0;
2108 }
2109 
2110 static void
2111 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2112 {
2113 	struct rb_node *n;
2114 	struct btrfs_qgroup *qgroup;
2115 
2116 	spin_lock(&fs_info->qgroup_lock);
2117 	/* clear all current qgroup tracking information */
2118 	for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2119 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
2120 		qgroup->rfer = 0;
2121 		qgroup->rfer_cmpr = 0;
2122 		qgroup->excl = 0;
2123 		qgroup->excl_cmpr = 0;
2124 	}
2125 	spin_unlock(&fs_info->qgroup_lock);
2126 }
2127 
2128 int
2129 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2130 {
2131 	int ret = 0;
2132 	struct btrfs_trans_handle *trans;
2133 
2134 	ret = qgroup_rescan_init(fs_info, 0, 1);
2135 	if (ret)
2136 		return ret;
2137 
2138 	/*
2139 	 * We have set the rescan_progress to 0, which means no more
2140 	 * delayed refs will be accounted by btrfs_qgroup_account_ref.
2141 	 * However, btrfs_qgroup_account_ref may be right after its call
2142 	 * to btrfs_find_all_roots, in which case it would still do the
2143 	 * accounting.
2144 	 * To solve this, we're committing the transaction, which will
2145 	 * ensure we run all delayed refs and only after that, we are
2146 	 * going to clear all tracking information for a clean start.
2147 	 */
2148 
2149 	trans = btrfs_join_transaction(fs_info->fs_root);
2150 	if (IS_ERR(trans)) {
2151 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2152 		return PTR_ERR(trans);
2153 	}
2154 	ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2155 	if (ret) {
2156 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2157 		return ret;
2158 	}
2159 
2160 	qgroup_rescan_zero_tracking(fs_info);
2161 
2162 	btrfs_queue_work(fs_info->qgroup_rescan_workers,
2163 			 &fs_info->qgroup_rescan_work);
2164 
2165 	return 0;
2166 }
2167 
2168 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
2169 {
2170 	int running;
2171 	int ret = 0;
2172 
2173 	mutex_lock(&fs_info->qgroup_rescan_lock);
2174 	spin_lock(&fs_info->qgroup_lock);
2175 	running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2176 	spin_unlock(&fs_info->qgroup_lock);
2177 	mutex_unlock(&fs_info->qgroup_rescan_lock);
2178 
2179 	if (running)
2180 		ret = wait_for_completion_interruptible(
2181 					&fs_info->qgroup_rescan_completion);
2182 
2183 	return ret;
2184 }
2185 
2186 /*
2187  * this is only called from open_ctree where we're still single threaded, thus
2188  * locking is omitted here.
2189  */
2190 void
2191 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2192 {
2193 	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2194 		btrfs_queue_work(fs_info->qgroup_rescan_workers,
2195 				 &fs_info->qgroup_rescan_work);
2196 }
2197