xref: /openbmc/linux/fs/btrfs/qgroup.c (revision 31b90347)
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 #include <linux/btrfs.h>
27 
28 #include "ctree.h"
29 #include "transaction.h"
30 #include "disk-io.h"
31 #include "locking.h"
32 #include "ulist.h"
33 #include "backref.h"
34 #include "extent_io.h"
35 
36 /* TODO XXX FIXME
37  *  - subvol delete -> delete when ref goes to 0? delete limits also?
38  *  - reorganize keys
39  *  - compressed
40  *  - sync
41  *  - copy also limits on subvol creation
42  *  - limit
43  *  - caches fuer ulists
44  *  - performance benchmarks
45  *  - check all ioctl parameters
46  */
47 
48 /*
49  * one struct for each qgroup, organized in fs_info->qgroup_tree.
50  */
51 struct btrfs_qgroup {
52 	u64 qgroupid;
53 
54 	/*
55 	 * state
56 	 */
57 	u64 rfer;	/* referenced */
58 	u64 rfer_cmpr;	/* referenced compressed */
59 	u64 excl;	/* exclusive */
60 	u64 excl_cmpr;	/* exclusive compressed */
61 
62 	/*
63 	 * limits
64 	 */
65 	u64 lim_flags;	/* which limits are set */
66 	u64 max_rfer;
67 	u64 max_excl;
68 	u64 rsv_rfer;
69 	u64 rsv_excl;
70 
71 	/*
72 	 * reservation tracking
73 	 */
74 	u64 reserved;
75 
76 	/*
77 	 * lists
78 	 */
79 	struct list_head groups;  /* groups this group is member of */
80 	struct list_head members; /* groups that are members of this group */
81 	struct list_head dirty;   /* dirty groups */
82 	struct rb_node node;	  /* tree of qgroups */
83 
84 	/*
85 	 * temp variables for accounting operations
86 	 */
87 	u64 tag;
88 	u64 refcnt;
89 };
90 
91 /*
92  * glue structure to represent the relations between qgroups.
93  */
94 struct btrfs_qgroup_list {
95 	struct list_head next_group;
96 	struct list_head next_member;
97 	struct btrfs_qgroup *group;
98 	struct btrfs_qgroup *member;
99 };
100 
101 static int
102 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
103 		   int init_flags);
104 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
105 
106 /* must be called with qgroup_ioctl_lock held */
107 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
108 					   u64 qgroupid)
109 {
110 	struct rb_node *n = fs_info->qgroup_tree.rb_node;
111 	struct btrfs_qgroup *qgroup;
112 
113 	while (n) {
114 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
115 		if (qgroup->qgroupid < qgroupid)
116 			n = n->rb_left;
117 		else if (qgroup->qgroupid > qgroupid)
118 			n = n->rb_right;
119 		else
120 			return qgroup;
121 	}
122 	return NULL;
123 }
124 
125 /* must be called with qgroup_lock held */
126 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
127 					  u64 qgroupid)
128 {
129 	struct rb_node **p = &fs_info->qgroup_tree.rb_node;
130 	struct rb_node *parent = NULL;
131 	struct btrfs_qgroup *qgroup;
132 
133 	while (*p) {
134 		parent = *p;
135 		qgroup = rb_entry(parent, struct btrfs_qgroup, node);
136 
137 		if (qgroup->qgroupid < qgroupid)
138 			p = &(*p)->rb_left;
139 		else if (qgroup->qgroupid > qgroupid)
140 			p = &(*p)->rb_right;
141 		else
142 			return qgroup;
143 	}
144 
145 	qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
146 	if (!qgroup)
147 		return ERR_PTR(-ENOMEM);
148 
149 	qgroup->qgroupid = qgroupid;
150 	INIT_LIST_HEAD(&qgroup->groups);
151 	INIT_LIST_HEAD(&qgroup->members);
152 	INIT_LIST_HEAD(&qgroup->dirty);
153 
154 	rb_link_node(&qgroup->node, parent, p);
155 	rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
156 
157 	return qgroup;
158 }
159 
160 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
161 {
162 	struct btrfs_qgroup_list *list;
163 
164 	list_del(&qgroup->dirty);
165 	while (!list_empty(&qgroup->groups)) {
166 		list = list_first_entry(&qgroup->groups,
167 					struct btrfs_qgroup_list, next_group);
168 		list_del(&list->next_group);
169 		list_del(&list->next_member);
170 		kfree(list);
171 	}
172 
173 	while (!list_empty(&qgroup->members)) {
174 		list = list_first_entry(&qgroup->members,
175 					struct btrfs_qgroup_list, next_member);
176 		list_del(&list->next_group);
177 		list_del(&list->next_member);
178 		kfree(list);
179 	}
180 	kfree(qgroup);
181 }
182 
183 /* must be called with qgroup_lock held */
184 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
185 {
186 	struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
187 
188 	if (!qgroup)
189 		return -ENOENT;
190 
191 	rb_erase(&qgroup->node, &fs_info->qgroup_tree);
192 	__del_qgroup_rb(qgroup);
193 	return 0;
194 }
195 
196 /* must be called with qgroup_lock held */
197 static int add_relation_rb(struct btrfs_fs_info *fs_info,
198 			   u64 memberid, u64 parentid)
199 {
200 	struct btrfs_qgroup *member;
201 	struct btrfs_qgroup *parent;
202 	struct btrfs_qgroup_list *list;
203 
204 	member = find_qgroup_rb(fs_info, memberid);
205 	parent = find_qgroup_rb(fs_info, parentid);
206 	if (!member || !parent)
207 		return -ENOENT;
208 
209 	list = kzalloc(sizeof(*list), GFP_ATOMIC);
210 	if (!list)
211 		return -ENOMEM;
212 
213 	list->group = parent;
214 	list->member = member;
215 	list_add_tail(&list->next_group, &member->groups);
216 	list_add_tail(&list->next_member, &parent->members);
217 
218 	return 0;
219 }
220 
221 /* must be called with qgroup_lock held */
222 static int del_relation_rb(struct btrfs_fs_info *fs_info,
223 			   u64 memberid, u64 parentid)
224 {
225 	struct btrfs_qgroup *member;
226 	struct btrfs_qgroup *parent;
227 	struct btrfs_qgroup_list *list;
228 
229 	member = find_qgroup_rb(fs_info, memberid);
230 	parent = find_qgroup_rb(fs_info, parentid);
231 	if (!member || !parent)
232 		return -ENOENT;
233 
234 	list_for_each_entry(list, &member->groups, next_group) {
235 		if (list->group == parent) {
236 			list_del(&list->next_group);
237 			list_del(&list->next_member);
238 			kfree(list);
239 			return 0;
240 		}
241 	}
242 	return -ENOENT;
243 }
244 
245 /*
246  * The full config is read in one go, only called from open_ctree()
247  * It doesn't use any locking, as at this point we're still single-threaded
248  */
249 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
250 {
251 	struct btrfs_key key;
252 	struct btrfs_key found_key;
253 	struct btrfs_root *quota_root = fs_info->quota_root;
254 	struct btrfs_path *path = NULL;
255 	struct extent_buffer *l;
256 	int slot;
257 	int ret = 0;
258 	u64 flags = 0;
259 	u64 rescan_progress = 0;
260 
261 	if (!fs_info->quota_enabled)
262 		return 0;
263 
264 	fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
265 	if (!fs_info->qgroup_ulist) {
266 		ret = -ENOMEM;
267 		goto out;
268 	}
269 
270 	path = btrfs_alloc_path();
271 	if (!path) {
272 		ret = -ENOMEM;
273 		goto out;
274 	}
275 
276 	/* default this to quota off, in case no status key is found */
277 	fs_info->qgroup_flags = 0;
278 
279 	/*
280 	 * pass 1: read status, all qgroup infos and limits
281 	 */
282 	key.objectid = 0;
283 	key.type = 0;
284 	key.offset = 0;
285 	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
286 	if (ret)
287 		goto out;
288 
289 	while (1) {
290 		struct btrfs_qgroup *qgroup;
291 
292 		slot = path->slots[0];
293 		l = path->nodes[0];
294 		btrfs_item_key_to_cpu(l, &found_key, slot);
295 
296 		if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
297 			struct btrfs_qgroup_status_item *ptr;
298 
299 			ptr = btrfs_item_ptr(l, slot,
300 					     struct btrfs_qgroup_status_item);
301 
302 			if (btrfs_qgroup_status_version(l, ptr) !=
303 			    BTRFS_QGROUP_STATUS_VERSION) {
304 				printk(KERN_ERR
305 				 "btrfs: old qgroup version, quota disabled\n");
306 				goto out;
307 			}
308 			if (btrfs_qgroup_status_generation(l, ptr) !=
309 			    fs_info->generation) {
310 				flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
311 				printk(KERN_ERR
312 					"btrfs: qgroup generation mismatch, "
313 					"marked as inconsistent\n");
314 			}
315 			fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
316 									  ptr);
317 			rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
318 			goto next1;
319 		}
320 
321 		if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
322 		    found_key.type != BTRFS_QGROUP_LIMIT_KEY)
323 			goto next1;
324 
325 		qgroup = find_qgroup_rb(fs_info, found_key.offset);
326 		if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
327 		    (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
328 			printk(KERN_ERR "btrfs: inconsitent qgroup config\n");
329 			flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
330 		}
331 		if (!qgroup) {
332 			qgroup = add_qgroup_rb(fs_info, found_key.offset);
333 			if (IS_ERR(qgroup)) {
334 				ret = PTR_ERR(qgroup);
335 				goto out;
336 			}
337 		}
338 		switch (found_key.type) {
339 		case BTRFS_QGROUP_INFO_KEY: {
340 			struct btrfs_qgroup_info_item *ptr;
341 
342 			ptr = btrfs_item_ptr(l, slot,
343 					     struct btrfs_qgroup_info_item);
344 			qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
345 			qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
346 			qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
347 			qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
348 			/* generation currently unused */
349 			break;
350 		}
351 		case BTRFS_QGROUP_LIMIT_KEY: {
352 			struct btrfs_qgroup_limit_item *ptr;
353 
354 			ptr = btrfs_item_ptr(l, slot,
355 					     struct btrfs_qgroup_limit_item);
356 			qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
357 			qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
358 			qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
359 			qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
360 			qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
361 			break;
362 		}
363 		}
364 next1:
365 		ret = btrfs_next_item(quota_root, path);
366 		if (ret < 0)
367 			goto out;
368 		if (ret)
369 			break;
370 	}
371 	btrfs_release_path(path);
372 
373 	/*
374 	 * pass 2: read all qgroup relations
375 	 */
376 	key.objectid = 0;
377 	key.type = BTRFS_QGROUP_RELATION_KEY;
378 	key.offset = 0;
379 	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
380 	if (ret)
381 		goto out;
382 	while (1) {
383 		slot = path->slots[0];
384 		l = path->nodes[0];
385 		btrfs_item_key_to_cpu(l, &found_key, slot);
386 
387 		if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
388 			goto next2;
389 
390 		if (found_key.objectid > found_key.offset) {
391 			/* parent <- member, not needed to build config */
392 			/* FIXME should we omit the key completely? */
393 			goto next2;
394 		}
395 
396 		ret = add_relation_rb(fs_info, found_key.objectid,
397 				      found_key.offset);
398 		if (ret == -ENOENT) {
399 			printk(KERN_WARNING
400 				"btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
401 				found_key.objectid, found_key.offset);
402 			ret = 0;	/* ignore the error */
403 		}
404 		if (ret)
405 			goto out;
406 next2:
407 		ret = btrfs_next_item(quota_root, path);
408 		if (ret < 0)
409 			goto out;
410 		if (ret)
411 			break;
412 	}
413 out:
414 	fs_info->qgroup_flags |= flags;
415 	if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
416 		fs_info->quota_enabled = 0;
417 		fs_info->pending_quota_state = 0;
418 	} else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
419 		   ret >= 0) {
420 		ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
421 	}
422 	btrfs_free_path(path);
423 
424 	if (ret < 0) {
425 		ulist_free(fs_info->qgroup_ulist);
426 		fs_info->qgroup_ulist = NULL;
427 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
428 	}
429 
430 	return ret < 0 ? ret : 0;
431 }
432 
433 /*
434  * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
435  * first two are in single-threaded paths.And for the third one, we have set
436  * quota_root to be null with qgroup_lock held before, so it is safe to clean
437  * up the in-memory structures without qgroup_lock held.
438  */
439 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
440 {
441 	struct rb_node *n;
442 	struct btrfs_qgroup *qgroup;
443 
444 	while ((n = rb_first(&fs_info->qgroup_tree))) {
445 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
446 		rb_erase(n, &fs_info->qgroup_tree);
447 		__del_qgroup_rb(qgroup);
448 	}
449 	/*
450 	 * we call btrfs_free_qgroup_config() when umounting
451 	 * filesystem and disabling quota, so we set qgroup_ulit
452 	 * to be null here to avoid double free.
453 	 */
454 	ulist_free(fs_info->qgroup_ulist);
455 	fs_info->qgroup_ulist = NULL;
456 }
457 
458 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
459 				    struct btrfs_root *quota_root,
460 				    u64 src, u64 dst)
461 {
462 	int ret;
463 	struct btrfs_path *path;
464 	struct btrfs_key key;
465 
466 	path = btrfs_alloc_path();
467 	if (!path)
468 		return -ENOMEM;
469 
470 	key.objectid = src;
471 	key.type = BTRFS_QGROUP_RELATION_KEY;
472 	key.offset = dst;
473 
474 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
475 
476 	btrfs_mark_buffer_dirty(path->nodes[0]);
477 
478 	btrfs_free_path(path);
479 	return ret;
480 }
481 
482 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
483 				    struct btrfs_root *quota_root,
484 				    u64 src, u64 dst)
485 {
486 	int ret;
487 	struct btrfs_path *path;
488 	struct btrfs_key key;
489 
490 	path = btrfs_alloc_path();
491 	if (!path)
492 		return -ENOMEM;
493 
494 	key.objectid = src;
495 	key.type = BTRFS_QGROUP_RELATION_KEY;
496 	key.offset = dst;
497 
498 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
499 	if (ret < 0)
500 		goto out;
501 
502 	if (ret > 0) {
503 		ret = -ENOENT;
504 		goto out;
505 	}
506 
507 	ret = btrfs_del_item(trans, quota_root, path);
508 out:
509 	btrfs_free_path(path);
510 	return ret;
511 }
512 
513 static int add_qgroup_item(struct btrfs_trans_handle *trans,
514 			   struct btrfs_root *quota_root, u64 qgroupid)
515 {
516 	int ret;
517 	struct btrfs_path *path;
518 	struct btrfs_qgroup_info_item *qgroup_info;
519 	struct btrfs_qgroup_limit_item *qgroup_limit;
520 	struct extent_buffer *leaf;
521 	struct btrfs_key key;
522 
523 	path = btrfs_alloc_path();
524 	if (!path)
525 		return -ENOMEM;
526 
527 	key.objectid = 0;
528 	key.type = BTRFS_QGROUP_INFO_KEY;
529 	key.offset = qgroupid;
530 
531 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
532 				      sizeof(*qgroup_info));
533 	if (ret)
534 		goto out;
535 
536 	leaf = path->nodes[0];
537 	qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
538 				 struct btrfs_qgroup_info_item);
539 	btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
540 	btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
541 	btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
542 	btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
543 	btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
544 
545 	btrfs_mark_buffer_dirty(leaf);
546 
547 	btrfs_release_path(path);
548 
549 	key.type = BTRFS_QGROUP_LIMIT_KEY;
550 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
551 				      sizeof(*qgroup_limit));
552 	if (ret)
553 		goto out;
554 
555 	leaf = path->nodes[0];
556 	qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
557 				  struct btrfs_qgroup_limit_item);
558 	btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
559 	btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
560 	btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
561 	btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
562 	btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
563 
564 	btrfs_mark_buffer_dirty(leaf);
565 
566 	ret = 0;
567 out:
568 	btrfs_free_path(path);
569 	return ret;
570 }
571 
572 static int del_qgroup_item(struct btrfs_trans_handle *trans,
573 			   struct btrfs_root *quota_root, u64 qgroupid)
574 {
575 	int ret;
576 	struct btrfs_path *path;
577 	struct btrfs_key key;
578 
579 	path = btrfs_alloc_path();
580 	if (!path)
581 		return -ENOMEM;
582 
583 	key.objectid = 0;
584 	key.type = BTRFS_QGROUP_INFO_KEY;
585 	key.offset = qgroupid;
586 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
587 	if (ret < 0)
588 		goto out;
589 
590 	if (ret > 0) {
591 		ret = -ENOENT;
592 		goto out;
593 	}
594 
595 	ret = btrfs_del_item(trans, quota_root, path);
596 	if (ret)
597 		goto out;
598 
599 	btrfs_release_path(path);
600 
601 	key.type = BTRFS_QGROUP_LIMIT_KEY;
602 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
603 	if (ret < 0)
604 		goto out;
605 
606 	if (ret > 0) {
607 		ret = -ENOENT;
608 		goto out;
609 	}
610 
611 	ret = btrfs_del_item(trans, quota_root, path);
612 
613 out:
614 	btrfs_free_path(path);
615 	return ret;
616 }
617 
618 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
619 				    struct btrfs_root *root, u64 qgroupid,
620 				    u64 flags, u64 max_rfer, u64 max_excl,
621 				    u64 rsv_rfer, u64 rsv_excl)
622 {
623 	struct btrfs_path *path;
624 	struct btrfs_key key;
625 	struct extent_buffer *l;
626 	struct btrfs_qgroup_limit_item *qgroup_limit;
627 	int ret;
628 	int slot;
629 
630 	key.objectid = 0;
631 	key.type = BTRFS_QGROUP_LIMIT_KEY;
632 	key.offset = qgroupid;
633 
634 	path = btrfs_alloc_path();
635 	if (!path)
636 		return -ENOMEM;
637 
638 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
639 	if (ret > 0)
640 		ret = -ENOENT;
641 
642 	if (ret)
643 		goto out;
644 
645 	l = path->nodes[0];
646 	slot = path->slots[0];
647 	qgroup_limit = btrfs_item_ptr(l, path->slots[0],
648 				      struct btrfs_qgroup_limit_item);
649 	btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
650 	btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
651 	btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
652 	btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
653 	btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
654 
655 	btrfs_mark_buffer_dirty(l);
656 
657 out:
658 	btrfs_free_path(path);
659 	return ret;
660 }
661 
662 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
663 				   struct btrfs_root *root,
664 				   struct btrfs_qgroup *qgroup)
665 {
666 	struct btrfs_path *path;
667 	struct btrfs_key key;
668 	struct extent_buffer *l;
669 	struct btrfs_qgroup_info_item *qgroup_info;
670 	int ret;
671 	int slot;
672 
673 	key.objectid = 0;
674 	key.type = BTRFS_QGROUP_INFO_KEY;
675 	key.offset = qgroup->qgroupid;
676 
677 	path = btrfs_alloc_path();
678 	if (!path)
679 		return -ENOMEM;
680 
681 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
682 	if (ret > 0)
683 		ret = -ENOENT;
684 
685 	if (ret)
686 		goto out;
687 
688 	l = path->nodes[0];
689 	slot = path->slots[0];
690 	qgroup_info = btrfs_item_ptr(l, path->slots[0],
691 				 struct btrfs_qgroup_info_item);
692 	btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
693 	btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
694 	btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
695 	btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
696 	btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
697 
698 	btrfs_mark_buffer_dirty(l);
699 
700 out:
701 	btrfs_free_path(path);
702 	return ret;
703 }
704 
705 static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
706 				     struct btrfs_fs_info *fs_info,
707 				    struct btrfs_root *root)
708 {
709 	struct btrfs_path *path;
710 	struct btrfs_key key;
711 	struct extent_buffer *l;
712 	struct btrfs_qgroup_status_item *ptr;
713 	int ret;
714 	int slot;
715 
716 	key.objectid = 0;
717 	key.type = BTRFS_QGROUP_STATUS_KEY;
718 	key.offset = 0;
719 
720 	path = btrfs_alloc_path();
721 	if (!path)
722 		return -ENOMEM;
723 
724 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
725 	if (ret > 0)
726 		ret = -ENOENT;
727 
728 	if (ret)
729 		goto out;
730 
731 	l = path->nodes[0];
732 	slot = path->slots[0];
733 	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
734 	btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
735 	btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
736 	btrfs_set_qgroup_status_rescan(l, ptr,
737 				fs_info->qgroup_rescan_progress.objectid);
738 
739 	btrfs_mark_buffer_dirty(l);
740 
741 out:
742 	btrfs_free_path(path);
743 	return ret;
744 }
745 
746 /*
747  * called with qgroup_lock held
748  */
749 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
750 				  struct btrfs_root *root)
751 {
752 	struct btrfs_path *path;
753 	struct btrfs_key key;
754 	struct extent_buffer *leaf = NULL;
755 	int ret;
756 	int nr = 0;
757 
758 	path = btrfs_alloc_path();
759 	if (!path)
760 		return -ENOMEM;
761 
762 	path->leave_spinning = 1;
763 
764 	key.objectid = 0;
765 	key.offset = 0;
766 	key.type = 0;
767 
768 	while (1) {
769 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
770 		if (ret < 0)
771 			goto out;
772 		leaf = path->nodes[0];
773 		nr = btrfs_header_nritems(leaf);
774 		if (!nr)
775 			break;
776 		/*
777 		 * delete the leaf one by one
778 		 * since the whole tree is going
779 		 * to be deleted.
780 		 */
781 		path->slots[0] = 0;
782 		ret = btrfs_del_items(trans, root, path, 0, nr);
783 		if (ret)
784 			goto out;
785 
786 		btrfs_release_path(path);
787 	}
788 	ret = 0;
789 out:
790 	root->fs_info->pending_quota_state = 0;
791 	btrfs_free_path(path);
792 	return ret;
793 }
794 
795 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
796 		       struct btrfs_fs_info *fs_info)
797 {
798 	struct btrfs_root *quota_root;
799 	struct btrfs_root *tree_root = fs_info->tree_root;
800 	struct btrfs_path *path = NULL;
801 	struct btrfs_qgroup_status_item *ptr;
802 	struct extent_buffer *leaf;
803 	struct btrfs_key key;
804 	struct btrfs_key found_key;
805 	struct btrfs_qgroup *qgroup = NULL;
806 	int ret = 0;
807 	int slot;
808 
809 	mutex_lock(&fs_info->qgroup_ioctl_lock);
810 	if (fs_info->quota_root) {
811 		fs_info->pending_quota_state = 1;
812 		goto out;
813 	}
814 
815 	fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
816 	if (!fs_info->qgroup_ulist) {
817 		ret = -ENOMEM;
818 		goto out;
819 	}
820 
821 	/*
822 	 * initially create the quota tree
823 	 */
824 	quota_root = btrfs_create_tree(trans, fs_info,
825 				       BTRFS_QUOTA_TREE_OBJECTID);
826 	if (IS_ERR(quota_root)) {
827 		ret =  PTR_ERR(quota_root);
828 		goto out;
829 	}
830 
831 	path = btrfs_alloc_path();
832 	if (!path) {
833 		ret = -ENOMEM;
834 		goto out_free_root;
835 	}
836 
837 	key.objectid = 0;
838 	key.type = BTRFS_QGROUP_STATUS_KEY;
839 	key.offset = 0;
840 
841 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
842 				      sizeof(*ptr));
843 	if (ret)
844 		goto out_free_path;
845 
846 	leaf = path->nodes[0];
847 	ptr = btrfs_item_ptr(leaf, path->slots[0],
848 				 struct btrfs_qgroup_status_item);
849 	btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
850 	btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
851 	fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
852 				BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
853 	btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
854 	btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
855 
856 	btrfs_mark_buffer_dirty(leaf);
857 
858 	key.objectid = 0;
859 	key.type = BTRFS_ROOT_REF_KEY;
860 	key.offset = 0;
861 
862 	btrfs_release_path(path);
863 	ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
864 	if (ret > 0)
865 		goto out_add_root;
866 	if (ret < 0)
867 		goto out_free_path;
868 
869 
870 	while (1) {
871 		slot = path->slots[0];
872 		leaf = path->nodes[0];
873 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
874 
875 		if (found_key.type == BTRFS_ROOT_REF_KEY) {
876 			ret = add_qgroup_item(trans, quota_root,
877 					      found_key.offset);
878 			if (ret)
879 				goto out_free_path;
880 
881 			qgroup = add_qgroup_rb(fs_info, found_key.offset);
882 			if (IS_ERR(qgroup)) {
883 				ret = PTR_ERR(qgroup);
884 				goto out_free_path;
885 			}
886 		}
887 		ret = btrfs_next_item(tree_root, path);
888 		if (ret < 0)
889 			goto out_free_path;
890 		if (ret)
891 			break;
892 	}
893 
894 out_add_root:
895 	btrfs_release_path(path);
896 	ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
897 	if (ret)
898 		goto out_free_path;
899 
900 	qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
901 	if (IS_ERR(qgroup)) {
902 		ret = PTR_ERR(qgroup);
903 		goto out_free_path;
904 	}
905 	spin_lock(&fs_info->qgroup_lock);
906 	fs_info->quota_root = quota_root;
907 	fs_info->pending_quota_state = 1;
908 	spin_unlock(&fs_info->qgroup_lock);
909 out_free_path:
910 	btrfs_free_path(path);
911 out_free_root:
912 	if (ret) {
913 		free_extent_buffer(quota_root->node);
914 		free_extent_buffer(quota_root->commit_root);
915 		kfree(quota_root);
916 	}
917 out:
918 	if (ret) {
919 		ulist_free(fs_info->qgroup_ulist);
920 		fs_info->qgroup_ulist = NULL;
921 	}
922 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
923 	return ret;
924 }
925 
926 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
927 			struct btrfs_fs_info *fs_info)
928 {
929 	struct btrfs_root *tree_root = fs_info->tree_root;
930 	struct btrfs_root *quota_root;
931 	int ret = 0;
932 
933 	mutex_lock(&fs_info->qgroup_ioctl_lock);
934 	if (!fs_info->quota_root)
935 		goto out;
936 	spin_lock(&fs_info->qgroup_lock);
937 	fs_info->quota_enabled = 0;
938 	fs_info->pending_quota_state = 0;
939 	quota_root = fs_info->quota_root;
940 	fs_info->quota_root = NULL;
941 	spin_unlock(&fs_info->qgroup_lock);
942 
943 	btrfs_free_qgroup_config(fs_info);
944 
945 	ret = btrfs_clean_quota_tree(trans, quota_root);
946 	if (ret)
947 		goto out;
948 
949 	ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
950 	if (ret)
951 		goto out;
952 
953 	list_del(&quota_root->dirty_list);
954 
955 	btrfs_tree_lock(quota_root->node);
956 	clean_tree_block(trans, tree_root, quota_root->node);
957 	btrfs_tree_unlock(quota_root->node);
958 	btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
959 
960 	free_extent_buffer(quota_root->node);
961 	free_extent_buffer(quota_root->commit_root);
962 	kfree(quota_root);
963 out:
964 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
965 	return ret;
966 }
967 
968 static void qgroup_dirty(struct btrfs_fs_info *fs_info,
969 			 struct btrfs_qgroup *qgroup)
970 {
971 	if (list_empty(&qgroup->dirty))
972 		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
973 }
974 
975 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
976 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
977 {
978 	struct btrfs_root *quota_root;
979 	struct btrfs_qgroup *parent;
980 	struct btrfs_qgroup *member;
981 	struct btrfs_qgroup_list *list;
982 	int ret = 0;
983 
984 	mutex_lock(&fs_info->qgroup_ioctl_lock);
985 	quota_root = fs_info->quota_root;
986 	if (!quota_root) {
987 		ret = -EINVAL;
988 		goto out;
989 	}
990 	member = find_qgroup_rb(fs_info, src);
991 	parent = find_qgroup_rb(fs_info, dst);
992 	if (!member || !parent) {
993 		ret = -EINVAL;
994 		goto out;
995 	}
996 
997 	/* check if such qgroup relation exist firstly */
998 	list_for_each_entry(list, &member->groups, next_group) {
999 		if (list->group == parent) {
1000 			ret = -EEXIST;
1001 			goto out;
1002 		}
1003 	}
1004 
1005 	ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1006 	if (ret)
1007 		goto out;
1008 
1009 	ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1010 	if (ret) {
1011 		del_qgroup_relation_item(trans, quota_root, src, dst);
1012 		goto out;
1013 	}
1014 
1015 	spin_lock(&fs_info->qgroup_lock);
1016 	ret = add_relation_rb(quota_root->fs_info, src, dst);
1017 	spin_unlock(&fs_info->qgroup_lock);
1018 out:
1019 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1020 	return ret;
1021 }
1022 
1023 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1024 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1025 {
1026 	struct btrfs_root *quota_root;
1027 	struct btrfs_qgroup *parent;
1028 	struct btrfs_qgroup *member;
1029 	struct btrfs_qgroup_list *list;
1030 	int ret = 0;
1031 	int err;
1032 
1033 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1034 	quota_root = fs_info->quota_root;
1035 	if (!quota_root) {
1036 		ret = -EINVAL;
1037 		goto out;
1038 	}
1039 
1040 	member = find_qgroup_rb(fs_info, src);
1041 	parent = find_qgroup_rb(fs_info, dst);
1042 	if (!member || !parent) {
1043 		ret = -EINVAL;
1044 		goto out;
1045 	}
1046 
1047 	/* check if such qgroup relation exist firstly */
1048 	list_for_each_entry(list, &member->groups, next_group) {
1049 		if (list->group == parent)
1050 			goto exist;
1051 	}
1052 	ret = -ENOENT;
1053 	goto out;
1054 exist:
1055 	ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1056 	err = del_qgroup_relation_item(trans, quota_root, dst, src);
1057 	if (err && !ret)
1058 		ret = err;
1059 
1060 	spin_lock(&fs_info->qgroup_lock);
1061 	del_relation_rb(fs_info, src, dst);
1062 	spin_unlock(&fs_info->qgroup_lock);
1063 out:
1064 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1065 	return ret;
1066 }
1067 
1068 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1069 			struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
1070 {
1071 	struct btrfs_root *quota_root;
1072 	struct btrfs_qgroup *qgroup;
1073 	int ret = 0;
1074 
1075 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1076 	quota_root = fs_info->quota_root;
1077 	if (!quota_root) {
1078 		ret = -EINVAL;
1079 		goto out;
1080 	}
1081 	qgroup = find_qgroup_rb(fs_info, qgroupid);
1082 	if (qgroup) {
1083 		ret = -EEXIST;
1084 		goto out;
1085 	}
1086 
1087 	ret = add_qgroup_item(trans, quota_root, qgroupid);
1088 	if (ret)
1089 		goto out;
1090 
1091 	spin_lock(&fs_info->qgroup_lock);
1092 	qgroup = add_qgroup_rb(fs_info, qgroupid);
1093 	spin_unlock(&fs_info->qgroup_lock);
1094 
1095 	if (IS_ERR(qgroup))
1096 		ret = PTR_ERR(qgroup);
1097 out:
1098 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1099 	return ret;
1100 }
1101 
1102 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1103 			struct btrfs_fs_info *fs_info, u64 qgroupid)
1104 {
1105 	struct btrfs_root *quota_root;
1106 	struct btrfs_qgroup *qgroup;
1107 	int ret = 0;
1108 
1109 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1110 	quota_root = fs_info->quota_root;
1111 	if (!quota_root) {
1112 		ret = -EINVAL;
1113 		goto out;
1114 	}
1115 
1116 	qgroup = find_qgroup_rb(fs_info, qgroupid);
1117 	if (!qgroup) {
1118 		ret = -ENOENT;
1119 		goto out;
1120 	} else {
1121 		/* check if there are no relations to this qgroup */
1122 		if (!list_empty(&qgroup->groups) ||
1123 		    !list_empty(&qgroup->members)) {
1124 			ret = -EBUSY;
1125 			goto out;
1126 		}
1127 	}
1128 	ret = del_qgroup_item(trans, quota_root, qgroupid);
1129 
1130 	spin_lock(&fs_info->qgroup_lock);
1131 	del_qgroup_rb(quota_root->fs_info, qgroupid);
1132 	spin_unlock(&fs_info->qgroup_lock);
1133 out:
1134 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1135 	return ret;
1136 }
1137 
1138 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1139 		       struct btrfs_fs_info *fs_info, u64 qgroupid,
1140 		       struct btrfs_qgroup_limit *limit)
1141 {
1142 	struct btrfs_root *quota_root;
1143 	struct btrfs_qgroup *qgroup;
1144 	int ret = 0;
1145 
1146 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1147 	quota_root = fs_info->quota_root;
1148 	if (!quota_root) {
1149 		ret = -EINVAL;
1150 		goto out;
1151 	}
1152 
1153 	qgroup = find_qgroup_rb(fs_info, qgroupid);
1154 	if (!qgroup) {
1155 		ret = -ENOENT;
1156 		goto out;
1157 	}
1158 	ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1159 				       limit->flags, limit->max_rfer,
1160 				       limit->max_excl, limit->rsv_rfer,
1161 				       limit->rsv_excl);
1162 	if (ret) {
1163 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1164 		printk(KERN_INFO "unable to update quota limit for %llu\n",
1165 		       qgroupid);
1166 	}
1167 
1168 	spin_lock(&fs_info->qgroup_lock);
1169 	qgroup->lim_flags = limit->flags;
1170 	qgroup->max_rfer = limit->max_rfer;
1171 	qgroup->max_excl = limit->max_excl;
1172 	qgroup->rsv_rfer = limit->rsv_rfer;
1173 	qgroup->rsv_excl = limit->rsv_excl;
1174 	spin_unlock(&fs_info->qgroup_lock);
1175 out:
1176 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1177 	return ret;
1178 }
1179 
1180 /*
1181  * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1182  * the modification into a list that's later used by btrfs_end_transaction to
1183  * pass the recorded modifications on to btrfs_qgroup_account_ref.
1184  */
1185 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1186 			    struct btrfs_delayed_ref_node *node,
1187 			    struct btrfs_delayed_extent_op *extent_op)
1188 {
1189 	struct qgroup_update *u;
1190 
1191 	BUG_ON(!trans->delayed_ref_elem.seq);
1192 	u = kmalloc(sizeof(*u), GFP_NOFS);
1193 	if (!u)
1194 		return -ENOMEM;
1195 
1196 	u->node = node;
1197 	u->extent_op = extent_op;
1198 	list_add_tail(&u->list, &trans->qgroup_ref_list);
1199 
1200 	return 0;
1201 }
1202 
1203 static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
1204 				    struct ulist *roots, struct ulist *tmp,
1205 				    u64 seq)
1206 {
1207 	struct ulist_node *unode;
1208 	struct ulist_iterator uiter;
1209 	struct ulist_node *tmp_unode;
1210 	struct ulist_iterator tmp_uiter;
1211 	struct btrfs_qgroup *qg;
1212 	int ret;
1213 
1214 	ULIST_ITER_INIT(&uiter);
1215 	while ((unode = ulist_next(roots, &uiter))) {
1216 		qg = find_qgroup_rb(fs_info, unode->val);
1217 		if (!qg)
1218 			continue;
1219 
1220 		ulist_reinit(tmp);
1221 						/* XXX id not needed */
1222 		ret = ulist_add(tmp, qg->qgroupid,
1223 				(u64)(uintptr_t)qg, GFP_ATOMIC);
1224 		if (ret < 0)
1225 			return ret;
1226 		ULIST_ITER_INIT(&tmp_uiter);
1227 		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1228 			struct btrfs_qgroup_list *glist;
1229 
1230 			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1231 			if (qg->refcnt < seq)
1232 				qg->refcnt = seq + 1;
1233 			else
1234 				++qg->refcnt;
1235 
1236 			list_for_each_entry(glist, &qg->groups, next_group) {
1237 				ret = ulist_add(tmp, glist->group->qgroupid,
1238 						(u64)(uintptr_t)glist->group,
1239 						GFP_ATOMIC);
1240 				if (ret < 0)
1241 					return ret;
1242 			}
1243 		}
1244 	}
1245 
1246 	return 0;
1247 }
1248 
1249 static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
1250 				    struct ulist *roots, struct ulist *tmp,
1251 				    u64 seq, int sgn, u64 num_bytes,
1252 				    struct btrfs_qgroup *qgroup)
1253 {
1254 	struct ulist_node *unode;
1255 	struct ulist_iterator uiter;
1256 	struct btrfs_qgroup *qg;
1257 	struct btrfs_qgroup_list *glist;
1258 	int ret;
1259 
1260 	ulist_reinit(tmp);
1261 	ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1262 	if (ret < 0)
1263 		return ret;
1264 
1265 	ULIST_ITER_INIT(&uiter);
1266 	while ((unode = ulist_next(tmp, &uiter))) {
1267 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1268 		if (qg->refcnt < seq) {
1269 			/* not visited by step 1 */
1270 			qg->rfer += sgn * num_bytes;
1271 			qg->rfer_cmpr += sgn * num_bytes;
1272 			if (roots->nnodes == 0) {
1273 				qg->excl += sgn * num_bytes;
1274 				qg->excl_cmpr += sgn * num_bytes;
1275 			}
1276 			qgroup_dirty(fs_info, qg);
1277 		}
1278 		WARN_ON(qg->tag >= seq);
1279 		qg->tag = seq;
1280 
1281 		list_for_each_entry(glist, &qg->groups, next_group) {
1282 			ret = ulist_add(tmp, glist->group->qgroupid,
1283 					(uintptr_t)glist->group, GFP_ATOMIC);
1284 			if (ret < 0)
1285 				return ret;
1286 		}
1287 	}
1288 
1289 	return 0;
1290 }
1291 
1292 static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
1293 				    struct ulist *roots, struct ulist *tmp,
1294 				    u64 seq, int sgn, u64 num_bytes)
1295 {
1296 	struct ulist_node *unode;
1297 	struct ulist_iterator uiter;
1298 	struct btrfs_qgroup *qg;
1299 	struct ulist_node *tmp_unode;
1300 	struct ulist_iterator tmp_uiter;
1301 	int ret;
1302 
1303 	ULIST_ITER_INIT(&uiter);
1304 	while ((unode = ulist_next(roots, &uiter))) {
1305 		qg = find_qgroup_rb(fs_info, unode->val);
1306 		if (!qg)
1307 			continue;
1308 
1309 		ulist_reinit(tmp);
1310 		ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1311 		if (ret < 0)
1312 			return ret;
1313 
1314 		ULIST_ITER_INIT(&tmp_uiter);
1315 		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1316 			struct btrfs_qgroup_list *glist;
1317 
1318 			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1319 			if (qg->tag == seq)
1320 				continue;
1321 
1322 			if (qg->refcnt - seq == roots->nnodes) {
1323 				qg->excl -= sgn * num_bytes;
1324 				qg->excl_cmpr -= sgn * num_bytes;
1325 				qgroup_dirty(fs_info, qg);
1326 			}
1327 
1328 			list_for_each_entry(glist, &qg->groups, next_group) {
1329 				ret = ulist_add(tmp, glist->group->qgroupid,
1330 						(uintptr_t)glist->group,
1331 						GFP_ATOMIC);
1332 				if (ret < 0)
1333 					return ret;
1334 			}
1335 		}
1336 	}
1337 
1338 	return 0;
1339 }
1340 
1341 /*
1342  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1343  * from the fs. First, all roots referencing the extent are searched, and
1344  * then the space is accounted accordingly to the different roots. The
1345  * accounting algorithm works in 3 steps documented inline.
1346  */
1347 int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1348 			     struct btrfs_fs_info *fs_info,
1349 			     struct btrfs_delayed_ref_node *node,
1350 			     struct btrfs_delayed_extent_op *extent_op)
1351 {
1352 	struct btrfs_key ins;
1353 	struct btrfs_root *quota_root;
1354 	u64 ref_root;
1355 	struct btrfs_qgroup *qgroup;
1356 	struct ulist *roots = NULL;
1357 	u64 seq;
1358 	int ret = 0;
1359 	int sgn;
1360 
1361 	if (!fs_info->quota_enabled)
1362 		return 0;
1363 
1364 	BUG_ON(!fs_info->quota_root);
1365 
1366 	ins.objectid = node->bytenr;
1367 	ins.offset = node->num_bytes;
1368 	ins.type = BTRFS_EXTENT_ITEM_KEY;
1369 
1370 	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1371 	    node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
1372 		struct btrfs_delayed_tree_ref *ref;
1373 		ref = btrfs_delayed_node_to_tree_ref(node);
1374 		ref_root = ref->root;
1375 	} else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1376 		   node->type == BTRFS_SHARED_DATA_REF_KEY) {
1377 		struct btrfs_delayed_data_ref *ref;
1378 		ref = btrfs_delayed_node_to_data_ref(node);
1379 		ref_root = ref->root;
1380 	} else {
1381 		BUG();
1382 	}
1383 
1384 	if (!is_fstree(ref_root)) {
1385 		/*
1386 		 * non-fs-trees are not being accounted
1387 		 */
1388 		return 0;
1389 	}
1390 
1391 	switch (node->action) {
1392 	case BTRFS_ADD_DELAYED_REF:
1393 	case BTRFS_ADD_DELAYED_EXTENT:
1394 		sgn = 1;
1395 		seq = btrfs_tree_mod_seq_prev(node->seq);
1396 		break;
1397 	case BTRFS_DROP_DELAYED_REF:
1398 		sgn = -1;
1399 		seq = node->seq;
1400 		break;
1401 	case BTRFS_UPDATE_DELAYED_HEAD:
1402 		return 0;
1403 	default:
1404 		BUG();
1405 	}
1406 
1407 	mutex_lock(&fs_info->qgroup_rescan_lock);
1408 	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1409 		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1410 			mutex_unlock(&fs_info->qgroup_rescan_lock);
1411 			return 0;
1412 		}
1413 	}
1414 	mutex_unlock(&fs_info->qgroup_rescan_lock);
1415 
1416 	/*
1417 	 * the delayed ref sequence number we pass depends on the direction of
1418 	 * the operation. for add operations, we pass
1419 	 * tree_mod_log_prev_seq(node->seq) to skip
1420 	 * the delayed ref's current sequence number, because we need the state
1421 	 * of the tree before the add operation. for delete operations, we pass
1422 	 * (node->seq) to include the delayed ref's current sequence number,
1423 	 * because we need the state of the tree after the delete operation.
1424 	 */
1425 	ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
1426 	if (ret < 0)
1427 		return ret;
1428 
1429 	spin_lock(&fs_info->qgroup_lock);
1430 
1431 	quota_root = fs_info->quota_root;
1432 	if (!quota_root)
1433 		goto unlock;
1434 
1435 	qgroup = find_qgroup_rb(fs_info, ref_root);
1436 	if (!qgroup)
1437 		goto unlock;
1438 
1439 	/*
1440 	 * step 1: for each old ref, visit all nodes once and inc refcnt
1441 	 */
1442 	ulist_reinit(fs_info->qgroup_ulist);
1443 	seq = fs_info->qgroup_seq;
1444 	fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1445 
1446 	ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist,
1447 				       seq);
1448 	if (ret)
1449 		goto unlock;
1450 
1451 	/*
1452 	 * step 2: walk from the new root
1453 	 */
1454 	ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist,
1455 				       seq, sgn, node->num_bytes, qgroup);
1456 	if (ret)
1457 		goto unlock;
1458 
1459 	/*
1460 	 * step 3: walk again from old refs
1461 	 */
1462 	ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist,
1463 				       seq, sgn, node->num_bytes);
1464 	if (ret)
1465 		goto unlock;
1466 
1467 unlock:
1468 	spin_unlock(&fs_info->qgroup_lock);
1469 	ulist_free(roots);
1470 
1471 	return ret;
1472 }
1473 
1474 /*
1475  * called from commit_transaction. Writes all changed qgroups to disk.
1476  */
1477 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1478 		      struct btrfs_fs_info *fs_info)
1479 {
1480 	struct btrfs_root *quota_root = fs_info->quota_root;
1481 	int ret = 0;
1482 	int start_rescan_worker = 0;
1483 
1484 	if (!quota_root)
1485 		goto out;
1486 
1487 	if (!fs_info->quota_enabled && fs_info->pending_quota_state)
1488 		start_rescan_worker = 1;
1489 
1490 	fs_info->quota_enabled = fs_info->pending_quota_state;
1491 
1492 	spin_lock(&fs_info->qgroup_lock);
1493 	while (!list_empty(&fs_info->dirty_qgroups)) {
1494 		struct btrfs_qgroup *qgroup;
1495 		qgroup = list_first_entry(&fs_info->dirty_qgroups,
1496 					  struct btrfs_qgroup, dirty);
1497 		list_del_init(&qgroup->dirty);
1498 		spin_unlock(&fs_info->qgroup_lock);
1499 		ret = update_qgroup_info_item(trans, quota_root, qgroup);
1500 		if (ret)
1501 			fs_info->qgroup_flags |=
1502 					BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1503 		spin_lock(&fs_info->qgroup_lock);
1504 	}
1505 	if (fs_info->quota_enabled)
1506 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
1507 	else
1508 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1509 	spin_unlock(&fs_info->qgroup_lock);
1510 
1511 	ret = update_qgroup_status_item(trans, fs_info, quota_root);
1512 	if (ret)
1513 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1514 
1515 	if (!ret && start_rescan_worker) {
1516 		ret = qgroup_rescan_init(fs_info, 0, 1);
1517 		if (!ret) {
1518 			qgroup_rescan_zero_tracking(fs_info);
1519 			btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
1520 					   &fs_info->qgroup_rescan_work);
1521 		}
1522 		ret = 0;
1523 	}
1524 
1525 out:
1526 
1527 	return ret;
1528 }
1529 
1530 /*
1531  * copy the acounting information between qgroups. This is necessary when a
1532  * snapshot or a subvolume is created
1533  */
1534 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1535 			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
1536 			 struct btrfs_qgroup_inherit *inherit)
1537 {
1538 	int ret = 0;
1539 	int i;
1540 	u64 *i_qgroups;
1541 	struct btrfs_root *quota_root = fs_info->quota_root;
1542 	struct btrfs_qgroup *srcgroup;
1543 	struct btrfs_qgroup *dstgroup;
1544 	u32 level_size = 0;
1545 	u64 nums;
1546 
1547 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1548 	if (!fs_info->quota_enabled)
1549 		goto out;
1550 
1551 	if (!quota_root) {
1552 		ret = -EINVAL;
1553 		goto out;
1554 	}
1555 
1556 	if (inherit) {
1557 		i_qgroups = (u64 *)(inherit + 1);
1558 		nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
1559 		       2 * inherit->num_excl_copies;
1560 		for (i = 0; i < nums; ++i) {
1561 			srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
1562 			if (!srcgroup) {
1563 				ret = -EINVAL;
1564 				goto out;
1565 			}
1566 			++i_qgroups;
1567 		}
1568 	}
1569 
1570 	/*
1571 	 * create a tracking group for the subvol itself
1572 	 */
1573 	ret = add_qgroup_item(trans, quota_root, objectid);
1574 	if (ret)
1575 		goto out;
1576 
1577 	if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
1578 		ret = update_qgroup_limit_item(trans, quota_root, objectid,
1579 					       inherit->lim.flags,
1580 					       inherit->lim.max_rfer,
1581 					       inherit->lim.max_excl,
1582 					       inherit->lim.rsv_rfer,
1583 					       inherit->lim.rsv_excl);
1584 		if (ret)
1585 			goto out;
1586 	}
1587 
1588 	if (srcid) {
1589 		struct btrfs_root *srcroot;
1590 		struct btrfs_key srckey;
1591 		int srcroot_level;
1592 
1593 		srckey.objectid = srcid;
1594 		srckey.type = BTRFS_ROOT_ITEM_KEY;
1595 		srckey.offset = (u64)-1;
1596 		srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
1597 		if (IS_ERR(srcroot)) {
1598 			ret = PTR_ERR(srcroot);
1599 			goto out;
1600 		}
1601 
1602 		rcu_read_lock();
1603 		srcroot_level = btrfs_header_level(srcroot->node);
1604 		level_size = btrfs_level_size(srcroot, srcroot_level);
1605 		rcu_read_unlock();
1606 	}
1607 
1608 	/*
1609 	 * add qgroup to all inherited groups
1610 	 */
1611 	if (inherit) {
1612 		i_qgroups = (u64 *)(inherit + 1);
1613 		for (i = 0; i < inherit->num_qgroups; ++i) {
1614 			ret = add_qgroup_relation_item(trans, quota_root,
1615 						       objectid, *i_qgroups);
1616 			if (ret)
1617 				goto out;
1618 			ret = add_qgroup_relation_item(trans, quota_root,
1619 						       *i_qgroups, objectid);
1620 			if (ret)
1621 				goto out;
1622 			++i_qgroups;
1623 		}
1624 	}
1625 
1626 
1627 	spin_lock(&fs_info->qgroup_lock);
1628 
1629 	dstgroup = add_qgroup_rb(fs_info, objectid);
1630 	if (IS_ERR(dstgroup)) {
1631 		ret = PTR_ERR(dstgroup);
1632 		goto unlock;
1633 	}
1634 
1635 	if (srcid) {
1636 		srcgroup = find_qgroup_rb(fs_info, srcid);
1637 		if (!srcgroup)
1638 			goto unlock;
1639 		dstgroup->rfer = srcgroup->rfer - level_size;
1640 		dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1641 		srcgroup->excl = level_size;
1642 		srcgroup->excl_cmpr = level_size;
1643 		qgroup_dirty(fs_info, dstgroup);
1644 		qgroup_dirty(fs_info, srcgroup);
1645 	}
1646 
1647 	if (!inherit)
1648 		goto unlock;
1649 
1650 	i_qgroups = (u64 *)(inherit + 1);
1651 	for (i = 0; i < inherit->num_qgroups; ++i) {
1652 		ret = add_relation_rb(quota_root->fs_info, objectid,
1653 				      *i_qgroups);
1654 		if (ret)
1655 			goto unlock;
1656 		++i_qgroups;
1657 	}
1658 
1659 	for (i = 0; i <  inherit->num_ref_copies; ++i) {
1660 		struct btrfs_qgroup *src;
1661 		struct btrfs_qgroup *dst;
1662 
1663 		src = find_qgroup_rb(fs_info, i_qgroups[0]);
1664 		dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1665 
1666 		if (!src || !dst) {
1667 			ret = -EINVAL;
1668 			goto unlock;
1669 		}
1670 
1671 		dst->rfer = src->rfer - level_size;
1672 		dst->rfer_cmpr = src->rfer_cmpr - level_size;
1673 		i_qgroups += 2;
1674 	}
1675 	for (i = 0; i <  inherit->num_excl_copies; ++i) {
1676 		struct btrfs_qgroup *src;
1677 		struct btrfs_qgroup *dst;
1678 
1679 		src = find_qgroup_rb(fs_info, i_qgroups[0]);
1680 		dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1681 
1682 		if (!src || !dst) {
1683 			ret = -EINVAL;
1684 			goto unlock;
1685 		}
1686 
1687 		dst->excl = src->excl + level_size;
1688 		dst->excl_cmpr = src->excl_cmpr + level_size;
1689 		i_qgroups += 2;
1690 	}
1691 
1692 unlock:
1693 	spin_unlock(&fs_info->qgroup_lock);
1694 out:
1695 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1696 	return ret;
1697 }
1698 
1699 /*
1700  * reserve some space for a qgroup and all its parents. The reservation takes
1701  * place with start_transaction or dealloc_reserve, similar to ENOSPC
1702  * accounting. If not enough space is available, EDQUOT is returned.
1703  * We assume that the requested space is new for all qgroups.
1704  */
1705 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1706 {
1707 	struct btrfs_root *quota_root;
1708 	struct btrfs_qgroup *qgroup;
1709 	struct btrfs_fs_info *fs_info = root->fs_info;
1710 	u64 ref_root = root->root_key.objectid;
1711 	int ret = 0;
1712 	struct ulist_node *unode;
1713 	struct ulist_iterator uiter;
1714 
1715 	if (!is_fstree(ref_root))
1716 		return 0;
1717 
1718 	if (num_bytes == 0)
1719 		return 0;
1720 
1721 	spin_lock(&fs_info->qgroup_lock);
1722 	quota_root = fs_info->quota_root;
1723 	if (!quota_root)
1724 		goto out;
1725 
1726 	qgroup = find_qgroup_rb(fs_info, ref_root);
1727 	if (!qgroup)
1728 		goto out;
1729 
1730 	/*
1731 	 * in a first step, we check all affected qgroups if any limits would
1732 	 * be exceeded
1733 	 */
1734 	ulist_reinit(fs_info->qgroup_ulist);
1735 	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
1736 			(uintptr_t)qgroup, GFP_ATOMIC);
1737 	if (ret < 0)
1738 		goto out;
1739 	ULIST_ITER_INIT(&uiter);
1740 	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1741 		struct btrfs_qgroup *qg;
1742 		struct btrfs_qgroup_list *glist;
1743 
1744 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1745 
1746 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1747 		    qg->reserved + (s64)qg->rfer + num_bytes >
1748 		    qg->max_rfer) {
1749 			ret = -EDQUOT;
1750 			goto out;
1751 		}
1752 
1753 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
1754 		    qg->reserved + (s64)qg->excl + num_bytes >
1755 		    qg->max_excl) {
1756 			ret = -EDQUOT;
1757 			goto out;
1758 		}
1759 
1760 		list_for_each_entry(glist, &qg->groups, next_group) {
1761 			ret = ulist_add(fs_info->qgroup_ulist,
1762 					glist->group->qgroupid,
1763 					(uintptr_t)glist->group, GFP_ATOMIC);
1764 			if (ret < 0)
1765 				goto out;
1766 		}
1767 	}
1768 	ret = 0;
1769 	/*
1770 	 * no limits exceeded, now record the reservation into all qgroups
1771 	 */
1772 	ULIST_ITER_INIT(&uiter);
1773 	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1774 		struct btrfs_qgroup *qg;
1775 
1776 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1777 
1778 		qg->reserved += num_bytes;
1779 	}
1780 
1781 out:
1782 	spin_unlock(&fs_info->qgroup_lock);
1783 	return ret;
1784 }
1785 
1786 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1787 {
1788 	struct btrfs_root *quota_root;
1789 	struct btrfs_qgroup *qgroup;
1790 	struct btrfs_fs_info *fs_info = root->fs_info;
1791 	struct ulist_node *unode;
1792 	struct ulist_iterator uiter;
1793 	u64 ref_root = root->root_key.objectid;
1794 	int ret = 0;
1795 
1796 	if (!is_fstree(ref_root))
1797 		return;
1798 
1799 	if (num_bytes == 0)
1800 		return;
1801 
1802 	spin_lock(&fs_info->qgroup_lock);
1803 
1804 	quota_root = fs_info->quota_root;
1805 	if (!quota_root)
1806 		goto out;
1807 
1808 	qgroup = find_qgroup_rb(fs_info, ref_root);
1809 	if (!qgroup)
1810 		goto out;
1811 
1812 	ulist_reinit(fs_info->qgroup_ulist);
1813 	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
1814 			(uintptr_t)qgroup, GFP_ATOMIC);
1815 	if (ret < 0)
1816 		goto out;
1817 	ULIST_ITER_INIT(&uiter);
1818 	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1819 		struct btrfs_qgroup *qg;
1820 		struct btrfs_qgroup_list *glist;
1821 
1822 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1823 
1824 		qg->reserved -= num_bytes;
1825 
1826 		list_for_each_entry(glist, &qg->groups, next_group) {
1827 			ret = ulist_add(fs_info->qgroup_ulist,
1828 					glist->group->qgroupid,
1829 					(uintptr_t)glist->group, GFP_ATOMIC);
1830 			if (ret < 0)
1831 				goto out;
1832 		}
1833 	}
1834 
1835 out:
1836 	spin_unlock(&fs_info->qgroup_lock);
1837 }
1838 
1839 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1840 {
1841 	if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1842 		return;
1843 	pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n",
1844 		trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
1845 		(u32)(trans->delayed_ref_elem.seq >> 32),
1846 		(u32)trans->delayed_ref_elem.seq);
1847 	BUG();
1848 }
1849 
1850 /*
1851  * returns < 0 on error, 0 when more leafs are to be scanned.
1852  * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
1853  */
1854 static int
1855 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1856 		   struct btrfs_trans_handle *trans, struct ulist *tmp,
1857 		   struct extent_buffer *scratch_leaf)
1858 {
1859 	struct btrfs_key found;
1860 	struct ulist *roots = NULL;
1861 	struct ulist_node *unode;
1862 	struct ulist_iterator uiter;
1863 	struct seq_list tree_mod_seq_elem = {};
1864 	u64 seq;
1865 	int slot;
1866 	int ret;
1867 
1868 	path->leave_spinning = 1;
1869 	mutex_lock(&fs_info->qgroup_rescan_lock);
1870 	ret = btrfs_search_slot_for_read(fs_info->extent_root,
1871 					 &fs_info->qgroup_rescan_progress,
1872 					 path, 1, 0);
1873 
1874 	pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1875 		 fs_info->qgroup_rescan_progress.objectid,
1876 		 fs_info->qgroup_rescan_progress.type,
1877 		 fs_info->qgroup_rescan_progress.offset, ret);
1878 
1879 	if (ret) {
1880 		/*
1881 		 * The rescan is about to end, we will not be scanning any
1882 		 * further blocks. We cannot unset the RESCAN flag here, because
1883 		 * we want to commit the transaction if everything went well.
1884 		 * To make the live accounting work in this phase, we set our
1885 		 * scan progress pointer such that every real extent objectid
1886 		 * will be smaller.
1887 		 */
1888 		fs_info->qgroup_rescan_progress.objectid = (u64)-1;
1889 		btrfs_release_path(path);
1890 		mutex_unlock(&fs_info->qgroup_rescan_lock);
1891 		return ret;
1892 	}
1893 
1894 	btrfs_item_key_to_cpu(path->nodes[0], &found,
1895 			      btrfs_header_nritems(path->nodes[0]) - 1);
1896 	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
1897 
1898 	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1899 	memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
1900 	slot = path->slots[0];
1901 	btrfs_release_path(path);
1902 	mutex_unlock(&fs_info->qgroup_rescan_lock);
1903 
1904 	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1905 		btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1906 		if (found.type != BTRFS_EXTENT_ITEM_KEY)
1907 			continue;
1908 		ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1909 					   tree_mod_seq_elem.seq, &roots);
1910 		if (ret < 0)
1911 			goto out;
1912 		spin_lock(&fs_info->qgroup_lock);
1913 		seq = fs_info->qgroup_seq;
1914 		fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1915 
1916 		ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1917 		if (ret) {
1918 			spin_unlock(&fs_info->qgroup_lock);
1919 			ulist_free(roots);
1920 			goto out;
1921 		}
1922 
1923 		/*
1924 		 * step2 of btrfs_qgroup_account_ref works from a single root,
1925 		 * we're doing all at once here.
1926 		 */
1927 		ulist_reinit(tmp);
1928 		ULIST_ITER_INIT(&uiter);
1929 		while ((unode = ulist_next(roots, &uiter))) {
1930 			struct btrfs_qgroup *qg;
1931 
1932 			qg = find_qgroup_rb(fs_info, unode->val);
1933 			if (!qg)
1934 				continue;
1935 
1936 			ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1937 					GFP_ATOMIC);
1938 			if (ret < 0) {
1939 				spin_unlock(&fs_info->qgroup_lock);
1940 				ulist_free(roots);
1941 				goto out;
1942 			}
1943 		}
1944 
1945 		/* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1946 		ULIST_ITER_INIT(&uiter);
1947 		while ((unode = ulist_next(tmp, &uiter))) {
1948 			struct btrfs_qgroup *qg;
1949 			struct btrfs_qgroup_list *glist;
1950 
1951 			qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1952 			qg->rfer += found.offset;
1953 			qg->rfer_cmpr += found.offset;
1954 			WARN_ON(qg->tag >= seq);
1955 			if (qg->refcnt - seq == roots->nnodes) {
1956 				qg->excl += found.offset;
1957 				qg->excl_cmpr += found.offset;
1958 			}
1959 			qgroup_dirty(fs_info, qg);
1960 
1961 			list_for_each_entry(glist, &qg->groups, next_group) {
1962 				ret = ulist_add(tmp, glist->group->qgroupid,
1963 						(uintptr_t)glist->group,
1964 						GFP_ATOMIC);
1965 				if (ret < 0) {
1966 					spin_unlock(&fs_info->qgroup_lock);
1967 					ulist_free(roots);
1968 					goto out;
1969 				}
1970 			}
1971 		}
1972 
1973 		spin_unlock(&fs_info->qgroup_lock);
1974 		ulist_free(roots);
1975 		ret = 0;
1976 	}
1977 
1978 out:
1979 	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1980 
1981 	return ret;
1982 }
1983 
1984 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
1985 {
1986 	struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
1987 						     qgroup_rescan_work);
1988 	struct btrfs_path *path;
1989 	struct btrfs_trans_handle *trans = NULL;
1990 	struct ulist *tmp = NULL;
1991 	struct extent_buffer *scratch_leaf = NULL;
1992 	int err = -ENOMEM;
1993 
1994 	path = btrfs_alloc_path();
1995 	if (!path)
1996 		goto out;
1997 	tmp = ulist_alloc(GFP_NOFS);
1998 	if (!tmp)
1999 		goto out;
2000 	scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2001 	if (!scratch_leaf)
2002 		goto out;
2003 
2004 	err = 0;
2005 	while (!err) {
2006 		trans = btrfs_start_transaction(fs_info->fs_root, 0);
2007 		if (IS_ERR(trans)) {
2008 			err = PTR_ERR(trans);
2009 			break;
2010 		}
2011 		if (!fs_info->quota_enabled) {
2012 			err = -EINTR;
2013 		} else {
2014 			err = qgroup_rescan_leaf(fs_info, path, trans,
2015 						 tmp, scratch_leaf);
2016 		}
2017 		if (err > 0)
2018 			btrfs_commit_transaction(trans, fs_info->fs_root);
2019 		else
2020 			btrfs_end_transaction(trans, fs_info->fs_root);
2021 	}
2022 
2023 out:
2024 	kfree(scratch_leaf);
2025 	ulist_free(tmp);
2026 	btrfs_free_path(path);
2027 
2028 	mutex_lock(&fs_info->qgroup_rescan_lock);
2029 	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2030 
2031 	if (err == 2 &&
2032 	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2033 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2034 	} else if (err < 0) {
2035 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2036 	}
2037 	mutex_unlock(&fs_info->qgroup_rescan_lock);
2038 
2039 	if (err >= 0) {
2040 		pr_info("btrfs: qgroup scan completed%s\n",
2041 			err == 2 ? " (inconsistency flag cleared)" : "");
2042 	} else {
2043 		pr_err("btrfs: qgroup scan failed with %d\n", err);
2044 	}
2045 
2046 	complete_all(&fs_info->qgroup_rescan_completion);
2047 }
2048 
2049 /*
2050  * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2051  * memory required for the rescan context.
2052  */
2053 static int
2054 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2055 		   int init_flags)
2056 {
2057 	int ret = 0;
2058 
2059 	if (!init_flags &&
2060 	    (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2061 	     !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2062 		ret = -EINVAL;
2063 		goto err;
2064 	}
2065 
2066 	mutex_lock(&fs_info->qgroup_rescan_lock);
2067 	spin_lock(&fs_info->qgroup_lock);
2068 
2069 	if (init_flags) {
2070 		if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2071 			ret = -EINPROGRESS;
2072 		else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2073 			ret = -EINVAL;
2074 
2075 		if (ret) {
2076 			spin_unlock(&fs_info->qgroup_lock);
2077 			mutex_unlock(&fs_info->qgroup_rescan_lock);
2078 			goto err;
2079 		}
2080 
2081 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2082 	}
2083 
2084 	memset(&fs_info->qgroup_rescan_progress, 0,
2085 		sizeof(fs_info->qgroup_rescan_progress));
2086 	fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2087 
2088 	spin_unlock(&fs_info->qgroup_lock);
2089 	mutex_unlock(&fs_info->qgroup_rescan_lock);
2090 
2091 	init_completion(&fs_info->qgroup_rescan_completion);
2092 
2093 	memset(&fs_info->qgroup_rescan_work, 0,
2094 	       sizeof(fs_info->qgroup_rescan_work));
2095 	fs_info->qgroup_rescan_work.func = btrfs_qgroup_rescan_worker;
2096 
2097 	if (ret) {
2098 err:
2099 		pr_info("btrfs: qgroup_rescan_init failed with %d\n", ret);
2100 		return ret;
2101 	}
2102 
2103 	return 0;
2104 }
2105 
2106 static void
2107 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2108 {
2109 	struct rb_node *n;
2110 	struct btrfs_qgroup *qgroup;
2111 
2112 	spin_lock(&fs_info->qgroup_lock);
2113 	/* clear all current qgroup tracking information */
2114 	for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2115 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
2116 		qgroup->rfer = 0;
2117 		qgroup->rfer_cmpr = 0;
2118 		qgroup->excl = 0;
2119 		qgroup->excl_cmpr = 0;
2120 	}
2121 	spin_unlock(&fs_info->qgroup_lock);
2122 }
2123 
2124 int
2125 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2126 {
2127 	int ret = 0;
2128 	struct btrfs_trans_handle *trans;
2129 
2130 	ret = qgroup_rescan_init(fs_info, 0, 1);
2131 	if (ret)
2132 		return ret;
2133 
2134 	/*
2135 	 * We have set the rescan_progress to 0, which means no more
2136 	 * delayed refs will be accounted by btrfs_qgroup_account_ref.
2137 	 * However, btrfs_qgroup_account_ref may be right after its call
2138 	 * to btrfs_find_all_roots, in which case it would still do the
2139 	 * accounting.
2140 	 * To solve this, we're committing the transaction, which will
2141 	 * ensure we run all delayed refs and only after that, we are
2142 	 * going to clear all tracking information for a clean start.
2143 	 */
2144 
2145 	trans = btrfs_join_transaction(fs_info->fs_root);
2146 	if (IS_ERR(trans)) {
2147 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2148 		return PTR_ERR(trans);
2149 	}
2150 	ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2151 	if (ret) {
2152 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2153 		return ret;
2154 	}
2155 
2156 	qgroup_rescan_zero_tracking(fs_info);
2157 
2158 	btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
2159 			   &fs_info->qgroup_rescan_work);
2160 
2161 	return 0;
2162 }
2163 
2164 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
2165 {
2166 	int running;
2167 	int ret = 0;
2168 
2169 	mutex_lock(&fs_info->qgroup_rescan_lock);
2170 	spin_lock(&fs_info->qgroup_lock);
2171 	running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2172 	spin_unlock(&fs_info->qgroup_lock);
2173 	mutex_unlock(&fs_info->qgroup_rescan_lock);
2174 
2175 	if (running)
2176 		ret = wait_for_completion_interruptible(
2177 					&fs_info->qgroup_rescan_completion);
2178 
2179 	return ret;
2180 }
2181 
2182 /*
2183  * this is only called from open_ctree where we're still single threaded, thus
2184  * locking is omitted here.
2185  */
2186 void
2187 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2188 {
2189 	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2190 		btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
2191 				   &fs_info->qgroup_rescan_work);
2192 }
2193