xref: /openbmc/linux/fs/btrfs/qgroup.c (revision c1d45424)
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 #include <linux/btrfs.h>
27 
28 #include "ctree.h"
29 #include "transaction.h"
30 #include "disk-io.h"
31 #include "locking.h"
32 #include "ulist.h"
33 #include "backref.h"
34 #include "extent_io.h"
35 
36 /* TODO XXX FIXME
37  *  - subvol delete -> delete when ref goes to 0? delete limits also?
38  *  - reorganize keys
39  *  - compressed
40  *  - sync
41  *  - copy also limits on subvol creation
42  *  - limit
43  *  - caches fuer ulists
44  *  - performance benchmarks
45  *  - check all ioctl parameters
46  */
47 
48 /*
49  * one struct for each qgroup, organized in fs_info->qgroup_tree.
50  */
51 struct btrfs_qgroup {
52 	u64 qgroupid;
53 
54 	/*
55 	 * state
56 	 */
57 	u64 rfer;	/* referenced */
58 	u64 rfer_cmpr;	/* referenced compressed */
59 	u64 excl;	/* exclusive */
60 	u64 excl_cmpr;	/* exclusive compressed */
61 
62 	/*
63 	 * limits
64 	 */
65 	u64 lim_flags;	/* which limits are set */
66 	u64 max_rfer;
67 	u64 max_excl;
68 	u64 rsv_rfer;
69 	u64 rsv_excl;
70 
71 	/*
72 	 * reservation tracking
73 	 */
74 	u64 reserved;
75 
76 	/*
77 	 * lists
78 	 */
79 	struct list_head groups;  /* groups this group is member of */
80 	struct list_head members; /* groups that are members of this group */
81 	struct list_head dirty;   /* dirty groups */
82 	struct rb_node node;	  /* tree of qgroups */
83 
84 	/*
85 	 * temp variables for accounting operations
86 	 */
87 	u64 tag;
88 	u64 refcnt;
89 };
90 
91 /*
92  * glue structure to represent the relations between qgroups.
93  */
94 struct btrfs_qgroup_list {
95 	struct list_head next_group;
96 	struct list_head next_member;
97 	struct btrfs_qgroup *group;
98 	struct btrfs_qgroup *member;
99 };
100 
101 static int
102 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
103 		   int init_flags);
104 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
105 
106 /* must be called with qgroup_ioctl_lock held */
107 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
108 					   u64 qgroupid)
109 {
110 	struct rb_node *n = fs_info->qgroup_tree.rb_node;
111 	struct btrfs_qgroup *qgroup;
112 
113 	while (n) {
114 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
115 		if (qgroup->qgroupid < qgroupid)
116 			n = n->rb_left;
117 		else if (qgroup->qgroupid > qgroupid)
118 			n = n->rb_right;
119 		else
120 			return qgroup;
121 	}
122 	return NULL;
123 }
124 
125 /* must be called with qgroup_lock held */
126 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
127 					  u64 qgroupid)
128 {
129 	struct rb_node **p = &fs_info->qgroup_tree.rb_node;
130 	struct rb_node *parent = NULL;
131 	struct btrfs_qgroup *qgroup;
132 
133 	while (*p) {
134 		parent = *p;
135 		qgroup = rb_entry(parent, struct btrfs_qgroup, node);
136 
137 		if (qgroup->qgroupid < qgroupid)
138 			p = &(*p)->rb_left;
139 		else if (qgroup->qgroupid > qgroupid)
140 			p = &(*p)->rb_right;
141 		else
142 			return qgroup;
143 	}
144 
145 	qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
146 	if (!qgroup)
147 		return ERR_PTR(-ENOMEM);
148 
149 	qgroup->qgroupid = qgroupid;
150 	INIT_LIST_HEAD(&qgroup->groups);
151 	INIT_LIST_HEAD(&qgroup->members);
152 	INIT_LIST_HEAD(&qgroup->dirty);
153 
154 	rb_link_node(&qgroup->node, parent, p);
155 	rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
156 
157 	return qgroup;
158 }
159 
160 /* must be called with qgroup_lock held */
161 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
162 {
163 	struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
164 	struct btrfs_qgroup_list *list;
165 
166 	if (!qgroup)
167 		return -ENOENT;
168 
169 	rb_erase(&qgroup->node, &fs_info->qgroup_tree);
170 	list_del(&qgroup->dirty);
171 
172 	while (!list_empty(&qgroup->groups)) {
173 		list = list_first_entry(&qgroup->groups,
174 					struct btrfs_qgroup_list, next_group);
175 		list_del(&list->next_group);
176 		list_del(&list->next_member);
177 		kfree(list);
178 	}
179 
180 	while (!list_empty(&qgroup->members)) {
181 		list = list_first_entry(&qgroup->members,
182 					struct btrfs_qgroup_list, next_member);
183 		list_del(&list->next_group);
184 		list_del(&list->next_member);
185 		kfree(list);
186 	}
187 	kfree(qgroup);
188 
189 	return 0;
190 }
191 
192 /* must be called with qgroup_lock held */
193 static int add_relation_rb(struct btrfs_fs_info *fs_info,
194 			   u64 memberid, u64 parentid)
195 {
196 	struct btrfs_qgroup *member;
197 	struct btrfs_qgroup *parent;
198 	struct btrfs_qgroup_list *list;
199 
200 	member = find_qgroup_rb(fs_info, memberid);
201 	parent = find_qgroup_rb(fs_info, parentid);
202 	if (!member || !parent)
203 		return -ENOENT;
204 
205 	list = kzalloc(sizeof(*list), GFP_ATOMIC);
206 	if (!list)
207 		return -ENOMEM;
208 
209 	list->group = parent;
210 	list->member = member;
211 	list_add_tail(&list->next_group, &member->groups);
212 	list_add_tail(&list->next_member, &parent->members);
213 
214 	return 0;
215 }
216 
217 /* must be called with qgroup_lock held */
218 static int del_relation_rb(struct btrfs_fs_info *fs_info,
219 			   u64 memberid, u64 parentid)
220 {
221 	struct btrfs_qgroup *member;
222 	struct btrfs_qgroup *parent;
223 	struct btrfs_qgroup_list *list;
224 
225 	member = find_qgroup_rb(fs_info, memberid);
226 	parent = find_qgroup_rb(fs_info, parentid);
227 	if (!member || !parent)
228 		return -ENOENT;
229 
230 	list_for_each_entry(list, &member->groups, next_group) {
231 		if (list->group == parent) {
232 			list_del(&list->next_group);
233 			list_del(&list->next_member);
234 			kfree(list);
235 			return 0;
236 		}
237 	}
238 	return -ENOENT;
239 }
240 
241 /*
242  * The full config is read in one go, only called from open_ctree()
243  * It doesn't use any locking, as at this point we're still single-threaded
244  */
245 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
246 {
247 	struct btrfs_key key;
248 	struct btrfs_key found_key;
249 	struct btrfs_root *quota_root = fs_info->quota_root;
250 	struct btrfs_path *path = NULL;
251 	struct extent_buffer *l;
252 	int slot;
253 	int ret = 0;
254 	u64 flags = 0;
255 	u64 rescan_progress = 0;
256 
257 	if (!fs_info->quota_enabled)
258 		return 0;
259 
260 	fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
261 	if (!fs_info->qgroup_ulist) {
262 		ret = -ENOMEM;
263 		goto out;
264 	}
265 
266 	path = btrfs_alloc_path();
267 	if (!path) {
268 		ret = -ENOMEM;
269 		goto out;
270 	}
271 
272 	/* default this to quota off, in case no status key is found */
273 	fs_info->qgroup_flags = 0;
274 
275 	/*
276 	 * pass 1: read status, all qgroup infos and limits
277 	 */
278 	key.objectid = 0;
279 	key.type = 0;
280 	key.offset = 0;
281 	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
282 	if (ret)
283 		goto out;
284 
285 	while (1) {
286 		struct btrfs_qgroup *qgroup;
287 
288 		slot = path->slots[0];
289 		l = path->nodes[0];
290 		btrfs_item_key_to_cpu(l, &found_key, slot);
291 
292 		if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
293 			struct btrfs_qgroup_status_item *ptr;
294 
295 			ptr = btrfs_item_ptr(l, slot,
296 					     struct btrfs_qgroup_status_item);
297 
298 			if (btrfs_qgroup_status_version(l, ptr) !=
299 			    BTRFS_QGROUP_STATUS_VERSION) {
300 				printk(KERN_ERR
301 				 "btrfs: old qgroup version, quota disabled\n");
302 				goto out;
303 			}
304 			if (btrfs_qgroup_status_generation(l, ptr) !=
305 			    fs_info->generation) {
306 				flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
307 				printk(KERN_ERR
308 					"btrfs: qgroup generation mismatch, "
309 					"marked as inconsistent\n");
310 			}
311 			fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
312 									  ptr);
313 			rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
314 			goto next1;
315 		}
316 
317 		if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
318 		    found_key.type != BTRFS_QGROUP_LIMIT_KEY)
319 			goto next1;
320 
321 		qgroup = find_qgroup_rb(fs_info, found_key.offset);
322 		if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
323 		    (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
324 			printk(KERN_ERR "btrfs: inconsitent qgroup config\n");
325 			flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
326 		}
327 		if (!qgroup) {
328 			qgroup = add_qgroup_rb(fs_info, found_key.offset);
329 			if (IS_ERR(qgroup)) {
330 				ret = PTR_ERR(qgroup);
331 				goto out;
332 			}
333 		}
334 		switch (found_key.type) {
335 		case BTRFS_QGROUP_INFO_KEY: {
336 			struct btrfs_qgroup_info_item *ptr;
337 
338 			ptr = btrfs_item_ptr(l, slot,
339 					     struct btrfs_qgroup_info_item);
340 			qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
341 			qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
342 			qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
343 			qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
344 			/* generation currently unused */
345 			break;
346 		}
347 		case BTRFS_QGROUP_LIMIT_KEY: {
348 			struct btrfs_qgroup_limit_item *ptr;
349 
350 			ptr = btrfs_item_ptr(l, slot,
351 					     struct btrfs_qgroup_limit_item);
352 			qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
353 			qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
354 			qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
355 			qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
356 			qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
357 			break;
358 		}
359 		}
360 next1:
361 		ret = btrfs_next_item(quota_root, path);
362 		if (ret < 0)
363 			goto out;
364 		if (ret)
365 			break;
366 	}
367 	btrfs_release_path(path);
368 
369 	/*
370 	 * pass 2: read all qgroup relations
371 	 */
372 	key.objectid = 0;
373 	key.type = BTRFS_QGROUP_RELATION_KEY;
374 	key.offset = 0;
375 	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
376 	if (ret)
377 		goto out;
378 	while (1) {
379 		slot = path->slots[0];
380 		l = path->nodes[0];
381 		btrfs_item_key_to_cpu(l, &found_key, slot);
382 
383 		if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
384 			goto next2;
385 
386 		if (found_key.objectid > found_key.offset) {
387 			/* parent <- member, not needed to build config */
388 			/* FIXME should we omit the key completely? */
389 			goto next2;
390 		}
391 
392 		ret = add_relation_rb(fs_info, found_key.objectid,
393 				      found_key.offset);
394 		if (ret == -ENOENT) {
395 			printk(KERN_WARNING
396 				"btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
397 				(unsigned long long)found_key.objectid,
398 				(unsigned long long)found_key.offset);
399 			ret = 0;	/* ignore the error */
400 		}
401 		if (ret)
402 			goto out;
403 next2:
404 		ret = btrfs_next_item(quota_root, path);
405 		if (ret < 0)
406 			goto out;
407 		if (ret)
408 			break;
409 	}
410 out:
411 	fs_info->qgroup_flags |= flags;
412 	if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
413 		fs_info->quota_enabled = 0;
414 		fs_info->pending_quota_state = 0;
415 	} else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
416 		   ret >= 0) {
417 		ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
418 	}
419 	btrfs_free_path(path);
420 
421 	if (ret < 0) {
422 		ulist_free(fs_info->qgroup_ulist);
423 		fs_info->qgroup_ulist = NULL;
424 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
425 	}
426 
427 	return ret < 0 ? ret : 0;
428 }
429 
430 /*
431  * This is only called from close_ctree() or open_ctree(), both in single-
432  * treaded paths. Clean up the in-memory structures. No locking needed.
433  */
434 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
435 {
436 	struct rb_node *n;
437 	struct btrfs_qgroup *qgroup;
438 	struct btrfs_qgroup_list *list;
439 
440 	while ((n = rb_first(&fs_info->qgroup_tree))) {
441 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
442 		rb_erase(n, &fs_info->qgroup_tree);
443 
444 		while (!list_empty(&qgroup->groups)) {
445 			list = list_first_entry(&qgroup->groups,
446 						struct btrfs_qgroup_list,
447 						next_group);
448 			list_del(&list->next_group);
449 			list_del(&list->next_member);
450 			kfree(list);
451 		}
452 
453 		while (!list_empty(&qgroup->members)) {
454 			list = list_first_entry(&qgroup->members,
455 						struct btrfs_qgroup_list,
456 						next_member);
457 			list_del(&list->next_group);
458 			list_del(&list->next_member);
459 			kfree(list);
460 		}
461 		kfree(qgroup);
462 	}
463 	ulist_free(fs_info->qgroup_ulist);
464 }
465 
466 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
467 				    struct btrfs_root *quota_root,
468 				    u64 src, u64 dst)
469 {
470 	int ret;
471 	struct btrfs_path *path;
472 	struct btrfs_key key;
473 
474 	path = btrfs_alloc_path();
475 	if (!path)
476 		return -ENOMEM;
477 
478 	key.objectid = src;
479 	key.type = BTRFS_QGROUP_RELATION_KEY;
480 	key.offset = dst;
481 
482 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
483 
484 	btrfs_mark_buffer_dirty(path->nodes[0]);
485 
486 	btrfs_free_path(path);
487 	return ret;
488 }
489 
490 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
491 				    struct btrfs_root *quota_root,
492 				    u64 src, u64 dst)
493 {
494 	int ret;
495 	struct btrfs_path *path;
496 	struct btrfs_key key;
497 
498 	path = btrfs_alloc_path();
499 	if (!path)
500 		return -ENOMEM;
501 
502 	key.objectid = src;
503 	key.type = BTRFS_QGROUP_RELATION_KEY;
504 	key.offset = dst;
505 
506 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
507 	if (ret < 0)
508 		goto out;
509 
510 	if (ret > 0) {
511 		ret = -ENOENT;
512 		goto out;
513 	}
514 
515 	ret = btrfs_del_item(trans, quota_root, path);
516 out:
517 	btrfs_free_path(path);
518 	return ret;
519 }
520 
521 static int add_qgroup_item(struct btrfs_trans_handle *trans,
522 			   struct btrfs_root *quota_root, u64 qgroupid)
523 {
524 	int ret;
525 	struct btrfs_path *path;
526 	struct btrfs_qgroup_info_item *qgroup_info;
527 	struct btrfs_qgroup_limit_item *qgroup_limit;
528 	struct extent_buffer *leaf;
529 	struct btrfs_key key;
530 
531 	path = btrfs_alloc_path();
532 	if (!path)
533 		return -ENOMEM;
534 
535 	key.objectid = 0;
536 	key.type = BTRFS_QGROUP_INFO_KEY;
537 	key.offset = qgroupid;
538 
539 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
540 				      sizeof(*qgroup_info));
541 	if (ret)
542 		goto out;
543 
544 	leaf = path->nodes[0];
545 	qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
546 				 struct btrfs_qgroup_info_item);
547 	btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
548 	btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
549 	btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
550 	btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
551 	btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
552 
553 	btrfs_mark_buffer_dirty(leaf);
554 
555 	btrfs_release_path(path);
556 
557 	key.type = BTRFS_QGROUP_LIMIT_KEY;
558 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
559 				      sizeof(*qgroup_limit));
560 	if (ret)
561 		goto out;
562 
563 	leaf = path->nodes[0];
564 	qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
565 				  struct btrfs_qgroup_limit_item);
566 	btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
567 	btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
568 	btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
569 	btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
570 	btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
571 
572 	btrfs_mark_buffer_dirty(leaf);
573 
574 	ret = 0;
575 out:
576 	btrfs_free_path(path);
577 	return ret;
578 }
579 
580 static int del_qgroup_item(struct btrfs_trans_handle *trans,
581 			   struct btrfs_root *quota_root, u64 qgroupid)
582 {
583 	int ret;
584 	struct btrfs_path *path;
585 	struct btrfs_key key;
586 
587 	path = btrfs_alloc_path();
588 	if (!path)
589 		return -ENOMEM;
590 
591 	key.objectid = 0;
592 	key.type = BTRFS_QGROUP_INFO_KEY;
593 	key.offset = qgroupid;
594 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
595 	if (ret < 0)
596 		goto out;
597 
598 	if (ret > 0) {
599 		ret = -ENOENT;
600 		goto out;
601 	}
602 
603 	ret = btrfs_del_item(trans, quota_root, path);
604 	if (ret)
605 		goto out;
606 
607 	btrfs_release_path(path);
608 
609 	key.type = BTRFS_QGROUP_LIMIT_KEY;
610 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
611 	if (ret < 0)
612 		goto out;
613 
614 	if (ret > 0) {
615 		ret = -ENOENT;
616 		goto out;
617 	}
618 
619 	ret = btrfs_del_item(trans, quota_root, path);
620 
621 out:
622 	btrfs_free_path(path);
623 	return ret;
624 }
625 
626 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
627 				    struct btrfs_root *root, u64 qgroupid,
628 				    u64 flags, u64 max_rfer, u64 max_excl,
629 				    u64 rsv_rfer, u64 rsv_excl)
630 {
631 	struct btrfs_path *path;
632 	struct btrfs_key key;
633 	struct extent_buffer *l;
634 	struct btrfs_qgroup_limit_item *qgroup_limit;
635 	int ret;
636 	int slot;
637 
638 	key.objectid = 0;
639 	key.type = BTRFS_QGROUP_LIMIT_KEY;
640 	key.offset = qgroupid;
641 
642 	path = btrfs_alloc_path();
643 	if (!path)
644 		return -ENOMEM;
645 
646 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
647 	if (ret > 0)
648 		ret = -ENOENT;
649 
650 	if (ret)
651 		goto out;
652 
653 	l = path->nodes[0];
654 	slot = path->slots[0];
655 	qgroup_limit = btrfs_item_ptr(l, path->slots[0],
656 				      struct btrfs_qgroup_limit_item);
657 	btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
658 	btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
659 	btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
660 	btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
661 	btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
662 
663 	btrfs_mark_buffer_dirty(l);
664 
665 out:
666 	btrfs_free_path(path);
667 	return ret;
668 }
669 
670 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
671 				   struct btrfs_root *root,
672 				   struct btrfs_qgroup *qgroup)
673 {
674 	struct btrfs_path *path;
675 	struct btrfs_key key;
676 	struct extent_buffer *l;
677 	struct btrfs_qgroup_info_item *qgroup_info;
678 	int ret;
679 	int slot;
680 
681 	key.objectid = 0;
682 	key.type = BTRFS_QGROUP_INFO_KEY;
683 	key.offset = qgroup->qgroupid;
684 
685 	path = btrfs_alloc_path();
686 	if (!path)
687 		return -ENOMEM;
688 
689 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
690 	if (ret > 0)
691 		ret = -ENOENT;
692 
693 	if (ret)
694 		goto out;
695 
696 	l = path->nodes[0];
697 	slot = path->slots[0];
698 	qgroup_info = btrfs_item_ptr(l, path->slots[0],
699 				 struct btrfs_qgroup_info_item);
700 	btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
701 	btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
702 	btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
703 	btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
704 	btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
705 
706 	btrfs_mark_buffer_dirty(l);
707 
708 out:
709 	btrfs_free_path(path);
710 	return ret;
711 }
712 
713 static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
714 				     struct btrfs_fs_info *fs_info,
715 				    struct btrfs_root *root)
716 {
717 	struct btrfs_path *path;
718 	struct btrfs_key key;
719 	struct extent_buffer *l;
720 	struct btrfs_qgroup_status_item *ptr;
721 	int ret;
722 	int slot;
723 
724 	key.objectid = 0;
725 	key.type = BTRFS_QGROUP_STATUS_KEY;
726 	key.offset = 0;
727 
728 	path = btrfs_alloc_path();
729 	if (!path)
730 		return -ENOMEM;
731 
732 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
733 	if (ret > 0)
734 		ret = -ENOENT;
735 
736 	if (ret)
737 		goto out;
738 
739 	l = path->nodes[0];
740 	slot = path->slots[0];
741 	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
742 	btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
743 	btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
744 	btrfs_set_qgroup_status_rescan(l, ptr,
745 				fs_info->qgroup_rescan_progress.objectid);
746 
747 	btrfs_mark_buffer_dirty(l);
748 
749 out:
750 	btrfs_free_path(path);
751 	return ret;
752 }
753 
754 /*
755  * called with qgroup_lock held
756  */
757 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
758 				  struct btrfs_root *root)
759 {
760 	struct btrfs_path *path;
761 	struct btrfs_key key;
762 	struct extent_buffer *leaf = NULL;
763 	int ret;
764 	int nr = 0;
765 
766 	path = btrfs_alloc_path();
767 	if (!path)
768 		return -ENOMEM;
769 
770 	path->leave_spinning = 1;
771 
772 	key.objectid = 0;
773 	key.offset = 0;
774 	key.type = 0;
775 
776 	while (1) {
777 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
778 		if (ret < 0)
779 			goto out;
780 		leaf = path->nodes[0];
781 		nr = btrfs_header_nritems(leaf);
782 		if (!nr)
783 			break;
784 		/*
785 		 * delete the leaf one by one
786 		 * since the whole tree is going
787 		 * to be deleted.
788 		 */
789 		path->slots[0] = 0;
790 		ret = btrfs_del_items(trans, root, path, 0, nr);
791 		if (ret)
792 			goto out;
793 
794 		btrfs_release_path(path);
795 	}
796 	ret = 0;
797 out:
798 	root->fs_info->pending_quota_state = 0;
799 	btrfs_free_path(path);
800 	return ret;
801 }
802 
803 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
804 		       struct btrfs_fs_info *fs_info)
805 {
806 	struct btrfs_root *quota_root;
807 	struct btrfs_root *tree_root = fs_info->tree_root;
808 	struct btrfs_path *path = NULL;
809 	struct btrfs_qgroup_status_item *ptr;
810 	struct extent_buffer *leaf;
811 	struct btrfs_key key;
812 	struct btrfs_key found_key;
813 	struct btrfs_qgroup *qgroup = NULL;
814 	int ret = 0;
815 	int slot;
816 
817 	mutex_lock(&fs_info->qgroup_ioctl_lock);
818 	if (fs_info->quota_root) {
819 		fs_info->pending_quota_state = 1;
820 		goto out;
821 	}
822 
823 	fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
824 	if (!fs_info->qgroup_ulist) {
825 		ret = -ENOMEM;
826 		goto out;
827 	}
828 
829 	/*
830 	 * initially create the quota tree
831 	 */
832 	quota_root = btrfs_create_tree(trans, fs_info,
833 				       BTRFS_QUOTA_TREE_OBJECTID);
834 	if (IS_ERR(quota_root)) {
835 		ret =  PTR_ERR(quota_root);
836 		goto out;
837 	}
838 
839 	path = btrfs_alloc_path();
840 	if (!path) {
841 		ret = -ENOMEM;
842 		goto out_free_root;
843 	}
844 
845 	key.objectid = 0;
846 	key.type = BTRFS_QGROUP_STATUS_KEY;
847 	key.offset = 0;
848 
849 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
850 				      sizeof(*ptr));
851 	if (ret)
852 		goto out_free_path;
853 
854 	leaf = path->nodes[0];
855 	ptr = btrfs_item_ptr(leaf, path->slots[0],
856 				 struct btrfs_qgroup_status_item);
857 	btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
858 	btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
859 	fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
860 				BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
861 	btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
862 	btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
863 
864 	btrfs_mark_buffer_dirty(leaf);
865 
866 	key.objectid = 0;
867 	key.type = BTRFS_ROOT_REF_KEY;
868 	key.offset = 0;
869 
870 	btrfs_release_path(path);
871 	ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
872 	if (ret > 0)
873 		goto out_add_root;
874 	if (ret < 0)
875 		goto out_free_path;
876 
877 
878 	while (1) {
879 		slot = path->slots[0];
880 		leaf = path->nodes[0];
881 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
882 
883 		if (found_key.type == BTRFS_ROOT_REF_KEY) {
884 			ret = add_qgroup_item(trans, quota_root,
885 					      found_key.offset);
886 			if (ret)
887 				goto out_free_path;
888 
889 			qgroup = add_qgroup_rb(fs_info, found_key.offset);
890 			if (IS_ERR(qgroup)) {
891 				ret = PTR_ERR(qgroup);
892 				goto out_free_path;
893 			}
894 		}
895 		ret = btrfs_next_item(tree_root, path);
896 		if (ret < 0)
897 			goto out_free_path;
898 		if (ret)
899 			break;
900 	}
901 
902 out_add_root:
903 	btrfs_release_path(path);
904 	ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
905 	if (ret)
906 		goto out_free_path;
907 
908 	qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
909 	if (IS_ERR(qgroup)) {
910 		ret = PTR_ERR(qgroup);
911 		goto out_free_path;
912 	}
913 	spin_lock(&fs_info->qgroup_lock);
914 	fs_info->quota_root = quota_root;
915 	fs_info->pending_quota_state = 1;
916 	spin_unlock(&fs_info->qgroup_lock);
917 out_free_path:
918 	btrfs_free_path(path);
919 out_free_root:
920 	if (ret) {
921 		free_extent_buffer(quota_root->node);
922 		free_extent_buffer(quota_root->commit_root);
923 		kfree(quota_root);
924 	}
925 out:
926 	if (ret) {
927 		ulist_free(fs_info->qgroup_ulist);
928 		fs_info->qgroup_ulist = NULL;
929 	}
930 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
931 	return ret;
932 }
933 
934 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
935 			struct btrfs_fs_info *fs_info)
936 {
937 	struct btrfs_root *tree_root = fs_info->tree_root;
938 	struct btrfs_root *quota_root;
939 	int ret = 0;
940 
941 	mutex_lock(&fs_info->qgroup_ioctl_lock);
942 	if (!fs_info->quota_root)
943 		goto out;
944 	spin_lock(&fs_info->qgroup_lock);
945 	fs_info->quota_enabled = 0;
946 	fs_info->pending_quota_state = 0;
947 	quota_root = fs_info->quota_root;
948 	fs_info->quota_root = NULL;
949 	btrfs_free_qgroup_config(fs_info);
950 	spin_unlock(&fs_info->qgroup_lock);
951 
952 	if (!quota_root) {
953 		ret = -EINVAL;
954 		goto out;
955 	}
956 
957 	ret = btrfs_clean_quota_tree(trans, quota_root);
958 	if (ret)
959 		goto out;
960 
961 	ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
962 	if (ret)
963 		goto out;
964 
965 	list_del(&quota_root->dirty_list);
966 
967 	btrfs_tree_lock(quota_root->node);
968 	clean_tree_block(trans, tree_root, quota_root->node);
969 	btrfs_tree_unlock(quota_root->node);
970 	btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
971 
972 	free_extent_buffer(quota_root->node);
973 	free_extent_buffer(quota_root->commit_root);
974 	kfree(quota_root);
975 out:
976 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
977 	return ret;
978 }
979 
980 static void qgroup_dirty(struct btrfs_fs_info *fs_info,
981 			 struct btrfs_qgroup *qgroup)
982 {
983 	if (list_empty(&qgroup->dirty))
984 		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
985 }
986 
987 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
988 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
989 {
990 	struct btrfs_root *quota_root;
991 	struct btrfs_qgroup *parent;
992 	struct btrfs_qgroup *member;
993 	struct btrfs_qgroup_list *list;
994 	int ret = 0;
995 
996 	mutex_lock(&fs_info->qgroup_ioctl_lock);
997 	quota_root = fs_info->quota_root;
998 	if (!quota_root) {
999 		ret = -EINVAL;
1000 		goto out;
1001 	}
1002 	member = find_qgroup_rb(fs_info, src);
1003 	parent = find_qgroup_rb(fs_info, dst);
1004 	if (!member || !parent) {
1005 		ret = -EINVAL;
1006 		goto out;
1007 	}
1008 
1009 	/* check if such qgroup relation exist firstly */
1010 	list_for_each_entry(list, &member->groups, next_group) {
1011 		if (list->group == parent) {
1012 			ret = -EEXIST;
1013 			goto out;
1014 		}
1015 	}
1016 
1017 	ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1018 	if (ret)
1019 		goto out;
1020 
1021 	ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1022 	if (ret) {
1023 		del_qgroup_relation_item(trans, quota_root, src, dst);
1024 		goto out;
1025 	}
1026 
1027 	spin_lock(&fs_info->qgroup_lock);
1028 	ret = add_relation_rb(quota_root->fs_info, src, dst);
1029 	spin_unlock(&fs_info->qgroup_lock);
1030 out:
1031 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1032 	return ret;
1033 }
1034 
1035 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1036 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1037 {
1038 	struct btrfs_root *quota_root;
1039 	struct btrfs_qgroup *parent;
1040 	struct btrfs_qgroup *member;
1041 	struct btrfs_qgroup_list *list;
1042 	int ret = 0;
1043 	int err;
1044 
1045 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1046 	quota_root = fs_info->quota_root;
1047 	if (!quota_root) {
1048 		ret = -EINVAL;
1049 		goto out;
1050 	}
1051 
1052 	member = find_qgroup_rb(fs_info, src);
1053 	parent = find_qgroup_rb(fs_info, dst);
1054 	if (!member || !parent) {
1055 		ret = -EINVAL;
1056 		goto out;
1057 	}
1058 
1059 	/* check if such qgroup relation exist firstly */
1060 	list_for_each_entry(list, &member->groups, next_group) {
1061 		if (list->group == parent)
1062 			goto exist;
1063 	}
1064 	ret = -ENOENT;
1065 	goto out;
1066 exist:
1067 	ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1068 	err = del_qgroup_relation_item(trans, quota_root, dst, src);
1069 	if (err && !ret)
1070 		ret = err;
1071 
1072 	spin_lock(&fs_info->qgroup_lock);
1073 	del_relation_rb(fs_info, src, dst);
1074 	spin_unlock(&fs_info->qgroup_lock);
1075 out:
1076 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1077 	return ret;
1078 }
1079 
1080 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1081 			struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
1082 {
1083 	struct btrfs_root *quota_root;
1084 	struct btrfs_qgroup *qgroup;
1085 	int ret = 0;
1086 
1087 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1088 	quota_root = fs_info->quota_root;
1089 	if (!quota_root) {
1090 		ret = -EINVAL;
1091 		goto out;
1092 	}
1093 	qgroup = find_qgroup_rb(fs_info, qgroupid);
1094 	if (qgroup) {
1095 		ret = -EEXIST;
1096 		goto out;
1097 	}
1098 
1099 	ret = add_qgroup_item(trans, quota_root, qgroupid);
1100 	if (ret)
1101 		goto out;
1102 
1103 	spin_lock(&fs_info->qgroup_lock);
1104 	qgroup = add_qgroup_rb(fs_info, qgroupid);
1105 	spin_unlock(&fs_info->qgroup_lock);
1106 
1107 	if (IS_ERR(qgroup))
1108 		ret = PTR_ERR(qgroup);
1109 out:
1110 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1111 	return ret;
1112 }
1113 
1114 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1115 			struct btrfs_fs_info *fs_info, u64 qgroupid)
1116 {
1117 	struct btrfs_root *quota_root;
1118 	struct btrfs_qgroup *qgroup;
1119 	int ret = 0;
1120 
1121 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1122 	quota_root = fs_info->quota_root;
1123 	if (!quota_root) {
1124 		ret = -EINVAL;
1125 		goto out;
1126 	}
1127 
1128 	qgroup = find_qgroup_rb(fs_info, qgroupid);
1129 	if (!qgroup) {
1130 		ret = -ENOENT;
1131 		goto out;
1132 	} else {
1133 		/* check if there are no relations to this qgroup */
1134 		if (!list_empty(&qgroup->groups) ||
1135 		    !list_empty(&qgroup->members)) {
1136 			ret = -EBUSY;
1137 			goto out;
1138 		}
1139 	}
1140 	ret = del_qgroup_item(trans, quota_root, qgroupid);
1141 
1142 	spin_lock(&fs_info->qgroup_lock);
1143 	del_qgroup_rb(quota_root->fs_info, qgroupid);
1144 	spin_unlock(&fs_info->qgroup_lock);
1145 out:
1146 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1147 	return ret;
1148 }
1149 
1150 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1151 		       struct btrfs_fs_info *fs_info, u64 qgroupid,
1152 		       struct btrfs_qgroup_limit *limit)
1153 {
1154 	struct btrfs_root *quota_root;
1155 	struct btrfs_qgroup *qgroup;
1156 	int ret = 0;
1157 
1158 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1159 	quota_root = fs_info->quota_root;
1160 	if (!quota_root) {
1161 		ret = -EINVAL;
1162 		goto out;
1163 	}
1164 
1165 	qgroup = find_qgroup_rb(fs_info, qgroupid);
1166 	if (!qgroup) {
1167 		ret = -ENOENT;
1168 		goto out;
1169 	}
1170 	ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1171 				       limit->flags, limit->max_rfer,
1172 				       limit->max_excl, limit->rsv_rfer,
1173 				       limit->rsv_excl);
1174 	if (ret) {
1175 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1176 		printk(KERN_INFO "unable to update quota limit for %llu\n",
1177 		       (unsigned long long)qgroupid);
1178 	}
1179 
1180 	spin_lock(&fs_info->qgroup_lock);
1181 	qgroup->lim_flags = limit->flags;
1182 	qgroup->max_rfer = limit->max_rfer;
1183 	qgroup->max_excl = limit->max_excl;
1184 	qgroup->rsv_rfer = limit->rsv_rfer;
1185 	qgroup->rsv_excl = limit->rsv_excl;
1186 	spin_unlock(&fs_info->qgroup_lock);
1187 out:
1188 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1189 	return ret;
1190 }
1191 
1192 /*
1193  * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1194  * the modification into a list that's later used by btrfs_end_transaction to
1195  * pass the recorded modifications on to btrfs_qgroup_account_ref.
1196  */
1197 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1198 			    struct btrfs_delayed_ref_node *node,
1199 			    struct btrfs_delayed_extent_op *extent_op)
1200 {
1201 	struct qgroup_update *u;
1202 
1203 	BUG_ON(!trans->delayed_ref_elem.seq);
1204 	u = kmalloc(sizeof(*u), GFP_NOFS);
1205 	if (!u)
1206 		return -ENOMEM;
1207 
1208 	u->node = node;
1209 	u->extent_op = extent_op;
1210 	list_add_tail(&u->list, &trans->qgroup_ref_list);
1211 
1212 	return 0;
1213 }
1214 
1215 static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
1216 				    struct ulist *roots, struct ulist *tmp,
1217 				    u64 seq)
1218 {
1219 	struct ulist_node *unode;
1220 	struct ulist_iterator uiter;
1221 	struct ulist_node *tmp_unode;
1222 	struct ulist_iterator tmp_uiter;
1223 	struct btrfs_qgroup *qg;
1224 	int ret;
1225 
1226 	ULIST_ITER_INIT(&uiter);
1227 	while ((unode = ulist_next(roots, &uiter))) {
1228 		qg = find_qgroup_rb(fs_info, unode->val);
1229 		if (!qg)
1230 			continue;
1231 
1232 		ulist_reinit(tmp);
1233 						/* XXX id not needed */
1234 		ret = ulist_add(tmp, qg->qgroupid,
1235 				(u64)(uintptr_t)qg, GFP_ATOMIC);
1236 		if (ret < 0)
1237 			return ret;
1238 		ULIST_ITER_INIT(&tmp_uiter);
1239 		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1240 			struct btrfs_qgroup_list *glist;
1241 
1242 			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1243 			if (qg->refcnt < seq)
1244 				qg->refcnt = seq + 1;
1245 			else
1246 				++qg->refcnt;
1247 
1248 			list_for_each_entry(glist, &qg->groups, next_group) {
1249 				ret = ulist_add(tmp, glist->group->qgroupid,
1250 						(u64)(uintptr_t)glist->group,
1251 						GFP_ATOMIC);
1252 				if (ret < 0)
1253 					return ret;
1254 			}
1255 		}
1256 	}
1257 
1258 	return 0;
1259 }
1260 
1261 static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
1262 				    struct ulist *roots, struct ulist *tmp,
1263 				    u64 seq, int sgn, u64 num_bytes,
1264 				    struct btrfs_qgroup *qgroup)
1265 {
1266 	struct ulist_node *unode;
1267 	struct ulist_iterator uiter;
1268 	struct btrfs_qgroup *qg;
1269 	struct btrfs_qgroup_list *glist;
1270 	int ret;
1271 
1272 	ulist_reinit(tmp);
1273 	ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1274 	if (ret < 0)
1275 		return ret;
1276 
1277 	ULIST_ITER_INIT(&uiter);
1278 	while ((unode = ulist_next(tmp, &uiter))) {
1279 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1280 		if (qg->refcnt < seq) {
1281 			/* not visited by step 1 */
1282 			qg->rfer += sgn * num_bytes;
1283 			qg->rfer_cmpr += sgn * num_bytes;
1284 			if (roots->nnodes == 0) {
1285 				qg->excl += sgn * num_bytes;
1286 				qg->excl_cmpr += sgn * num_bytes;
1287 			}
1288 			qgroup_dirty(fs_info, qg);
1289 		}
1290 		WARN_ON(qg->tag >= seq);
1291 		qg->tag = seq;
1292 
1293 		list_for_each_entry(glist, &qg->groups, next_group) {
1294 			ret = ulist_add(tmp, glist->group->qgroupid,
1295 					(uintptr_t)glist->group, GFP_ATOMIC);
1296 			if (ret < 0)
1297 				return ret;
1298 		}
1299 	}
1300 
1301 	return 0;
1302 }
1303 
1304 static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
1305 				    struct ulist *roots, struct ulist *tmp,
1306 				    u64 seq, int sgn, u64 num_bytes)
1307 {
1308 	struct ulist_node *unode;
1309 	struct ulist_iterator uiter;
1310 	struct btrfs_qgroup *qg;
1311 	struct ulist_node *tmp_unode;
1312 	struct ulist_iterator tmp_uiter;
1313 	int ret;
1314 
1315 	ULIST_ITER_INIT(&uiter);
1316 	while ((unode = ulist_next(roots, &uiter))) {
1317 		qg = find_qgroup_rb(fs_info, unode->val);
1318 		if (!qg)
1319 			continue;
1320 
1321 		ulist_reinit(tmp);
1322 		ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1323 		if (ret < 0)
1324 			return ret;
1325 
1326 		ULIST_ITER_INIT(&tmp_uiter);
1327 		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1328 			struct btrfs_qgroup_list *glist;
1329 
1330 			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1331 			if (qg->tag == seq)
1332 				continue;
1333 
1334 			if (qg->refcnt - seq == roots->nnodes) {
1335 				qg->excl -= sgn * num_bytes;
1336 				qg->excl_cmpr -= sgn * num_bytes;
1337 				qgroup_dirty(fs_info, qg);
1338 			}
1339 
1340 			list_for_each_entry(glist, &qg->groups, next_group) {
1341 				ret = ulist_add(tmp, glist->group->qgroupid,
1342 						(uintptr_t)glist->group,
1343 						GFP_ATOMIC);
1344 				if (ret < 0)
1345 					return ret;
1346 			}
1347 		}
1348 	}
1349 
1350 	return 0;
1351 }
1352 
1353 /*
1354  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1355  * from the fs. First, all roots referencing the extent are searched, and
1356  * then the space is accounted accordingly to the different roots. The
1357  * accounting algorithm works in 3 steps documented inline.
1358  */
1359 int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1360 			     struct btrfs_fs_info *fs_info,
1361 			     struct btrfs_delayed_ref_node *node,
1362 			     struct btrfs_delayed_extent_op *extent_op)
1363 {
1364 	struct btrfs_key ins;
1365 	struct btrfs_root *quota_root;
1366 	u64 ref_root;
1367 	struct btrfs_qgroup *qgroup;
1368 	struct ulist *roots = NULL;
1369 	u64 seq;
1370 	int ret = 0;
1371 	int sgn;
1372 
1373 	if (!fs_info->quota_enabled)
1374 		return 0;
1375 
1376 	BUG_ON(!fs_info->quota_root);
1377 
1378 	ins.objectid = node->bytenr;
1379 	ins.offset = node->num_bytes;
1380 	ins.type = BTRFS_EXTENT_ITEM_KEY;
1381 
1382 	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1383 	    node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
1384 		struct btrfs_delayed_tree_ref *ref;
1385 		ref = btrfs_delayed_node_to_tree_ref(node);
1386 		ref_root = ref->root;
1387 	} else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1388 		   node->type == BTRFS_SHARED_DATA_REF_KEY) {
1389 		struct btrfs_delayed_data_ref *ref;
1390 		ref = btrfs_delayed_node_to_data_ref(node);
1391 		ref_root = ref->root;
1392 	} else {
1393 		BUG();
1394 	}
1395 
1396 	if (!is_fstree(ref_root)) {
1397 		/*
1398 		 * non-fs-trees are not being accounted
1399 		 */
1400 		return 0;
1401 	}
1402 
1403 	switch (node->action) {
1404 	case BTRFS_ADD_DELAYED_REF:
1405 	case BTRFS_ADD_DELAYED_EXTENT:
1406 		sgn = 1;
1407 		seq = btrfs_tree_mod_seq_prev(node->seq);
1408 		break;
1409 	case BTRFS_DROP_DELAYED_REF:
1410 		sgn = -1;
1411 		seq = node->seq;
1412 		break;
1413 	case BTRFS_UPDATE_DELAYED_HEAD:
1414 		return 0;
1415 	default:
1416 		BUG();
1417 	}
1418 
1419 	mutex_lock(&fs_info->qgroup_rescan_lock);
1420 	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1421 		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1422 			mutex_unlock(&fs_info->qgroup_rescan_lock);
1423 			return 0;
1424 		}
1425 	}
1426 	mutex_unlock(&fs_info->qgroup_rescan_lock);
1427 
1428 	/*
1429 	 * the delayed ref sequence number we pass depends on the direction of
1430 	 * the operation. for add operations, we pass
1431 	 * tree_mod_log_prev_seq(node->seq) to skip
1432 	 * the delayed ref's current sequence number, because we need the state
1433 	 * of the tree before the add operation. for delete operations, we pass
1434 	 * (node->seq) to include the delayed ref's current sequence number,
1435 	 * because we need the state of the tree after the delete operation.
1436 	 */
1437 	ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
1438 	if (ret < 0)
1439 		return ret;
1440 
1441 	spin_lock(&fs_info->qgroup_lock);
1442 
1443 	quota_root = fs_info->quota_root;
1444 	if (!quota_root)
1445 		goto unlock;
1446 
1447 	qgroup = find_qgroup_rb(fs_info, ref_root);
1448 	if (!qgroup)
1449 		goto unlock;
1450 
1451 	/*
1452 	 * step 1: for each old ref, visit all nodes once and inc refcnt
1453 	 */
1454 	ulist_reinit(fs_info->qgroup_ulist);
1455 	seq = fs_info->qgroup_seq;
1456 	fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1457 
1458 	ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist,
1459 				       seq);
1460 	if (ret)
1461 		goto unlock;
1462 
1463 	/*
1464 	 * step 2: walk from the new root
1465 	 */
1466 	ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist,
1467 				       seq, sgn, node->num_bytes, qgroup);
1468 	if (ret)
1469 		goto unlock;
1470 
1471 	/*
1472 	 * step 3: walk again from old refs
1473 	 */
1474 	ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist,
1475 				       seq, sgn, node->num_bytes);
1476 	if (ret)
1477 		goto unlock;
1478 
1479 unlock:
1480 	spin_unlock(&fs_info->qgroup_lock);
1481 	ulist_free(roots);
1482 
1483 	return ret;
1484 }
1485 
1486 /*
1487  * called from commit_transaction. Writes all changed qgroups to disk.
1488  */
1489 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1490 		      struct btrfs_fs_info *fs_info)
1491 {
1492 	struct btrfs_root *quota_root = fs_info->quota_root;
1493 	int ret = 0;
1494 	int start_rescan_worker = 0;
1495 
1496 	if (!quota_root)
1497 		goto out;
1498 
1499 	if (!fs_info->quota_enabled && fs_info->pending_quota_state)
1500 		start_rescan_worker = 1;
1501 
1502 	fs_info->quota_enabled = fs_info->pending_quota_state;
1503 
1504 	spin_lock(&fs_info->qgroup_lock);
1505 	while (!list_empty(&fs_info->dirty_qgroups)) {
1506 		struct btrfs_qgroup *qgroup;
1507 		qgroup = list_first_entry(&fs_info->dirty_qgroups,
1508 					  struct btrfs_qgroup, dirty);
1509 		list_del_init(&qgroup->dirty);
1510 		spin_unlock(&fs_info->qgroup_lock);
1511 		ret = update_qgroup_info_item(trans, quota_root, qgroup);
1512 		if (ret)
1513 			fs_info->qgroup_flags |=
1514 					BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1515 		spin_lock(&fs_info->qgroup_lock);
1516 	}
1517 	if (fs_info->quota_enabled)
1518 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
1519 	else
1520 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1521 	spin_unlock(&fs_info->qgroup_lock);
1522 
1523 	ret = update_qgroup_status_item(trans, fs_info, quota_root);
1524 	if (ret)
1525 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1526 
1527 	if (!ret && start_rescan_worker) {
1528 		ret = qgroup_rescan_init(fs_info, 0, 1);
1529 		if (!ret) {
1530 			qgroup_rescan_zero_tracking(fs_info);
1531 			btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
1532 					   &fs_info->qgroup_rescan_work);
1533 		}
1534 		ret = 0;
1535 	}
1536 
1537 out:
1538 
1539 	return ret;
1540 }
1541 
1542 /*
1543  * copy the acounting information between qgroups. This is necessary when a
1544  * snapshot or a subvolume is created
1545  */
1546 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1547 			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
1548 			 struct btrfs_qgroup_inherit *inherit)
1549 {
1550 	int ret = 0;
1551 	int i;
1552 	u64 *i_qgroups;
1553 	struct btrfs_root *quota_root = fs_info->quota_root;
1554 	struct btrfs_qgroup *srcgroup;
1555 	struct btrfs_qgroup *dstgroup;
1556 	u32 level_size = 0;
1557 	u64 nums;
1558 
1559 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1560 	if (!fs_info->quota_enabled)
1561 		goto out;
1562 
1563 	if (!quota_root) {
1564 		ret = -EINVAL;
1565 		goto out;
1566 	}
1567 
1568 	if (inherit) {
1569 		i_qgroups = (u64 *)(inherit + 1);
1570 		nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
1571 		       2 * inherit->num_excl_copies;
1572 		for (i = 0; i < nums; ++i) {
1573 			srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
1574 			if (!srcgroup) {
1575 				ret = -EINVAL;
1576 				goto out;
1577 			}
1578 			++i_qgroups;
1579 		}
1580 	}
1581 
1582 	/*
1583 	 * create a tracking group for the subvol itself
1584 	 */
1585 	ret = add_qgroup_item(trans, quota_root, objectid);
1586 	if (ret)
1587 		goto out;
1588 
1589 	if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
1590 		ret = update_qgroup_limit_item(trans, quota_root, objectid,
1591 					       inherit->lim.flags,
1592 					       inherit->lim.max_rfer,
1593 					       inherit->lim.max_excl,
1594 					       inherit->lim.rsv_rfer,
1595 					       inherit->lim.rsv_excl);
1596 		if (ret)
1597 			goto out;
1598 	}
1599 
1600 	if (srcid) {
1601 		struct btrfs_root *srcroot;
1602 		struct btrfs_key srckey;
1603 		int srcroot_level;
1604 
1605 		srckey.objectid = srcid;
1606 		srckey.type = BTRFS_ROOT_ITEM_KEY;
1607 		srckey.offset = (u64)-1;
1608 		srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
1609 		if (IS_ERR(srcroot)) {
1610 			ret = PTR_ERR(srcroot);
1611 			goto out;
1612 		}
1613 
1614 		rcu_read_lock();
1615 		srcroot_level = btrfs_header_level(srcroot->node);
1616 		level_size = btrfs_level_size(srcroot, srcroot_level);
1617 		rcu_read_unlock();
1618 	}
1619 
1620 	/*
1621 	 * add qgroup to all inherited groups
1622 	 */
1623 	if (inherit) {
1624 		i_qgroups = (u64 *)(inherit + 1);
1625 		for (i = 0; i < inherit->num_qgroups; ++i) {
1626 			ret = add_qgroup_relation_item(trans, quota_root,
1627 						       objectid, *i_qgroups);
1628 			if (ret)
1629 				goto out;
1630 			ret = add_qgroup_relation_item(trans, quota_root,
1631 						       *i_qgroups, objectid);
1632 			if (ret)
1633 				goto out;
1634 			++i_qgroups;
1635 		}
1636 	}
1637 
1638 
1639 	spin_lock(&fs_info->qgroup_lock);
1640 
1641 	dstgroup = add_qgroup_rb(fs_info, objectid);
1642 	if (IS_ERR(dstgroup)) {
1643 		ret = PTR_ERR(dstgroup);
1644 		goto unlock;
1645 	}
1646 
1647 	if (srcid) {
1648 		srcgroup = find_qgroup_rb(fs_info, srcid);
1649 		if (!srcgroup)
1650 			goto unlock;
1651 		dstgroup->rfer = srcgroup->rfer - level_size;
1652 		dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1653 		srcgroup->excl = level_size;
1654 		srcgroup->excl_cmpr = level_size;
1655 		qgroup_dirty(fs_info, dstgroup);
1656 		qgroup_dirty(fs_info, srcgroup);
1657 	}
1658 
1659 	if (!inherit)
1660 		goto unlock;
1661 
1662 	i_qgroups = (u64 *)(inherit + 1);
1663 	for (i = 0; i < inherit->num_qgroups; ++i) {
1664 		ret = add_relation_rb(quota_root->fs_info, objectid,
1665 				      *i_qgroups);
1666 		if (ret)
1667 			goto unlock;
1668 		++i_qgroups;
1669 	}
1670 
1671 	for (i = 0; i <  inherit->num_ref_copies; ++i) {
1672 		struct btrfs_qgroup *src;
1673 		struct btrfs_qgroup *dst;
1674 
1675 		src = find_qgroup_rb(fs_info, i_qgroups[0]);
1676 		dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1677 
1678 		if (!src || !dst) {
1679 			ret = -EINVAL;
1680 			goto unlock;
1681 		}
1682 
1683 		dst->rfer = src->rfer - level_size;
1684 		dst->rfer_cmpr = src->rfer_cmpr - level_size;
1685 		i_qgroups += 2;
1686 	}
1687 	for (i = 0; i <  inherit->num_excl_copies; ++i) {
1688 		struct btrfs_qgroup *src;
1689 		struct btrfs_qgroup *dst;
1690 
1691 		src = find_qgroup_rb(fs_info, i_qgroups[0]);
1692 		dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1693 
1694 		if (!src || !dst) {
1695 			ret = -EINVAL;
1696 			goto unlock;
1697 		}
1698 
1699 		dst->excl = src->excl + level_size;
1700 		dst->excl_cmpr = src->excl_cmpr + level_size;
1701 		i_qgroups += 2;
1702 	}
1703 
1704 unlock:
1705 	spin_unlock(&fs_info->qgroup_lock);
1706 out:
1707 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1708 	return ret;
1709 }
1710 
1711 /*
1712  * reserve some space for a qgroup and all its parents. The reservation takes
1713  * place with start_transaction or dealloc_reserve, similar to ENOSPC
1714  * accounting. If not enough space is available, EDQUOT is returned.
1715  * We assume that the requested space is new for all qgroups.
1716  */
1717 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1718 {
1719 	struct btrfs_root *quota_root;
1720 	struct btrfs_qgroup *qgroup;
1721 	struct btrfs_fs_info *fs_info = root->fs_info;
1722 	u64 ref_root = root->root_key.objectid;
1723 	int ret = 0;
1724 	struct ulist_node *unode;
1725 	struct ulist_iterator uiter;
1726 
1727 	if (!is_fstree(ref_root))
1728 		return 0;
1729 
1730 	if (num_bytes == 0)
1731 		return 0;
1732 
1733 	spin_lock(&fs_info->qgroup_lock);
1734 	quota_root = fs_info->quota_root;
1735 	if (!quota_root)
1736 		goto out;
1737 
1738 	qgroup = find_qgroup_rb(fs_info, ref_root);
1739 	if (!qgroup)
1740 		goto out;
1741 
1742 	/*
1743 	 * in a first step, we check all affected qgroups if any limits would
1744 	 * be exceeded
1745 	 */
1746 	ulist_reinit(fs_info->qgroup_ulist);
1747 	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
1748 			(uintptr_t)qgroup, GFP_ATOMIC);
1749 	if (ret < 0)
1750 		goto out;
1751 	ULIST_ITER_INIT(&uiter);
1752 	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1753 		struct btrfs_qgroup *qg;
1754 		struct btrfs_qgroup_list *glist;
1755 
1756 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1757 
1758 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1759 		    qg->reserved + (s64)qg->rfer + num_bytes >
1760 		    qg->max_rfer) {
1761 			ret = -EDQUOT;
1762 			goto out;
1763 		}
1764 
1765 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
1766 		    qg->reserved + (s64)qg->excl + num_bytes >
1767 		    qg->max_excl) {
1768 			ret = -EDQUOT;
1769 			goto out;
1770 		}
1771 
1772 		list_for_each_entry(glist, &qg->groups, next_group) {
1773 			ret = ulist_add(fs_info->qgroup_ulist,
1774 					glist->group->qgroupid,
1775 					(uintptr_t)glist->group, GFP_ATOMIC);
1776 			if (ret < 0)
1777 				goto out;
1778 		}
1779 	}
1780 	ret = 0;
1781 	/*
1782 	 * no limits exceeded, now record the reservation into all qgroups
1783 	 */
1784 	ULIST_ITER_INIT(&uiter);
1785 	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1786 		struct btrfs_qgroup *qg;
1787 
1788 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1789 
1790 		qg->reserved += num_bytes;
1791 	}
1792 
1793 out:
1794 	spin_unlock(&fs_info->qgroup_lock);
1795 	return ret;
1796 }
1797 
1798 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1799 {
1800 	struct btrfs_root *quota_root;
1801 	struct btrfs_qgroup *qgroup;
1802 	struct btrfs_fs_info *fs_info = root->fs_info;
1803 	struct ulist_node *unode;
1804 	struct ulist_iterator uiter;
1805 	u64 ref_root = root->root_key.objectid;
1806 	int ret = 0;
1807 
1808 	if (!is_fstree(ref_root))
1809 		return;
1810 
1811 	if (num_bytes == 0)
1812 		return;
1813 
1814 	spin_lock(&fs_info->qgroup_lock);
1815 
1816 	quota_root = fs_info->quota_root;
1817 	if (!quota_root)
1818 		goto out;
1819 
1820 	qgroup = find_qgroup_rb(fs_info, ref_root);
1821 	if (!qgroup)
1822 		goto out;
1823 
1824 	ulist_reinit(fs_info->qgroup_ulist);
1825 	ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
1826 			(uintptr_t)qgroup, GFP_ATOMIC);
1827 	if (ret < 0)
1828 		goto out;
1829 	ULIST_ITER_INIT(&uiter);
1830 	while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
1831 		struct btrfs_qgroup *qg;
1832 		struct btrfs_qgroup_list *glist;
1833 
1834 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1835 
1836 		qg->reserved -= num_bytes;
1837 
1838 		list_for_each_entry(glist, &qg->groups, next_group) {
1839 			ret = ulist_add(fs_info->qgroup_ulist,
1840 					glist->group->qgroupid,
1841 					(uintptr_t)glist->group, GFP_ATOMIC);
1842 			if (ret < 0)
1843 				goto out;
1844 		}
1845 	}
1846 
1847 out:
1848 	spin_unlock(&fs_info->qgroup_lock);
1849 }
1850 
1851 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1852 {
1853 	if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1854 		return;
1855 	pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n",
1856 		trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
1857 		(u32)(trans->delayed_ref_elem.seq >> 32),
1858 		(u32)trans->delayed_ref_elem.seq);
1859 	BUG();
1860 }
1861 
1862 /*
1863  * returns < 0 on error, 0 when more leafs are to be scanned.
1864  * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
1865  */
1866 static int
1867 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1868 		   struct btrfs_trans_handle *trans, struct ulist *tmp,
1869 		   struct extent_buffer *scratch_leaf)
1870 {
1871 	struct btrfs_key found;
1872 	struct ulist *roots = NULL;
1873 	struct ulist_node *unode;
1874 	struct ulist_iterator uiter;
1875 	struct seq_list tree_mod_seq_elem = {};
1876 	u64 seq;
1877 	int slot;
1878 	int ret;
1879 
1880 	path->leave_spinning = 1;
1881 	mutex_lock(&fs_info->qgroup_rescan_lock);
1882 	ret = btrfs_search_slot_for_read(fs_info->extent_root,
1883 					 &fs_info->qgroup_rescan_progress,
1884 					 path, 1, 0);
1885 
1886 	pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1887 		 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
1888 		 fs_info->qgroup_rescan_progress.type,
1889 		 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
1890 		 ret);
1891 
1892 	if (ret) {
1893 		/*
1894 		 * The rescan is about to end, we will not be scanning any
1895 		 * further blocks. We cannot unset the RESCAN flag here, because
1896 		 * we want to commit the transaction if everything went well.
1897 		 * To make the live accounting work in this phase, we set our
1898 		 * scan progress pointer such that every real extent objectid
1899 		 * will be smaller.
1900 		 */
1901 		fs_info->qgroup_rescan_progress.objectid = (u64)-1;
1902 		btrfs_release_path(path);
1903 		mutex_unlock(&fs_info->qgroup_rescan_lock);
1904 		return ret;
1905 	}
1906 
1907 	btrfs_item_key_to_cpu(path->nodes[0], &found,
1908 			      btrfs_header_nritems(path->nodes[0]) - 1);
1909 	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
1910 
1911 	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1912 	memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
1913 	slot = path->slots[0];
1914 	btrfs_release_path(path);
1915 	mutex_unlock(&fs_info->qgroup_rescan_lock);
1916 
1917 	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1918 		btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1919 		if (found.type != BTRFS_EXTENT_ITEM_KEY)
1920 			continue;
1921 		ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1922 					   tree_mod_seq_elem.seq, &roots);
1923 		if (ret < 0)
1924 			goto out;
1925 		spin_lock(&fs_info->qgroup_lock);
1926 		seq = fs_info->qgroup_seq;
1927 		fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1928 
1929 		ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1930 		if (ret) {
1931 			spin_unlock(&fs_info->qgroup_lock);
1932 			ulist_free(roots);
1933 			goto out;
1934 		}
1935 
1936 		/*
1937 		 * step2 of btrfs_qgroup_account_ref works from a single root,
1938 		 * we're doing all at once here.
1939 		 */
1940 		ulist_reinit(tmp);
1941 		ULIST_ITER_INIT(&uiter);
1942 		while ((unode = ulist_next(roots, &uiter))) {
1943 			struct btrfs_qgroup *qg;
1944 
1945 			qg = find_qgroup_rb(fs_info, unode->val);
1946 			if (!qg)
1947 				continue;
1948 
1949 			ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1950 					GFP_ATOMIC);
1951 			if (ret < 0) {
1952 				spin_unlock(&fs_info->qgroup_lock);
1953 				ulist_free(roots);
1954 				goto out;
1955 			}
1956 		}
1957 
1958 		/* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1959 		ULIST_ITER_INIT(&uiter);
1960 		while ((unode = ulist_next(tmp, &uiter))) {
1961 			struct btrfs_qgroup *qg;
1962 			struct btrfs_qgroup_list *glist;
1963 
1964 			qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1965 			qg->rfer += found.offset;
1966 			qg->rfer_cmpr += found.offset;
1967 			WARN_ON(qg->tag >= seq);
1968 			if (qg->refcnt - seq == roots->nnodes) {
1969 				qg->excl += found.offset;
1970 				qg->excl_cmpr += found.offset;
1971 			}
1972 			qgroup_dirty(fs_info, qg);
1973 
1974 			list_for_each_entry(glist, &qg->groups, next_group) {
1975 				ret = ulist_add(tmp, glist->group->qgroupid,
1976 						(uintptr_t)glist->group,
1977 						GFP_ATOMIC);
1978 				if (ret < 0) {
1979 					spin_unlock(&fs_info->qgroup_lock);
1980 					ulist_free(roots);
1981 					goto out;
1982 				}
1983 			}
1984 		}
1985 
1986 		spin_unlock(&fs_info->qgroup_lock);
1987 		ulist_free(roots);
1988 		ret = 0;
1989 	}
1990 
1991 out:
1992 	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1993 
1994 	return ret;
1995 }
1996 
1997 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
1998 {
1999 	struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
2000 						     qgroup_rescan_work);
2001 	struct btrfs_path *path;
2002 	struct btrfs_trans_handle *trans = NULL;
2003 	struct ulist *tmp = NULL;
2004 	struct extent_buffer *scratch_leaf = NULL;
2005 	int err = -ENOMEM;
2006 
2007 	path = btrfs_alloc_path();
2008 	if (!path)
2009 		goto out;
2010 	tmp = ulist_alloc(GFP_NOFS);
2011 	if (!tmp)
2012 		goto out;
2013 	scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2014 	if (!scratch_leaf)
2015 		goto out;
2016 
2017 	err = 0;
2018 	while (!err) {
2019 		trans = btrfs_start_transaction(fs_info->fs_root, 0);
2020 		if (IS_ERR(trans)) {
2021 			err = PTR_ERR(trans);
2022 			break;
2023 		}
2024 		if (!fs_info->quota_enabled) {
2025 			err = -EINTR;
2026 		} else {
2027 			err = qgroup_rescan_leaf(fs_info, path, trans,
2028 						 tmp, scratch_leaf);
2029 		}
2030 		if (err > 0)
2031 			btrfs_commit_transaction(trans, fs_info->fs_root);
2032 		else
2033 			btrfs_end_transaction(trans, fs_info->fs_root);
2034 	}
2035 
2036 out:
2037 	kfree(scratch_leaf);
2038 	ulist_free(tmp);
2039 	btrfs_free_path(path);
2040 
2041 	mutex_lock(&fs_info->qgroup_rescan_lock);
2042 	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2043 
2044 	if (err == 2 &&
2045 	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2046 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2047 	} else if (err < 0) {
2048 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2049 	}
2050 	mutex_unlock(&fs_info->qgroup_rescan_lock);
2051 
2052 	if (err >= 0) {
2053 		pr_info("btrfs: qgroup scan completed%s\n",
2054 			err == 2 ? " (inconsistency flag cleared)" : "");
2055 	} else {
2056 		pr_err("btrfs: qgroup scan failed with %d\n", err);
2057 	}
2058 
2059 	complete_all(&fs_info->qgroup_rescan_completion);
2060 }
2061 
2062 /*
2063  * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2064  * memory required for the rescan context.
2065  */
2066 static int
2067 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2068 		   int init_flags)
2069 {
2070 	int ret = 0;
2071 
2072 	if (!init_flags &&
2073 	    (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2074 	     !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2075 		ret = -EINVAL;
2076 		goto err;
2077 	}
2078 
2079 	mutex_lock(&fs_info->qgroup_rescan_lock);
2080 	spin_lock(&fs_info->qgroup_lock);
2081 
2082 	if (init_flags) {
2083 		if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2084 			ret = -EINPROGRESS;
2085 		else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2086 			ret = -EINVAL;
2087 
2088 		if (ret) {
2089 			spin_unlock(&fs_info->qgroup_lock);
2090 			mutex_unlock(&fs_info->qgroup_rescan_lock);
2091 			goto err;
2092 		}
2093 
2094 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2095 	}
2096 
2097 	memset(&fs_info->qgroup_rescan_progress, 0,
2098 		sizeof(fs_info->qgroup_rescan_progress));
2099 	fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2100 
2101 	spin_unlock(&fs_info->qgroup_lock);
2102 	mutex_unlock(&fs_info->qgroup_rescan_lock);
2103 
2104 	init_completion(&fs_info->qgroup_rescan_completion);
2105 
2106 	memset(&fs_info->qgroup_rescan_work, 0,
2107 	       sizeof(fs_info->qgroup_rescan_work));
2108 	fs_info->qgroup_rescan_work.func = btrfs_qgroup_rescan_worker;
2109 
2110 	if (ret) {
2111 err:
2112 		pr_info("btrfs: qgroup_rescan_init failed with %d\n", ret);
2113 		return ret;
2114 	}
2115 
2116 	return 0;
2117 }
2118 
2119 static void
2120 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2121 {
2122 	struct rb_node *n;
2123 	struct btrfs_qgroup *qgroup;
2124 
2125 	spin_lock(&fs_info->qgroup_lock);
2126 	/* clear all current qgroup tracking information */
2127 	for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2128 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
2129 		qgroup->rfer = 0;
2130 		qgroup->rfer_cmpr = 0;
2131 		qgroup->excl = 0;
2132 		qgroup->excl_cmpr = 0;
2133 	}
2134 	spin_unlock(&fs_info->qgroup_lock);
2135 }
2136 
2137 int
2138 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2139 {
2140 	int ret = 0;
2141 	struct btrfs_trans_handle *trans;
2142 
2143 	ret = qgroup_rescan_init(fs_info, 0, 1);
2144 	if (ret)
2145 		return ret;
2146 
2147 	/*
2148 	 * We have set the rescan_progress to 0, which means no more
2149 	 * delayed refs will be accounted by btrfs_qgroup_account_ref.
2150 	 * However, btrfs_qgroup_account_ref may be right after its call
2151 	 * to btrfs_find_all_roots, in which case it would still do the
2152 	 * accounting.
2153 	 * To solve this, we're committing the transaction, which will
2154 	 * ensure we run all delayed refs and only after that, we are
2155 	 * going to clear all tracking information for a clean start.
2156 	 */
2157 
2158 	trans = btrfs_join_transaction(fs_info->fs_root);
2159 	if (IS_ERR(trans)) {
2160 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2161 		return PTR_ERR(trans);
2162 	}
2163 	ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2164 	if (ret) {
2165 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2166 		return ret;
2167 	}
2168 
2169 	qgroup_rescan_zero_tracking(fs_info);
2170 
2171 	btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
2172 			   &fs_info->qgroup_rescan_work);
2173 
2174 	return 0;
2175 }
2176 
2177 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
2178 {
2179 	int running;
2180 	int ret = 0;
2181 
2182 	mutex_lock(&fs_info->qgroup_rescan_lock);
2183 	spin_lock(&fs_info->qgroup_lock);
2184 	running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2185 	spin_unlock(&fs_info->qgroup_lock);
2186 	mutex_unlock(&fs_info->qgroup_rescan_lock);
2187 
2188 	if (running)
2189 		ret = wait_for_completion_interruptible(
2190 					&fs_info->qgroup_rescan_completion);
2191 
2192 	return ret;
2193 }
2194 
2195 /*
2196  * this is only called from open_ctree where we're still single threaded, thus
2197  * locking is omitted here.
2198  */
2199 void
2200 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2201 {
2202 	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2203 		btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
2204 				   &fs_info->qgroup_rescan_work);
2205 }
2206