xref: /openbmc/linux/fs/btrfs/qgroup.c (revision 93f14468)
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 #include <linux/btrfs.h>
27 
28 #include "ctree.h"
29 #include "transaction.h"
30 #include "disk-io.h"
31 #include "locking.h"
32 #include "ulist.h"
33 #include "backref.h"
34 #include "extent_io.h"
35 
36 /* TODO XXX FIXME
37  *  - subvol delete -> delete when ref goes to 0? delete limits also?
38  *  - reorganize keys
39  *  - compressed
40  *  - sync
41  *  - copy also limits on subvol creation
42  *  - limit
43  *  - caches fuer ulists
44  *  - performance benchmarks
45  *  - check all ioctl parameters
46  */
47 
48 /*
49  * one struct for each qgroup, organized in fs_info->qgroup_tree.
50  */
51 struct btrfs_qgroup {
52 	u64 qgroupid;
53 
54 	/*
55 	 * state
56 	 */
57 	u64 rfer;	/* referenced */
58 	u64 rfer_cmpr;	/* referenced compressed */
59 	u64 excl;	/* exclusive */
60 	u64 excl_cmpr;	/* exclusive compressed */
61 
62 	/*
63 	 * limits
64 	 */
65 	u64 lim_flags;	/* which limits are set */
66 	u64 max_rfer;
67 	u64 max_excl;
68 	u64 rsv_rfer;
69 	u64 rsv_excl;
70 
71 	/*
72 	 * reservation tracking
73 	 */
74 	u64 reserved;
75 
76 	/*
77 	 * lists
78 	 */
79 	struct list_head groups;  /* groups this group is member of */
80 	struct list_head members; /* groups that are members of this group */
81 	struct list_head dirty;   /* dirty groups */
82 	struct rb_node node;	  /* tree of qgroups */
83 
84 	/*
85 	 * temp variables for accounting operations
86 	 */
87 	u64 tag;
88 	u64 refcnt;
89 };
90 
91 /*
92  * glue structure to represent the relations between qgroups.
93  */
94 struct btrfs_qgroup_list {
95 	struct list_head next_group;
96 	struct list_head next_member;
97 	struct btrfs_qgroup *group;
98 	struct btrfs_qgroup *member;
99 };
100 
101 struct qgroup_rescan {
102 	struct btrfs_work	work;
103 	struct btrfs_fs_info	*fs_info;
104 };
105 
106 static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
107 				struct qgroup_rescan *qscan);
108 
109 /* must be called with qgroup_ioctl_lock held */
110 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
111 					   u64 qgroupid)
112 {
113 	struct rb_node *n = fs_info->qgroup_tree.rb_node;
114 	struct btrfs_qgroup *qgroup;
115 
116 	while (n) {
117 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
118 		if (qgroup->qgroupid < qgroupid)
119 			n = n->rb_left;
120 		else if (qgroup->qgroupid > qgroupid)
121 			n = n->rb_right;
122 		else
123 			return qgroup;
124 	}
125 	return NULL;
126 }
127 
128 /* must be called with qgroup_lock held */
129 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
130 					  u64 qgroupid)
131 {
132 	struct rb_node **p = &fs_info->qgroup_tree.rb_node;
133 	struct rb_node *parent = NULL;
134 	struct btrfs_qgroup *qgroup;
135 
136 	while (*p) {
137 		parent = *p;
138 		qgroup = rb_entry(parent, struct btrfs_qgroup, node);
139 
140 		if (qgroup->qgroupid < qgroupid)
141 			p = &(*p)->rb_left;
142 		else if (qgroup->qgroupid > qgroupid)
143 			p = &(*p)->rb_right;
144 		else
145 			return qgroup;
146 	}
147 
148 	qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
149 	if (!qgroup)
150 		return ERR_PTR(-ENOMEM);
151 
152 	qgroup->qgroupid = qgroupid;
153 	INIT_LIST_HEAD(&qgroup->groups);
154 	INIT_LIST_HEAD(&qgroup->members);
155 	INIT_LIST_HEAD(&qgroup->dirty);
156 
157 	rb_link_node(&qgroup->node, parent, p);
158 	rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
159 
160 	return qgroup;
161 }
162 
163 /* must be called with qgroup_lock held */
164 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
165 {
166 	struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
167 	struct btrfs_qgroup_list *list;
168 
169 	if (!qgroup)
170 		return -ENOENT;
171 
172 	rb_erase(&qgroup->node, &fs_info->qgroup_tree);
173 	list_del(&qgroup->dirty);
174 
175 	while (!list_empty(&qgroup->groups)) {
176 		list = list_first_entry(&qgroup->groups,
177 					struct btrfs_qgroup_list, next_group);
178 		list_del(&list->next_group);
179 		list_del(&list->next_member);
180 		kfree(list);
181 	}
182 
183 	while (!list_empty(&qgroup->members)) {
184 		list = list_first_entry(&qgroup->members,
185 					struct btrfs_qgroup_list, next_member);
186 		list_del(&list->next_group);
187 		list_del(&list->next_member);
188 		kfree(list);
189 	}
190 	kfree(qgroup);
191 
192 	return 0;
193 }
194 
195 /* must be called with qgroup_lock held */
196 static int add_relation_rb(struct btrfs_fs_info *fs_info,
197 			   u64 memberid, u64 parentid)
198 {
199 	struct btrfs_qgroup *member;
200 	struct btrfs_qgroup *parent;
201 	struct btrfs_qgroup_list *list;
202 
203 	member = find_qgroup_rb(fs_info, memberid);
204 	parent = find_qgroup_rb(fs_info, parentid);
205 	if (!member || !parent)
206 		return -ENOENT;
207 
208 	list = kzalloc(sizeof(*list), GFP_ATOMIC);
209 	if (!list)
210 		return -ENOMEM;
211 
212 	list->group = parent;
213 	list->member = member;
214 	list_add_tail(&list->next_group, &member->groups);
215 	list_add_tail(&list->next_member, &parent->members);
216 
217 	return 0;
218 }
219 
220 /* must be called with qgroup_lock held */
221 static int del_relation_rb(struct btrfs_fs_info *fs_info,
222 			   u64 memberid, u64 parentid)
223 {
224 	struct btrfs_qgroup *member;
225 	struct btrfs_qgroup *parent;
226 	struct btrfs_qgroup_list *list;
227 
228 	member = find_qgroup_rb(fs_info, memberid);
229 	parent = find_qgroup_rb(fs_info, parentid);
230 	if (!member || !parent)
231 		return -ENOENT;
232 
233 	list_for_each_entry(list, &member->groups, next_group) {
234 		if (list->group == parent) {
235 			list_del(&list->next_group);
236 			list_del(&list->next_member);
237 			kfree(list);
238 			return 0;
239 		}
240 	}
241 	return -ENOENT;
242 }
243 
244 /*
245  * The full config is read in one go, only called from open_ctree()
246  * It doesn't use any locking, as at this point we're still single-threaded
247  */
248 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
249 {
250 	struct btrfs_key key;
251 	struct btrfs_key found_key;
252 	struct btrfs_root *quota_root = fs_info->quota_root;
253 	struct btrfs_path *path = NULL;
254 	struct extent_buffer *l;
255 	int slot;
256 	int ret = 0;
257 	u64 flags = 0;
258 
259 	if (!fs_info->quota_enabled)
260 		return 0;
261 
262 	path = btrfs_alloc_path();
263 	if (!path) {
264 		ret = -ENOMEM;
265 		goto out;
266 	}
267 
268 	/* default this to quota off, in case no status key is found */
269 	fs_info->qgroup_flags = 0;
270 
271 	/*
272 	 * pass 1: read status, all qgroup infos and limits
273 	 */
274 	key.objectid = 0;
275 	key.type = 0;
276 	key.offset = 0;
277 	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
278 	if (ret)
279 		goto out;
280 
281 	while (1) {
282 		struct btrfs_qgroup *qgroup;
283 
284 		slot = path->slots[0];
285 		l = path->nodes[0];
286 		btrfs_item_key_to_cpu(l, &found_key, slot);
287 
288 		if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
289 			struct btrfs_qgroup_status_item *ptr;
290 
291 			ptr = btrfs_item_ptr(l, slot,
292 					     struct btrfs_qgroup_status_item);
293 
294 			if (btrfs_qgroup_status_version(l, ptr) !=
295 			    BTRFS_QGROUP_STATUS_VERSION) {
296 				printk(KERN_ERR
297 				 "btrfs: old qgroup version, quota disabled\n");
298 				goto out;
299 			}
300 			if (btrfs_qgroup_status_generation(l, ptr) !=
301 			    fs_info->generation) {
302 				flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
303 				printk(KERN_ERR
304 					"btrfs: qgroup generation mismatch, "
305 					"marked as inconsistent\n");
306 			}
307 			fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
308 									  ptr);
309 			fs_info->qgroup_rescan_progress.objectid =
310 					btrfs_qgroup_status_rescan(l, ptr);
311 			if (fs_info->qgroup_flags &
312 			    BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
313 				struct qgroup_rescan *qscan =
314 					kmalloc(sizeof(*qscan), GFP_NOFS);
315 				if (!qscan) {
316 					ret = -ENOMEM;
317 					goto out;
318 				}
319 				fs_info->qgroup_rescan_progress.type = 0;
320 				fs_info->qgroup_rescan_progress.offset = 0;
321 				qgroup_rescan_start(fs_info, qscan);
322 			}
323 			goto next1;
324 		}
325 
326 		if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
327 		    found_key.type != BTRFS_QGROUP_LIMIT_KEY)
328 			goto next1;
329 
330 		qgroup = find_qgroup_rb(fs_info, found_key.offset);
331 		if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
332 		    (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
333 			printk(KERN_ERR "btrfs: inconsitent qgroup config\n");
334 			flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
335 		}
336 		if (!qgroup) {
337 			qgroup = add_qgroup_rb(fs_info, found_key.offset);
338 			if (IS_ERR(qgroup)) {
339 				ret = PTR_ERR(qgroup);
340 				goto out;
341 			}
342 		}
343 		switch (found_key.type) {
344 		case BTRFS_QGROUP_INFO_KEY: {
345 			struct btrfs_qgroup_info_item *ptr;
346 
347 			ptr = btrfs_item_ptr(l, slot,
348 					     struct btrfs_qgroup_info_item);
349 			qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
350 			qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
351 			qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
352 			qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
353 			/* generation currently unused */
354 			break;
355 		}
356 		case BTRFS_QGROUP_LIMIT_KEY: {
357 			struct btrfs_qgroup_limit_item *ptr;
358 
359 			ptr = btrfs_item_ptr(l, slot,
360 					     struct btrfs_qgroup_limit_item);
361 			qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
362 			qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
363 			qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
364 			qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
365 			qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
366 			break;
367 		}
368 		}
369 next1:
370 		ret = btrfs_next_item(quota_root, path);
371 		if (ret < 0)
372 			goto out;
373 		if (ret)
374 			break;
375 	}
376 	btrfs_release_path(path);
377 
378 	/*
379 	 * pass 2: read all qgroup relations
380 	 */
381 	key.objectid = 0;
382 	key.type = BTRFS_QGROUP_RELATION_KEY;
383 	key.offset = 0;
384 	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
385 	if (ret)
386 		goto out;
387 	while (1) {
388 		slot = path->slots[0];
389 		l = path->nodes[0];
390 		btrfs_item_key_to_cpu(l, &found_key, slot);
391 
392 		if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
393 			goto next2;
394 
395 		if (found_key.objectid > found_key.offset) {
396 			/* parent <- member, not needed to build config */
397 			/* FIXME should we omit the key completely? */
398 			goto next2;
399 		}
400 
401 		ret = add_relation_rb(fs_info, found_key.objectid,
402 				      found_key.offset);
403 		if (ret == -ENOENT) {
404 			printk(KERN_WARNING
405 				"btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
406 				(unsigned long long)found_key.objectid,
407 				(unsigned long long)found_key.offset);
408 			ret = 0;	/* ignore the error */
409 		}
410 		if (ret)
411 			goto out;
412 next2:
413 		ret = btrfs_next_item(quota_root, path);
414 		if (ret < 0)
415 			goto out;
416 		if (ret)
417 			break;
418 	}
419 out:
420 	fs_info->qgroup_flags |= flags;
421 	if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
422 		fs_info->quota_enabled = 0;
423 		fs_info->pending_quota_state = 0;
424 	}
425 	btrfs_free_path(path);
426 
427 	return ret < 0 ? ret : 0;
428 }
429 
430 /*
431  * This is only called from close_ctree() or open_ctree(), both in single-
432  * treaded paths. Clean up the in-memory structures. No locking needed.
433  */
434 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
435 {
436 	struct rb_node *n;
437 	struct btrfs_qgroup *qgroup;
438 	struct btrfs_qgroup_list *list;
439 
440 	while ((n = rb_first(&fs_info->qgroup_tree))) {
441 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
442 		rb_erase(n, &fs_info->qgroup_tree);
443 
444 		while (!list_empty(&qgroup->groups)) {
445 			list = list_first_entry(&qgroup->groups,
446 						struct btrfs_qgroup_list,
447 						next_group);
448 			list_del(&list->next_group);
449 			list_del(&list->next_member);
450 			kfree(list);
451 		}
452 
453 		while (!list_empty(&qgroup->members)) {
454 			list = list_first_entry(&qgroup->members,
455 						struct btrfs_qgroup_list,
456 						next_member);
457 			list_del(&list->next_group);
458 			list_del(&list->next_member);
459 			kfree(list);
460 		}
461 		kfree(qgroup);
462 	}
463 }
464 
465 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
466 				    struct btrfs_root *quota_root,
467 				    u64 src, u64 dst)
468 {
469 	int ret;
470 	struct btrfs_path *path;
471 	struct btrfs_key key;
472 
473 	path = btrfs_alloc_path();
474 	if (!path)
475 		return -ENOMEM;
476 
477 	key.objectid = src;
478 	key.type = BTRFS_QGROUP_RELATION_KEY;
479 	key.offset = dst;
480 
481 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
482 
483 	btrfs_mark_buffer_dirty(path->nodes[0]);
484 
485 	btrfs_free_path(path);
486 	return ret;
487 }
488 
489 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
490 				    struct btrfs_root *quota_root,
491 				    u64 src, u64 dst)
492 {
493 	int ret;
494 	struct btrfs_path *path;
495 	struct btrfs_key key;
496 
497 	path = btrfs_alloc_path();
498 	if (!path)
499 		return -ENOMEM;
500 
501 	key.objectid = src;
502 	key.type = BTRFS_QGROUP_RELATION_KEY;
503 	key.offset = dst;
504 
505 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
506 	if (ret < 0)
507 		goto out;
508 
509 	if (ret > 0) {
510 		ret = -ENOENT;
511 		goto out;
512 	}
513 
514 	ret = btrfs_del_item(trans, quota_root, path);
515 out:
516 	btrfs_free_path(path);
517 	return ret;
518 }
519 
520 static int add_qgroup_item(struct btrfs_trans_handle *trans,
521 			   struct btrfs_root *quota_root, u64 qgroupid)
522 {
523 	int ret;
524 	struct btrfs_path *path;
525 	struct btrfs_qgroup_info_item *qgroup_info;
526 	struct btrfs_qgroup_limit_item *qgroup_limit;
527 	struct extent_buffer *leaf;
528 	struct btrfs_key key;
529 
530 	path = btrfs_alloc_path();
531 	if (!path)
532 		return -ENOMEM;
533 
534 	key.objectid = 0;
535 	key.type = BTRFS_QGROUP_INFO_KEY;
536 	key.offset = qgroupid;
537 
538 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
539 				      sizeof(*qgroup_info));
540 	if (ret)
541 		goto out;
542 
543 	leaf = path->nodes[0];
544 	qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
545 				 struct btrfs_qgroup_info_item);
546 	btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
547 	btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
548 	btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
549 	btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
550 	btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
551 
552 	btrfs_mark_buffer_dirty(leaf);
553 
554 	btrfs_release_path(path);
555 
556 	key.type = BTRFS_QGROUP_LIMIT_KEY;
557 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
558 				      sizeof(*qgroup_limit));
559 	if (ret)
560 		goto out;
561 
562 	leaf = path->nodes[0];
563 	qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
564 				  struct btrfs_qgroup_limit_item);
565 	btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
566 	btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
567 	btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
568 	btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
569 	btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
570 
571 	btrfs_mark_buffer_dirty(leaf);
572 
573 	ret = 0;
574 out:
575 	btrfs_free_path(path);
576 	return ret;
577 }
578 
579 static int del_qgroup_item(struct btrfs_trans_handle *trans,
580 			   struct btrfs_root *quota_root, u64 qgroupid)
581 {
582 	int ret;
583 	struct btrfs_path *path;
584 	struct btrfs_key key;
585 
586 	path = btrfs_alloc_path();
587 	if (!path)
588 		return -ENOMEM;
589 
590 	key.objectid = 0;
591 	key.type = BTRFS_QGROUP_INFO_KEY;
592 	key.offset = qgroupid;
593 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
594 	if (ret < 0)
595 		goto out;
596 
597 	if (ret > 0) {
598 		ret = -ENOENT;
599 		goto out;
600 	}
601 
602 	ret = btrfs_del_item(trans, quota_root, path);
603 	if (ret)
604 		goto out;
605 
606 	btrfs_release_path(path);
607 
608 	key.type = BTRFS_QGROUP_LIMIT_KEY;
609 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
610 	if (ret < 0)
611 		goto out;
612 
613 	if (ret > 0) {
614 		ret = -ENOENT;
615 		goto out;
616 	}
617 
618 	ret = btrfs_del_item(trans, quota_root, path);
619 
620 out:
621 	btrfs_free_path(path);
622 	return ret;
623 }
624 
625 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
626 				    struct btrfs_root *root, u64 qgroupid,
627 				    u64 flags, u64 max_rfer, u64 max_excl,
628 				    u64 rsv_rfer, u64 rsv_excl)
629 {
630 	struct btrfs_path *path;
631 	struct btrfs_key key;
632 	struct extent_buffer *l;
633 	struct btrfs_qgroup_limit_item *qgroup_limit;
634 	int ret;
635 	int slot;
636 
637 	key.objectid = 0;
638 	key.type = BTRFS_QGROUP_LIMIT_KEY;
639 	key.offset = qgroupid;
640 
641 	path = btrfs_alloc_path();
642 	if (!path)
643 		return -ENOMEM;
644 
645 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
646 	if (ret > 0)
647 		ret = -ENOENT;
648 
649 	if (ret)
650 		goto out;
651 
652 	l = path->nodes[0];
653 	slot = path->slots[0];
654 	qgroup_limit = btrfs_item_ptr(l, path->slots[0],
655 				      struct btrfs_qgroup_limit_item);
656 	btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
657 	btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
658 	btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
659 	btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
660 	btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
661 
662 	btrfs_mark_buffer_dirty(l);
663 
664 out:
665 	btrfs_free_path(path);
666 	return ret;
667 }
668 
669 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
670 				   struct btrfs_root *root,
671 				   struct btrfs_qgroup *qgroup)
672 {
673 	struct btrfs_path *path;
674 	struct btrfs_key key;
675 	struct extent_buffer *l;
676 	struct btrfs_qgroup_info_item *qgroup_info;
677 	int ret;
678 	int slot;
679 
680 	key.objectid = 0;
681 	key.type = BTRFS_QGROUP_INFO_KEY;
682 	key.offset = qgroup->qgroupid;
683 
684 	path = btrfs_alloc_path();
685 	if (!path)
686 		return -ENOMEM;
687 
688 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
689 	if (ret > 0)
690 		ret = -ENOENT;
691 
692 	if (ret)
693 		goto out;
694 
695 	l = path->nodes[0];
696 	slot = path->slots[0];
697 	qgroup_info = btrfs_item_ptr(l, path->slots[0],
698 				 struct btrfs_qgroup_info_item);
699 	btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
700 	btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
701 	btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
702 	btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
703 	btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
704 
705 	btrfs_mark_buffer_dirty(l);
706 
707 out:
708 	btrfs_free_path(path);
709 	return ret;
710 }
711 
712 static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
713 				     struct btrfs_fs_info *fs_info,
714 				    struct btrfs_root *root)
715 {
716 	struct btrfs_path *path;
717 	struct btrfs_key key;
718 	struct extent_buffer *l;
719 	struct btrfs_qgroup_status_item *ptr;
720 	int ret;
721 	int slot;
722 
723 	key.objectid = 0;
724 	key.type = BTRFS_QGROUP_STATUS_KEY;
725 	key.offset = 0;
726 
727 	path = btrfs_alloc_path();
728 	if (!path)
729 		return -ENOMEM;
730 
731 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
732 	if (ret > 0)
733 		ret = -ENOENT;
734 
735 	if (ret)
736 		goto out;
737 
738 	l = path->nodes[0];
739 	slot = path->slots[0];
740 	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
741 	btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
742 	btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
743 	btrfs_set_qgroup_status_rescan(l, ptr,
744 				fs_info->qgroup_rescan_progress.objectid);
745 
746 	btrfs_mark_buffer_dirty(l);
747 
748 out:
749 	btrfs_free_path(path);
750 	return ret;
751 }
752 
753 /*
754  * called with qgroup_lock held
755  */
756 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
757 				  struct btrfs_root *root)
758 {
759 	struct btrfs_path *path;
760 	struct btrfs_key key;
761 	struct extent_buffer *leaf = NULL;
762 	int ret;
763 	int nr = 0;
764 
765 	path = btrfs_alloc_path();
766 	if (!path)
767 		return -ENOMEM;
768 
769 	path->leave_spinning = 1;
770 
771 	key.objectid = 0;
772 	key.offset = 0;
773 	key.type = 0;
774 
775 	while (1) {
776 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
777 		if (ret < 0)
778 			goto out;
779 		leaf = path->nodes[0];
780 		nr = btrfs_header_nritems(leaf);
781 		if (!nr)
782 			break;
783 		/*
784 		 * delete the leaf one by one
785 		 * since the whole tree is going
786 		 * to be deleted.
787 		 */
788 		path->slots[0] = 0;
789 		ret = btrfs_del_items(trans, root, path, 0, nr);
790 		if (ret)
791 			goto out;
792 
793 		btrfs_release_path(path);
794 	}
795 	ret = 0;
796 out:
797 	root->fs_info->pending_quota_state = 0;
798 	btrfs_free_path(path);
799 	return ret;
800 }
801 
802 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
803 		       struct btrfs_fs_info *fs_info)
804 {
805 	struct btrfs_root *quota_root;
806 	struct btrfs_root *tree_root = fs_info->tree_root;
807 	struct btrfs_path *path = NULL;
808 	struct btrfs_qgroup_status_item *ptr;
809 	struct extent_buffer *leaf;
810 	struct btrfs_key key;
811 	struct btrfs_key found_key;
812 	struct btrfs_qgroup *qgroup = NULL;
813 	int ret = 0;
814 	int slot;
815 
816 	mutex_lock(&fs_info->qgroup_ioctl_lock);
817 	if (fs_info->quota_root) {
818 		fs_info->pending_quota_state = 1;
819 		goto out;
820 	}
821 
822 	/*
823 	 * initially create the quota tree
824 	 */
825 	quota_root = btrfs_create_tree(trans, fs_info,
826 				       BTRFS_QUOTA_TREE_OBJECTID);
827 	if (IS_ERR(quota_root)) {
828 		ret =  PTR_ERR(quota_root);
829 		goto out;
830 	}
831 
832 	path = btrfs_alloc_path();
833 	if (!path) {
834 		ret = -ENOMEM;
835 		goto out_free_root;
836 	}
837 
838 	key.objectid = 0;
839 	key.type = BTRFS_QGROUP_STATUS_KEY;
840 	key.offset = 0;
841 
842 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
843 				      sizeof(*ptr));
844 	if (ret)
845 		goto out_free_path;
846 
847 	leaf = path->nodes[0];
848 	ptr = btrfs_item_ptr(leaf, path->slots[0],
849 				 struct btrfs_qgroup_status_item);
850 	btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
851 	btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
852 	fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
853 				BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
854 	btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
855 	btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
856 
857 	btrfs_mark_buffer_dirty(leaf);
858 
859 	key.objectid = 0;
860 	key.type = BTRFS_ROOT_REF_KEY;
861 	key.offset = 0;
862 
863 	btrfs_release_path(path);
864 	ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
865 	if (ret > 0)
866 		goto out_add_root;
867 	if (ret < 0)
868 		goto out_free_path;
869 
870 
871 	while (1) {
872 		slot = path->slots[0];
873 		leaf = path->nodes[0];
874 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
875 
876 		if (found_key.type == BTRFS_ROOT_REF_KEY) {
877 			ret = add_qgroup_item(trans, quota_root,
878 					      found_key.offset);
879 			if (ret)
880 				goto out_free_path;
881 
882 			qgroup = add_qgroup_rb(fs_info, found_key.offset);
883 			if (IS_ERR(qgroup)) {
884 				ret = PTR_ERR(qgroup);
885 				goto out_free_path;
886 			}
887 		}
888 		ret = btrfs_next_item(tree_root, path);
889 		if (ret < 0)
890 			goto out_free_path;
891 		if (ret)
892 			break;
893 	}
894 
895 out_add_root:
896 	btrfs_release_path(path);
897 	ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
898 	if (ret)
899 		goto out_free_path;
900 
901 	qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
902 	if (IS_ERR(qgroup)) {
903 		ret = PTR_ERR(qgroup);
904 		goto out_free_path;
905 	}
906 	spin_lock(&fs_info->qgroup_lock);
907 	fs_info->quota_root = quota_root;
908 	fs_info->pending_quota_state = 1;
909 	spin_unlock(&fs_info->qgroup_lock);
910 out_free_path:
911 	btrfs_free_path(path);
912 out_free_root:
913 	if (ret) {
914 		free_extent_buffer(quota_root->node);
915 		free_extent_buffer(quota_root->commit_root);
916 		kfree(quota_root);
917 	}
918 out:
919 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
920 	return ret;
921 }
922 
923 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
924 			struct btrfs_fs_info *fs_info)
925 {
926 	struct btrfs_root *tree_root = fs_info->tree_root;
927 	struct btrfs_root *quota_root;
928 	int ret = 0;
929 
930 	mutex_lock(&fs_info->qgroup_ioctl_lock);
931 	if (!fs_info->quota_root)
932 		goto out;
933 	spin_lock(&fs_info->qgroup_lock);
934 	fs_info->quota_enabled = 0;
935 	fs_info->pending_quota_state = 0;
936 	quota_root = fs_info->quota_root;
937 	fs_info->quota_root = NULL;
938 	btrfs_free_qgroup_config(fs_info);
939 	spin_unlock(&fs_info->qgroup_lock);
940 
941 	if (!quota_root) {
942 		ret = -EINVAL;
943 		goto out;
944 	}
945 
946 	ret = btrfs_clean_quota_tree(trans, quota_root);
947 	if (ret)
948 		goto out;
949 
950 	ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
951 	if (ret)
952 		goto out;
953 
954 	list_del(&quota_root->dirty_list);
955 
956 	btrfs_tree_lock(quota_root->node);
957 	clean_tree_block(trans, tree_root, quota_root->node);
958 	btrfs_tree_unlock(quota_root->node);
959 	btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
960 
961 	free_extent_buffer(quota_root->node);
962 	free_extent_buffer(quota_root->commit_root);
963 	kfree(quota_root);
964 out:
965 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
966 	return ret;
967 }
968 
969 static void qgroup_dirty(struct btrfs_fs_info *fs_info,
970 			 struct btrfs_qgroup *qgroup)
971 {
972 	if (list_empty(&qgroup->dirty))
973 		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
974 }
975 
976 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
977 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
978 {
979 	struct btrfs_root *quota_root;
980 	struct btrfs_qgroup *parent;
981 	struct btrfs_qgroup *member;
982 	struct btrfs_qgroup_list *list;
983 	int ret = 0;
984 
985 	mutex_lock(&fs_info->qgroup_ioctl_lock);
986 	quota_root = fs_info->quota_root;
987 	if (!quota_root) {
988 		ret = -EINVAL;
989 		goto out;
990 	}
991 	member = find_qgroup_rb(fs_info, src);
992 	parent = find_qgroup_rb(fs_info, dst);
993 	if (!member || !parent) {
994 		ret = -EINVAL;
995 		goto out;
996 	}
997 
998 	/* check if such qgroup relation exist firstly */
999 	list_for_each_entry(list, &member->groups, next_group) {
1000 		if (list->group == parent) {
1001 			ret = -EEXIST;
1002 			goto out;
1003 		}
1004 	}
1005 
1006 	ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1007 	if (ret)
1008 		goto out;
1009 
1010 	ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1011 	if (ret) {
1012 		del_qgroup_relation_item(trans, quota_root, src, dst);
1013 		goto out;
1014 	}
1015 
1016 	spin_lock(&fs_info->qgroup_lock);
1017 	ret = add_relation_rb(quota_root->fs_info, src, dst);
1018 	spin_unlock(&fs_info->qgroup_lock);
1019 out:
1020 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1021 	return ret;
1022 }
1023 
1024 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1025 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1026 {
1027 	struct btrfs_root *quota_root;
1028 	struct btrfs_qgroup *parent;
1029 	struct btrfs_qgroup *member;
1030 	struct btrfs_qgroup_list *list;
1031 	int ret = 0;
1032 	int err;
1033 
1034 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1035 	quota_root = fs_info->quota_root;
1036 	if (!quota_root) {
1037 		ret = -EINVAL;
1038 		goto out;
1039 	}
1040 
1041 	member = find_qgroup_rb(fs_info, src);
1042 	parent = find_qgroup_rb(fs_info, dst);
1043 	if (!member || !parent) {
1044 		ret = -EINVAL;
1045 		goto out;
1046 	}
1047 
1048 	/* check if such qgroup relation exist firstly */
1049 	list_for_each_entry(list, &member->groups, next_group) {
1050 		if (list->group == parent)
1051 			goto exist;
1052 	}
1053 	ret = -ENOENT;
1054 	goto out;
1055 exist:
1056 	ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1057 	err = del_qgroup_relation_item(trans, quota_root, dst, src);
1058 	if (err && !ret)
1059 		ret = err;
1060 
1061 	spin_lock(&fs_info->qgroup_lock);
1062 	del_relation_rb(fs_info, src, dst);
1063 	spin_unlock(&fs_info->qgroup_lock);
1064 out:
1065 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1066 	return ret;
1067 }
1068 
1069 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1070 			struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
1071 {
1072 	struct btrfs_root *quota_root;
1073 	struct btrfs_qgroup *qgroup;
1074 	int ret = 0;
1075 
1076 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1077 	quota_root = fs_info->quota_root;
1078 	if (!quota_root) {
1079 		ret = -EINVAL;
1080 		goto out;
1081 	}
1082 	qgroup = find_qgroup_rb(fs_info, qgroupid);
1083 	if (qgroup) {
1084 		ret = -EEXIST;
1085 		goto out;
1086 	}
1087 
1088 	ret = add_qgroup_item(trans, quota_root, qgroupid);
1089 	if (ret)
1090 		goto out;
1091 
1092 	spin_lock(&fs_info->qgroup_lock);
1093 	qgroup = add_qgroup_rb(fs_info, qgroupid);
1094 	spin_unlock(&fs_info->qgroup_lock);
1095 
1096 	if (IS_ERR(qgroup))
1097 		ret = PTR_ERR(qgroup);
1098 out:
1099 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1100 	return ret;
1101 }
1102 
1103 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1104 			struct btrfs_fs_info *fs_info, u64 qgroupid)
1105 {
1106 	struct btrfs_root *quota_root;
1107 	struct btrfs_qgroup *qgroup;
1108 	int ret = 0;
1109 
1110 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1111 	quota_root = fs_info->quota_root;
1112 	if (!quota_root) {
1113 		ret = -EINVAL;
1114 		goto out;
1115 	}
1116 
1117 	qgroup = find_qgroup_rb(fs_info, qgroupid);
1118 	if (!qgroup) {
1119 		ret = -ENOENT;
1120 		goto out;
1121 	} else {
1122 		/* check if there are no relations to this qgroup */
1123 		if (!list_empty(&qgroup->groups) ||
1124 		    !list_empty(&qgroup->members)) {
1125 			ret = -EBUSY;
1126 			goto out;
1127 		}
1128 	}
1129 	ret = del_qgroup_item(trans, quota_root, qgroupid);
1130 
1131 	spin_lock(&fs_info->qgroup_lock);
1132 	del_qgroup_rb(quota_root->fs_info, qgroupid);
1133 	spin_unlock(&fs_info->qgroup_lock);
1134 out:
1135 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1136 	return ret;
1137 }
1138 
1139 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1140 		       struct btrfs_fs_info *fs_info, u64 qgroupid,
1141 		       struct btrfs_qgroup_limit *limit)
1142 {
1143 	struct btrfs_root *quota_root;
1144 	struct btrfs_qgroup *qgroup;
1145 	int ret = 0;
1146 
1147 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1148 	quota_root = fs_info->quota_root;
1149 	if (!quota_root) {
1150 		ret = -EINVAL;
1151 		goto out;
1152 	}
1153 
1154 	qgroup = find_qgroup_rb(fs_info, qgroupid);
1155 	if (!qgroup) {
1156 		ret = -ENOENT;
1157 		goto out;
1158 	}
1159 	ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1160 				       limit->flags, limit->max_rfer,
1161 				       limit->max_excl, limit->rsv_rfer,
1162 				       limit->rsv_excl);
1163 	if (ret) {
1164 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1165 		printk(KERN_INFO "unable to update quota limit for %llu\n",
1166 		       (unsigned long long)qgroupid);
1167 	}
1168 
1169 	spin_lock(&fs_info->qgroup_lock);
1170 	qgroup->lim_flags = limit->flags;
1171 	qgroup->max_rfer = limit->max_rfer;
1172 	qgroup->max_excl = limit->max_excl;
1173 	qgroup->rsv_rfer = limit->rsv_rfer;
1174 	qgroup->rsv_excl = limit->rsv_excl;
1175 	spin_unlock(&fs_info->qgroup_lock);
1176 out:
1177 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1178 	return ret;
1179 }
1180 
1181 /*
1182  * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1183  * the modification into a list that's later used by btrfs_end_transaction to
1184  * pass the recorded modifications on to btrfs_qgroup_account_ref.
1185  */
1186 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1187 			    struct btrfs_delayed_ref_node *node,
1188 			    struct btrfs_delayed_extent_op *extent_op)
1189 {
1190 	struct qgroup_update *u;
1191 
1192 	BUG_ON(!trans->delayed_ref_elem.seq);
1193 	u = kmalloc(sizeof(*u), GFP_NOFS);
1194 	if (!u)
1195 		return -ENOMEM;
1196 
1197 	u->node = node;
1198 	u->extent_op = extent_op;
1199 	list_add_tail(&u->list, &trans->qgroup_ref_list);
1200 
1201 	return 0;
1202 }
1203 
1204 static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
1205 				    struct ulist *roots, struct ulist *tmp,
1206 				    u64 seq)
1207 {
1208 	struct ulist_node *unode;
1209 	struct ulist_iterator uiter;
1210 	struct ulist_node *tmp_unode;
1211 	struct ulist_iterator tmp_uiter;
1212 	struct btrfs_qgroup *qg;
1213 	int ret;
1214 
1215 	ULIST_ITER_INIT(&uiter);
1216 	while ((unode = ulist_next(roots, &uiter))) {
1217 		qg = find_qgroup_rb(fs_info, unode->val);
1218 		if (!qg)
1219 			continue;
1220 
1221 		ulist_reinit(tmp);
1222 						/* XXX id not needed */
1223 		ret = ulist_add(tmp, qg->qgroupid,
1224 				(u64)(uintptr_t)qg, GFP_ATOMIC);
1225 		if (ret < 0)
1226 			return ret;
1227 		ULIST_ITER_INIT(&tmp_uiter);
1228 		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1229 			struct btrfs_qgroup_list *glist;
1230 
1231 			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1232 			if (qg->refcnt < seq)
1233 				qg->refcnt = seq + 1;
1234 			else
1235 				++qg->refcnt;
1236 
1237 			list_for_each_entry(glist, &qg->groups, next_group) {
1238 				ret = ulist_add(tmp, glist->group->qgroupid,
1239 						(u64)(uintptr_t)glist->group,
1240 						GFP_ATOMIC);
1241 				if (ret < 0)
1242 					return ret;
1243 			}
1244 		}
1245 	}
1246 
1247 	return 0;
1248 }
1249 
1250 static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
1251 				    struct ulist *roots, struct ulist *tmp,
1252 				    u64 seq, int sgn, u64 num_bytes,
1253 				    struct btrfs_qgroup *qgroup)
1254 {
1255 	struct ulist_node *unode;
1256 	struct ulist_iterator uiter;
1257 	struct btrfs_qgroup *qg;
1258 	struct btrfs_qgroup_list *glist;
1259 	int ret;
1260 
1261 	ulist_reinit(tmp);
1262 	ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1263 	if (ret < 0)
1264 		return ret;
1265 
1266 	ULIST_ITER_INIT(&uiter);
1267 	while ((unode = ulist_next(tmp, &uiter))) {
1268 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1269 		if (qg->refcnt < seq) {
1270 			/* not visited by step 1 */
1271 			qg->rfer += sgn * num_bytes;
1272 			qg->rfer_cmpr += sgn * num_bytes;
1273 			if (roots->nnodes == 0) {
1274 				qg->excl += sgn * num_bytes;
1275 				qg->excl_cmpr += sgn * num_bytes;
1276 			}
1277 			qgroup_dirty(fs_info, qg);
1278 		}
1279 		WARN_ON(qg->tag >= seq);
1280 		qg->tag = seq;
1281 
1282 		list_for_each_entry(glist, &qg->groups, next_group) {
1283 			ret = ulist_add(tmp, glist->group->qgroupid,
1284 					(uintptr_t)glist->group, GFP_ATOMIC);
1285 			if (ret < 0)
1286 				return ret;
1287 		}
1288 	}
1289 
1290 	return 0;
1291 }
1292 
1293 static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
1294 				    struct ulist *roots, struct ulist *tmp,
1295 				    u64 seq, int sgn, u64 num_bytes)
1296 {
1297 	struct ulist_node *unode;
1298 	struct ulist_iterator uiter;
1299 	struct btrfs_qgroup *qg;
1300 	struct ulist_node *tmp_unode;
1301 	struct ulist_iterator tmp_uiter;
1302 	int ret;
1303 
1304 	ULIST_ITER_INIT(&uiter);
1305 	while ((unode = ulist_next(roots, &uiter))) {
1306 		qg = find_qgroup_rb(fs_info, unode->val);
1307 		if (!qg)
1308 			continue;
1309 
1310 		ulist_reinit(tmp);
1311 		ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1312 		if (ret < 0)
1313 			return ret;
1314 
1315 		ULIST_ITER_INIT(&tmp_uiter);
1316 		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1317 			struct btrfs_qgroup_list *glist;
1318 
1319 			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1320 			if (qg->tag == seq)
1321 				continue;
1322 
1323 			if (qg->refcnt - seq == roots->nnodes) {
1324 				qg->excl -= sgn * num_bytes;
1325 				qg->excl_cmpr -= sgn * num_bytes;
1326 				qgroup_dirty(fs_info, qg);
1327 			}
1328 
1329 			list_for_each_entry(glist, &qg->groups, next_group) {
1330 				ret = ulist_add(tmp, glist->group->qgroupid,
1331 						(uintptr_t)glist->group,
1332 						GFP_ATOMIC);
1333 				if (ret < 0)
1334 					return ret;
1335 			}
1336 		}
1337 	}
1338 
1339 	return 0;
1340 }
1341 
1342 /*
1343  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1344  * from the fs. First, all roots referencing the extent are searched, and
1345  * then the space is accounted accordingly to the different roots. The
1346  * accounting algorithm works in 3 steps documented inline.
1347  */
1348 int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1349 			     struct btrfs_fs_info *fs_info,
1350 			     struct btrfs_delayed_ref_node *node,
1351 			     struct btrfs_delayed_extent_op *extent_op)
1352 {
1353 	struct btrfs_key ins;
1354 	struct btrfs_root *quota_root;
1355 	u64 ref_root;
1356 	struct btrfs_qgroup *qgroup;
1357 	struct ulist *roots = NULL;
1358 	struct ulist *tmp = NULL;
1359 	u64 seq;
1360 	int ret = 0;
1361 	int sgn;
1362 
1363 	if (!fs_info->quota_enabled)
1364 		return 0;
1365 
1366 	BUG_ON(!fs_info->quota_root);
1367 
1368 	ins.objectid = node->bytenr;
1369 	ins.offset = node->num_bytes;
1370 	ins.type = BTRFS_EXTENT_ITEM_KEY;
1371 
1372 	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1373 	    node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
1374 		struct btrfs_delayed_tree_ref *ref;
1375 		ref = btrfs_delayed_node_to_tree_ref(node);
1376 		ref_root = ref->root;
1377 	} else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1378 		   node->type == BTRFS_SHARED_DATA_REF_KEY) {
1379 		struct btrfs_delayed_data_ref *ref;
1380 		ref = btrfs_delayed_node_to_data_ref(node);
1381 		ref_root = ref->root;
1382 	} else {
1383 		BUG();
1384 	}
1385 
1386 	if (!is_fstree(ref_root)) {
1387 		/*
1388 		 * non-fs-trees are not being accounted
1389 		 */
1390 		return 0;
1391 	}
1392 
1393 	switch (node->action) {
1394 	case BTRFS_ADD_DELAYED_REF:
1395 	case BTRFS_ADD_DELAYED_EXTENT:
1396 		sgn = 1;
1397 		seq = btrfs_tree_mod_seq_prev(node->seq);
1398 		break;
1399 	case BTRFS_DROP_DELAYED_REF:
1400 		sgn = -1;
1401 		seq = node->seq;
1402 		break;
1403 	case BTRFS_UPDATE_DELAYED_HEAD:
1404 		return 0;
1405 	default:
1406 		BUG();
1407 	}
1408 
1409 	mutex_lock(&fs_info->qgroup_rescan_lock);
1410 	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1411 		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1412 			mutex_unlock(&fs_info->qgroup_rescan_lock);
1413 			return 0;
1414 		}
1415 	}
1416 	mutex_unlock(&fs_info->qgroup_rescan_lock);
1417 
1418 	/*
1419 	 * the delayed ref sequence number we pass depends on the direction of
1420 	 * the operation. for add operations, we pass
1421 	 * tree_mod_log_prev_seq(node->seq) to skip
1422 	 * the delayed ref's current sequence number, because we need the state
1423 	 * of the tree before the add operation. for delete operations, we pass
1424 	 * (node->seq) to include the delayed ref's current sequence number,
1425 	 * because we need the state of the tree after the delete operation.
1426 	 */
1427 	ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
1428 	if (ret < 0)
1429 		return ret;
1430 
1431 	mutex_lock(&fs_info->qgroup_rescan_lock);
1432 	spin_lock(&fs_info->qgroup_lock);
1433 	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1434 		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1435 			ret = 0;
1436 			goto unlock;
1437 		}
1438 	}
1439 
1440 	quota_root = fs_info->quota_root;
1441 	if (!quota_root)
1442 		goto unlock;
1443 
1444 	qgroup = find_qgroup_rb(fs_info, ref_root);
1445 	if (!qgroup)
1446 		goto unlock;
1447 
1448 	/*
1449 	 * step 1: for each old ref, visit all nodes once and inc refcnt
1450 	 */
1451 	tmp = ulist_alloc(GFP_ATOMIC);
1452 	if (!tmp) {
1453 		ret = -ENOMEM;
1454 		goto unlock;
1455 	}
1456 	seq = fs_info->qgroup_seq;
1457 	fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1458 
1459 	ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1460 	if (ret)
1461 		goto unlock;
1462 
1463 	/*
1464 	 * step 2: walk from the new root
1465 	 */
1466 	ret = qgroup_account_ref_step2(fs_info, roots, tmp, seq, sgn,
1467 				       node->num_bytes, qgroup);
1468 	if (ret)
1469 		goto unlock;
1470 
1471 	/*
1472 	 * step 3: walk again from old refs
1473 	 */
1474 	ret = qgroup_account_ref_step3(fs_info, roots, tmp, seq, sgn,
1475 				       node->num_bytes);
1476 	if (ret)
1477 		goto unlock;
1478 
1479 unlock:
1480 	spin_unlock(&fs_info->qgroup_lock);
1481 	mutex_unlock(&fs_info->qgroup_rescan_lock);
1482 	ulist_free(roots);
1483 	ulist_free(tmp);
1484 
1485 	return ret;
1486 }
1487 
1488 /*
1489  * called from commit_transaction. Writes all changed qgroups to disk.
1490  */
1491 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1492 		      struct btrfs_fs_info *fs_info)
1493 {
1494 	struct btrfs_root *quota_root = fs_info->quota_root;
1495 	int ret = 0;
1496 	int start_rescan_worker = 0;
1497 
1498 	if (!quota_root)
1499 		goto out;
1500 
1501 	if (!fs_info->quota_enabled && fs_info->pending_quota_state)
1502 		start_rescan_worker = 1;
1503 
1504 	fs_info->quota_enabled = fs_info->pending_quota_state;
1505 
1506 	spin_lock(&fs_info->qgroup_lock);
1507 	while (!list_empty(&fs_info->dirty_qgroups)) {
1508 		struct btrfs_qgroup *qgroup;
1509 		qgroup = list_first_entry(&fs_info->dirty_qgroups,
1510 					  struct btrfs_qgroup, dirty);
1511 		list_del_init(&qgroup->dirty);
1512 		spin_unlock(&fs_info->qgroup_lock);
1513 		ret = update_qgroup_info_item(trans, quota_root, qgroup);
1514 		if (ret)
1515 			fs_info->qgroup_flags |=
1516 					BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1517 		spin_lock(&fs_info->qgroup_lock);
1518 	}
1519 	if (fs_info->quota_enabled)
1520 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
1521 	else
1522 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1523 	spin_unlock(&fs_info->qgroup_lock);
1524 
1525 	ret = update_qgroup_status_item(trans, fs_info, quota_root);
1526 	if (ret)
1527 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1528 
1529 	if (!ret && start_rescan_worker) {
1530 		ret = btrfs_qgroup_rescan(fs_info);
1531 		if (ret)
1532 			pr_err("btrfs: start rescan quota failed: %d\n", ret);
1533 		ret = 0;
1534 	}
1535 
1536 out:
1537 
1538 	return ret;
1539 }
1540 
1541 /*
1542  * copy the acounting information between qgroups. This is necessary when a
1543  * snapshot or a subvolume is created
1544  */
1545 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1546 			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
1547 			 struct btrfs_qgroup_inherit *inherit)
1548 {
1549 	int ret = 0;
1550 	int i;
1551 	u64 *i_qgroups;
1552 	struct btrfs_root *quota_root = fs_info->quota_root;
1553 	struct btrfs_qgroup *srcgroup;
1554 	struct btrfs_qgroup *dstgroup;
1555 	u32 level_size = 0;
1556 	u64 nums;
1557 
1558 	mutex_lock(&fs_info->qgroup_ioctl_lock);
1559 	if (!fs_info->quota_enabled)
1560 		goto out;
1561 
1562 	if (!quota_root) {
1563 		ret = -EINVAL;
1564 		goto out;
1565 	}
1566 
1567 	if (inherit) {
1568 		i_qgroups = (u64 *)(inherit + 1);
1569 		nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
1570 		       2 * inherit->num_excl_copies;
1571 		for (i = 0; i < nums; ++i) {
1572 			srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
1573 			if (!srcgroup) {
1574 				ret = -EINVAL;
1575 				goto out;
1576 			}
1577 			++i_qgroups;
1578 		}
1579 	}
1580 
1581 	/*
1582 	 * create a tracking group for the subvol itself
1583 	 */
1584 	ret = add_qgroup_item(trans, quota_root, objectid);
1585 	if (ret)
1586 		goto out;
1587 
1588 	if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
1589 		ret = update_qgroup_limit_item(trans, quota_root, objectid,
1590 					       inherit->lim.flags,
1591 					       inherit->lim.max_rfer,
1592 					       inherit->lim.max_excl,
1593 					       inherit->lim.rsv_rfer,
1594 					       inherit->lim.rsv_excl);
1595 		if (ret)
1596 			goto out;
1597 	}
1598 
1599 	if (srcid) {
1600 		struct btrfs_root *srcroot;
1601 		struct btrfs_key srckey;
1602 		int srcroot_level;
1603 
1604 		srckey.objectid = srcid;
1605 		srckey.type = BTRFS_ROOT_ITEM_KEY;
1606 		srckey.offset = (u64)-1;
1607 		srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
1608 		if (IS_ERR(srcroot)) {
1609 			ret = PTR_ERR(srcroot);
1610 			goto out;
1611 		}
1612 
1613 		rcu_read_lock();
1614 		srcroot_level = btrfs_header_level(srcroot->node);
1615 		level_size = btrfs_level_size(srcroot, srcroot_level);
1616 		rcu_read_unlock();
1617 	}
1618 
1619 	/*
1620 	 * add qgroup to all inherited groups
1621 	 */
1622 	if (inherit) {
1623 		i_qgroups = (u64 *)(inherit + 1);
1624 		for (i = 0; i < inherit->num_qgroups; ++i) {
1625 			ret = add_qgroup_relation_item(trans, quota_root,
1626 						       objectid, *i_qgroups);
1627 			if (ret)
1628 				goto out;
1629 			ret = add_qgroup_relation_item(trans, quota_root,
1630 						       *i_qgroups, objectid);
1631 			if (ret)
1632 				goto out;
1633 			++i_qgroups;
1634 		}
1635 	}
1636 
1637 
1638 	spin_lock(&fs_info->qgroup_lock);
1639 
1640 	dstgroup = add_qgroup_rb(fs_info, objectid);
1641 	if (IS_ERR(dstgroup)) {
1642 		ret = PTR_ERR(dstgroup);
1643 		goto unlock;
1644 	}
1645 
1646 	if (srcid) {
1647 		srcgroup = find_qgroup_rb(fs_info, srcid);
1648 		if (!srcgroup)
1649 			goto unlock;
1650 		dstgroup->rfer = srcgroup->rfer - level_size;
1651 		dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1652 		srcgroup->excl = level_size;
1653 		srcgroup->excl_cmpr = level_size;
1654 		qgroup_dirty(fs_info, dstgroup);
1655 		qgroup_dirty(fs_info, srcgroup);
1656 	}
1657 
1658 	if (!inherit)
1659 		goto unlock;
1660 
1661 	i_qgroups = (u64 *)(inherit + 1);
1662 	for (i = 0; i < inherit->num_qgroups; ++i) {
1663 		ret = add_relation_rb(quota_root->fs_info, objectid,
1664 				      *i_qgroups);
1665 		if (ret)
1666 			goto unlock;
1667 		++i_qgroups;
1668 	}
1669 
1670 	for (i = 0; i <  inherit->num_ref_copies; ++i) {
1671 		struct btrfs_qgroup *src;
1672 		struct btrfs_qgroup *dst;
1673 
1674 		src = find_qgroup_rb(fs_info, i_qgroups[0]);
1675 		dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1676 
1677 		if (!src || !dst) {
1678 			ret = -EINVAL;
1679 			goto unlock;
1680 		}
1681 
1682 		dst->rfer = src->rfer - level_size;
1683 		dst->rfer_cmpr = src->rfer_cmpr - level_size;
1684 		i_qgroups += 2;
1685 	}
1686 	for (i = 0; i <  inherit->num_excl_copies; ++i) {
1687 		struct btrfs_qgroup *src;
1688 		struct btrfs_qgroup *dst;
1689 
1690 		src = find_qgroup_rb(fs_info, i_qgroups[0]);
1691 		dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1692 
1693 		if (!src || !dst) {
1694 			ret = -EINVAL;
1695 			goto unlock;
1696 		}
1697 
1698 		dst->excl = src->excl + level_size;
1699 		dst->excl_cmpr = src->excl_cmpr + level_size;
1700 		i_qgroups += 2;
1701 	}
1702 
1703 unlock:
1704 	spin_unlock(&fs_info->qgroup_lock);
1705 out:
1706 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
1707 	return ret;
1708 }
1709 
1710 /*
1711  * reserve some space for a qgroup and all its parents. The reservation takes
1712  * place with start_transaction or dealloc_reserve, similar to ENOSPC
1713  * accounting. If not enough space is available, EDQUOT is returned.
1714  * We assume that the requested space is new for all qgroups.
1715  */
1716 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1717 {
1718 	struct btrfs_root *quota_root;
1719 	struct btrfs_qgroup *qgroup;
1720 	struct btrfs_fs_info *fs_info = root->fs_info;
1721 	u64 ref_root = root->root_key.objectid;
1722 	int ret = 0;
1723 	struct ulist *ulist = NULL;
1724 	struct ulist_node *unode;
1725 	struct ulist_iterator uiter;
1726 
1727 	if (!is_fstree(ref_root))
1728 		return 0;
1729 
1730 	if (num_bytes == 0)
1731 		return 0;
1732 
1733 	spin_lock(&fs_info->qgroup_lock);
1734 	quota_root = fs_info->quota_root;
1735 	if (!quota_root)
1736 		goto out;
1737 
1738 	qgroup = find_qgroup_rb(fs_info, ref_root);
1739 	if (!qgroup)
1740 		goto out;
1741 
1742 	/*
1743 	 * in a first step, we check all affected qgroups if any limits would
1744 	 * be exceeded
1745 	 */
1746 	ulist = ulist_alloc(GFP_ATOMIC);
1747 	if (!ulist) {
1748 		ret = -ENOMEM;
1749 		goto out;
1750 	}
1751 	ret = ulist_add(ulist, qgroup->qgroupid,
1752 			(uintptr_t)qgroup, GFP_ATOMIC);
1753 	if (ret < 0)
1754 		goto out;
1755 	ULIST_ITER_INIT(&uiter);
1756 	while ((unode = ulist_next(ulist, &uiter))) {
1757 		struct btrfs_qgroup *qg;
1758 		struct btrfs_qgroup_list *glist;
1759 
1760 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1761 
1762 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1763 		    qg->reserved + (s64)qg->rfer + num_bytes >
1764 		    qg->max_rfer) {
1765 			ret = -EDQUOT;
1766 			goto out;
1767 		}
1768 
1769 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
1770 		    qg->reserved + (s64)qg->excl + num_bytes >
1771 		    qg->max_excl) {
1772 			ret = -EDQUOT;
1773 			goto out;
1774 		}
1775 
1776 		list_for_each_entry(glist, &qg->groups, next_group) {
1777 			ret = ulist_add(ulist, glist->group->qgroupid,
1778 					(uintptr_t)glist->group, GFP_ATOMIC);
1779 			if (ret < 0)
1780 				goto out;
1781 		}
1782 	}
1783 	ret = 0;
1784 	/*
1785 	 * no limits exceeded, now record the reservation into all qgroups
1786 	 */
1787 	ULIST_ITER_INIT(&uiter);
1788 	while ((unode = ulist_next(ulist, &uiter))) {
1789 		struct btrfs_qgroup *qg;
1790 
1791 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1792 
1793 		qg->reserved += num_bytes;
1794 	}
1795 
1796 out:
1797 	spin_unlock(&fs_info->qgroup_lock);
1798 	ulist_free(ulist);
1799 
1800 	return ret;
1801 }
1802 
1803 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1804 {
1805 	struct btrfs_root *quota_root;
1806 	struct btrfs_qgroup *qgroup;
1807 	struct btrfs_fs_info *fs_info = root->fs_info;
1808 	struct ulist *ulist = NULL;
1809 	struct ulist_node *unode;
1810 	struct ulist_iterator uiter;
1811 	u64 ref_root = root->root_key.objectid;
1812 	int ret = 0;
1813 
1814 	if (!is_fstree(ref_root))
1815 		return;
1816 
1817 	if (num_bytes == 0)
1818 		return;
1819 
1820 	spin_lock(&fs_info->qgroup_lock);
1821 
1822 	quota_root = fs_info->quota_root;
1823 	if (!quota_root)
1824 		goto out;
1825 
1826 	qgroup = find_qgroup_rb(fs_info, ref_root);
1827 	if (!qgroup)
1828 		goto out;
1829 
1830 	ulist = ulist_alloc(GFP_ATOMIC);
1831 	if (!ulist) {
1832 		btrfs_std_error(fs_info, -ENOMEM);
1833 		goto out;
1834 	}
1835 	ret = ulist_add(ulist, qgroup->qgroupid,
1836 			(uintptr_t)qgroup, GFP_ATOMIC);
1837 	if (ret < 0)
1838 		goto out;
1839 	ULIST_ITER_INIT(&uiter);
1840 	while ((unode = ulist_next(ulist, &uiter))) {
1841 		struct btrfs_qgroup *qg;
1842 		struct btrfs_qgroup_list *glist;
1843 
1844 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1845 
1846 		qg->reserved -= num_bytes;
1847 
1848 		list_for_each_entry(glist, &qg->groups, next_group) {
1849 			ret = ulist_add(ulist, glist->group->qgroupid,
1850 					(uintptr_t)glist->group, GFP_ATOMIC);
1851 			if (ret < 0)
1852 				goto out;
1853 		}
1854 	}
1855 
1856 out:
1857 	spin_unlock(&fs_info->qgroup_lock);
1858 	ulist_free(ulist);
1859 }
1860 
1861 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1862 {
1863 	if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1864 		return;
1865 	pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n",
1866 		trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
1867 		(u32)(trans->delayed_ref_elem.seq >> 32),
1868 		(u32)trans->delayed_ref_elem.seq);
1869 	BUG();
1870 }
1871 
1872 /*
1873  * returns < 0 on error, 0 when more leafs are to be scanned.
1874  * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
1875  */
1876 static int
1877 qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
1878 		   struct btrfs_trans_handle *trans, struct ulist *tmp,
1879 		   struct extent_buffer *scratch_leaf)
1880 {
1881 	struct btrfs_key found;
1882 	struct btrfs_fs_info *fs_info = qscan->fs_info;
1883 	struct ulist *roots = NULL;
1884 	struct ulist_node *unode;
1885 	struct ulist_iterator uiter;
1886 	struct seq_list tree_mod_seq_elem = {};
1887 	u64 seq;
1888 	int slot;
1889 	int ret;
1890 
1891 	path->leave_spinning = 1;
1892 	mutex_lock(&fs_info->qgroup_rescan_lock);
1893 	ret = btrfs_search_slot_for_read(fs_info->extent_root,
1894 					 &fs_info->qgroup_rescan_progress,
1895 					 path, 1, 0);
1896 
1897 	pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1898 		 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
1899 		 fs_info->qgroup_rescan_progress.type,
1900 		 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
1901 		 ret);
1902 
1903 	if (ret) {
1904 		/*
1905 		 * The rescan is about to end, we will not be scanning any
1906 		 * further blocks. We cannot unset the RESCAN flag here, because
1907 		 * we want to commit the transaction if everything went well.
1908 		 * To make the live accounting work in this phase, we set our
1909 		 * scan progress pointer such that every real extent objectid
1910 		 * will be smaller.
1911 		 */
1912 		fs_info->qgroup_rescan_progress.objectid = (u64)-1;
1913 		btrfs_release_path(path);
1914 		mutex_unlock(&fs_info->qgroup_rescan_lock);
1915 		return ret;
1916 	}
1917 
1918 	btrfs_item_key_to_cpu(path->nodes[0], &found,
1919 			      btrfs_header_nritems(path->nodes[0]) - 1);
1920 	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
1921 
1922 	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1923 	memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
1924 	slot = path->slots[0];
1925 	btrfs_release_path(path);
1926 	mutex_unlock(&fs_info->qgroup_rescan_lock);
1927 
1928 	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1929 		btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1930 		if (found.type != BTRFS_EXTENT_ITEM_KEY)
1931 			continue;
1932 		ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1933 					   tree_mod_seq_elem.seq, &roots);
1934 		if (ret < 0)
1935 			goto out;
1936 		spin_lock(&fs_info->qgroup_lock);
1937 		seq = fs_info->qgroup_seq;
1938 		fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1939 
1940 		ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1941 		if (ret) {
1942 			spin_unlock(&fs_info->qgroup_lock);
1943 			ulist_free(roots);
1944 			goto out;
1945 		}
1946 
1947 		/*
1948 		 * step2 of btrfs_qgroup_account_ref works from a single root,
1949 		 * we're doing all at once here.
1950 		 */
1951 		ulist_reinit(tmp);
1952 		ULIST_ITER_INIT(&uiter);
1953 		while ((unode = ulist_next(roots, &uiter))) {
1954 			struct btrfs_qgroup *qg;
1955 
1956 			qg = find_qgroup_rb(fs_info, unode->val);
1957 			if (!qg)
1958 				continue;
1959 
1960 			ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1961 					GFP_ATOMIC);
1962 			if (ret < 0) {
1963 				spin_unlock(&fs_info->qgroup_lock);
1964 				ulist_free(roots);
1965 				goto out;
1966 			}
1967 		}
1968 
1969 		/* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1970 		ULIST_ITER_INIT(&uiter);
1971 		while ((unode = ulist_next(tmp, &uiter))) {
1972 			struct btrfs_qgroup *qg;
1973 			struct btrfs_qgroup_list *glist;
1974 
1975 			qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1976 			qg->rfer += found.offset;
1977 			qg->rfer_cmpr += found.offset;
1978 			WARN_ON(qg->tag >= seq);
1979 			if (qg->refcnt - seq == roots->nnodes) {
1980 				qg->excl += found.offset;
1981 				qg->excl_cmpr += found.offset;
1982 			}
1983 			qgroup_dirty(fs_info, qg);
1984 
1985 			list_for_each_entry(glist, &qg->groups, next_group) {
1986 				ret = ulist_add(tmp, glist->group->qgroupid,
1987 						(uintptr_t)glist->group,
1988 						GFP_ATOMIC);
1989 				if (ret < 0) {
1990 					spin_unlock(&fs_info->qgroup_lock);
1991 					ulist_free(roots);
1992 					goto out;
1993 				}
1994 			}
1995 		}
1996 
1997 		spin_unlock(&fs_info->qgroup_lock);
1998 		ulist_free(roots);
1999 		ret = 0;
2000 	}
2001 
2002 out:
2003 	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2004 
2005 	return ret;
2006 }
2007 
2008 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2009 {
2010 	struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
2011 						   work);
2012 	struct btrfs_path *path;
2013 	struct btrfs_trans_handle *trans = NULL;
2014 	struct btrfs_fs_info *fs_info = qscan->fs_info;
2015 	struct ulist *tmp = NULL;
2016 	struct extent_buffer *scratch_leaf = NULL;
2017 	int err = -ENOMEM;
2018 
2019 	path = btrfs_alloc_path();
2020 	if (!path)
2021 		goto out;
2022 	tmp = ulist_alloc(GFP_NOFS);
2023 	if (!tmp)
2024 		goto out;
2025 	scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2026 	if (!scratch_leaf)
2027 		goto out;
2028 
2029 	err = 0;
2030 	while (!err) {
2031 		trans = btrfs_start_transaction(fs_info->fs_root, 0);
2032 		if (IS_ERR(trans)) {
2033 			err = PTR_ERR(trans);
2034 			break;
2035 		}
2036 		if (!fs_info->quota_enabled) {
2037 			err = -EINTR;
2038 		} else {
2039 			err = qgroup_rescan_leaf(qscan, path, trans,
2040 						 tmp, scratch_leaf);
2041 		}
2042 		if (err > 0)
2043 			btrfs_commit_transaction(trans, fs_info->fs_root);
2044 		else
2045 			btrfs_end_transaction(trans, fs_info->fs_root);
2046 	}
2047 
2048 out:
2049 	kfree(scratch_leaf);
2050 	ulist_free(tmp);
2051 	btrfs_free_path(path);
2052 	kfree(qscan);
2053 
2054 	mutex_lock(&fs_info->qgroup_rescan_lock);
2055 	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2056 
2057 	if (err == 2 &&
2058 	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2059 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2060 	} else if (err < 0) {
2061 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2062 	}
2063 	mutex_unlock(&fs_info->qgroup_rescan_lock);
2064 
2065 	if (err >= 0) {
2066 		pr_info("btrfs: qgroup scan completed%s\n",
2067 			err == 2 ? " (inconsistency flag cleared)" : "");
2068 	} else {
2069 		pr_err("btrfs: qgroup scan failed with %d\n", err);
2070 	}
2071 }
2072 
2073 static void
2074 qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan)
2075 {
2076 	memset(&qscan->work, 0, sizeof(qscan->work));
2077 	qscan->work.func = btrfs_qgroup_rescan_worker;
2078 	qscan->fs_info = fs_info;
2079 
2080 	pr_info("btrfs: qgroup scan started\n");
2081 	btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work);
2082 }
2083 
2084 int
2085 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2086 {
2087 	int ret = 0;
2088 	struct rb_node *n;
2089 	struct btrfs_qgroup *qgroup;
2090 	struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
2091 
2092 	if (!qscan)
2093 		return -ENOMEM;
2094 
2095 	mutex_lock(&fs_info->qgroup_rescan_lock);
2096 	spin_lock(&fs_info->qgroup_lock);
2097 	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2098 		ret = -EINPROGRESS;
2099 	else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2100 		ret = -EINVAL;
2101 	if (ret) {
2102 		spin_unlock(&fs_info->qgroup_lock);
2103 		mutex_unlock(&fs_info->qgroup_rescan_lock);
2104 		kfree(qscan);
2105 		return ret;
2106 	}
2107 
2108 	fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2109 	memset(&fs_info->qgroup_rescan_progress, 0,
2110 		sizeof(fs_info->qgroup_rescan_progress));
2111 
2112 	/* clear all current qgroup tracking information */
2113 	for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2114 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
2115 		qgroup->rfer = 0;
2116 		qgroup->rfer_cmpr = 0;
2117 		qgroup->excl = 0;
2118 		qgroup->excl_cmpr = 0;
2119 	}
2120 	spin_unlock(&fs_info->qgroup_lock);
2121 	mutex_unlock(&fs_info->qgroup_rescan_lock);
2122 
2123 	qgroup_rescan_start(fs_info, qscan);
2124 
2125 	return 0;
2126 }
2127