xref: /openbmc/linux/fs/btrfs/qgroup.c (revision d0b73b48)
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 
27 #include "ctree.h"
28 #include "transaction.h"
29 #include "disk-io.h"
30 #include "locking.h"
31 #include "ulist.h"
32 #include "ioctl.h"
33 #include "backref.h"
34 
35 /* TODO XXX FIXME
36  *  - subvol delete -> delete when ref goes to 0? delete limits also?
37  *  - reorganize keys
38  *  - compressed
39  *  - sync
40  *  - rescan
41  *  - copy also limits on subvol creation
42  *  - limit
43  *  - caches fuer ulists
44  *  - performance benchmarks
45  *  - check all ioctl parameters
46  */
47 
48 /*
49  * one struct for each qgroup, organized in fs_info->qgroup_tree.
50  */
51 struct btrfs_qgroup {
52 	u64 qgroupid;
53 
54 	/*
55 	 * state
56 	 */
57 	u64 rfer;	/* referenced */
58 	u64 rfer_cmpr;	/* referenced compressed */
59 	u64 excl;	/* exclusive */
60 	u64 excl_cmpr;	/* exclusive compressed */
61 
62 	/*
63 	 * limits
64 	 */
65 	u64 lim_flags;	/* which limits are set */
66 	u64 max_rfer;
67 	u64 max_excl;
68 	u64 rsv_rfer;
69 	u64 rsv_excl;
70 
71 	/*
72 	 * reservation tracking
73 	 */
74 	u64 reserved;
75 
76 	/*
77 	 * lists
78 	 */
79 	struct list_head groups;  /* groups this group is member of */
80 	struct list_head members; /* groups that are members of this group */
81 	struct list_head dirty;   /* dirty groups */
82 	struct rb_node node;	  /* tree of qgroups */
83 
84 	/*
85 	 * temp variables for accounting operations
86 	 */
87 	u64 tag;
88 	u64 refcnt;
89 };
90 
91 /*
92  * glue structure to represent the relations between qgroups.
93  */
94 struct btrfs_qgroup_list {
95 	struct list_head next_group;
96 	struct list_head next_member;
97 	struct btrfs_qgroup *group;
98 	struct btrfs_qgroup *member;
99 };
100 
101 /* must be called with qgroup_lock held */
102 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
103 					   u64 qgroupid)
104 {
105 	struct rb_node *n = fs_info->qgroup_tree.rb_node;
106 	struct btrfs_qgroup *qgroup;
107 
108 	while (n) {
109 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
110 		if (qgroup->qgroupid < qgroupid)
111 			n = n->rb_left;
112 		else if (qgroup->qgroupid > qgroupid)
113 			n = n->rb_right;
114 		else
115 			return qgroup;
116 	}
117 	return NULL;
118 }
119 
120 /* must be called with qgroup_lock held */
121 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
122 					  u64 qgroupid)
123 {
124 	struct rb_node **p = &fs_info->qgroup_tree.rb_node;
125 	struct rb_node *parent = NULL;
126 	struct btrfs_qgroup *qgroup;
127 
128 	while (*p) {
129 		parent = *p;
130 		qgroup = rb_entry(parent, struct btrfs_qgroup, node);
131 
132 		if (qgroup->qgroupid < qgroupid)
133 			p = &(*p)->rb_left;
134 		else if (qgroup->qgroupid > qgroupid)
135 			p = &(*p)->rb_right;
136 		else
137 			return qgroup;
138 	}
139 
140 	qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
141 	if (!qgroup)
142 		return ERR_PTR(-ENOMEM);
143 
144 	qgroup->qgroupid = qgroupid;
145 	INIT_LIST_HEAD(&qgroup->groups);
146 	INIT_LIST_HEAD(&qgroup->members);
147 	INIT_LIST_HEAD(&qgroup->dirty);
148 
149 	rb_link_node(&qgroup->node, parent, p);
150 	rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
151 
152 	return qgroup;
153 }
154 
155 /* must be called with qgroup_lock held */
156 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
157 {
158 	struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
159 	struct btrfs_qgroup_list *list;
160 
161 	if (!qgroup)
162 		return -ENOENT;
163 
164 	rb_erase(&qgroup->node, &fs_info->qgroup_tree);
165 	list_del(&qgroup->dirty);
166 
167 	while (!list_empty(&qgroup->groups)) {
168 		list = list_first_entry(&qgroup->groups,
169 					struct btrfs_qgroup_list, next_group);
170 		list_del(&list->next_group);
171 		list_del(&list->next_member);
172 		kfree(list);
173 	}
174 
175 	while (!list_empty(&qgroup->members)) {
176 		list = list_first_entry(&qgroup->members,
177 					struct btrfs_qgroup_list, next_member);
178 		list_del(&list->next_group);
179 		list_del(&list->next_member);
180 		kfree(list);
181 	}
182 	kfree(qgroup);
183 
184 	return 0;
185 }
186 
187 /* must be called with qgroup_lock held */
188 static int add_relation_rb(struct btrfs_fs_info *fs_info,
189 			   u64 memberid, u64 parentid)
190 {
191 	struct btrfs_qgroup *member;
192 	struct btrfs_qgroup *parent;
193 	struct btrfs_qgroup_list *list;
194 
195 	member = find_qgroup_rb(fs_info, memberid);
196 	parent = find_qgroup_rb(fs_info, parentid);
197 	if (!member || !parent)
198 		return -ENOENT;
199 
200 	list = kzalloc(sizeof(*list), GFP_ATOMIC);
201 	if (!list)
202 		return -ENOMEM;
203 
204 	list->group = parent;
205 	list->member = member;
206 	list_add_tail(&list->next_group, &member->groups);
207 	list_add_tail(&list->next_member, &parent->members);
208 
209 	return 0;
210 }
211 
212 /* must be called with qgroup_lock held */
213 static int del_relation_rb(struct btrfs_fs_info *fs_info,
214 			   u64 memberid, u64 parentid)
215 {
216 	struct btrfs_qgroup *member;
217 	struct btrfs_qgroup *parent;
218 	struct btrfs_qgroup_list *list;
219 
220 	member = find_qgroup_rb(fs_info, memberid);
221 	parent = find_qgroup_rb(fs_info, parentid);
222 	if (!member || !parent)
223 		return -ENOENT;
224 
225 	list_for_each_entry(list, &member->groups, next_group) {
226 		if (list->group == parent) {
227 			list_del(&list->next_group);
228 			list_del(&list->next_member);
229 			kfree(list);
230 			return 0;
231 		}
232 	}
233 	return -ENOENT;
234 }
235 
236 /*
237  * The full config is read in one go, only called from open_ctree()
238  * It doesn't use any locking, as at this point we're still single-threaded
239  */
240 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
241 {
242 	struct btrfs_key key;
243 	struct btrfs_key found_key;
244 	struct btrfs_root *quota_root = fs_info->quota_root;
245 	struct btrfs_path *path = NULL;
246 	struct extent_buffer *l;
247 	int slot;
248 	int ret = 0;
249 	u64 flags = 0;
250 
251 	if (!fs_info->quota_enabled)
252 		return 0;
253 
254 	path = btrfs_alloc_path();
255 	if (!path) {
256 		ret = -ENOMEM;
257 		goto out;
258 	}
259 
260 	/* default this to quota off, in case no status key is found */
261 	fs_info->qgroup_flags = 0;
262 
263 	/*
264 	 * pass 1: read status, all qgroup infos and limits
265 	 */
266 	key.objectid = 0;
267 	key.type = 0;
268 	key.offset = 0;
269 	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
270 	if (ret)
271 		goto out;
272 
273 	while (1) {
274 		struct btrfs_qgroup *qgroup;
275 
276 		slot = path->slots[0];
277 		l = path->nodes[0];
278 		btrfs_item_key_to_cpu(l, &found_key, slot);
279 
280 		if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
281 			struct btrfs_qgroup_status_item *ptr;
282 
283 			ptr = btrfs_item_ptr(l, slot,
284 					     struct btrfs_qgroup_status_item);
285 
286 			if (btrfs_qgroup_status_version(l, ptr) !=
287 			    BTRFS_QGROUP_STATUS_VERSION) {
288 				printk(KERN_ERR
289 				 "btrfs: old qgroup version, quota disabled\n");
290 				goto out;
291 			}
292 			if (btrfs_qgroup_status_generation(l, ptr) !=
293 			    fs_info->generation) {
294 				flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
295 				printk(KERN_ERR
296 					"btrfs: qgroup generation mismatch, "
297 					"marked as inconsistent\n");
298 			}
299 			fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
300 									  ptr);
301 			/* FIXME read scan element */
302 			goto next1;
303 		}
304 
305 		if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
306 		    found_key.type != BTRFS_QGROUP_LIMIT_KEY)
307 			goto next1;
308 
309 		qgroup = find_qgroup_rb(fs_info, found_key.offset);
310 		if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
311 		    (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
312 			printk(KERN_ERR "btrfs: inconsitent qgroup config\n");
313 			flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
314 		}
315 		if (!qgroup) {
316 			qgroup = add_qgroup_rb(fs_info, found_key.offset);
317 			if (IS_ERR(qgroup)) {
318 				ret = PTR_ERR(qgroup);
319 				goto out;
320 			}
321 		}
322 		switch (found_key.type) {
323 		case BTRFS_QGROUP_INFO_KEY: {
324 			struct btrfs_qgroup_info_item *ptr;
325 
326 			ptr = btrfs_item_ptr(l, slot,
327 					     struct btrfs_qgroup_info_item);
328 			qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
329 			qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
330 			qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
331 			qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
332 			/* generation currently unused */
333 			break;
334 		}
335 		case BTRFS_QGROUP_LIMIT_KEY: {
336 			struct btrfs_qgroup_limit_item *ptr;
337 
338 			ptr = btrfs_item_ptr(l, slot,
339 					     struct btrfs_qgroup_limit_item);
340 			qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
341 			qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
342 			qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
343 			qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
344 			qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
345 			break;
346 		}
347 		}
348 next1:
349 		ret = btrfs_next_item(quota_root, path);
350 		if (ret < 0)
351 			goto out;
352 		if (ret)
353 			break;
354 	}
355 	btrfs_release_path(path);
356 
357 	/*
358 	 * pass 2: read all qgroup relations
359 	 */
360 	key.objectid = 0;
361 	key.type = BTRFS_QGROUP_RELATION_KEY;
362 	key.offset = 0;
363 	ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
364 	if (ret)
365 		goto out;
366 	while (1) {
367 		slot = path->slots[0];
368 		l = path->nodes[0];
369 		btrfs_item_key_to_cpu(l, &found_key, slot);
370 
371 		if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
372 			goto next2;
373 
374 		if (found_key.objectid > found_key.offset) {
375 			/* parent <- member, not needed to build config */
376 			/* FIXME should we omit the key completely? */
377 			goto next2;
378 		}
379 
380 		ret = add_relation_rb(fs_info, found_key.objectid,
381 				      found_key.offset);
382 		if (ret == -ENOENT) {
383 			printk(KERN_WARNING
384 				"btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
385 				(unsigned long long)found_key.objectid,
386 				(unsigned long long)found_key.offset);
387 			ret = 0;	/* ignore the error */
388 		}
389 		if (ret)
390 			goto out;
391 next2:
392 		ret = btrfs_next_item(quota_root, path);
393 		if (ret < 0)
394 			goto out;
395 		if (ret)
396 			break;
397 	}
398 out:
399 	fs_info->qgroup_flags |= flags;
400 	if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
401 		fs_info->quota_enabled = 0;
402 		fs_info->pending_quota_state = 0;
403 	}
404 	btrfs_free_path(path);
405 
406 	return ret < 0 ? ret : 0;
407 }
408 
409 /*
410  * This is only called from close_ctree() or open_ctree(), both in single-
411  * treaded paths. Clean up the in-memory structures. No locking needed.
412  */
413 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
414 {
415 	struct rb_node *n;
416 	struct btrfs_qgroup *qgroup;
417 	struct btrfs_qgroup_list *list;
418 
419 	while ((n = rb_first(&fs_info->qgroup_tree))) {
420 		qgroup = rb_entry(n, struct btrfs_qgroup, node);
421 		rb_erase(n, &fs_info->qgroup_tree);
422 
423 		WARN_ON(!list_empty(&qgroup->dirty));
424 
425 		while (!list_empty(&qgroup->groups)) {
426 			list = list_first_entry(&qgroup->groups,
427 						struct btrfs_qgroup_list,
428 						next_group);
429 			list_del(&list->next_group);
430 			list_del(&list->next_member);
431 			kfree(list);
432 		}
433 
434 		while (!list_empty(&qgroup->members)) {
435 			list = list_first_entry(&qgroup->members,
436 						struct btrfs_qgroup_list,
437 						next_member);
438 			list_del(&list->next_group);
439 			list_del(&list->next_member);
440 			kfree(list);
441 		}
442 		kfree(qgroup);
443 	}
444 }
445 
446 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
447 				    struct btrfs_root *quota_root,
448 				    u64 src, u64 dst)
449 {
450 	int ret;
451 	struct btrfs_path *path;
452 	struct btrfs_key key;
453 
454 	path = btrfs_alloc_path();
455 	if (!path)
456 		return -ENOMEM;
457 
458 	key.objectid = src;
459 	key.type = BTRFS_QGROUP_RELATION_KEY;
460 	key.offset = dst;
461 
462 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
463 
464 	btrfs_mark_buffer_dirty(path->nodes[0]);
465 
466 	btrfs_free_path(path);
467 	return ret;
468 }
469 
470 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
471 				    struct btrfs_root *quota_root,
472 				    u64 src, u64 dst)
473 {
474 	int ret;
475 	struct btrfs_path *path;
476 	struct btrfs_key key;
477 
478 	path = btrfs_alloc_path();
479 	if (!path)
480 		return -ENOMEM;
481 
482 	key.objectid = src;
483 	key.type = BTRFS_QGROUP_RELATION_KEY;
484 	key.offset = dst;
485 
486 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
487 	if (ret < 0)
488 		goto out;
489 
490 	if (ret > 0) {
491 		ret = -ENOENT;
492 		goto out;
493 	}
494 
495 	ret = btrfs_del_item(trans, quota_root, path);
496 out:
497 	btrfs_free_path(path);
498 	return ret;
499 }
500 
501 static int add_qgroup_item(struct btrfs_trans_handle *trans,
502 			   struct btrfs_root *quota_root, u64 qgroupid)
503 {
504 	int ret;
505 	struct btrfs_path *path;
506 	struct btrfs_qgroup_info_item *qgroup_info;
507 	struct btrfs_qgroup_limit_item *qgroup_limit;
508 	struct extent_buffer *leaf;
509 	struct btrfs_key key;
510 
511 	path = btrfs_alloc_path();
512 	if (!path)
513 		return -ENOMEM;
514 
515 	key.objectid = 0;
516 	key.type = BTRFS_QGROUP_INFO_KEY;
517 	key.offset = qgroupid;
518 
519 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
520 				      sizeof(*qgroup_info));
521 	if (ret)
522 		goto out;
523 
524 	leaf = path->nodes[0];
525 	qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
526 				 struct btrfs_qgroup_info_item);
527 	btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
528 	btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
529 	btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
530 	btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
531 	btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
532 
533 	btrfs_mark_buffer_dirty(leaf);
534 
535 	btrfs_release_path(path);
536 
537 	key.type = BTRFS_QGROUP_LIMIT_KEY;
538 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
539 				      sizeof(*qgroup_limit));
540 	if (ret)
541 		goto out;
542 
543 	leaf = path->nodes[0];
544 	qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
545 				  struct btrfs_qgroup_limit_item);
546 	btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
547 	btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
548 	btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
549 	btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
550 	btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
551 
552 	btrfs_mark_buffer_dirty(leaf);
553 
554 	ret = 0;
555 out:
556 	btrfs_free_path(path);
557 	return ret;
558 }
559 
560 static int del_qgroup_item(struct btrfs_trans_handle *trans,
561 			   struct btrfs_root *quota_root, u64 qgroupid)
562 {
563 	int ret;
564 	struct btrfs_path *path;
565 	struct btrfs_key key;
566 
567 	path = btrfs_alloc_path();
568 	if (!path)
569 		return -ENOMEM;
570 
571 	key.objectid = 0;
572 	key.type = BTRFS_QGROUP_INFO_KEY;
573 	key.offset = qgroupid;
574 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
575 	if (ret < 0)
576 		goto out;
577 
578 	if (ret > 0) {
579 		ret = -ENOENT;
580 		goto out;
581 	}
582 
583 	ret = btrfs_del_item(trans, quota_root, path);
584 	if (ret)
585 		goto out;
586 
587 	btrfs_release_path(path);
588 
589 	key.type = BTRFS_QGROUP_LIMIT_KEY;
590 	ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
591 	if (ret < 0)
592 		goto out;
593 
594 	if (ret > 0) {
595 		ret = -ENOENT;
596 		goto out;
597 	}
598 
599 	ret = btrfs_del_item(trans, quota_root, path);
600 
601 out:
602 	btrfs_free_path(path);
603 	return ret;
604 }
605 
606 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
607 				    struct btrfs_root *root, u64 qgroupid,
608 				    u64 flags, u64 max_rfer, u64 max_excl,
609 				    u64 rsv_rfer, u64 rsv_excl)
610 {
611 	struct btrfs_path *path;
612 	struct btrfs_key key;
613 	struct extent_buffer *l;
614 	struct btrfs_qgroup_limit_item *qgroup_limit;
615 	int ret;
616 	int slot;
617 
618 	key.objectid = 0;
619 	key.type = BTRFS_QGROUP_LIMIT_KEY;
620 	key.offset = qgroupid;
621 
622 	path = btrfs_alloc_path();
623 	BUG_ON(!path);
624 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
625 	if (ret > 0)
626 		ret = -ENOENT;
627 
628 	if (ret)
629 		goto out;
630 
631 	l = path->nodes[0];
632 	slot = path->slots[0];
633 	qgroup_limit = btrfs_item_ptr(l, path->slots[0],
634 				      struct btrfs_qgroup_limit_item);
635 	btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
636 	btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
637 	btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
638 	btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
639 	btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
640 
641 	btrfs_mark_buffer_dirty(l);
642 
643 out:
644 	btrfs_free_path(path);
645 	return ret;
646 }
647 
648 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
649 				   struct btrfs_root *root,
650 				   struct btrfs_qgroup *qgroup)
651 {
652 	struct btrfs_path *path;
653 	struct btrfs_key key;
654 	struct extent_buffer *l;
655 	struct btrfs_qgroup_info_item *qgroup_info;
656 	int ret;
657 	int slot;
658 
659 	key.objectid = 0;
660 	key.type = BTRFS_QGROUP_INFO_KEY;
661 	key.offset = qgroup->qgroupid;
662 
663 	path = btrfs_alloc_path();
664 	BUG_ON(!path);
665 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
666 	if (ret > 0)
667 		ret = -ENOENT;
668 
669 	if (ret)
670 		goto out;
671 
672 	l = path->nodes[0];
673 	slot = path->slots[0];
674 	qgroup_info = btrfs_item_ptr(l, path->slots[0],
675 				 struct btrfs_qgroup_info_item);
676 	btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
677 	btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
678 	btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
679 	btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
680 	btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
681 
682 	btrfs_mark_buffer_dirty(l);
683 
684 out:
685 	btrfs_free_path(path);
686 	return ret;
687 }
688 
689 static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
690 				     struct btrfs_fs_info *fs_info,
691 				    struct btrfs_root *root)
692 {
693 	struct btrfs_path *path;
694 	struct btrfs_key key;
695 	struct extent_buffer *l;
696 	struct btrfs_qgroup_status_item *ptr;
697 	int ret;
698 	int slot;
699 
700 	key.objectid = 0;
701 	key.type = BTRFS_QGROUP_STATUS_KEY;
702 	key.offset = 0;
703 
704 	path = btrfs_alloc_path();
705 	BUG_ON(!path);
706 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
707 	if (ret > 0)
708 		ret = -ENOENT;
709 
710 	if (ret)
711 		goto out;
712 
713 	l = path->nodes[0];
714 	slot = path->slots[0];
715 	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
716 	btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
717 	btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
718 	/* XXX scan */
719 
720 	btrfs_mark_buffer_dirty(l);
721 
722 out:
723 	btrfs_free_path(path);
724 	return ret;
725 }
726 
727 /*
728  * called with qgroup_lock held
729  */
730 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
731 				  struct btrfs_root *root)
732 {
733 	struct btrfs_path *path;
734 	struct btrfs_key key;
735 	int ret;
736 
737 	if (!root)
738 		return -EINVAL;
739 
740 	path = btrfs_alloc_path();
741 	if (!path)
742 		return -ENOMEM;
743 
744 	while (1) {
745 		key.objectid = 0;
746 		key.offset = 0;
747 		key.type = 0;
748 
749 		path->leave_spinning = 1;
750 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
751 		if (ret > 0) {
752 			if (path->slots[0] == 0)
753 				break;
754 			path->slots[0]--;
755 		} else if (ret < 0) {
756 			break;
757 		}
758 
759 		ret = btrfs_del_item(trans, root, path);
760 		if (ret)
761 			goto out;
762 		btrfs_release_path(path);
763 	}
764 	ret = 0;
765 out:
766 	root->fs_info->pending_quota_state = 0;
767 	btrfs_free_path(path);
768 	return ret;
769 }
770 
771 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
772 		       struct btrfs_fs_info *fs_info)
773 {
774 	struct btrfs_root *quota_root;
775 	struct btrfs_path *path = NULL;
776 	struct btrfs_qgroup_status_item *ptr;
777 	struct extent_buffer *leaf;
778 	struct btrfs_key key;
779 	int ret = 0;
780 
781 	spin_lock(&fs_info->qgroup_lock);
782 	if (fs_info->quota_root) {
783 		fs_info->pending_quota_state = 1;
784 		spin_unlock(&fs_info->qgroup_lock);
785 		goto out;
786 	}
787 	spin_unlock(&fs_info->qgroup_lock);
788 
789 	/*
790 	 * initially create the quota tree
791 	 */
792 	quota_root = btrfs_create_tree(trans, fs_info,
793 				       BTRFS_QUOTA_TREE_OBJECTID);
794 	if (IS_ERR(quota_root)) {
795 		ret =  PTR_ERR(quota_root);
796 		goto out;
797 	}
798 
799 	path = btrfs_alloc_path();
800 	if (!path) {
801 		ret = -ENOMEM;
802 		goto out_free_root;
803 	}
804 
805 	key.objectid = 0;
806 	key.type = BTRFS_QGROUP_STATUS_KEY;
807 	key.offset = 0;
808 
809 	ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
810 				      sizeof(*ptr));
811 	if (ret)
812 		goto out_free_path;
813 
814 	leaf = path->nodes[0];
815 	ptr = btrfs_item_ptr(leaf, path->slots[0],
816 				 struct btrfs_qgroup_status_item);
817 	btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
818 	btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
819 	fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
820 				BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
821 	btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
822 	btrfs_set_qgroup_status_scan(leaf, ptr, 0);
823 
824 	btrfs_mark_buffer_dirty(leaf);
825 
826 	spin_lock(&fs_info->qgroup_lock);
827 	fs_info->quota_root = quota_root;
828 	fs_info->pending_quota_state = 1;
829 	spin_unlock(&fs_info->qgroup_lock);
830 out_free_path:
831 	btrfs_free_path(path);
832 out_free_root:
833 	if (ret) {
834 		free_extent_buffer(quota_root->node);
835 		free_extent_buffer(quota_root->commit_root);
836 		kfree(quota_root);
837 	}
838 out:
839 	return ret;
840 }
841 
842 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
843 			struct btrfs_fs_info *fs_info)
844 {
845 	struct btrfs_root *tree_root = fs_info->tree_root;
846 	struct btrfs_root *quota_root;
847 	int ret = 0;
848 
849 	spin_lock(&fs_info->qgroup_lock);
850 	fs_info->quota_enabled = 0;
851 	fs_info->pending_quota_state = 0;
852 	quota_root = fs_info->quota_root;
853 	fs_info->quota_root = NULL;
854 	btrfs_free_qgroup_config(fs_info);
855 	spin_unlock(&fs_info->qgroup_lock);
856 
857 	if (!quota_root)
858 		return -EINVAL;
859 
860 	ret = btrfs_clean_quota_tree(trans, quota_root);
861 	if (ret)
862 		goto out;
863 
864 	ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
865 	if (ret)
866 		goto out;
867 
868 	list_del(&quota_root->dirty_list);
869 
870 	btrfs_tree_lock(quota_root->node);
871 	clean_tree_block(trans, tree_root, quota_root->node);
872 	btrfs_tree_unlock(quota_root->node);
873 	btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
874 
875 	free_extent_buffer(quota_root->node);
876 	free_extent_buffer(quota_root->commit_root);
877 	kfree(quota_root);
878 out:
879 	return ret;
880 }
881 
882 int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
883 {
884 	/* FIXME */
885 	return 0;
886 }
887 
888 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
889 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
890 {
891 	struct btrfs_root *quota_root;
892 	int ret = 0;
893 
894 	quota_root = fs_info->quota_root;
895 	if (!quota_root)
896 		return -EINVAL;
897 
898 	ret = add_qgroup_relation_item(trans, quota_root, src, dst);
899 	if (ret)
900 		return ret;
901 
902 	ret = add_qgroup_relation_item(trans, quota_root, dst, src);
903 	if (ret) {
904 		del_qgroup_relation_item(trans, quota_root, src, dst);
905 		return ret;
906 	}
907 
908 	spin_lock(&fs_info->qgroup_lock);
909 	ret = add_relation_rb(quota_root->fs_info, src, dst);
910 	spin_unlock(&fs_info->qgroup_lock);
911 
912 	return ret;
913 }
914 
915 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
916 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst)
917 {
918 	struct btrfs_root *quota_root;
919 	int ret = 0;
920 	int err;
921 
922 	quota_root = fs_info->quota_root;
923 	if (!quota_root)
924 		return -EINVAL;
925 
926 	ret = del_qgroup_relation_item(trans, quota_root, src, dst);
927 	err = del_qgroup_relation_item(trans, quota_root, dst, src);
928 	if (err && !ret)
929 		ret = err;
930 
931 	spin_lock(&fs_info->qgroup_lock);
932 	del_relation_rb(fs_info, src, dst);
933 
934 	spin_unlock(&fs_info->qgroup_lock);
935 
936 	return ret;
937 }
938 
939 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
940 			struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
941 {
942 	struct btrfs_root *quota_root;
943 	struct btrfs_qgroup *qgroup;
944 	int ret = 0;
945 
946 	quota_root = fs_info->quota_root;
947 	if (!quota_root)
948 		return -EINVAL;
949 
950 	ret = add_qgroup_item(trans, quota_root, qgroupid);
951 
952 	spin_lock(&fs_info->qgroup_lock);
953 	qgroup = add_qgroup_rb(fs_info, qgroupid);
954 	spin_unlock(&fs_info->qgroup_lock);
955 
956 	if (IS_ERR(qgroup))
957 		ret = PTR_ERR(qgroup);
958 
959 	return ret;
960 }
961 
962 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
963 			struct btrfs_fs_info *fs_info, u64 qgroupid)
964 {
965 	struct btrfs_root *quota_root;
966 	struct btrfs_qgroup *qgroup;
967 	int ret = 0;
968 
969 	quota_root = fs_info->quota_root;
970 	if (!quota_root)
971 		return -EINVAL;
972 
973 	/* check if there are no relations to this qgroup */
974 	spin_lock(&fs_info->qgroup_lock);
975 	qgroup = find_qgroup_rb(fs_info, qgroupid);
976 	if (qgroup) {
977 		if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) {
978 			spin_unlock(&fs_info->qgroup_lock);
979 			return -EBUSY;
980 		}
981 	}
982 	spin_unlock(&fs_info->qgroup_lock);
983 
984 	ret = del_qgroup_item(trans, quota_root, qgroupid);
985 
986 	spin_lock(&fs_info->qgroup_lock);
987 	del_qgroup_rb(quota_root->fs_info, qgroupid);
988 	spin_unlock(&fs_info->qgroup_lock);
989 
990 	return ret;
991 }
992 
993 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
994 		       struct btrfs_fs_info *fs_info, u64 qgroupid,
995 		       struct btrfs_qgroup_limit *limit)
996 {
997 	struct btrfs_root *quota_root = fs_info->quota_root;
998 	struct btrfs_qgroup *qgroup;
999 	int ret = 0;
1000 
1001 	if (!quota_root)
1002 		return -EINVAL;
1003 
1004 	ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1005 				       limit->flags, limit->max_rfer,
1006 				       limit->max_excl, limit->rsv_rfer,
1007 				       limit->rsv_excl);
1008 	if (ret) {
1009 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1010 		printk(KERN_INFO "unable to update quota limit for %llu\n",
1011 		       (unsigned long long)qgroupid);
1012 	}
1013 
1014 	spin_lock(&fs_info->qgroup_lock);
1015 
1016 	qgroup = find_qgroup_rb(fs_info, qgroupid);
1017 	if (!qgroup) {
1018 		ret = -ENOENT;
1019 		goto unlock;
1020 	}
1021 	qgroup->lim_flags = limit->flags;
1022 	qgroup->max_rfer = limit->max_rfer;
1023 	qgroup->max_excl = limit->max_excl;
1024 	qgroup->rsv_rfer = limit->rsv_rfer;
1025 	qgroup->rsv_excl = limit->rsv_excl;
1026 
1027 unlock:
1028 	spin_unlock(&fs_info->qgroup_lock);
1029 
1030 	return ret;
1031 }
1032 
1033 static void qgroup_dirty(struct btrfs_fs_info *fs_info,
1034 			 struct btrfs_qgroup *qgroup)
1035 {
1036 	if (list_empty(&qgroup->dirty))
1037 		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
1038 }
1039 
1040 /*
1041  * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1042  * the modification into a list that's later used by btrfs_end_transaction to
1043  * pass the recorded modifications on to btrfs_qgroup_account_ref.
1044  */
1045 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1046 			    struct btrfs_delayed_ref_node *node,
1047 			    struct btrfs_delayed_extent_op *extent_op)
1048 {
1049 	struct qgroup_update *u;
1050 
1051 	BUG_ON(!trans->delayed_ref_elem.seq);
1052 	u = kmalloc(sizeof(*u), GFP_NOFS);
1053 	if (!u)
1054 		return -ENOMEM;
1055 
1056 	u->node = node;
1057 	u->extent_op = extent_op;
1058 	list_add_tail(&u->list, &trans->qgroup_ref_list);
1059 
1060 	return 0;
1061 }
1062 
1063 /*
1064  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1065  * from the fs. First, all roots referencing the extent are searched, and
1066  * then the space is accounted accordingly to the different roots. The
1067  * accounting algorithm works in 3 steps documented inline.
1068  */
1069 int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1070 			     struct btrfs_fs_info *fs_info,
1071 			     struct btrfs_delayed_ref_node *node,
1072 			     struct btrfs_delayed_extent_op *extent_op)
1073 {
1074 	struct btrfs_key ins;
1075 	struct btrfs_root *quota_root;
1076 	u64 ref_root;
1077 	struct btrfs_qgroup *qgroup;
1078 	struct ulist_node *unode;
1079 	struct ulist *roots = NULL;
1080 	struct ulist *tmp = NULL;
1081 	struct ulist_iterator uiter;
1082 	u64 seq;
1083 	int ret = 0;
1084 	int sgn;
1085 
1086 	if (!fs_info->quota_enabled)
1087 		return 0;
1088 
1089 	BUG_ON(!fs_info->quota_root);
1090 
1091 	ins.objectid = node->bytenr;
1092 	ins.offset = node->num_bytes;
1093 	ins.type = BTRFS_EXTENT_ITEM_KEY;
1094 
1095 	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1096 	    node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
1097 		struct btrfs_delayed_tree_ref *ref;
1098 		ref = btrfs_delayed_node_to_tree_ref(node);
1099 		ref_root = ref->root;
1100 	} else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1101 		   node->type == BTRFS_SHARED_DATA_REF_KEY) {
1102 		struct btrfs_delayed_data_ref *ref;
1103 		ref = btrfs_delayed_node_to_data_ref(node);
1104 		ref_root = ref->root;
1105 	} else {
1106 		BUG();
1107 	}
1108 
1109 	if (!is_fstree(ref_root)) {
1110 		/*
1111 		 * non-fs-trees are not being accounted
1112 		 */
1113 		return 0;
1114 	}
1115 
1116 	switch (node->action) {
1117 	case BTRFS_ADD_DELAYED_REF:
1118 	case BTRFS_ADD_DELAYED_EXTENT:
1119 		sgn = 1;
1120 		break;
1121 	case BTRFS_DROP_DELAYED_REF:
1122 		sgn = -1;
1123 		break;
1124 	case BTRFS_UPDATE_DELAYED_HEAD:
1125 		return 0;
1126 	default:
1127 		BUG();
1128 	}
1129 
1130 	/*
1131 	 * the delayed ref sequence number we pass depends on the direction of
1132 	 * the operation. for add operations, we pass (node->seq - 1) to skip
1133 	 * the delayed ref's current sequence number, because we need the state
1134 	 * of the tree before the add operation. for delete operations, we pass
1135 	 * (node->seq) to include the delayed ref's current sequence number,
1136 	 * because we need the state of the tree after the delete operation.
1137 	 */
1138 	ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,
1139 				   sgn > 0 ? node->seq - 1 : node->seq, &roots);
1140 	if (ret < 0)
1141 		goto out;
1142 
1143 	spin_lock(&fs_info->qgroup_lock);
1144 	quota_root = fs_info->quota_root;
1145 	if (!quota_root)
1146 		goto unlock;
1147 
1148 	qgroup = find_qgroup_rb(fs_info, ref_root);
1149 	if (!qgroup)
1150 		goto unlock;
1151 
1152 	/*
1153 	 * step 1: for each old ref, visit all nodes once and inc refcnt
1154 	 */
1155 	tmp = ulist_alloc(GFP_ATOMIC);
1156 	if (!tmp) {
1157 		ret = -ENOMEM;
1158 		goto unlock;
1159 	}
1160 	seq = fs_info->qgroup_seq;
1161 	fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1162 
1163 	ULIST_ITER_INIT(&uiter);
1164 	while ((unode = ulist_next(roots, &uiter))) {
1165 		struct ulist_node *tmp_unode;
1166 		struct ulist_iterator tmp_uiter;
1167 		struct btrfs_qgroup *qg;
1168 
1169 		qg = find_qgroup_rb(fs_info, unode->val);
1170 		if (!qg)
1171 			continue;
1172 
1173 		ulist_reinit(tmp);
1174 						/* XXX id not needed */
1175 		ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC);
1176 		ULIST_ITER_INIT(&tmp_uiter);
1177 		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1178 			struct btrfs_qgroup_list *glist;
1179 
1180 			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1181 			if (qg->refcnt < seq)
1182 				qg->refcnt = seq + 1;
1183 			else
1184 				++qg->refcnt;
1185 
1186 			list_for_each_entry(glist, &qg->groups, next_group) {
1187 				ulist_add(tmp, glist->group->qgroupid,
1188 					  (u64)(uintptr_t)glist->group,
1189 					  GFP_ATOMIC);
1190 			}
1191 		}
1192 	}
1193 
1194 	/*
1195 	 * step 2: walk from the new root
1196 	 */
1197 	ulist_reinit(tmp);
1198 	ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1199 	ULIST_ITER_INIT(&uiter);
1200 	while ((unode = ulist_next(tmp, &uiter))) {
1201 		struct btrfs_qgroup *qg;
1202 		struct btrfs_qgroup_list *glist;
1203 
1204 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1205 		if (qg->refcnt < seq) {
1206 			/* not visited by step 1 */
1207 			qg->rfer += sgn * node->num_bytes;
1208 			qg->rfer_cmpr += sgn * node->num_bytes;
1209 			if (roots->nnodes == 0) {
1210 				qg->excl += sgn * node->num_bytes;
1211 				qg->excl_cmpr += sgn * node->num_bytes;
1212 			}
1213 			qgroup_dirty(fs_info, qg);
1214 		}
1215 		WARN_ON(qg->tag >= seq);
1216 		qg->tag = seq;
1217 
1218 		list_for_each_entry(glist, &qg->groups, next_group) {
1219 			ulist_add(tmp, glist->group->qgroupid,
1220 				  (uintptr_t)glist->group, GFP_ATOMIC);
1221 		}
1222 	}
1223 
1224 	/*
1225 	 * step 3: walk again from old refs
1226 	 */
1227 	ULIST_ITER_INIT(&uiter);
1228 	while ((unode = ulist_next(roots, &uiter))) {
1229 		struct btrfs_qgroup *qg;
1230 		struct ulist_node *tmp_unode;
1231 		struct ulist_iterator tmp_uiter;
1232 
1233 		qg = find_qgroup_rb(fs_info, unode->val);
1234 		if (!qg)
1235 			continue;
1236 
1237 		ulist_reinit(tmp);
1238 		ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1239 		ULIST_ITER_INIT(&tmp_uiter);
1240 		while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1241 			struct btrfs_qgroup_list *glist;
1242 
1243 			qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1244 			if (qg->tag == seq)
1245 				continue;
1246 
1247 			if (qg->refcnt - seq == roots->nnodes) {
1248 				qg->excl -= sgn * node->num_bytes;
1249 				qg->excl_cmpr -= sgn * node->num_bytes;
1250 				qgroup_dirty(fs_info, qg);
1251 			}
1252 
1253 			list_for_each_entry(glist, &qg->groups, next_group) {
1254 				ulist_add(tmp, glist->group->qgroupid,
1255 					  (uintptr_t)glist->group,
1256 					  GFP_ATOMIC);
1257 			}
1258 		}
1259 	}
1260 	ret = 0;
1261 unlock:
1262 	spin_unlock(&fs_info->qgroup_lock);
1263 out:
1264 	ulist_free(roots);
1265 	ulist_free(tmp);
1266 
1267 	return ret;
1268 }
1269 
1270 /*
1271  * called from commit_transaction. Writes all changed qgroups to disk.
1272  */
1273 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1274 		      struct btrfs_fs_info *fs_info)
1275 {
1276 	struct btrfs_root *quota_root = fs_info->quota_root;
1277 	int ret = 0;
1278 
1279 	if (!quota_root)
1280 		goto out;
1281 
1282 	fs_info->quota_enabled = fs_info->pending_quota_state;
1283 
1284 	spin_lock(&fs_info->qgroup_lock);
1285 	while (!list_empty(&fs_info->dirty_qgroups)) {
1286 		struct btrfs_qgroup *qgroup;
1287 		qgroup = list_first_entry(&fs_info->dirty_qgroups,
1288 					  struct btrfs_qgroup, dirty);
1289 		list_del_init(&qgroup->dirty);
1290 		spin_unlock(&fs_info->qgroup_lock);
1291 		ret = update_qgroup_info_item(trans, quota_root, qgroup);
1292 		if (ret)
1293 			fs_info->qgroup_flags |=
1294 					BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1295 		spin_lock(&fs_info->qgroup_lock);
1296 	}
1297 	if (fs_info->quota_enabled)
1298 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
1299 	else
1300 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1301 	spin_unlock(&fs_info->qgroup_lock);
1302 
1303 	ret = update_qgroup_status_item(trans, fs_info, quota_root);
1304 	if (ret)
1305 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1306 
1307 out:
1308 
1309 	return ret;
1310 }
1311 
1312 /*
1313  * copy the acounting information between qgroups. This is necessary when a
1314  * snapshot or a subvolume is created
1315  */
1316 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1317 			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
1318 			 struct btrfs_qgroup_inherit *inherit)
1319 {
1320 	int ret = 0;
1321 	int i;
1322 	u64 *i_qgroups;
1323 	struct btrfs_root *quota_root = fs_info->quota_root;
1324 	struct btrfs_qgroup *srcgroup;
1325 	struct btrfs_qgroup *dstgroup;
1326 	u32 level_size = 0;
1327 
1328 	if (!fs_info->quota_enabled)
1329 		return 0;
1330 
1331 	if (!quota_root)
1332 		return -EINVAL;
1333 
1334 	/*
1335 	 * create a tracking group for the subvol itself
1336 	 */
1337 	ret = add_qgroup_item(trans, quota_root, objectid);
1338 	if (ret)
1339 		goto out;
1340 
1341 	if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
1342 		ret = update_qgroup_limit_item(trans, quota_root, objectid,
1343 					       inherit->lim.flags,
1344 					       inherit->lim.max_rfer,
1345 					       inherit->lim.max_excl,
1346 					       inherit->lim.rsv_rfer,
1347 					       inherit->lim.rsv_excl);
1348 		if (ret)
1349 			goto out;
1350 	}
1351 
1352 	if (srcid) {
1353 		struct btrfs_root *srcroot;
1354 		struct btrfs_key srckey;
1355 		int srcroot_level;
1356 
1357 		srckey.objectid = srcid;
1358 		srckey.type = BTRFS_ROOT_ITEM_KEY;
1359 		srckey.offset = (u64)-1;
1360 		srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
1361 		if (IS_ERR(srcroot)) {
1362 			ret = PTR_ERR(srcroot);
1363 			goto out;
1364 		}
1365 
1366 		rcu_read_lock();
1367 		srcroot_level = btrfs_header_level(srcroot->node);
1368 		level_size = btrfs_level_size(srcroot, srcroot_level);
1369 		rcu_read_unlock();
1370 	}
1371 
1372 	/*
1373 	 * add qgroup to all inherited groups
1374 	 */
1375 	if (inherit) {
1376 		i_qgroups = (u64 *)(inherit + 1);
1377 		for (i = 0; i < inherit->num_qgroups; ++i) {
1378 			ret = add_qgroup_relation_item(trans, quota_root,
1379 						       objectid, *i_qgroups);
1380 			if (ret)
1381 				goto out;
1382 			ret = add_qgroup_relation_item(trans, quota_root,
1383 						       *i_qgroups, objectid);
1384 			if (ret)
1385 				goto out;
1386 			++i_qgroups;
1387 		}
1388 	}
1389 
1390 
1391 	spin_lock(&fs_info->qgroup_lock);
1392 
1393 	dstgroup = add_qgroup_rb(fs_info, objectid);
1394 	if (IS_ERR(dstgroup)) {
1395 		ret = PTR_ERR(dstgroup);
1396 		goto unlock;
1397 	}
1398 
1399 	if (srcid) {
1400 		srcgroup = find_qgroup_rb(fs_info, srcid);
1401 		if (!srcgroup)
1402 			goto unlock;
1403 		dstgroup->rfer = srcgroup->rfer - level_size;
1404 		dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1405 		srcgroup->excl = level_size;
1406 		srcgroup->excl_cmpr = level_size;
1407 		qgroup_dirty(fs_info, dstgroup);
1408 		qgroup_dirty(fs_info, srcgroup);
1409 	}
1410 
1411 	if (!inherit)
1412 		goto unlock;
1413 
1414 	i_qgroups = (u64 *)(inherit + 1);
1415 	for (i = 0; i < inherit->num_qgroups; ++i) {
1416 		ret = add_relation_rb(quota_root->fs_info, objectid,
1417 				      *i_qgroups);
1418 		if (ret)
1419 			goto unlock;
1420 		++i_qgroups;
1421 	}
1422 
1423 	for (i = 0; i <  inherit->num_ref_copies; ++i) {
1424 		struct btrfs_qgroup *src;
1425 		struct btrfs_qgroup *dst;
1426 
1427 		src = find_qgroup_rb(fs_info, i_qgroups[0]);
1428 		dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1429 
1430 		if (!src || !dst) {
1431 			ret = -EINVAL;
1432 			goto unlock;
1433 		}
1434 
1435 		dst->rfer = src->rfer - level_size;
1436 		dst->rfer_cmpr = src->rfer_cmpr - level_size;
1437 		i_qgroups += 2;
1438 	}
1439 	for (i = 0; i <  inherit->num_excl_copies; ++i) {
1440 		struct btrfs_qgroup *src;
1441 		struct btrfs_qgroup *dst;
1442 
1443 		src = find_qgroup_rb(fs_info, i_qgroups[0]);
1444 		dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1445 
1446 		if (!src || !dst) {
1447 			ret = -EINVAL;
1448 			goto unlock;
1449 		}
1450 
1451 		dst->excl = src->excl + level_size;
1452 		dst->excl_cmpr = src->excl_cmpr + level_size;
1453 		i_qgroups += 2;
1454 	}
1455 
1456 unlock:
1457 	spin_unlock(&fs_info->qgroup_lock);
1458 out:
1459 	return ret;
1460 }
1461 
1462 /*
1463  * reserve some space for a qgroup and all its parents. The reservation takes
1464  * place with start_transaction or dealloc_reserve, similar to ENOSPC
1465  * accounting. If not enough space is available, EDQUOT is returned.
1466  * We assume that the requested space is new for all qgroups.
1467  */
1468 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1469 {
1470 	struct btrfs_root *quota_root;
1471 	struct btrfs_qgroup *qgroup;
1472 	struct btrfs_fs_info *fs_info = root->fs_info;
1473 	u64 ref_root = root->root_key.objectid;
1474 	int ret = 0;
1475 	struct ulist *ulist = NULL;
1476 	struct ulist_node *unode;
1477 	struct ulist_iterator uiter;
1478 
1479 	if (!is_fstree(ref_root))
1480 		return 0;
1481 
1482 	if (num_bytes == 0)
1483 		return 0;
1484 
1485 	spin_lock(&fs_info->qgroup_lock);
1486 	quota_root = fs_info->quota_root;
1487 	if (!quota_root)
1488 		goto out;
1489 
1490 	qgroup = find_qgroup_rb(fs_info, ref_root);
1491 	if (!qgroup)
1492 		goto out;
1493 
1494 	/*
1495 	 * in a first step, we check all affected qgroups if any limits would
1496 	 * be exceeded
1497 	 */
1498 	ulist = ulist_alloc(GFP_ATOMIC);
1499 	if (!ulist) {
1500 		ret = -ENOMEM;
1501 		goto out;
1502 	}
1503 	ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1504 	ULIST_ITER_INIT(&uiter);
1505 	while ((unode = ulist_next(ulist, &uiter))) {
1506 		struct btrfs_qgroup *qg;
1507 		struct btrfs_qgroup_list *glist;
1508 
1509 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1510 
1511 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
1512 		    qg->reserved + qg->rfer + num_bytes >
1513 		    qg->max_rfer)
1514 			ret = -EDQUOT;
1515 
1516 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
1517 		    qg->reserved + qg->excl + num_bytes >
1518 		    qg->max_excl)
1519 			ret = -EDQUOT;
1520 
1521 		list_for_each_entry(glist, &qg->groups, next_group) {
1522 			ulist_add(ulist, glist->group->qgroupid,
1523 				  (uintptr_t)glist->group, GFP_ATOMIC);
1524 		}
1525 	}
1526 	if (ret)
1527 		goto out;
1528 
1529 	/*
1530 	 * no limits exceeded, now record the reservation into all qgroups
1531 	 */
1532 	ULIST_ITER_INIT(&uiter);
1533 	while ((unode = ulist_next(ulist, &uiter))) {
1534 		struct btrfs_qgroup *qg;
1535 
1536 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1537 
1538 		qg->reserved += num_bytes;
1539 	}
1540 
1541 out:
1542 	spin_unlock(&fs_info->qgroup_lock);
1543 	ulist_free(ulist);
1544 
1545 	return ret;
1546 }
1547 
1548 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1549 {
1550 	struct btrfs_root *quota_root;
1551 	struct btrfs_qgroup *qgroup;
1552 	struct btrfs_fs_info *fs_info = root->fs_info;
1553 	struct ulist *ulist = NULL;
1554 	struct ulist_node *unode;
1555 	struct ulist_iterator uiter;
1556 	u64 ref_root = root->root_key.objectid;
1557 
1558 	if (!is_fstree(ref_root))
1559 		return;
1560 
1561 	if (num_bytes == 0)
1562 		return;
1563 
1564 	spin_lock(&fs_info->qgroup_lock);
1565 
1566 	quota_root = fs_info->quota_root;
1567 	if (!quota_root)
1568 		goto out;
1569 
1570 	qgroup = find_qgroup_rb(fs_info, ref_root);
1571 	if (!qgroup)
1572 		goto out;
1573 
1574 	ulist = ulist_alloc(GFP_ATOMIC);
1575 	if (!ulist) {
1576 		btrfs_std_error(fs_info, -ENOMEM);
1577 		goto out;
1578 	}
1579 	ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1580 	ULIST_ITER_INIT(&uiter);
1581 	while ((unode = ulist_next(ulist, &uiter))) {
1582 		struct btrfs_qgroup *qg;
1583 		struct btrfs_qgroup_list *glist;
1584 
1585 		qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1586 
1587 		qg->reserved -= num_bytes;
1588 
1589 		list_for_each_entry(glist, &qg->groups, next_group) {
1590 			ulist_add(ulist, glist->group->qgroupid,
1591 				  (uintptr_t)glist->group, GFP_ATOMIC);
1592 		}
1593 	}
1594 
1595 out:
1596 	spin_unlock(&fs_info->qgroup_lock);
1597 	ulist_free(ulist);
1598 }
1599 
1600 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1601 {
1602 	if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1603 		return;
1604 	printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n",
1605 		trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
1606 		trans->delayed_ref_elem.seq);
1607 	BUG();
1608 }
1609