1 /* 2 * Copyright (C) 2011 STRATO. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include <linux/workqueue.h> 26 #include <linux/btrfs.h> 27 28 #include "ctree.h" 29 #include "transaction.h" 30 #include "disk-io.h" 31 #include "locking.h" 32 #include "ulist.h" 33 #include "backref.h" 34 #include "extent_io.h" 35 #include "qgroup.h" 36 37 /* TODO XXX FIXME 38 * - subvol delete -> delete when ref goes to 0? delete limits also? 39 * - reorganize keys 40 * - compressed 41 * - sync 42 * - copy also limits on subvol creation 43 * - limit 44 * - caches fuer ulists 45 * - performance benchmarks 46 * - check all ioctl parameters 47 */ 48 49 /* 50 * one struct for each qgroup, organized in fs_info->qgroup_tree. 51 */ 52 struct btrfs_qgroup { 53 u64 qgroupid; 54 55 /* 56 * state 57 */ 58 u64 rfer; /* referenced */ 59 u64 rfer_cmpr; /* referenced compressed */ 60 u64 excl; /* exclusive */ 61 u64 excl_cmpr; /* exclusive compressed */ 62 63 /* 64 * limits 65 */ 66 u64 lim_flags; /* which limits are set */ 67 u64 max_rfer; 68 u64 max_excl; 69 u64 rsv_rfer; 70 u64 rsv_excl; 71 72 /* 73 * reservation tracking 74 */ 75 u64 reserved; 76 77 /* 78 * lists 79 */ 80 struct list_head groups; /* groups this group is member of */ 81 struct list_head members; /* groups that are members of this group */ 82 struct list_head dirty; /* dirty groups */ 83 struct rb_node node; /* tree of qgroups */ 84 85 /* 86 * temp variables for accounting operations 87 */ 88 u64 old_refcnt; 89 u64 new_refcnt; 90 }; 91 92 /* 93 * glue structure to represent the relations between qgroups. 94 */ 95 struct btrfs_qgroup_list { 96 struct list_head next_group; 97 struct list_head next_member; 98 struct btrfs_qgroup *group; 99 struct btrfs_qgroup *member; 100 }; 101 102 #define ptr_to_u64(x) ((u64)(uintptr_t)x) 103 #define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x) 104 105 static int 106 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 107 int init_flags); 108 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); 109 110 /* must be called with qgroup_ioctl_lock held */ 111 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 112 u64 qgroupid) 113 { 114 struct rb_node *n = fs_info->qgroup_tree.rb_node; 115 struct btrfs_qgroup *qgroup; 116 117 while (n) { 118 qgroup = rb_entry(n, struct btrfs_qgroup, node); 119 if (qgroup->qgroupid < qgroupid) 120 n = n->rb_left; 121 else if (qgroup->qgroupid > qgroupid) 122 n = n->rb_right; 123 else 124 return qgroup; 125 } 126 return NULL; 127 } 128 129 /* must be called with qgroup_lock held */ 130 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, 131 u64 qgroupid) 132 { 133 struct rb_node **p = &fs_info->qgroup_tree.rb_node; 134 struct rb_node *parent = NULL; 135 struct btrfs_qgroup *qgroup; 136 137 while (*p) { 138 parent = *p; 139 qgroup = rb_entry(parent, struct btrfs_qgroup, node); 140 141 if (qgroup->qgroupid < qgroupid) 142 p = &(*p)->rb_left; 143 else if (qgroup->qgroupid > qgroupid) 144 p = &(*p)->rb_right; 145 else 146 return qgroup; 147 } 148 149 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); 150 if (!qgroup) 151 return ERR_PTR(-ENOMEM); 152 153 qgroup->qgroupid = qgroupid; 154 INIT_LIST_HEAD(&qgroup->groups); 155 INIT_LIST_HEAD(&qgroup->members); 156 INIT_LIST_HEAD(&qgroup->dirty); 157 158 rb_link_node(&qgroup->node, parent, p); 159 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); 160 161 return qgroup; 162 } 163 164 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) 165 { 166 struct btrfs_qgroup_list *list; 167 168 list_del(&qgroup->dirty); 169 while (!list_empty(&qgroup->groups)) { 170 list = list_first_entry(&qgroup->groups, 171 struct btrfs_qgroup_list, next_group); 172 list_del(&list->next_group); 173 list_del(&list->next_member); 174 kfree(list); 175 } 176 177 while (!list_empty(&qgroup->members)) { 178 list = list_first_entry(&qgroup->members, 179 struct btrfs_qgroup_list, next_member); 180 list_del(&list->next_group); 181 list_del(&list->next_member); 182 kfree(list); 183 } 184 kfree(qgroup); 185 } 186 187 /* must be called with qgroup_lock held */ 188 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) 189 { 190 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); 191 192 if (!qgroup) 193 return -ENOENT; 194 195 rb_erase(&qgroup->node, &fs_info->qgroup_tree); 196 __del_qgroup_rb(qgroup); 197 return 0; 198 } 199 200 /* must be called with qgroup_lock held */ 201 static int add_relation_rb(struct btrfs_fs_info *fs_info, 202 u64 memberid, u64 parentid) 203 { 204 struct btrfs_qgroup *member; 205 struct btrfs_qgroup *parent; 206 struct btrfs_qgroup_list *list; 207 208 member = find_qgroup_rb(fs_info, memberid); 209 parent = find_qgroup_rb(fs_info, parentid); 210 if (!member || !parent) 211 return -ENOENT; 212 213 list = kzalloc(sizeof(*list), GFP_ATOMIC); 214 if (!list) 215 return -ENOMEM; 216 217 list->group = parent; 218 list->member = member; 219 list_add_tail(&list->next_group, &member->groups); 220 list_add_tail(&list->next_member, &parent->members); 221 222 return 0; 223 } 224 225 /* must be called with qgroup_lock held */ 226 static int del_relation_rb(struct btrfs_fs_info *fs_info, 227 u64 memberid, u64 parentid) 228 { 229 struct btrfs_qgroup *member; 230 struct btrfs_qgroup *parent; 231 struct btrfs_qgroup_list *list; 232 233 member = find_qgroup_rb(fs_info, memberid); 234 parent = find_qgroup_rb(fs_info, parentid); 235 if (!member || !parent) 236 return -ENOENT; 237 238 list_for_each_entry(list, &member->groups, next_group) { 239 if (list->group == parent) { 240 list_del(&list->next_group); 241 list_del(&list->next_member); 242 kfree(list); 243 return 0; 244 } 245 } 246 return -ENOENT; 247 } 248 249 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 250 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 251 u64 rfer, u64 excl) 252 { 253 struct btrfs_qgroup *qgroup; 254 255 qgroup = find_qgroup_rb(fs_info, qgroupid); 256 if (!qgroup) 257 return -EINVAL; 258 if (qgroup->rfer != rfer || qgroup->excl != excl) 259 return -EINVAL; 260 return 0; 261 } 262 #endif 263 264 /* 265 * The full config is read in one go, only called from open_ctree() 266 * It doesn't use any locking, as at this point we're still single-threaded 267 */ 268 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) 269 { 270 struct btrfs_key key; 271 struct btrfs_key found_key; 272 struct btrfs_root *quota_root = fs_info->quota_root; 273 struct btrfs_path *path = NULL; 274 struct extent_buffer *l; 275 int slot; 276 int ret = 0; 277 u64 flags = 0; 278 u64 rescan_progress = 0; 279 280 if (!fs_info->quota_enabled) 281 return 0; 282 283 fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); 284 if (!fs_info->qgroup_ulist) { 285 ret = -ENOMEM; 286 goto out; 287 } 288 289 path = btrfs_alloc_path(); 290 if (!path) { 291 ret = -ENOMEM; 292 goto out; 293 } 294 295 /* default this to quota off, in case no status key is found */ 296 fs_info->qgroup_flags = 0; 297 298 /* 299 * pass 1: read status, all qgroup infos and limits 300 */ 301 key.objectid = 0; 302 key.type = 0; 303 key.offset = 0; 304 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); 305 if (ret) 306 goto out; 307 308 while (1) { 309 struct btrfs_qgroup *qgroup; 310 311 slot = path->slots[0]; 312 l = path->nodes[0]; 313 btrfs_item_key_to_cpu(l, &found_key, slot); 314 315 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { 316 struct btrfs_qgroup_status_item *ptr; 317 318 ptr = btrfs_item_ptr(l, slot, 319 struct btrfs_qgroup_status_item); 320 321 if (btrfs_qgroup_status_version(l, ptr) != 322 BTRFS_QGROUP_STATUS_VERSION) { 323 btrfs_err(fs_info, 324 "old qgroup version, quota disabled"); 325 goto out; 326 } 327 if (btrfs_qgroup_status_generation(l, ptr) != 328 fs_info->generation) { 329 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 330 btrfs_err(fs_info, 331 "qgroup generation mismatch, " 332 "marked as inconsistent"); 333 } 334 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 335 ptr); 336 rescan_progress = btrfs_qgroup_status_rescan(l, ptr); 337 goto next1; 338 } 339 340 if (found_key.type != BTRFS_QGROUP_INFO_KEY && 341 found_key.type != BTRFS_QGROUP_LIMIT_KEY) 342 goto next1; 343 344 qgroup = find_qgroup_rb(fs_info, found_key.offset); 345 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 346 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 347 btrfs_err(fs_info, "inconsitent qgroup config"); 348 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 349 } 350 if (!qgroup) { 351 qgroup = add_qgroup_rb(fs_info, found_key.offset); 352 if (IS_ERR(qgroup)) { 353 ret = PTR_ERR(qgroup); 354 goto out; 355 } 356 } 357 switch (found_key.type) { 358 case BTRFS_QGROUP_INFO_KEY: { 359 struct btrfs_qgroup_info_item *ptr; 360 361 ptr = btrfs_item_ptr(l, slot, 362 struct btrfs_qgroup_info_item); 363 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); 364 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); 365 qgroup->excl = btrfs_qgroup_info_excl(l, ptr); 366 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); 367 /* generation currently unused */ 368 break; 369 } 370 case BTRFS_QGROUP_LIMIT_KEY: { 371 struct btrfs_qgroup_limit_item *ptr; 372 373 ptr = btrfs_item_ptr(l, slot, 374 struct btrfs_qgroup_limit_item); 375 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); 376 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); 377 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); 378 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); 379 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); 380 break; 381 } 382 } 383 next1: 384 ret = btrfs_next_item(quota_root, path); 385 if (ret < 0) 386 goto out; 387 if (ret) 388 break; 389 } 390 btrfs_release_path(path); 391 392 /* 393 * pass 2: read all qgroup relations 394 */ 395 key.objectid = 0; 396 key.type = BTRFS_QGROUP_RELATION_KEY; 397 key.offset = 0; 398 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); 399 if (ret) 400 goto out; 401 while (1) { 402 slot = path->slots[0]; 403 l = path->nodes[0]; 404 btrfs_item_key_to_cpu(l, &found_key, slot); 405 406 if (found_key.type != BTRFS_QGROUP_RELATION_KEY) 407 goto next2; 408 409 if (found_key.objectid > found_key.offset) { 410 /* parent <- member, not needed to build config */ 411 /* FIXME should we omit the key completely? */ 412 goto next2; 413 } 414 415 ret = add_relation_rb(fs_info, found_key.objectid, 416 found_key.offset); 417 if (ret == -ENOENT) { 418 btrfs_warn(fs_info, 419 "orphan qgroup relation 0x%llx->0x%llx", 420 found_key.objectid, found_key.offset); 421 ret = 0; /* ignore the error */ 422 } 423 if (ret) 424 goto out; 425 next2: 426 ret = btrfs_next_item(quota_root, path); 427 if (ret < 0) 428 goto out; 429 if (ret) 430 break; 431 } 432 out: 433 fs_info->qgroup_flags |= flags; 434 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { 435 fs_info->quota_enabled = 0; 436 fs_info->pending_quota_state = 0; 437 } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && 438 ret >= 0) { 439 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 440 } 441 btrfs_free_path(path); 442 443 if (ret < 0) { 444 ulist_free(fs_info->qgroup_ulist); 445 fs_info->qgroup_ulist = NULL; 446 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 447 } 448 449 return ret < 0 ? ret : 0; 450 } 451 452 /* 453 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), 454 * first two are in single-threaded paths.And for the third one, we have set 455 * quota_root to be null with qgroup_lock held before, so it is safe to clean 456 * up the in-memory structures without qgroup_lock held. 457 */ 458 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 459 { 460 struct rb_node *n; 461 struct btrfs_qgroup *qgroup; 462 463 while ((n = rb_first(&fs_info->qgroup_tree))) { 464 qgroup = rb_entry(n, struct btrfs_qgroup, node); 465 rb_erase(n, &fs_info->qgroup_tree); 466 __del_qgroup_rb(qgroup); 467 } 468 /* 469 * we call btrfs_free_qgroup_config() when umounting 470 * filesystem and disabling quota, so we set qgroup_ulit 471 * to be null here to avoid double free. 472 */ 473 ulist_free(fs_info->qgroup_ulist); 474 fs_info->qgroup_ulist = NULL; 475 } 476 477 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 478 struct btrfs_root *quota_root, 479 u64 src, u64 dst) 480 { 481 int ret; 482 struct btrfs_path *path; 483 struct btrfs_key key; 484 485 path = btrfs_alloc_path(); 486 if (!path) 487 return -ENOMEM; 488 489 key.objectid = src; 490 key.type = BTRFS_QGROUP_RELATION_KEY; 491 key.offset = dst; 492 493 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 494 495 btrfs_mark_buffer_dirty(path->nodes[0]); 496 497 btrfs_free_path(path); 498 return ret; 499 } 500 501 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, 502 struct btrfs_root *quota_root, 503 u64 src, u64 dst) 504 { 505 int ret; 506 struct btrfs_path *path; 507 struct btrfs_key key; 508 509 path = btrfs_alloc_path(); 510 if (!path) 511 return -ENOMEM; 512 513 key.objectid = src; 514 key.type = BTRFS_QGROUP_RELATION_KEY; 515 key.offset = dst; 516 517 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 518 if (ret < 0) 519 goto out; 520 521 if (ret > 0) { 522 ret = -ENOENT; 523 goto out; 524 } 525 526 ret = btrfs_del_item(trans, quota_root, path); 527 out: 528 btrfs_free_path(path); 529 return ret; 530 } 531 532 static int add_qgroup_item(struct btrfs_trans_handle *trans, 533 struct btrfs_root *quota_root, u64 qgroupid) 534 { 535 int ret; 536 struct btrfs_path *path; 537 struct btrfs_qgroup_info_item *qgroup_info; 538 struct btrfs_qgroup_limit_item *qgroup_limit; 539 struct extent_buffer *leaf; 540 struct btrfs_key key; 541 542 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 543 if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, "a_root->state))) 544 return 0; 545 #endif 546 path = btrfs_alloc_path(); 547 if (!path) 548 return -ENOMEM; 549 550 key.objectid = 0; 551 key.type = BTRFS_QGROUP_INFO_KEY; 552 key.offset = qgroupid; 553 554 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 555 sizeof(*qgroup_info)); 556 if (ret) 557 goto out; 558 559 leaf = path->nodes[0]; 560 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], 561 struct btrfs_qgroup_info_item); 562 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); 563 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); 564 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); 565 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); 566 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); 567 568 btrfs_mark_buffer_dirty(leaf); 569 570 btrfs_release_path(path); 571 572 key.type = BTRFS_QGROUP_LIMIT_KEY; 573 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 574 sizeof(*qgroup_limit)); 575 if (ret) 576 goto out; 577 578 leaf = path->nodes[0]; 579 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], 580 struct btrfs_qgroup_limit_item); 581 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); 582 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); 583 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); 584 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 585 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 586 587 btrfs_mark_buffer_dirty(leaf); 588 589 ret = 0; 590 out: 591 btrfs_free_path(path); 592 return ret; 593 } 594 595 static int del_qgroup_item(struct btrfs_trans_handle *trans, 596 struct btrfs_root *quota_root, u64 qgroupid) 597 { 598 int ret; 599 struct btrfs_path *path; 600 struct btrfs_key key; 601 602 path = btrfs_alloc_path(); 603 if (!path) 604 return -ENOMEM; 605 606 key.objectid = 0; 607 key.type = BTRFS_QGROUP_INFO_KEY; 608 key.offset = qgroupid; 609 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 610 if (ret < 0) 611 goto out; 612 613 if (ret > 0) { 614 ret = -ENOENT; 615 goto out; 616 } 617 618 ret = btrfs_del_item(trans, quota_root, path); 619 if (ret) 620 goto out; 621 622 btrfs_release_path(path); 623 624 key.type = BTRFS_QGROUP_LIMIT_KEY; 625 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 626 if (ret < 0) 627 goto out; 628 629 if (ret > 0) { 630 ret = -ENOENT; 631 goto out; 632 } 633 634 ret = btrfs_del_item(trans, quota_root, path); 635 636 out: 637 btrfs_free_path(path); 638 return ret; 639 } 640 641 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 642 struct btrfs_root *root, u64 qgroupid, 643 u64 flags, u64 max_rfer, u64 max_excl, 644 u64 rsv_rfer, u64 rsv_excl) 645 { 646 struct btrfs_path *path; 647 struct btrfs_key key; 648 struct extent_buffer *l; 649 struct btrfs_qgroup_limit_item *qgroup_limit; 650 int ret; 651 int slot; 652 653 key.objectid = 0; 654 key.type = BTRFS_QGROUP_LIMIT_KEY; 655 key.offset = qgroupid; 656 657 path = btrfs_alloc_path(); 658 if (!path) 659 return -ENOMEM; 660 661 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 662 if (ret > 0) 663 ret = -ENOENT; 664 665 if (ret) 666 goto out; 667 668 l = path->nodes[0]; 669 slot = path->slots[0]; 670 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 671 btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags); 672 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer); 673 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl); 674 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer); 675 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl); 676 677 btrfs_mark_buffer_dirty(l); 678 679 out: 680 btrfs_free_path(path); 681 return ret; 682 } 683 684 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 685 struct btrfs_root *root, 686 struct btrfs_qgroup *qgroup) 687 { 688 struct btrfs_path *path; 689 struct btrfs_key key; 690 struct extent_buffer *l; 691 struct btrfs_qgroup_info_item *qgroup_info; 692 int ret; 693 int slot; 694 695 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 696 if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) 697 return 0; 698 #endif 699 key.objectid = 0; 700 key.type = BTRFS_QGROUP_INFO_KEY; 701 key.offset = qgroup->qgroupid; 702 703 path = btrfs_alloc_path(); 704 if (!path) 705 return -ENOMEM; 706 707 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 708 if (ret > 0) 709 ret = -ENOENT; 710 711 if (ret) 712 goto out; 713 714 l = path->nodes[0]; 715 slot = path->slots[0]; 716 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); 717 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 718 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 719 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 720 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 721 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 722 723 btrfs_mark_buffer_dirty(l); 724 725 out: 726 btrfs_free_path(path); 727 return ret; 728 } 729 730 static int update_qgroup_status_item(struct btrfs_trans_handle *trans, 731 struct btrfs_fs_info *fs_info, 732 struct btrfs_root *root) 733 { 734 struct btrfs_path *path; 735 struct btrfs_key key; 736 struct extent_buffer *l; 737 struct btrfs_qgroup_status_item *ptr; 738 int ret; 739 int slot; 740 741 key.objectid = 0; 742 key.type = BTRFS_QGROUP_STATUS_KEY; 743 key.offset = 0; 744 745 path = btrfs_alloc_path(); 746 if (!path) 747 return -ENOMEM; 748 749 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 750 if (ret > 0) 751 ret = -ENOENT; 752 753 if (ret) 754 goto out; 755 756 l = path->nodes[0]; 757 slot = path->slots[0]; 758 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 759 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 760 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 761 btrfs_set_qgroup_status_rescan(l, ptr, 762 fs_info->qgroup_rescan_progress.objectid); 763 764 btrfs_mark_buffer_dirty(l); 765 766 out: 767 btrfs_free_path(path); 768 return ret; 769 } 770 771 /* 772 * called with qgroup_lock held 773 */ 774 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 775 struct btrfs_root *root) 776 { 777 struct btrfs_path *path; 778 struct btrfs_key key; 779 struct extent_buffer *leaf = NULL; 780 int ret; 781 int nr = 0; 782 783 path = btrfs_alloc_path(); 784 if (!path) 785 return -ENOMEM; 786 787 path->leave_spinning = 1; 788 789 key.objectid = 0; 790 key.offset = 0; 791 key.type = 0; 792 793 while (1) { 794 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 795 if (ret < 0) 796 goto out; 797 leaf = path->nodes[0]; 798 nr = btrfs_header_nritems(leaf); 799 if (!nr) 800 break; 801 /* 802 * delete the leaf one by one 803 * since the whole tree is going 804 * to be deleted. 805 */ 806 path->slots[0] = 0; 807 ret = btrfs_del_items(trans, root, path, 0, nr); 808 if (ret) 809 goto out; 810 811 btrfs_release_path(path); 812 } 813 ret = 0; 814 out: 815 root->fs_info->pending_quota_state = 0; 816 btrfs_free_path(path); 817 return ret; 818 } 819 820 int btrfs_quota_enable(struct btrfs_trans_handle *trans, 821 struct btrfs_fs_info *fs_info) 822 { 823 struct btrfs_root *quota_root; 824 struct btrfs_root *tree_root = fs_info->tree_root; 825 struct btrfs_path *path = NULL; 826 struct btrfs_qgroup_status_item *ptr; 827 struct extent_buffer *leaf; 828 struct btrfs_key key; 829 struct btrfs_key found_key; 830 struct btrfs_qgroup *qgroup = NULL; 831 int ret = 0; 832 int slot; 833 834 mutex_lock(&fs_info->qgroup_ioctl_lock); 835 if (fs_info->quota_root) { 836 fs_info->pending_quota_state = 1; 837 goto out; 838 } 839 840 fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); 841 if (!fs_info->qgroup_ulist) { 842 ret = -ENOMEM; 843 goto out; 844 } 845 846 /* 847 * initially create the quota tree 848 */ 849 quota_root = btrfs_create_tree(trans, fs_info, 850 BTRFS_QUOTA_TREE_OBJECTID); 851 if (IS_ERR(quota_root)) { 852 ret = PTR_ERR(quota_root); 853 goto out; 854 } 855 856 path = btrfs_alloc_path(); 857 if (!path) { 858 ret = -ENOMEM; 859 goto out_free_root; 860 } 861 862 key.objectid = 0; 863 key.type = BTRFS_QGROUP_STATUS_KEY; 864 key.offset = 0; 865 866 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 867 sizeof(*ptr)); 868 if (ret) 869 goto out_free_path; 870 871 leaf = path->nodes[0]; 872 ptr = btrfs_item_ptr(leaf, path->slots[0], 873 struct btrfs_qgroup_status_item); 874 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); 875 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); 876 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 877 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 878 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 879 btrfs_set_qgroup_status_rescan(leaf, ptr, 0); 880 881 btrfs_mark_buffer_dirty(leaf); 882 883 key.objectid = 0; 884 key.type = BTRFS_ROOT_REF_KEY; 885 key.offset = 0; 886 887 btrfs_release_path(path); 888 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 889 if (ret > 0) 890 goto out_add_root; 891 if (ret < 0) 892 goto out_free_path; 893 894 895 while (1) { 896 slot = path->slots[0]; 897 leaf = path->nodes[0]; 898 btrfs_item_key_to_cpu(leaf, &found_key, slot); 899 900 if (found_key.type == BTRFS_ROOT_REF_KEY) { 901 ret = add_qgroup_item(trans, quota_root, 902 found_key.offset); 903 if (ret) 904 goto out_free_path; 905 906 qgroup = add_qgroup_rb(fs_info, found_key.offset); 907 if (IS_ERR(qgroup)) { 908 ret = PTR_ERR(qgroup); 909 goto out_free_path; 910 } 911 } 912 ret = btrfs_next_item(tree_root, path); 913 if (ret < 0) 914 goto out_free_path; 915 if (ret) 916 break; 917 } 918 919 out_add_root: 920 btrfs_release_path(path); 921 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 922 if (ret) 923 goto out_free_path; 924 925 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 926 if (IS_ERR(qgroup)) { 927 ret = PTR_ERR(qgroup); 928 goto out_free_path; 929 } 930 spin_lock(&fs_info->qgroup_lock); 931 fs_info->quota_root = quota_root; 932 fs_info->pending_quota_state = 1; 933 spin_unlock(&fs_info->qgroup_lock); 934 out_free_path: 935 btrfs_free_path(path); 936 out_free_root: 937 if (ret) { 938 free_extent_buffer(quota_root->node); 939 free_extent_buffer(quota_root->commit_root); 940 kfree(quota_root); 941 } 942 out: 943 if (ret) { 944 ulist_free(fs_info->qgroup_ulist); 945 fs_info->qgroup_ulist = NULL; 946 } 947 mutex_unlock(&fs_info->qgroup_ioctl_lock); 948 return ret; 949 } 950 951 int btrfs_quota_disable(struct btrfs_trans_handle *trans, 952 struct btrfs_fs_info *fs_info) 953 { 954 struct btrfs_root *tree_root = fs_info->tree_root; 955 struct btrfs_root *quota_root; 956 int ret = 0; 957 958 mutex_lock(&fs_info->qgroup_ioctl_lock); 959 if (!fs_info->quota_root) 960 goto out; 961 spin_lock(&fs_info->qgroup_lock); 962 fs_info->quota_enabled = 0; 963 fs_info->pending_quota_state = 0; 964 quota_root = fs_info->quota_root; 965 fs_info->quota_root = NULL; 966 spin_unlock(&fs_info->qgroup_lock); 967 968 btrfs_free_qgroup_config(fs_info); 969 970 ret = btrfs_clean_quota_tree(trans, quota_root); 971 if (ret) 972 goto out; 973 974 ret = btrfs_del_root(trans, tree_root, "a_root->root_key); 975 if (ret) 976 goto out; 977 978 list_del("a_root->dirty_list); 979 980 btrfs_tree_lock(quota_root->node); 981 clean_tree_block(trans, tree_root, quota_root->node); 982 btrfs_tree_unlock(quota_root->node); 983 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 984 985 free_extent_buffer(quota_root->node); 986 free_extent_buffer(quota_root->commit_root); 987 kfree(quota_root); 988 out: 989 mutex_unlock(&fs_info->qgroup_ioctl_lock); 990 return ret; 991 } 992 993 static void qgroup_dirty(struct btrfs_fs_info *fs_info, 994 struct btrfs_qgroup *qgroup) 995 { 996 if (list_empty(&qgroup->dirty)) 997 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 998 } 999 1000 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1001 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1002 { 1003 struct btrfs_root *quota_root; 1004 struct btrfs_qgroup *parent; 1005 struct btrfs_qgroup *member; 1006 struct btrfs_qgroup_list *list; 1007 int ret = 0; 1008 1009 mutex_lock(&fs_info->qgroup_ioctl_lock); 1010 quota_root = fs_info->quota_root; 1011 if (!quota_root) { 1012 ret = -EINVAL; 1013 goto out; 1014 } 1015 member = find_qgroup_rb(fs_info, src); 1016 parent = find_qgroup_rb(fs_info, dst); 1017 if (!member || !parent) { 1018 ret = -EINVAL; 1019 goto out; 1020 } 1021 1022 /* check if such qgroup relation exist firstly */ 1023 list_for_each_entry(list, &member->groups, next_group) { 1024 if (list->group == parent) { 1025 ret = -EEXIST; 1026 goto out; 1027 } 1028 } 1029 1030 ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1031 if (ret) 1032 goto out; 1033 1034 ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1035 if (ret) { 1036 del_qgroup_relation_item(trans, quota_root, src, dst); 1037 goto out; 1038 } 1039 1040 spin_lock(&fs_info->qgroup_lock); 1041 ret = add_relation_rb(quota_root->fs_info, src, dst); 1042 spin_unlock(&fs_info->qgroup_lock); 1043 out: 1044 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1045 return ret; 1046 } 1047 1048 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1049 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1050 { 1051 struct btrfs_root *quota_root; 1052 struct btrfs_qgroup *parent; 1053 struct btrfs_qgroup *member; 1054 struct btrfs_qgroup_list *list; 1055 int ret = 0; 1056 int err; 1057 1058 mutex_lock(&fs_info->qgroup_ioctl_lock); 1059 quota_root = fs_info->quota_root; 1060 if (!quota_root) { 1061 ret = -EINVAL; 1062 goto out; 1063 } 1064 1065 member = find_qgroup_rb(fs_info, src); 1066 parent = find_qgroup_rb(fs_info, dst); 1067 if (!member || !parent) { 1068 ret = -EINVAL; 1069 goto out; 1070 } 1071 1072 /* check if such qgroup relation exist firstly */ 1073 list_for_each_entry(list, &member->groups, next_group) { 1074 if (list->group == parent) 1075 goto exist; 1076 } 1077 ret = -ENOENT; 1078 goto out; 1079 exist: 1080 ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1081 err = del_qgroup_relation_item(trans, quota_root, dst, src); 1082 if (err && !ret) 1083 ret = err; 1084 1085 spin_lock(&fs_info->qgroup_lock); 1086 del_relation_rb(fs_info, src, dst); 1087 spin_unlock(&fs_info->qgroup_lock); 1088 out: 1089 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1090 return ret; 1091 } 1092 1093 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1094 struct btrfs_fs_info *fs_info, u64 qgroupid, char *name) 1095 { 1096 struct btrfs_root *quota_root; 1097 struct btrfs_qgroup *qgroup; 1098 int ret = 0; 1099 1100 mutex_lock(&fs_info->qgroup_ioctl_lock); 1101 quota_root = fs_info->quota_root; 1102 if (!quota_root) { 1103 ret = -EINVAL; 1104 goto out; 1105 } 1106 qgroup = find_qgroup_rb(fs_info, qgroupid); 1107 if (qgroup) { 1108 ret = -EEXIST; 1109 goto out; 1110 } 1111 1112 ret = add_qgroup_item(trans, quota_root, qgroupid); 1113 if (ret) 1114 goto out; 1115 1116 spin_lock(&fs_info->qgroup_lock); 1117 qgroup = add_qgroup_rb(fs_info, qgroupid); 1118 spin_unlock(&fs_info->qgroup_lock); 1119 1120 if (IS_ERR(qgroup)) 1121 ret = PTR_ERR(qgroup); 1122 out: 1123 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1124 return ret; 1125 } 1126 1127 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 1128 struct btrfs_fs_info *fs_info, u64 qgroupid) 1129 { 1130 struct btrfs_root *quota_root; 1131 struct btrfs_qgroup *qgroup; 1132 int ret = 0; 1133 1134 mutex_lock(&fs_info->qgroup_ioctl_lock); 1135 quota_root = fs_info->quota_root; 1136 if (!quota_root) { 1137 ret = -EINVAL; 1138 goto out; 1139 } 1140 1141 qgroup = find_qgroup_rb(fs_info, qgroupid); 1142 if (!qgroup) { 1143 ret = -ENOENT; 1144 goto out; 1145 } else { 1146 /* check if there are no relations to this qgroup */ 1147 if (!list_empty(&qgroup->groups) || 1148 !list_empty(&qgroup->members)) { 1149 ret = -EBUSY; 1150 goto out; 1151 } 1152 } 1153 ret = del_qgroup_item(trans, quota_root, qgroupid); 1154 1155 spin_lock(&fs_info->qgroup_lock); 1156 del_qgroup_rb(quota_root->fs_info, qgroupid); 1157 spin_unlock(&fs_info->qgroup_lock); 1158 out: 1159 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1160 return ret; 1161 } 1162 1163 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 1164 struct btrfs_fs_info *fs_info, u64 qgroupid, 1165 struct btrfs_qgroup_limit *limit) 1166 { 1167 struct btrfs_root *quota_root; 1168 struct btrfs_qgroup *qgroup; 1169 int ret = 0; 1170 1171 mutex_lock(&fs_info->qgroup_ioctl_lock); 1172 quota_root = fs_info->quota_root; 1173 if (!quota_root) { 1174 ret = -EINVAL; 1175 goto out; 1176 } 1177 1178 qgroup = find_qgroup_rb(fs_info, qgroupid); 1179 if (!qgroup) { 1180 ret = -ENOENT; 1181 goto out; 1182 } 1183 ret = update_qgroup_limit_item(trans, quota_root, qgroupid, 1184 limit->flags, limit->max_rfer, 1185 limit->max_excl, limit->rsv_rfer, 1186 limit->rsv_excl); 1187 if (ret) { 1188 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1189 btrfs_info(fs_info, "unable to update quota limit for %llu", 1190 qgroupid); 1191 } 1192 1193 spin_lock(&fs_info->qgroup_lock); 1194 qgroup->lim_flags = limit->flags; 1195 qgroup->max_rfer = limit->max_rfer; 1196 qgroup->max_excl = limit->max_excl; 1197 qgroup->rsv_rfer = limit->rsv_rfer; 1198 qgroup->rsv_excl = limit->rsv_excl; 1199 spin_unlock(&fs_info->qgroup_lock); 1200 out: 1201 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1202 return ret; 1203 } 1204 1205 static int comp_oper_exist(struct btrfs_qgroup_operation *oper1, 1206 struct btrfs_qgroup_operation *oper2) 1207 { 1208 /* 1209 * Ignore seq and type here, we're looking for any operation 1210 * at all related to this extent on that root. 1211 */ 1212 if (oper1->bytenr < oper2->bytenr) 1213 return -1; 1214 if (oper1->bytenr > oper2->bytenr) 1215 return 1; 1216 if (oper1->ref_root < oper2->ref_root) 1217 return -1; 1218 if (oper1->ref_root > oper2->ref_root) 1219 return 1; 1220 return 0; 1221 } 1222 1223 static int qgroup_oper_exists(struct btrfs_fs_info *fs_info, 1224 struct btrfs_qgroup_operation *oper) 1225 { 1226 struct rb_node *n; 1227 struct btrfs_qgroup_operation *cur; 1228 int cmp; 1229 1230 spin_lock(&fs_info->qgroup_op_lock); 1231 n = fs_info->qgroup_op_tree.rb_node; 1232 while (n) { 1233 cur = rb_entry(n, struct btrfs_qgroup_operation, n); 1234 cmp = comp_oper_exist(cur, oper); 1235 if (cmp < 0) { 1236 n = n->rb_right; 1237 } else if (cmp) { 1238 n = n->rb_left; 1239 } else { 1240 spin_unlock(&fs_info->qgroup_op_lock); 1241 return -EEXIST; 1242 } 1243 } 1244 spin_unlock(&fs_info->qgroup_op_lock); 1245 return 0; 1246 } 1247 1248 static int comp_oper(struct btrfs_qgroup_operation *oper1, 1249 struct btrfs_qgroup_operation *oper2) 1250 { 1251 if (oper1->bytenr < oper2->bytenr) 1252 return -1; 1253 if (oper1->bytenr > oper2->bytenr) 1254 return 1; 1255 if (oper1->seq < oper2->seq) 1256 return -1; 1257 if (oper1->seq > oper2->seq) 1258 return -1; 1259 if (oper1->ref_root < oper2->ref_root) 1260 return -1; 1261 if (oper1->ref_root > oper2->ref_root) 1262 return 1; 1263 if (oper1->type < oper2->type) 1264 return -1; 1265 if (oper1->type > oper2->type) 1266 return 1; 1267 return 0; 1268 } 1269 1270 static int insert_qgroup_oper(struct btrfs_fs_info *fs_info, 1271 struct btrfs_qgroup_operation *oper) 1272 { 1273 struct rb_node **p; 1274 struct rb_node *parent = NULL; 1275 struct btrfs_qgroup_operation *cur; 1276 int cmp; 1277 1278 spin_lock(&fs_info->qgroup_op_lock); 1279 p = &fs_info->qgroup_op_tree.rb_node; 1280 while (*p) { 1281 parent = *p; 1282 cur = rb_entry(parent, struct btrfs_qgroup_operation, n); 1283 cmp = comp_oper(cur, oper); 1284 if (cmp < 0) { 1285 p = &(*p)->rb_right; 1286 } else if (cmp) { 1287 p = &(*p)->rb_left; 1288 } else { 1289 spin_unlock(&fs_info->qgroup_op_lock); 1290 return -EEXIST; 1291 } 1292 } 1293 rb_link_node(&oper->n, parent, p); 1294 rb_insert_color(&oper->n, &fs_info->qgroup_op_tree); 1295 spin_unlock(&fs_info->qgroup_op_lock); 1296 return 0; 1297 } 1298 1299 /* 1300 * Record a quota operation for processing later on. 1301 * @trans: the transaction we are adding the delayed op to. 1302 * @fs_info: the fs_info for this fs. 1303 * @ref_root: the root of the reference we are acting on, 1304 * @bytenr: the bytenr we are acting on. 1305 * @num_bytes: the number of bytes in the reference. 1306 * @type: the type of operation this is. 1307 * @mod_seq: do we need to get a sequence number for looking up roots. 1308 * 1309 * We just add it to our trans qgroup_ref_list and carry on and process these 1310 * operations in order at some later point. If the reference root isn't a fs 1311 * root then we don't bother with doing anything. 1312 * 1313 * MUST BE HOLDING THE REF LOCK. 1314 */ 1315 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, 1316 struct btrfs_fs_info *fs_info, u64 ref_root, 1317 u64 bytenr, u64 num_bytes, 1318 enum btrfs_qgroup_operation_type type, int mod_seq) 1319 { 1320 struct btrfs_qgroup_operation *oper; 1321 int ret; 1322 1323 if (!is_fstree(ref_root) || !fs_info->quota_enabled) 1324 return 0; 1325 1326 oper = kmalloc(sizeof(*oper), GFP_NOFS); 1327 if (!oper) 1328 return -ENOMEM; 1329 1330 oper->ref_root = ref_root; 1331 oper->bytenr = bytenr; 1332 oper->num_bytes = num_bytes; 1333 oper->type = type; 1334 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); 1335 INIT_LIST_HEAD(&oper->elem.list); 1336 oper->elem.seq = 0; 1337 1338 if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) { 1339 /* 1340 * If any operation for this bytenr/ref_root combo 1341 * exists, then we know it's not exclusively owned and 1342 * shouldn't be queued up. 1343 * 1344 * This also catches the case where we have a cloned 1345 * extent that gets queued up multiple times during 1346 * drop snapshot. 1347 */ 1348 if (qgroup_oper_exists(fs_info, oper)) { 1349 kfree(oper); 1350 return 0; 1351 } 1352 } 1353 1354 ret = insert_qgroup_oper(fs_info, oper); 1355 if (ret) { 1356 /* Shouldn't happen so have an assert for developers */ 1357 ASSERT(0); 1358 kfree(oper); 1359 return ret; 1360 } 1361 list_add_tail(&oper->list, &trans->qgroup_ref_list); 1362 1363 if (mod_seq) 1364 btrfs_get_tree_mod_seq(fs_info, &oper->elem); 1365 1366 return 0; 1367 } 1368 1369 /* 1370 * The easy accounting, if we are adding/removing the only ref for an extent 1371 * then this qgroup and all of the parent qgroups get their refrence and 1372 * exclusive counts adjusted. 1373 */ 1374 static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1375 struct btrfs_qgroup_operation *oper) 1376 { 1377 struct btrfs_qgroup *qgroup; 1378 struct ulist *tmp; 1379 struct btrfs_qgroup_list *glist; 1380 struct ulist_node *unode; 1381 struct ulist_iterator uiter; 1382 int sign = 0; 1383 int ret = 0; 1384 1385 tmp = ulist_alloc(GFP_NOFS); 1386 if (!tmp) 1387 return -ENOMEM; 1388 1389 spin_lock(&fs_info->qgroup_lock); 1390 if (!fs_info->quota_root) 1391 goto out; 1392 qgroup = find_qgroup_rb(fs_info, oper->ref_root); 1393 if (!qgroup) 1394 goto out; 1395 switch (oper->type) { 1396 case BTRFS_QGROUP_OPER_ADD_EXCL: 1397 sign = 1; 1398 break; 1399 case BTRFS_QGROUP_OPER_SUB_EXCL: 1400 sign = -1; 1401 break; 1402 default: 1403 ASSERT(0); 1404 } 1405 qgroup->rfer += sign * oper->num_bytes; 1406 qgroup->rfer_cmpr += sign * oper->num_bytes; 1407 1408 WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); 1409 qgroup->excl += sign * oper->num_bytes; 1410 qgroup->excl_cmpr += sign * oper->num_bytes; 1411 1412 qgroup_dirty(fs_info, qgroup); 1413 1414 /* Get all of the parent groups that contain this qgroup */ 1415 list_for_each_entry(glist, &qgroup->groups, next_group) { 1416 ret = ulist_add(tmp, glist->group->qgroupid, 1417 ptr_to_u64(glist->group), GFP_ATOMIC); 1418 if (ret < 0) 1419 goto out; 1420 } 1421 1422 /* Iterate all of the parents and adjust their reference counts */ 1423 ULIST_ITER_INIT(&uiter); 1424 while ((unode = ulist_next(tmp, &uiter))) { 1425 qgroup = u64_to_ptr(unode->aux); 1426 qgroup->rfer += sign * oper->num_bytes; 1427 qgroup->rfer_cmpr += sign * oper->num_bytes; 1428 qgroup->excl += sign * oper->num_bytes; 1429 if (sign < 0) 1430 WARN_ON(qgroup->excl < oper->num_bytes); 1431 qgroup->excl_cmpr += sign * oper->num_bytes; 1432 qgroup_dirty(fs_info, qgroup); 1433 1434 /* Add any parents of the parents */ 1435 list_for_each_entry(glist, &qgroup->groups, next_group) { 1436 ret = ulist_add(tmp, glist->group->qgroupid, 1437 ptr_to_u64(glist->group), GFP_ATOMIC); 1438 if (ret < 0) 1439 goto out; 1440 } 1441 } 1442 ret = 0; 1443 out: 1444 spin_unlock(&fs_info->qgroup_lock); 1445 ulist_free(tmp); 1446 return ret; 1447 } 1448 1449 /* 1450 * Walk all of the roots that pointed to our bytenr and adjust their refcnts as 1451 * properly. 1452 */ 1453 static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info, 1454 u64 root_to_skip, struct ulist *tmp, 1455 struct ulist *roots, struct ulist *qgroups, 1456 u64 seq, int *old_roots, int rescan) 1457 { 1458 struct ulist_node *unode; 1459 struct ulist_iterator uiter; 1460 struct ulist_node *tmp_unode; 1461 struct ulist_iterator tmp_uiter; 1462 struct btrfs_qgroup *qg; 1463 int ret; 1464 1465 ULIST_ITER_INIT(&uiter); 1466 while ((unode = ulist_next(roots, &uiter))) { 1467 /* We don't count our current root here */ 1468 if (unode->val == root_to_skip) 1469 continue; 1470 qg = find_qgroup_rb(fs_info, unode->val); 1471 if (!qg) 1472 continue; 1473 /* 1474 * We could have a pending removal of this same ref so we may 1475 * not have actually found our ref root when doing 1476 * btrfs_find_all_roots, so we need to keep track of how many 1477 * old roots we find in case we removed ours and added a 1478 * different one at the same time. I don't think this could 1479 * happen in practice but that sort of thinking leads to pain 1480 * and suffering and to the dark side. 1481 */ 1482 (*old_roots)++; 1483 1484 ulist_reinit(tmp); 1485 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), 1486 GFP_ATOMIC); 1487 if (ret < 0) 1488 return ret; 1489 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC); 1490 if (ret < 0) 1491 return ret; 1492 ULIST_ITER_INIT(&tmp_uiter); 1493 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1494 struct btrfs_qgroup_list *glist; 1495 1496 qg = u64_to_ptr(tmp_unode->aux); 1497 /* 1498 * We use this sequence number to keep from having to 1499 * run the whole list and 0 out the refcnt every time. 1500 * We basically use sequnce as the known 0 count and 1501 * then add 1 everytime we see a qgroup. This is how we 1502 * get how many of the roots actually point up to the 1503 * upper level qgroups in order to determine exclusive 1504 * counts. 1505 * 1506 * For rescan we want to set old_refcnt to seq so our 1507 * exclusive calculations end up correct. 1508 */ 1509 if (rescan) 1510 qg->old_refcnt = seq; 1511 else if (qg->old_refcnt < seq) 1512 qg->old_refcnt = seq + 1; 1513 else 1514 qg->old_refcnt++; 1515 1516 if (qg->new_refcnt < seq) 1517 qg->new_refcnt = seq + 1; 1518 else 1519 qg->new_refcnt++; 1520 list_for_each_entry(glist, &qg->groups, next_group) { 1521 ret = ulist_add(qgroups, glist->group->qgroupid, 1522 ptr_to_u64(glist->group), 1523 GFP_ATOMIC); 1524 if (ret < 0) 1525 return ret; 1526 ret = ulist_add(tmp, glist->group->qgroupid, 1527 ptr_to_u64(glist->group), 1528 GFP_ATOMIC); 1529 if (ret < 0) 1530 return ret; 1531 } 1532 } 1533 } 1534 return 0; 1535 } 1536 1537 /* 1538 * We need to walk forward in our operation tree and account for any roots that 1539 * were deleted after we made this operation. 1540 */ 1541 static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info, 1542 struct btrfs_qgroup_operation *oper, 1543 struct ulist *tmp, 1544 struct ulist *qgroups, u64 seq, 1545 int *old_roots) 1546 { 1547 struct ulist_node *unode; 1548 struct ulist_iterator uiter; 1549 struct btrfs_qgroup *qg; 1550 struct btrfs_qgroup_operation *tmp_oper; 1551 struct rb_node *n; 1552 int ret; 1553 1554 ulist_reinit(tmp); 1555 1556 /* 1557 * We only walk forward in the tree since we're only interested in 1558 * removals that happened _after_ our operation. 1559 */ 1560 spin_lock(&fs_info->qgroup_op_lock); 1561 n = rb_next(&oper->n); 1562 spin_unlock(&fs_info->qgroup_op_lock); 1563 if (!n) 1564 return 0; 1565 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); 1566 while (tmp_oper->bytenr == oper->bytenr) { 1567 /* 1568 * If it's not a removal we don't care, additions work out 1569 * properly with our refcnt tracking. 1570 */ 1571 if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED && 1572 tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL) 1573 goto next; 1574 qg = find_qgroup_rb(fs_info, tmp_oper->ref_root); 1575 if (!qg) 1576 goto next; 1577 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), 1578 GFP_ATOMIC); 1579 if (ret) { 1580 if (ret < 0) 1581 return ret; 1582 /* 1583 * We only want to increase old_roots if this qgroup is 1584 * not already in the list of qgroups. If it is already 1585 * there then that means it must have been re-added or 1586 * the delete will be discarded because we had an 1587 * existing ref that we haven't looked up yet. In this 1588 * case we don't want to increase old_roots. So if ret 1589 * == 1 then we know that this is the first time we've 1590 * seen this qgroup and we can bump the old_roots. 1591 */ 1592 (*old_roots)++; 1593 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), 1594 GFP_ATOMIC); 1595 if (ret < 0) 1596 return ret; 1597 } 1598 next: 1599 spin_lock(&fs_info->qgroup_op_lock); 1600 n = rb_next(&tmp_oper->n); 1601 spin_unlock(&fs_info->qgroup_op_lock); 1602 if (!n) 1603 break; 1604 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); 1605 } 1606 1607 /* Ok now process the qgroups we found */ 1608 ULIST_ITER_INIT(&uiter); 1609 while ((unode = ulist_next(tmp, &uiter))) { 1610 struct btrfs_qgroup_list *glist; 1611 1612 qg = u64_to_ptr(unode->aux); 1613 if (qg->old_refcnt < seq) 1614 qg->old_refcnt = seq + 1; 1615 else 1616 qg->old_refcnt++; 1617 if (qg->new_refcnt < seq) 1618 qg->new_refcnt = seq + 1; 1619 else 1620 qg->new_refcnt++; 1621 list_for_each_entry(glist, &qg->groups, next_group) { 1622 ret = ulist_add(qgroups, glist->group->qgroupid, 1623 ptr_to_u64(glist->group), GFP_ATOMIC); 1624 if (ret < 0) 1625 return ret; 1626 ret = ulist_add(tmp, glist->group->qgroupid, 1627 ptr_to_u64(glist->group), GFP_ATOMIC); 1628 if (ret < 0) 1629 return ret; 1630 } 1631 } 1632 return 0; 1633 } 1634 1635 /* Add refcnt for the newly added reference. */ 1636 static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info, 1637 struct btrfs_qgroup_operation *oper, 1638 struct btrfs_qgroup *qgroup, 1639 struct ulist *tmp, struct ulist *qgroups, 1640 u64 seq) 1641 { 1642 struct ulist_node *unode; 1643 struct ulist_iterator uiter; 1644 struct btrfs_qgroup *qg; 1645 int ret; 1646 1647 ulist_reinit(tmp); 1648 ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup), 1649 GFP_ATOMIC); 1650 if (ret < 0) 1651 return ret; 1652 ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup), 1653 GFP_ATOMIC); 1654 if (ret < 0) 1655 return ret; 1656 ULIST_ITER_INIT(&uiter); 1657 while ((unode = ulist_next(tmp, &uiter))) { 1658 struct btrfs_qgroup_list *glist; 1659 1660 qg = u64_to_ptr(unode->aux); 1661 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { 1662 if (qg->new_refcnt < seq) 1663 qg->new_refcnt = seq + 1; 1664 else 1665 qg->new_refcnt++; 1666 } else { 1667 if (qg->old_refcnt < seq) 1668 qg->old_refcnt = seq + 1; 1669 else 1670 qg->old_refcnt++; 1671 } 1672 list_for_each_entry(glist, &qg->groups, next_group) { 1673 ret = ulist_add(tmp, glist->group->qgroupid, 1674 ptr_to_u64(glist->group), GFP_ATOMIC); 1675 if (ret < 0) 1676 return ret; 1677 ret = ulist_add(qgroups, glist->group->qgroupid, 1678 ptr_to_u64(glist->group), GFP_ATOMIC); 1679 if (ret < 0) 1680 return ret; 1681 } 1682 } 1683 return 0; 1684 } 1685 1686 /* 1687 * This adjusts the counters for all referenced qgroups if need be. 1688 */ 1689 static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info, 1690 u64 root_to_skip, u64 num_bytes, 1691 struct ulist *qgroups, u64 seq, 1692 int old_roots, int new_roots, int rescan) 1693 { 1694 struct ulist_node *unode; 1695 struct ulist_iterator uiter; 1696 struct btrfs_qgroup *qg; 1697 u64 cur_new_count, cur_old_count; 1698 1699 ULIST_ITER_INIT(&uiter); 1700 while ((unode = ulist_next(qgroups, &uiter))) { 1701 bool dirty = false; 1702 1703 qg = u64_to_ptr(unode->aux); 1704 /* 1705 * Wasn't referenced before but is now, add to the reference 1706 * counters. 1707 */ 1708 if (qg->old_refcnt <= seq && qg->new_refcnt > seq) { 1709 qg->rfer += num_bytes; 1710 qg->rfer_cmpr += num_bytes; 1711 dirty = true; 1712 } 1713 1714 /* 1715 * Was referenced before but isn't now, subtract from the 1716 * reference counters. 1717 */ 1718 if (qg->old_refcnt > seq && qg->new_refcnt <= seq) { 1719 qg->rfer -= num_bytes; 1720 qg->rfer_cmpr -= num_bytes; 1721 dirty = true; 1722 } 1723 1724 if (qg->old_refcnt < seq) 1725 cur_old_count = 0; 1726 else 1727 cur_old_count = qg->old_refcnt - seq; 1728 if (qg->new_refcnt < seq) 1729 cur_new_count = 0; 1730 else 1731 cur_new_count = qg->new_refcnt - seq; 1732 1733 /* 1734 * If our refcount was the same as the roots previously but our 1735 * new count isn't the same as the number of roots now then we 1736 * went from having a exclusive reference on this range to not. 1737 */ 1738 if (old_roots && cur_old_count == old_roots && 1739 (cur_new_count != new_roots || new_roots == 0)) { 1740 WARN_ON(cur_new_count != new_roots && new_roots == 0); 1741 qg->excl -= num_bytes; 1742 qg->excl_cmpr -= num_bytes; 1743 dirty = true; 1744 } 1745 1746 /* 1747 * If we didn't reference all the roots before but now we do we 1748 * have an exclusive reference to this range. 1749 */ 1750 if ((!old_roots || (old_roots && cur_old_count != old_roots)) 1751 && cur_new_count == new_roots) { 1752 qg->excl += num_bytes; 1753 qg->excl_cmpr += num_bytes; 1754 dirty = true; 1755 } 1756 1757 if (dirty) 1758 qgroup_dirty(fs_info, qg); 1759 } 1760 return 0; 1761 } 1762 1763 /* 1764 * If we removed a data extent and there were other references for that bytenr 1765 * then we need to lookup all referenced roots to make sure we still don't 1766 * reference this bytenr. If we do then we can just discard this operation. 1767 */ 1768 static int check_existing_refs(struct btrfs_trans_handle *trans, 1769 struct btrfs_fs_info *fs_info, 1770 struct btrfs_qgroup_operation *oper) 1771 { 1772 struct ulist *roots = NULL; 1773 struct ulist_node *unode; 1774 struct ulist_iterator uiter; 1775 int ret = 0; 1776 1777 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, 1778 oper->elem.seq, &roots); 1779 if (ret < 0) 1780 return ret; 1781 ret = 0; 1782 1783 ULIST_ITER_INIT(&uiter); 1784 while ((unode = ulist_next(roots, &uiter))) { 1785 if (unode->val == oper->ref_root) { 1786 ret = 1; 1787 break; 1788 } 1789 } 1790 ulist_free(roots); 1791 btrfs_put_tree_mod_seq(fs_info, &oper->elem); 1792 1793 return ret; 1794 } 1795 1796 /* 1797 * If we share a reference across multiple roots then we may need to adjust 1798 * various qgroups referenced and exclusive counters. The basic premise is this 1799 * 1800 * 1) We have seq to represent a 0 count. Instead of looping through all of the 1801 * qgroups and resetting their refcount to 0 we just constantly bump this 1802 * sequence number to act as the base reference count. This means that if 1803 * anybody is equal to or below this sequence they were never referenced. We 1804 * jack this sequence up by the number of roots we found each time in order to 1805 * make sure we don't have any overlap. 1806 * 1807 * 2) We first search all the roots that reference the area _except_ the root 1808 * we're acting on currently. This makes up the old_refcnt of all the qgroups 1809 * before. 1810 * 1811 * 3) We walk all of the qgroups referenced by the root we are currently acting 1812 * on, and will either adjust old_refcnt in the case of a removal or the 1813 * new_refcnt in the case of an addition. 1814 * 1815 * 4) Finally we walk all the qgroups that are referenced by this range 1816 * including the root we are acting on currently. We will adjust the counters 1817 * based on the number of roots we had and will have after this operation. 1818 * 1819 * Take this example as an illustration 1820 * 1821 * [qgroup 1/0] 1822 * / | \ 1823 * [qg 0/0] [qg 0/1] [qg 0/2] 1824 * \ | / 1825 * [ extent ] 1826 * 1827 * Say we are adding a reference that is covered by qg 0/0. The first step 1828 * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with 1829 * old_roots being 2. Because it is adding new_roots will be 1. We then go 1830 * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's 1831 * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we 1832 * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a 1833 * reference and thus must add the size to the referenced bytes. Everything 1834 * else is the same so nothing else changes. 1835 */ 1836 static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, 1837 struct btrfs_fs_info *fs_info, 1838 struct btrfs_qgroup_operation *oper) 1839 { 1840 struct ulist *roots = NULL; 1841 struct ulist *qgroups, *tmp; 1842 struct btrfs_qgroup *qgroup; 1843 struct seq_list elem = {}; 1844 u64 seq; 1845 int old_roots = 0; 1846 int new_roots = 0; 1847 int ret = 0; 1848 1849 if (oper->elem.seq) { 1850 ret = check_existing_refs(trans, fs_info, oper); 1851 if (ret < 0) 1852 return ret; 1853 if (ret) 1854 return 0; 1855 } 1856 1857 qgroups = ulist_alloc(GFP_NOFS); 1858 if (!qgroups) 1859 return -ENOMEM; 1860 1861 tmp = ulist_alloc(GFP_NOFS); 1862 if (!tmp) { 1863 ulist_free(qgroups); 1864 return -ENOMEM; 1865 } 1866 1867 btrfs_get_tree_mod_seq(fs_info, &elem); 1868 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, 1869 &roots); 1870 btrfs_put_tree_mod_seq(fs_info, &elem); 1871 if (ret < 0) { 1872 ulist_free(qgroups); 1873 ulist_free(tmp); 1874 return ret; 1875 } 1876 spin_lock(&fs_info->qgroup_lock); 1877 qgroup = find_qgroup_rb(fs_info, oper->ref_root); 1878 if (!qgroup) 1879 goto out; 1880 seq = fs_info->qgroup_seq; 1881 1882 /* 1883 * So roots is the list of all the roots currently pointing at the 1884 * bytenr, including the ref we are adding if we are adding, or not if 1885 * we are removing a ref. So we pass in the ref_root to skip that root 1886 * in our calculations. We set old_refnct and new_refcnt cause who the 1887 * hell knows what everything looked like before, and it doesn't matter 1888 * except... 1889 */ 1890 ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups, 1891 seq, &old_roots, 0); 1892 if (ret < 0) 1893 goto out; 1894 1895 /* 1896 * Now adjust the refcounts of the qgroups that care about this 1897 * reference, either the old_count in the case of removal or new_count 1898 * in the case of an addition. 1899 */ 1900 ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups, 1901 seq); 1902 if (ret < 0) 1903 goto out; 1904 1905 /* 1906 * ...in the case of removals. If we had a removal before we got around 1907 * to processing this operation then we need to find that guy and count 1908 * his references as if they really existed so we don't end up screwing 1909 * up the exclusive counts. Then whenever we go to process the delete 1910 * everything will be grand and we can account for whatever exclusive 1911 * changes need to be made there. We also have to pass in old_roots so 1912 * we have an accurate count of the roots as it pertains to this 1913 * operations view of the world. 1914 */ 1915 ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq, 1916 &old_roots); 1917 if (ret < 0) 1918 goto out; 1919 1920 /* 1921 * We are adding our root, need to adjust up the number of roots, 1922 * otherwise old_roots is the number of roots we want. 1923 */ 1924 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { 1925 new_roots = old_roots + 1; 1926 } else { 1927 new_roots = old_roots; 1928 old_roots++; 1929 } 1930 fs_info->qgroup_seq += old_roots + 1; 1931 1932 1933 /* 1934 * And now the magic happens, bless Arne for having a pretty elegant 1935 * solution for this. 1936 */ 1937 qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes, 1938 qgroups, seq, old_roots, new_roots, 0); 1939 out: 1940 spin_unlock(&fs_info->qgroup_lock); 1941 ulist_free(qgroups); 1942 ulist_free(roots); 1943 ulist_free(tmp); 1944 return ret; 1945 } 1946 1947 /* 1948 * Process a reference to a shared subtree. This type of operation is 1949 * queued during snapshot removal when we encounter extents which are 1950 * shared between more than one root. 1951 */ 1952 static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans, 1953 struct btrfs_fs_info *fs_info, 1954 struct btrfs_qgroup_operation *oper) 1955 { 1956 struct ulist *roots = NULL; 1957 struct ulist_node *unode; 1958 struct ulist_iterator uiter; 1959 struct btrfs_qgroup_list *glist; 1960 struct ulist *parents; 1961 int ret = 0; 1962 int err; 1963 struct btrfs_qgroup *qg; 1964 u64 root_obj = 0; 1965 struct seq_list elem = {}; 1966 1967 parents = ulist_alloc(GFP_NOFS); 1968 if (!parents) 1969 return -ENOMEM; 1970 1971 btrfs_get_tree_mod_seq(fs_info, &elem); 1972 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, 1973 elem.seq, &roots); 1974 btrfs_put_tree_mod_seq(fs_info, &elem); 1975 if (ret < 0) 1976 goto out; 1977 1978 if (roots->nnodes != 1) 1979 goto out; 1980 1981 ULIST_ITER_INIT(&uiter); 1982 unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */ 1983 /* 1984 * If we find our ref root then that means all refs 1985 * this extent has to the root have not yet been 1986 * deleted. In that case, we do nothing and let the 1987 * last ref for this bytenr drive our update. 1988 * 1989 * This can happen for example if an extent is 1990 * referenced multiple times in a snapshot (clone, 1991 * etc). If we are in the middle of snapshot removal, 1992 * queued updates for such an extent will find the 1993 * root if we have not yet finished removing the 1994 * snapshot. 1995 */ 1996 if (unode->val == oper->ref_root) 1997 goto out; 1998 1999 root_obj = unode->val; 2000 BUG_ON(!root_obj); 2001 2002 spin_lock(&fs_info->qgroup_lock); 2003 qg = find_qgroup_rb(fs_info, root_obj); 2004 if (!qg) 2005 goto out_unlock; 2006 2007 qg->excl += oper->num_bytes; 2008 qg->excl_cmpr += oper->num_bytes; 2009 qgroup_dirty(fs_info, qg); 2010 2011 /* 2012 * Adjust counts for parent groups. First we find all 2013 * parents, then in the 2nd loop we do the adjustment 2014 * while adding parents of the parents to our ulist. 2015 */ 2016 list_for_each_entry(glist, &qg->groups, next_group) { 2017 err = ulist_add(parents, glist->group->qgroupid, 2018 ptr_to_u64(glist->group), GFP_ATOMIC); 2019 if (err < 0) { 2020 ret = err; 2021 goto out_unlock; 2022 } 2023 } 2024 2025 ULIST_ITER_INIT(&uiter); 2026 while ((unode = ulist_next(parents, &uiter))) { 2027 qg = u64_to_ptr(unode->aux); 2028 qg->excl += oper->num_bytes; 2029 qg->excl_cmpr += oper->num_bytes; 2030 qgroup_dirty(fs_info, qg); 2031 2032 /* Add any parents of the parents */ 2033 list_for_each_entry(glist, &qg->groups, next_group) { 2034 err = ulist_add(parents, glist->group->qgroupid, 2035 ptr_to_u64(glist->group), GFP_ATOMIC); 2036 if (err < 0) { 2037 ret = err; 2038 goto out_unlock; 2039 } 2040 } 2041 } 2042 2043 out_unlock: 2044 spin_unlock(&fs_info->qgroup_lock); 2045 2046 out: 2047 ulist_free(roots); 2048 ulist_free(parents); 2049 return ret; 2050 } 2051 2052 /* 2053 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted 2054 * from the fs. First, all roots referencing the extent are searched, and 2055 * then the space is accounted accordingly to the different roots. The 2056 * accounting algorithm works in 3 steps documented inline. 2057 */ 2058 static int btrfs_qgroup_account(struct btrfs_trans_handle *trans, 2059 struct btrfs_fs_info *fs_info, 2060 struct btrfs_qgroup_operation *oper) 2061 { 2062 int ret = 0; 2063 2064 if (!fs_info->quota_enabled) 2065 return 0; 2066 2067 BUG_ON(!fs_info->quota_root); 2068 2069 mutex_lock(&fs_info->qgroup_rescan_lock); 2070 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 2071 if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) { 2072 mutex_unlock(&fs_info->qgroup_rescan_lock); 2073 return 0; 2074 } 2075 } 2076 mutex_unlock(&fs_info->qgroup_rescan_lock); 2077 2078 ASSERT(is_fstree(oper->ref_root)); 2079 2080 switch (oper->type) { 2081 case BTRFS_QGROUP_OPER_ADD_EXCL: 2082 case BTRFS_QGROUP_OPER_SUB_EXCL: 2083 ret = qgroup_excl_accounting(fs_info, oper); 2084 break; 2085 case BTRFS_QGROUP_OPER_ADD_SHARED: 2086 case BTRFS_QGROUP_OPER_SUB_SHARED: 2087 ret = qgroup_shared_accounting(trans, fs_info, oper); 2088 break; 2089 case BTRFS_QGROUP_OPER_SUB_SUBTREE: 2090 ret = qgroup_subtree_accounting(trans, fs_info, oper); 2091 break; 2092 default: 2093 ASSERT(0); 2094 } 2095 return ret; 2096 } 2097 2098 /* 2099 * Needs to be called everytime we run delayed refs, even if there is an error 2100 * in order to cleanup outstanding operations. 2101 */ 2102 int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans, 2103 struct btrfs_fs_info *fs_info) 2104 { 2105 struct btrfs_qgroup_operation *oper; 2106 int ret = 0; 2107 2108 while (!list_empty(&trans->qgroup_ref_list)) { 2109 oper = list_first_entry(&trans->qgroup_ref_list, 2110 struct btrfs_qgroup_operation, list); 2111 list_del_init(&oper->list); 2112 if (!ret || !trans->aborted) 2113 ret = btrfs_qgroup_account(trans, fs_info, oper); 2114 spin_lock(&fs_info->qgroup_op_lock); 2115 rb_erase(&oper->n, &fs_info->qgroup_op_tree); 2116 spin_unlock(&fs_info->qgroup_op_lock); 2117 btrfs_put_tree_mod_seq(fs_info, &oper->elem); 2118 kfree(oper); 2119 } 2120 return ret; 2121 } 2122 2123 /* 2124 * called from commit_transaction. Writes all changed qgroups to disk. 2125 */ 2126 int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 2127 struct btrfs_fs_info *fs_info) 2128 { 2129 struct btrfs_root *quota_root = fs_info->quota_root; 2130 int ret = 0; 2131 int start_rescan_worker = 0; 2132 2133 if (!quota_root) 2134 goto out; 2135 2136 if (!fs_info->quota_enabled && fs_info->pending_quota_state) 2137 start_rescan_worker = 1; 2138 2139 fs_info->quota_enabled = fs_info->pending_quota_state; 2140 2141 spin_lock(&fs_info->qgroup_lock); 2142 while (!list_empty(&fs_info->dirty_qgroups)) { 2143 struct btrfs_qgroup *qgroup; 2144 qgroup = list_first_entry(&fs_info->dirty_qgroups, 2145 struct btrfs_qgroup, dirty); 2146 list_del_init(&qgroup->dirty); 2147 spin_unlock(&fs_info->qgroup_lock); 2148 ret = update_qgroup_info_item(trans, quota_root, qgroup); 2149 if (ret) 2150 fs_info->qgroup_flags |= 2151 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2152 spin_lock(&fs_info->qgroup_lock); 2153 } 2154 if (fs_info->quota_enabled) 2155 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; 2156 else 2157 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2158 spin_unlock(&fs_info->qgroup_lock); 2159 2160 ret = update_qgroup_status_item(trans, fs_info, quota_root); 2161 if (ret) 2162 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2163 2164 if (!ret && start_rescan_worker) { 2165 ret = qgroup_rescan_init(fs_info, 0, 1); 2166 if (!ret) { 2167 qgroup_rescan_zero_tracking(fs_info); 2168 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2169 &fs_info->qgroup_rescan_work); 2170 } 2171 ret = 0; 2172 } 2173 2174 out: 2175 2176 return ret; 2177 } 2178 2179 /* 2180 * copy the acounting information between qgroups. This is necessary when a 2181 * snapshot or a subvolume is created 2182 */ 2183 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 2184 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 2185 struct btrfs_qgroup_inherit *inherit) 2186 { 2187 int ret = 0; 2188 int i; 2189 u64 *i_qgroups; 2190 struct btrfs_root *quota_root = fs_info->quota_root; 2191 struct btrfs_qgroup *srcgroup; 2192 struct btrfs_qgroup *dstgroup; 2193 u32 level_size = 0; 2194 u64 nums; 2195 2196 mutex_lock(&fs_info->qgroup_ioctl_lock); 2197 if (!fs_info->quota_enabled) 2198 goto out; 2199 2200 if (!quota_root) { 2201 ret = -EINVAL; 2202 goto out; 2203 } 2204 2205 if (inherit) { 2206 i_qgroups = (u64 *)(inherit + 1); 2207 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 2208 2 * inherit->num_excl_copies; 2209 for (i = 0; i < nums; ++i) { 2210 srcgroup = find_qgroup_rb(fs_info, *i_qgroups); 2211 if (!srcgroup) { 2212 ret = -EINVAL; 2213 goto out; 2214 } 2215 ++i_qgroups; 2216 } 2217 } 2218 2219 /* 2220 * create a tracking group for the subvol itself 2221 */ 2222 ret = add_qgroup_item(trans, quota_root, objectid); 2223 if (ret) 2224 goto out; 2225 2226 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { 2227 ret = update_qgroup_limit_item(trans, quota_root, objectid, 2228 inherit->lim.flags, 2229 inherit->lim.max_rfer, 2230 inherit->lim.max_excl, 2231 inherit->lim.rsv_rfer, 2232 inherit->lim.rsv_excl); 2233 if (ret) 2234 goto out; 2235 } 2236 2237 if (srcid) { 2238 struct btrfs_root *srcroot; 2239 struct btrfs_key srckey; 2240 int srcroot_level; 2241 2242 srckey.objectid = srcid; 2243 srckey.type = BTRFS_ROOT_ITEM_KEY; 2244 srckey.offset = (u64)-1; 2245 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); 2246 if (IS_ERR(srcroot)) { 2247 ret = PTR_ERR(srcroot); 2248 goto out; 2249 } 2250 2251 rcu_read_lock(); 2252 srcroot_level = btrfs_header_level(srcroot->node); 2253 level_size = btrfs_level_size(srcroot, srcroot_level); 2254 rcu_read_unlock(); 2255 } 2256 2257 /* 2258 * add qgroup to all inherited groups 2259 */ 2260 if (inherit) { 2261 i_qgroups = (u64 *)(inherit + 1); 2262 for (i = 0; i < inherit->num_qgroups; ++i) { 2263 ret = add_qgroup_relation_item(trans, quota_root, 2264 objectid, *i_qgroups); 2265 if (ret) 2266 goto out; 2267 ret = add_qgroup_relation_item(trans, quota_root, 2268 *i_qgroups, objectid); 2269 if (ret) 2270 goto out; 2271 ++i_qgroups; 2272 } 2273 } 2274 2275 2276 spin_lock(&fs_info->qgroup_lock); 2277 2278 dstgroup = add_qgroup_rb(fs_info, objectid); 2279 if (IS_ERR(dstgroup)) { 2280 ret = PTR_ERR(dstgroup); 2281 goto unlock; 2282 } 2283 2284 if (srcid) { 2285 srcgroup = find_qgroup_rb(fs_info, srcid); 2286 if (!srcgroup) 2287 goto unlock; 2288 2289 /* 2290 * We call inherit after we clone the root in order to make sure 2291 * our counts don't go crazy, so at this point the only 2292 * difference between the two roots should be the root node. 2293 */ 2294 dstgroup->rfer = srcgroup->rfer; 2295 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2296 dstgroup->excl = level_size; 2297 dstgroup->excl_cmpr = level_size; 2298 srcgroup->excl = level_size; 2299 srcgroup->excl_cmpr = level_size; 2300 qgroup_dirty(fs_info, dstgroup); 2301 qgroup_dirty(fs_info, srcgroup); 2302 } 2303 2304 if (!inherit) 2305 goto unlock; 2306 2307 i_qgroups = (u64 *)(inherit + 1); 2308 for (i = 0; i < inherit->num_qgroups; ++i) { 2309 ret = add_relation_rb(quota_root->fs_info, objectid, 2310 *i_qgroups); 2311 if (ret) 2312 goto unlock; 2313 ++i_qgroups; 2314 } 2315 2316 for (i = 0; i < inherit->num_ref_copies; ++i) { 2317 struct btrfs_qgroup *src; 2318 struct btrfs_qgroup *dst; 2319 2320 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2321 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2322 2323 if (!src || !dst) { 2324 ret = -EINVAL; 2325 goto unlock; 2326 } 2327 2328 dst->rfer = src->rfer - level_size; 2329 dst->rfer_cmpr = src->rfer_cmpr - level_size; 2330 i_qgroups += 2; 2331 } 2332 for (i = 0; i < inherit->num_excl_copies; ++i) { 2333 struct btrfs_qgroup *src; 2334 struct btrfs_qgroup *dst; 2335 2336 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2337 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2338 2339 if (!src || !dst) { 2340 ret = -EINVAL; 2341 goto unlock; 2342 } 2343 2344 dst->excl = src->excl + level_size; 2345 dst->excl_cmpr = src->excl_cmpr + level_size; 2346 i_qgroups += 2; 2347 } 2348 2349 unlock: 2350 spin_unlock(&fs_info->qgroup_lock); 2351 out: 2352 mutex_unlock(&fs_info->qgroup_ioctl_lock); 2353 return ret; 2354 } 2355 2356 /* 2357 * reserve some space for a qgroup and all its parents. The reservation takes 2358 * place with start_transaction or dealloc_reserve, similar to ENOSPC 2359 * accounting. If not enough space is available, EDQUOT is returned. 2360 * We assume that the requested space is new for all qgroups. 2361 */ 2362 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) 2363 { 2364 struct btrfs_root *quota_root; 2365 struct btrfs_qgroup *qgroup; 2366 struct btrfs_fs_info *fs_info = root->fs_info; 2367 u64 ref_root = root->root_key.objectid; 2368 int ret = 0; 2369 struct ulist_node *unode; 2370 struct ulist_iterator uiter; 2371 2372 if (!is_fstree(ref_root)) 2373 return 0; 2374 2375 if (num_bytes == 0) 2376 return 0; 2377 2378 spin_lock(&fs_info->qgroup_lock); 2379 quota_root = fs_info->quota_root; 2380 if (!quota_root) 2381 goto out; 2382 2383 qgroup = find_qgroup_rb(fs_info, ref_root); 2384 if (!qgroup) 2385 goto out; 2386 2387 /* 2388 * in a first step, we check all affected qgroups if any limits would 2389 * be exceeded 2390 */ 2391 ulist_reinit(fs_info->qgroup_ulist); 2392 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2393 (uintptr_t)qgroup, GFP_ATOMIC); 2394 if (ret < 0) 2395 goto out; 2396 ULIST_ITER_INIT(&uiter); 2397 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2398 struct btrfs_qgroup *qg; 2399 struct btrfs_qgroup_list *glist; 2400 2401 qg = u64_to_ptr(unode->aux); 2402 2403 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2404 qg->reserved + (s64)qg->rfer + num_bytes > 2405 qg->max_rfer) { 2406 ret = -EDQUOT; 2407 goto out; 2408 } 2409 2410 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 2411 qg->reserved + (s64)qg->excl + num_bytes > 2412 qg->max_excl) { 2413 ret = -EDQUOT; 2414 goto out; 2415 } 2416 2417 list_for_each_entry(glist, &qg->groups, next_group) { 2418 ret = ulist_add(fs_info->qgroup_ulist, 2419 glist->group->qgroupid, 2420 (uintptr_t)glist->group, GFP_ATOMIC); 2421 if (ret < 0) 2422 goto out; 2423 } 2424 } 2425 ret = 0; 2426 /* 2427 * no limits exceeded, now record the reservation into all qgroups 2428 */ 2429 ULIST_ITER_INIT(&uiter); 2430 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2431 struct btrfs_qgroup *qg; 2432 2433 qg = u64_to_ptr(unode->aux); 2434 2435 qg->reserved += num_bytes; 2436 } 2437 2438 out: 2439 spin_unlock(&fs_info->qgroup_lock); 2440 return ret; 2441 } 2442 2443 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) 2444 { 2445 struct btrfs_root *quota_root; 2446 struct btrfs_qgroup *qgroup; 2447 struct btrfs_fs_info *fs_info = root->fs_info; 2448 struct ulist_node *unode; 2449 struct ulist_iterator uiter; 2450 u64 ref_root = root->root_key.objectid; 2451 int ret = 0; 2452 2453 if (!is_fstree(ref_root)) 2454 return; 2455 2456 if (num_bytes == 0) 2457 return; 2458 2459 spin_lock(&fs_info->qgroup_lock); 2460 2461 quota_root = fs_info->quota_root; 2462 if (!quota_root) 2463 goto out; 2464 2465 qgroup = find_qgroup_rb(fs_info, ref_root); 2466 if (!qgroup) 2467 goto out; 2468 2469 ulist_reinit(fs_info->qgroup_ulist); 2470 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2471 (uintptr_t)qgroup, GFP_ATOMIC); 2472 if (ret < 0) 2473 goto out; 2474 ULIST_ITER_INIT(&uiter); 2475 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2476 struct btrfs_qgroup *qg; 2477 struct btrfs_qgroup_list *glist; 2478 2479 qg = u64_to_ptr(unode->aux); 2480 2481 qg->reserved -= num_bytes; 2482 2483 list_for_each_entry(glist, &qg->groups, next_group) { 2484 ret = ulist_add(fs_info->qgroup_ulist, 2485 glist->group->qgroupid, 2486 (uintptr_t)glist->group, GFP_ATOMIC); 2487 if (ret < 0) 2488 goto out; 2489 } 2490 } 2491 2492 out: 2493 spin_unlock(&fs_info->qgroup_lock); 2494 } 2495 2496 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) 2497 { 2498 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) 2499 return; 2500 btrfs_err(trans->root->fs_info, 2501 "qgroups not uptodate in trans handle %p: list is%s empty, " 2502 "seq is %#x.%x", 2503 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", 2504 (u32)(trans->delayed_ref_elem.seq >> 32), 2505 (u32)trans->delayed_ref_elem.seq); 2506 BUG(); 2507 } 2508 2509 /* 2510 * returns < 0 on error, 0 when more leafs are to be scanned. 2511 * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared. 2512 */ 2513 static int 2514 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2515 struct btrfs_trans_handle *trans, struct ulist *qgroups, 2516 struct ulist *tmp, struct extent_buffer *scratch_leaf) 2517 { 2518 struct btrfs_key found; 2519 struct ulist *roots = NULL; 2520 struct seq_list tree_mod_seq_elem = {}; 2521 u64 num_bytes; 2522 u64 seq; 2523 int new_roots; 2524 int slot; 2525 int ret; 2526 2527 path->leave_spinning = 1; 2528 mutex_lock(&fs_info->qgroup_rescan_lock); 2529 ret = btrfs_search_slot_for_read(fs_info->extent_root, 2530 &fs_info->qgroup_rescan_progress, 2531 path, 1, 0); 2532 2533 pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n", 2534 fs_info->qgroup_rescan_progress.objectid, 2535 fs_info->qgroup_rescan_progress.type, 2536 fs_info->qgroup_rescan_progress.offset, ret); 2537 2538 if (ret) { 2539 /* 2540 * The rescan is about to end, we will not be scanning any 2541 * further blocks. We cannot unset the RESCAN flag here, because 2542 * we want to commit the transaction if everything went well. 2543 * To make the live accounting work in this phase, we set our 2544 * scan progress pointer such that every real extent objectid 2545 * will be smaller. 2546 */ 2547 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 2548 btrfs_release_path(path); 2549 mutex_unlock(&fs_info->qgroup_rescan_lock); 2550 return ret; 2551 } 2552 2553 btrfs_item_key_to_cpu(path->nodes[0], &found, 2554 btrfs_header_nritems(path->nodes[0]) - 1); 2555 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; 2556 2557 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2558 memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf)); 2559 slot = path->slots[0]; 2560 btrfs_release_path(path); 2561 mutex_unlock(&fs_info->qgroup_rescan_lock); 2562 2563 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 2564 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 2565 if (found.type != BTRFS_EXTENT_ITEM_KEY && 2566 found.type != BTRFS_METADATA_ITEM_KEY) 2567 continue; 2568 if (found.type == BTRFS_METADATA_ITEM_KEY) 2569 num_bytes = fs_info->extent_root->leafsize; 2570 else 2571 num_bytes = found.offset; 2572 2573 ulist_reinit(qgroups); 2574 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 2575 &roots); 2576 if (ret < 0) 2577 goto out; 2578 spin_lock(&fs_info->qgroup_lock); 2579 seq = fs_info->qgroup_seq; 2580 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ 2581 2582 new_roots = 0; 2583 ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups, 2584 seq, &new_roots, 1); 2585 if (ret < 0) { 2586 spin_unlock(&fs_info->qgroup_lock); 2587 ulist_free(roots); 2588 goto out; 2589 } 2590 2591 ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups, 2592 seq, 0, new_roots, 1); 2593 if (ret < 0) { 2594 spin_unlock(&fs_info->qgroup_lock); 2595 ulist_free(roots); 2596 goto out; 2597 } 2598 spin_unlock(&fs_info->qgroup_lock); 2599 ulist_free(roots); 2600 } 2601 out: 2602 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2603 2604 return ret; 2605 } 2606 2607 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) 2608 { 2609 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, 2610 qgroup_rescan_work); 2611 struct btrfs_path *path; 2612 struct btrfs_trans_handle *trans = NULL; 2613 struct ulist *tmp = NULL, *qgroups = NULL; 2614 struct extent_buffer *scratch_leaf = NULL; 2615 int err = -ENOMEM; 2616 2617 path = btrfs_alloc_path(); 2618 if (!path) 2619 goto out; 2620 qgroups = ulist_alloc(GFP_NOFS); 2621 if (!qgroups) 2622 goto out; 2623 tmp = ulist_alloc(GFP_NOFS); 2624 if (!tmp) 2625 goto out; 2626 scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS); 2627 if (!scratch_leaf) 2628 goto out; 2629 2630 err = 0; 2631 while (!err) { 2632 trans = btrfs_start_transaction(fs_info->fs_root, 0); 2633 if (IS_ERR(trans)) { 2634 err = PTR_ERR(trans); 2635 break; 2636 } 2637 if (!fs_info->quota_enabled) { 2638 err = -EINTR; 2639 } else { 2640 err = qgroup_rescan_leaf(fs_info, path, trans, 2641 qgroups, tmp, scratch_leaf); 2642 } 2643 if (err > 0) 2644 btrfs_commit_transaction(trans, fs_info->fs_root); 2645 else 2646 btrfs_end_transaction(trans, fs_info->fs_root); 2647 } 2648 2649 out: 2650 kfree(scratch_leaf); 2651 ulist_free(qgroups); 2652 ulist_free(tmp); 2653 btrfs_free_path(path); 2654 2655 mutex_lock(&fs_info->qgroup_rescan_lock); 2656 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2657 2658 if (err == 2 && 2659 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 2660 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2661 } else if (err < 0) { 2662 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2663 } 2664 mutex_unlock(&fs_info->qgroup_rescan_lock); 2665 2666 if (err >= 0) { 2667 btrfs_info(fs_info, "qgroup scan completed%s", 2668 err == 2 ? " (inconsistency flag cleared)" : ""); 2669 } else { 2670 btrfs_err(fs_info, "qgroup scan failed with %d", err); 2671 } 2672 2673 complete_all(&fs_info->qgroup_rescan_completion); 2674 } 2675 2676 /* 2677 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all 2678 * memory required for the rescan context. 2679 */ 2680 static int 2681 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 2682 int init_flags) 2683 { 2684 int ret = 0; 2685 2686 if (!init_flags && 2687 (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) || 2688 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) { 2689 ret = -EINVAL; 2690 goto err; 2691 } 2692 2693 mutex_lock(&fs_info->qgroup_rescan_lock); 2694 spin_lock(&fs_info->qgroup_lock); 2695 2696 if (init_flags) { 2697 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2698 ret = -EINPROGRESS; 2699 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 2700 ret = -EINVAL; 2701 2702 if (ret) { 2703 spin_unlock(&fs_info->qgroup_lock); 2704 mutex_unlock(&fs_info->qgroup_rescan_lock); 2705 goto err; 2706 } 2707 2708 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2709 } 2710 2711 memset(&fs_info->qgroup_rescan_progress, 0, 2712 sizeof(fs_info->qgroup_rescan_progress)); 2713 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 2714 2715 spin_unlock(&fs_info->qgroup_lock); 2716 mutex_unlock(&fs_info->qgroup_rescan_lock); 2717 2718 init_completion(&fs_info->qgroup_rescan_completion); 2719 2720 memset(&fs_info->qgroup_rescan_work, 0, 2721 sizeof(fs_info->qgroup_rescan_work)); 2722 btrfs_init_work(&fs_info->qgroup_rescan_work, 2723 btrfs_qgroup_rescan_helper, 2724 btrfs_qgroup_rescan_worker, NULL, NULL); 2725 2726 if (ret) { 2727 err: 2728 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret); 2729 return ret; 2730 } 2731 2732 return 0; 2733 } 2734 2735 static void 2736 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) 2737 { 2738 struct rb_node *n; 2739 struct btrfs_qgroup *qgroup; 2740 2741 spin_lock(&fs_info->qgroup_lock); 2742 /* clear all current qgroup tracking information */ 2743 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { 2744 qgroup = rb_entry(n, struct btrfs_qgroup, node); 2745 qgroup->rfer = 0; 2746 qgroup->rfer_cmpr = 0; 2747 qgroup->excl = 0; 2748 qgroup->excl_cmpr = 0; 2749 } 2750 spin_unlock(&fs_info->qgroup_lock); 2751 } 2752 2753 int 2754 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) 2755 { 2756 int ret = 0; 2757 struct btrfs_trans_handle *trans; 2758 2759 ret = qgroup_rescan_init(fs_info, 0, 1); 2760 if (ret) 2761 return ret; 2762 2763 /* 2764 * We have set the rescan_progress to 0, which means no more 2765 * delayed refs will be accounted by btrfs_qgroup_account_ref. 2766 * However, btrfs_qgroup_account_ref may be right after its call 2767 * to btrfs_find_all_roots, in which case it would still do the 2768 * accounting. 2769 * To solve this, we're committing the transaction, which will 2770 * ensure we run all delayed refs and only after that, we are 2771 * going to clear all tracking information for a clean start. 2772 */ 2773 2774 trans = btrfs_join_transaction(fs_info->fs_root); 2775 if (IS_ERR(trans)) { 2776 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2777 return PTR_ERR(trans); 2778 } 2779 ret = btrfs_commit_transaction(trans, fs_info->fs_root); 2780 if (ret) { 2781 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2782 return ret; 2783 } 2784 2785 qgroup_rescan_zero_tracking(fs_info); 2786 2787 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2788 &fs_info->qgroup_rescan_work); 2789 2790 return 0; 2791 } 2792 2793 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) 2794 { 2795 int running; 2796 int ret = 0; 2797 2798 mutex_lock(&fs_info->qgroup_rescan_lock); 2799 spin_lock(&fs_info->qgroup_lock); 2800 running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2801 spin_unlock(&fs_info->qgroup_lock); 2802 mutex_unlock(&fs_info->qgroup_rescan_lock); 2803 2804 if (running) 2805 ret = wait_for_completion_interruptible( 2806 &fs_info->qgroup_rescan_completion); 2807 2808 return ret; 2809 } 2810 2811 /* 2812 * this is only called from open_ctree where we're still single threaded, thus 2813 * locking is omitted here. 2814 */ 2815 void 2816 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) 2817 { 2818 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2819 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2820 &fs_info->qgroup_rescan_work); 2821 } 2822