1 /* 2 * Copyright (C) 2011 STRATO. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include <linux/workqueue.h> 26 #include <linux/btrfs.h> 27 28 #include "ctree.h" 29 #include "transaction.h" 30 #include "disk-io.h" 31 #include "locking.h" 32 #include "ulist.h" 33 #include "backref.h" 34 #include "extent_io.h" 35 #include "qgroup.h" 36 37 /* TODO XXX FIXME 38 * - subvol delete -> delete when ref goes to 0? delete limits also? 39 * - reorganize keys 40 * - compressed 41 * - sync 42 * - copy also limits on subvol creation 43 * - limit 44 * - caches fuer ulists 45 * - performance benchmarks 46 * - check all ioctl parameters 47 */ 48 49 /* 50 * one struct for each qgroup, organized in fs_info->qgroup_tree. 51 */ 52 struct btrfs_qgroup { 53 u64 qgroupid; 54 55 /* 56 * state 57 */ 58 u64 rfer; /* referenced */ 59 u64 rfer_cmpr; /* referenced compressed */ 60 u64 excl; /* exclusive */ 61 u64 excl_cmpr; /* exclusive compressed */ 62 63 /* 64 * limits 65 */ 66 u64 lim_flags; /* which limits are set */ 67 u64 max_rfer; 68 u64 max_excl; 69 u64 rsv_rfer; 70 u64 rsv_excl; 71 72 /* 73 * reservation tracking 74 */ 75 u64 reserved; 76 77 /* 78 * lists 79 */ 80 struct list_head groups; /* groups this group is member of */ 81 struct list_head members; /* groups that are members of this group */ 82 struct list_head dirty; /* dirty groups */ 83 struct rb_node node; /* tree of qgroups */ 84 85 /* 86 * temp variables for accounting operations 87 */ 88 u64 old_refcnt; 89 u64 new_refcnt; 90 }; 91 92 /* 93 * glue structure to represent the relations between qgroups. 94 */ 95 struct btrfs_qgroup_list { 96 struct list_head next_group; 97 struct list_head next_member; 98 struct btrfs_qgroup *group; 99 struct btrfs_qgroup *member; 100 }; 101 102 #define ptr_to_u64(x) ((u64)(uintptr_t)x) 103 #define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x) 104 105 static int 106 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 107 int init_flags); 108 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); 109 110 /* must be called with qgroup_ioctl_lock held */ 111 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 112 u64 qgroupid) 113 { 114 struct rb_node *n = fs_info->qgroup_tree.rb_node; 115 struct btrfs_qgroup *qgroup; 116 117 while (n) { 118 qgroup = rb_entry(n, struct btrfs_qgroup, node); 119 if (qgroup->qgroupid < qgroupid) 120 n = n->rb_left; 121 else if (qgroup->qgroupid > qgroupid) 122 n = n->rb_right; 123 else 124 return qgroup; 125 } 126 return NULL; 127 } 128 129 /* must be called with qgroup_lock held */ 130 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, 131 u64 qgroupid) 132 { 133 struct rb_node **p = &fs_info->qgroup_tree.rb_node; 134 struct rb_node *parent = NULL; 135 struct btrfs_qgroup *qgroup; 136 137 while (*p) { 138 parent = *p; 139 qgroup = rb_entry(parent, struct btrfs_qgroup, node); 140 141 if (qgroup->qgroupid < qgroupid) 142 p = &(*p)->rb_left; 143 else if (qgroup->qgroupid > qgroupid) 144 p = &(*p)->rb_right; 145 else 146 return qgroup; 147 } 148 149 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); 150 if (!qgroup) 151 return ERR_PTR(-ENOMEM); 152 153 qgroup->qgroupid = qgroupid; 154 INIT_LIST_HEAD(&qgroup->groups); 155 INIT_LIST_HEAD(&qgroup->members); 156 INIT_LIST_HEAD(&qgroup->dirty); 157 158 rb_link_node(&qgroup->node, parent, p); 159 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); 160 161 return qgroup; 162 } 163 164 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) 165 { 166 struct btrfs_qgroup_list *list; 167 168 list_del(&qgroup->dirty); 169 while (!list_empty(&qgroup->groups)) { 170 list = list_first_entry(&qgroup->groups, 171 struct btrfs_qgroup_list, next_group); 172 list_del(&list->next_group); 173 list_del(&list->next_member); 174 kfree(list); 175 } 176 177 while (!list_empty(&qgroup->members)) { 178 list = list_first_entry(&qgroup->members, 179 struct btrfs_qgroup_list, next_member); 180 list_del(&list->next_group); 181 list_del(&list->next_member); 182 kfree(list); 183 } 184 kfree(qgroup); 185 } 186 187 /* must be called with qgroup_lock held */ 188 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) 189 { 190 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); 191 192 if (!qgroup) 193 return -ENOENT; 194 195 rb_erase(&qgroup->node, &fs_info->qgroup_tree); 196 __del_qgroup_rb(qgroup); 197 return 0; 198 } 199 200 /* must be called with qgroup_lock held */ 201 static int add_relation_rb(struct btrfs_fs_info *fs_info, 202 u64 memberid, u64 parentid) 203 { 204 struct btrfs_qgroup *member; 205 struct btrfs_qgroup *parent; 206 struct btrfs_qgroup_list *list; 207 208 member = find_qgroup_rb(fs_info, memberid); 209 parent = find_qgroup_rb(fs_info, parentid); 210 if (!member || !parent) 211 return -ENOENT; 212 213 list = kzalloc(sizeof(*list), GFP_ATOMIC); 214 if (!list) 215 return -ENOMEM; 216 217 list->group = parent; 218 list->member = member; 219 list_add_tail(&list->next_group, &member->groups); 220 list_add_tail(&list->next_member, &parent->members); 221 222 return 0; 223 } 224 225 /* must be called with qgroup_lock held */ 226 static int del_relation_rb(struct btrfs_fs_info *fs_info, 227 u64 memberid, u64 parentid) 228 { 229 struct btrfs_qgroup *member; 230 struct btrfs_qgroup *parent; 231 struct btrfs_qgroup_list *list; 232 233 member = find_qgroup_rb(fs_info, memberid); 234 parent = find_qgroup_rb(fs_info, parentid); 235 if (!member || !parent) 236 return -ENOENT; 237 238 list_for_each_entry(list, &member->groups, next_group) { 239 if (list->group == parent) { 240 list_del(&list->next_group); 241 list_del(&list->next_member); 242 kfree(list); 243 return 0; 244 } 245 } 246 return -ENOENT; 247 } 248 249 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 250 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 251 u64 rfer, u64 excl) 252 { 253 struct btrfs_qgroup *qgroup; 254 255 qgroup = find_qgroup_rb(fs_info, qgroupid); 256 if (!qgroup) 257 return -EINVAL; 258 if (qgroup->rfer != rfer || qgroup->excl != excl) 259 return -EINVAL; 260 return 0; 261 } 262 #endif 263 264 /* 265 * The full config is read in one go, only called from open_ctree() 266 * It doesn't use any locking, as at this point we're still single-threaded 267 */ 268 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) 269 { 270 struct btrfs_key key; 271 struct btrfs_key found_key; 272 struct btrfs_root *quota_root = fs_info->quota_root; 273 struct btrfs_path *path = NULL; 274 struct extent_buffer *l; 275 int slot; 276 int ret = 0; 277 u64 flags = 0; 278 u64 rescan_progress = 0; 279 280 if (!fs_info->quota_enabled) 281 return 0; 282 283 fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); 284 if (!fs_info->qgroup_ulist) { 285 ret = -ENOMEM; 286 goto out; 287 } 288 289 path = btrfs_alloc_path(); 290 if (!path) { 291 ret = -ENOMEM; 292 goto out; 293 } 294 295 /* default this to quota off, in case no status key is found */ 296 fs_info->qgroup_flags = 0; 297 298 /* 299 * pass 1: read status, all qgroup infos and limits 300 */ 301 key.objectid = 0; 302 key.type = 0; 303 key.offset = 0; 304 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); 305 if (ret) 306 goto out; 307 308 while (1) { 309 struct btrfs_qgroup *qgroup; 310 311 slot = path->slots[0]; 312 l = path->nodes[0]; 313 btrfs_item_key_to_cpu(l, &found_key, slot); 314 315 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { 316 struct btrfs_qgroup_status_item *ptr; 317 318 ptr = btrfs_item_ptr(l, slot, 319 struct btrfs_qgroup_status_item); 320 321 if (btrfs_qgroup_status_version(l, ptr) != 322 BTRFS_QGROUP_STATUS_VERSION) { 323 btrfs_err(fs_info, 324 "old qgroup version, quota disabled"); 325 goto out; 326 } 327 if (btrfs_qgroup_status_generation(l, ptr) != 328 fs_info->generation) { 329 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 330 btrfs_err(fs_info, 331 "qgroup generation mismatch, " 332 "marked as inconsistent"); 333 } 334 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 335 ptr); 336 rescan_progress = btrfs_qgroup_status_rescan(l, ptr); 337 goto next1; 338 } 339 340 if (found_key.type != BTRFS_QGROUP_INFO_KEY && 341 found_key.type != BTRFS_QGROUP_LIMIT_KEY) 342 goto next1; 343 344 qgroup = find_qgroup_rb(fs_info, found_key.offset); 345 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 346 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 347 btrfs_err(fs_info, "inconsitent qgroup config"); 348 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 349 } 350 if (!qgroup) { 351 qgroup = add_qgroup_rb(fs_info, found_key.offset); 352 if (IS_ERR(qgroup)) { 353 ret = PTR_ERR(qgroup); 354 goto out; 355 } 356 } 357 switch (found_key.type) { 358 case BTRFS_QGROUP_INFO_KEY: { 359 struct btrfs_qgroup_info_item *ptr; 360 361 ptr = btrfs_item_ptr(l, slot, 362 struct btrfs_qgroup_info_item); 363 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); 364 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); 365 qgroup->excl = btrfs_qgroup_info_excl(l, ptr); 366 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); 367 /* generation currently unused */ 368 break; 369 } 370 case BTRFS_QGROUP_LIMIT_KEY: { 371 struct btrfs_qgroup_limit_item *ptr; 372 373 ptr = btrfs_item_ptr(l, slot, 374 struct btrfs_qgroup_limit_item); 375 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); 376 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); 377 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); 378 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); 379 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); 380 break; 381 } 382 } 383 next1: 384 ret = btrfs_next_item(quota_root, path); 385 if (ret < 0) 386 goto out; 387 if (ret) 388 break; 389 } 390 btrfs_release_path(path); 391 392 /* 393 * pass 2: read all qgroup relations 394 */ 395 key.objectid = 0; 396 key.type = BTRFS_QGROUP_RELATION_KEY; 397 key.offset = 0; 398 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); 399 if (ret) 400 goto out; 401 while (1) { 402 slot = path->slots[0]; 403 l = path->nodes[0]; 404 btrfs_item_key_to_cpu(l, &found_key, slot); 405 406 if (found_key.type != BTRFS_QGROUP_RELATION_KEY) 407 goto next2; 408 409 if (found_key.objectid > found_key.offset) { 410 /* parent <- member, not needed to build config */ 411 /* FIXME should we omit the key completely? */ 412 goto next2; 413 } 414 415 ret = add_relation_rb(fs_info, found_key.objectid, 416 found_key.offset); 417 if (ret == -ENOENT) { 418 btrfs_warn(fs_info, 419 "orphan qgroup relation 0x%llx->0x%llx", 420 found_key.objectid, found_key.offset); 421 ret = 0; /* ignore the error */ 422 } 423 if (ret) 424 goto out; 425 next2: 426 ret = btrfs_next_item(quota_root, path); 427 if (ret < 0) 428 goto out; 429 if (ret) 430 break; 431 } 432 out: 433 fs_info->qgroup_flags |= flags; 434 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { 435 fs_info->quota_enabled = 0; 436 fs_info->pending_quota_state = 0; 437 } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && 438 ret >= 0) { 439 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 440 } 441 btrfs_free_path(path); 442 443 if (ret < 0) { 444 ulist_free(fs_info->qgroup_ulist); 445 fs_info->qgroup_ulist = NULL; 446 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 447 } 448 449 return ret < 0 ? ret : 0; 450 } 451 452 /* 453 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), 454 * first two are in single-threaded paths.And for the third one, we have set 455 * quota_root to be null with qgroup_lock held before, so it is safe to clean 456 * up the in-memory structures without qgroup_lock held. 457 */ 458 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 459 { 460 struct rb_node *n; 461 struct btrfs_qgroup *qgroup; 462 463 while ((n = rb_first(&fs_info->qgroup_tree))) { 464 qgroup = rb_entry(n, struct btrfs_qgroup, node); 465 rb_erase(n, &fs_info->qgroup_tree); 466 __del_qgroup_rb(qgroup); 467 } 468 /* 469 * we call btrfs_free_qgroup_config() when umounting 470 * filesystem and disabling quota, so we set qgroup_ulit 471 * to be null here to avoid double free. 472 */ 473 ulist_free(fs_info->qgroup_ulist); 474 fs_info->qgroup_ulist = NULL; 475 } 476 477 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 478 struct btrfs_root *quota_root, 479 u64 src, u64 dst) 480 { 481 int ret; 482 struct btrfs_path *path; 483 struct btrfs_key key; 484 485 path = btrfs_alloc_path(); 486 if (!path) 487 return -ENOMEM; 488 489 key.objectid = src; 490 key.type = BTRFS_QGROUP_RELATION_KEY; 491 key.offset = dst; 492 493 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 494 495 btrfs_mark_buffer_dirty(path->nodes[0]); 496 497 btrfs_free_path(path); 498 return ret; 499 } 500 501 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, 502 struct btrfs_root *quota_root, 503 u64 src, u64 dst) 504 { 505 int ret; 506 struct btrfs_path *path; 507 struct btrfs_key key; 508 509 path = btrfs_alloc_path(); 510 if (!path) 511 return -ENOMEM; 512 513 key.objectid = src; 514 key.type = BTRFS_QGROUP_RELATION_KEY; 515 key.offset = dst; 516 517 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 518 if (ret < 0) 519 goto out; 520 521 if (ret > 0) { 522 ret = -ENOENT; 523 goto out; 524 } 525 526 ret = btrfs_del_item(trans, quota_root, path); 527 out: 528 btrfs_free_path(path); 529 return ret; 530 } 531 532 static int add_qgroup_item(struct btrfs_trans_handle *trans, 533 struct btrfs_root *quota_root, u64 qgroupid) 534 { 535 int ret; 536 struct btrfs_path *path; 537 struct btrfs_qgroup_info_item *qgroup_info; 538 struct btrfs_qgroup_limit_item *qgroup_limit; 539 struct extent_buffer *leaf; 540 struct btrfs_key key; 541 542 if (btrfs_test_is_dummy_root(quota_root)) 543 return 0; 544 545 path = btrfs_alloc_path(); 546 if (!path) 547 return -ENOMEM; 548 549 key.objectid = 0; 550 key.type = BTRFS_QGROUP_INFO_KEY; 551 key.offset = qgroupid; 552 553 /* 554 * Avoid a transaction abort by catching -EEXIST here. In that 555 * case, we proceed by re-initializing the existing structure 556 * on disk. 557 */ 558 559 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 560 sizeof(*qgroup_info)); 561 if (ret && ret != -EEXIST) 562 goto out; 563 564 leaf = path->nodes[0]; 565 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], 566 struct btrfs_qgroup_info_item); 567 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); 568 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); 569 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); 570 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); 571 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); 572 573 btrfs_mark_buffer_dirty(leaf); 574 575 btrfs_release_path(path); 576 577 key.type = BTRFS_QGROUP_LIMIT_KEY; 578 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 579 sizeof(*qgroup_limit)); 580 if (ret && ret != -EEXIST) 581 goto out; 582 583 leaf = path->nodes[0]; 584 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], 585 struct btrfs_qgroup_limit_item); 586 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); 587 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); 588 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); 589 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 590 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 591 592 btrfs_mark_buffer_dirty(leaf); 593 594 ret = 0; 595 out: 596 btrfs_free_path(path); 597 return ret; 598 } 599 600 static int del_qgroup_item(struct btrfs_trans_handle *trans, 601 struct btrfs_root *quota_root, u64 qgroupid) 602 { 603 int ret; 604 struct btrfs_path *path; 605 struct btrfs_key key; 606 607 path = btrfs_alloc_path(); 608 if (!path) 609 return -ENOMEM; 610 611 key.objectid = 0; 612 key.type = BTRFS_QGROUP_INFO_KEY; 613 key.offset = qgroupid; 614 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 615 if (ret < 0) 616 goto out; 617 618 if (ret > 0) { 619 ret = -ENOENT; 620 goto out; 621 } 622 623 ret = btrfs_del_item(trans, quota_root, path); 624 if (ret) 625 goto out; 626 627 btrfs_release_path(path); 628 629 key.type = BTRFS_QGROUP_LIMIT_KEY; 630 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 631 if (ret < 0) 632 goto out; 633 634 if (ret > 0) { 635 ret = -ENOENT; 636 goto out; 637 } 638 639 ret = btrfs_del_item(trans, quota_root, path); 640 641 out: 642 btrfs_free_path(path); 643 return ret; 644 } 645 646 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 647 struct btrfs_root *root, u64 qgroupid, 648 u64 flags, u64 max_rfer, u64 max_excl, 649 u64 rsv_rfer, u64 rsv_excl) 650 { 651 struct btrfs_path *path; 652 struct btrfs_key key; 653 struct extent_buffer *l; 654 struct btrfs_qgroup_limit_item *qgroup_limit; 655 int ret; 656 int slot; 657 658 key.objectid = 0; 659 key.type = BTRFS_QGROUP_LIMIT_KEY; 660 key.offset = qgroupid; 661 662 path = btrfs_alloc_path(); 663 if (!path) 664 return -ENOMEM; 665 666 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 667 if (ret > 0) 668 ret = -ENOENT; 669 670 if (ret) 671 goto out; 672 673 l = path->nodes[0]; 674 slot = path->slots[0]; 675 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 676 btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags); 677 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer); 678 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl); 679 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer); 680 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl); 681 682 btrfs_mark_buffer_dirty(l); 683 684 out: 685 btrfs_free_path(path); 686 return ret; 687 } 688 689 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 690 struct btrfs_root *root, 691 struct btrfs_qgroup *qgroup) 692 { 693 struct btrfs_path *path; 694 struct btrfs_key key; 695 struct extent_buffer *l; 696 struct btrfs_qgroup_info_item *qgroup_info; 697 int ret; 698 int slot; 699 700 if (btrfs_test_is_dummy_root(root)) 701 return 0; 702 703 key.objectid = 0; 704 key.type = BTRFS_QGROUP_INFO_KEY; 705 key.offset = qgroup->qgroupid; 706 707 path = btrfs_alloc_path(); 708 if (!path) 709 return -ENOMEM; 710 711 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 712 if (ret > 0) 713 ret = -ENOENT; 714 715 if (ret) 716 goto out; 717 718 l = path->nodes[0]; 719 slot = path->slots[0]; 720 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); 721 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 722 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 723 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 724 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 725 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 726 727 btrfs_mark_buffer_dirty(l); 728 729 out: 730 btrfs_free_path(path); 731 return ret; 732 } 733 734 static int update_qgroup_status_item(struct btrfs_trans_handle *trans, 735 struct btrfs_fs_info *fs_info, 736 struct btrfs_root *root) 737 { 738 struct btrfs_path *path; 739 struct btrfs_key key; 740 struct extent_buffer *l; 741 struct btrfs_qgroup_status_item *ptr; 742 int ret; 743 int slot; 744 745 key.objectid = 0; 746 key.type = BTRFS_QGROUP_STATUS_KEY; 747 key.offset = 0; 748 749 path = btrfs_alloc_path(); 750 if (!path) 751 return -ENOMEM; 752 753 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 754 if (ret > 0) 755 ret = -ENOENT; 756 757 if (ret) 758 goto out; 759 760 l = path->nodes[0]; 761 slot = path->slots[0]; 762 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 763 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 764 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 765 btrfs_set_qgroup_status_rescan(l, ptr, 766 fs_info->qgroup_rescan_progress.objectid); 767 768 btrfs_mark_buffer_dirty(l); 769 770 out: 771 btrfs_free_path(path); 772 return ret; 773 } 774 775 /* 776 * called with qgroup_lock held 777 */ 778 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 779 struct btrfs_root *root) 780 { 781 struct btrfs_path *path; 782 struct btrfs_key key; 783 struct extent_buffer *leaf = NULL; 784 int ret; 785 int nr = 0; 786 787 path = btrfs_alloc_path(); 788 if (!path) 789 return -ENOMEM; 790 791 path->leave_spinning = 1; 792 793 key.objectid = 0; 794 key.offset = 0; 795 key.type = 0; 796 797 while (1) { 798 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 799 if (ret < 0) 800 goto out; 801 leaf = path->nodes[0]; 802 nr = btrfs_header_nritems(leaf); 803 if (!nr) 804 break; 805 /* 806 * delete the leaf one by one 807 * since the whole tree is going 808 * to be deleted. 809 */ 810 path->slots[0] = 0; 811 ret = btrfs_del_items(trans, root, path, 0, nr); 812 if (ret) 813 goto out; 814 815 btrfs_release_path(path); 816 } 817 ret = 0; 818 out: 819 root->fs_info->pending_quota_state = 0; 820 btrfs_free_path(path); 821 return ret; 822 } 823 824 int btrfs_quota_enable(struct btrfs_trans_handle *trans, 825 struct btrfs_fs_info *fs_info) 826 { 827 struct btrfs_root *quota_root; 828 struct btrfs_root *tree_root = fs_info->tree_root; 829 struct btrfs_path *path = NULL; 830 struct btrfs_qgroup_status_item *ptr; 831 struct extent_buffer *leaf; 832 struct btrfs_key key; 833 struct btrfs_key found_key; 834 struct btrfs_qgroup *qgroup = NULL; 835 int ret = 0; 836 int slot; 837 838 mutex_lock(&fs_info->qgroup_ioctl_lock); 839 if (fs_info->quota_root) { 840 fs_info->pending_quota_state = 1; 841 goto out; 842 } 843 844 fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); 845 if (!fs_info->qgroup_ulist) { 846 ret = -ENOMEM; 847 goto out; 848 } 849 850 /* 851 * initially create the quota tree 852 */ 853 quota_root = btrfs_create_tree(trans, fs_info, 854 BTRFS_QUOTA_TREE_OBJECTID); 855 if (IS_ERR(quota_root)) { 856 ret = PTR_ERR(quota_root); 857 goto out; 858 } 859 860 path = btrfs_alloc_path(); 861 if (!path) { 862 ret = -ENOMEM; 863 goto out_free_root; 864 } 865 866 key.objectid = 0; 867 key.type = BTRFS_QGROUP_STATUS_KEY; 868 key.offset = 0; 869 870 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 871 sizeof(*ptr)); 872 if (ret) 873 goto out_free_path; 874 875 leaf = path->nodes[0]; 876 ptr = btrfs_item_ptr(leaf, path->slots[0], 877 struct btrfs_qgroup_status_item); 878 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); 879 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); 880 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 881 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 882 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 883 btrfs_set_qgroup_status_rescan(leaf, ptr, 0); 884 885 btrfs_mark_buffer_dirty(leaf); 886 887 key.objectid = 0; 888 key.type = BTRFS_ROOT_REF_KEY; 889 key.offset = 0; 890 891 btrfs_release_path(path); 892 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 893 if (ret > 0) 894 goto out_add_root; 895 if (ret < 0) 896 goto out_free_path; 897 898 899 while (1) { 900 slot = path->slots[0]; 901 leaf = path->nodes[0]; 902 btrfs_item_key_to_cpu(leaf, &found_key, slot); 903 904 if (found_key.type == BTRFS_ROOT_REF_KEY) { 905 ret = add_qgroup_item(trans, quota_root, 906 found_key.offset); 907 if (ret) 908 goto out_free_path; 909 910 qgroup = add_qgroup_rb(fs_info, found_key.offset); 911 if (IS_ERR(qgroup)) { 912 ret = PTR_ERR(qgroup); 913 goto out_free_path; 914 } 915 } 916 ret = btrfs_next_item(tree_root, path); 917 if (ret < 0) 918 goto out_free_path; 919 if (ret) 920 break; 921 } 922 923 out_add_root: 924 btrfs_release_path(path); 925 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 926 if (ret) 927 goto out_free_path; 928 929 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 930 if (IS_ERR(qgroup)) { 931 ret = PTR_ERR(qgroup); 932 goto out_free_path; 933 } 934 spin_lock(&fs_info->qgroup_lock); 935 fs_info->quota_root = quota_root; 936 fs_info->pending_quota_state = 1; 937 spin_unlock(&fs_info->qgroup_lock); 938 out_free_path: 939 btrfs_free_path(path); 940 out_free_root: 941 if (ret) { 942 free_extent_buffer(quota_root->node); 943 free_extent_buffer(quota_root->commit_root); 944 kfree(quota_root); 945 } 946 out: 947 if (ret) { 948 ulist_free(fs_info->qgroup_ulist); 949 fs_info->qgroup_ulist = NULL; 950 } 951 mutex_unlock(&fs_info->qgroup_ioctl_lock); 952 return ret; 953 } 954 955 int btrfs_quota_disable(struct btrfs_trans_handle *trans, 956 struct btrfs_fs_info *fs_info) 957 { 958 struct btrfs_root *tree_root = fs_info->tree_root; 959 struct btrfs_root *quota_root; 960 int ret = 0; 961 962 mutex_lock(&fs_info->qgroup_ioctl_lock); 963 if (!fs_info->quota_root) 964 goto out; 965 spin_lock(&fs_info->qgroup_lock); 966 fs_info->quota_enabled = 0; 967 fs_info->pending_quota_state = 0; 968 quota_root = fs_info->quota_root; 969 fs_info->quota_root = NULL; 970 spin_unlock(&fs_info->qgroup_lock); 971 972 btrfs_free_qgroup_config(fs_info); 973 974 ret = btrfs_clean_quota_tree(trans, quota_root); 975 if (ret) 976 goto out; 977 978 ret = btrfs_del_root(trans, tree_root, "a_root->root_key); 979 if (ret) 980 goto out; 981 982 list_del("a_root->dirty_list); 983 984 btrfs_tree_lock(quota_root->node); 985 clean_tree_block(trans, tree_root, quota_root->node); 986 btrfs_tree_unlock(quota_root->node); 987 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 988 989 free_extent_buffer(quota_root->node); 990 free_extent_buffer(quota_root->commit_root); 991 kfree(quota_root); 992 out: 993 mutex_unlock(&fs_info->qgroup_ioctl_lock); 994 return ret; 995 } 996 997 static void qgroup_dirty(struct btrfs_fs_info *fs_info, 998 struct btrfs_qgroup *qgroup) 999 { 1000 if (list_empty(&qgroup->dirty)) 1001 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 1002 } 1003 1004 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1005 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1006 { 1007 struct btrfs_root *quota_root; 1008 struct btrfs_qgroup *parent; 1009 struct btrfs_qgroup *member; 1010 struct btrfs_qgroup_list *list; 1011 int ret = 0; 1012 1013 mutex_lock(&fs_info->qgroup_ioctl_lock); 1014 quota_root = fs_info->quota_root; 1015 if (!quota_root) { 1016 ret = -EINVAL; 1017 goto out; 1018 } 1019 member = find_qgroup_rb(fs_info, src); 1020 parent = find_qgroup_rb(fs_info, dst); 1021 if (!member || !parent) { 1022 ret = -EINVAL; 1023 goto out; 1024 } 1025 1026 /* check if such qgroup relation exist firstly */ 1027 list_for_each_entry(list, &member->groups, next_group) { 1028 if (list->group == parent) { 1029 ret = -EEXIST; 1030 goto out; 1031 } 1032 } 1033 1034 ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1035 if (ret) 1036 goto out; 1037 1038 ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1039 if (ret) { 1040 del_qgroup_relation_item(trans, quota_root, src, dst); 1041 goto out; 1042 } 1043 1044 spin_lock(&fs_info->qgroup_lock); 1045 ret = add_relation_rb(quota_root->fs_info, src, dst); 1046 spin_unlock(&fs_info->qgroup_lock); 1047 out: 1048 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1049 return ret; 1050 } 1051 1052 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1053 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1054 { 1055 struct btrfs_root *quota_root; 1056 struct btrfs_qgroup *parent; 1057 struct btrfs_qgroup *member; 1058 struct btrfs_qgroup_list *list; 1059 int ret = 0; 1060 int err; 1061 1062 mutex_lock(&fs_info->qgroup_ioctl_lock); 1063 quota_root = fs_info->quota_root; 1064 if (!quota_root) { 1065 ret = -EINVAL; 1066 goto out; 1067 } 1068 1069 member = find_qgroup_rb(fs_info, src); 1070 parent = find_qgroup_rb(fs_info, dst); 1071 if (!member || !parent) { 1072 ret = -EINVAL; 1073 goto out; 1074 } 1075 1076 /* check if such qgroup relation exist firstly */ 1077 list_for_each_entry(list, &member->groups, next_group) { 1078 if (list->group == parent) 1079 goto exist; 1080 } 1081 ret = -ENOENT; 1082 goto out; 1083 exist: 1084 ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1085 err = del_qgroup_relation_item(trans, quota_root, dst, src); 1086 if (err && !ret) 1087 ret = err; 1088 1089 spin_lock(&fs_info->qgroup_lock); 1090 del_relation_rb(fs_info, src, dst); 1091 spin_unlock(&fs_info->qgroup_lock); 1092 out: 1093 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1094 return ret; 1095 } 1096 1097 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1098 struct btrfs_fs_info *fs_info, u64 qgroupid, char *name) 1099 { 1100 struct btrfs_root *quota_root; 1101 struct btrfs_qgroup *qgroup; 1102 int ret = 0; 1103 1104 mutex_lock(&fs_info->qgroup_ioctl_lock); 1105 quota_root = fs_info->quota_root; 1106 if (!quota_root) { 1107 ret = -EINVAL; 1108 goto out; 1109 } 1110 qgroup = find_qgroup_rb(fs_info, qgroupid); 1111 if (qgroup) { 1112 ret = -EEXIST; 1113 goto out; 1114 } 1115 1116 ret = add_qgroup_item(trans, quota_root, qgroupid); 1117 if (ret) 1118 goto out; 1119 1120 spin_lock(&fs_info->qgroup_lock); 1121 qgroup = add_qgroup_rb(fs_info, qgroupid); 1122 spin_unlock(&fs_info->qgroup_lock); 1123 1124 if (IS_ERR(qgroup)) 1125 ret = PTR_ERR(qgroup); 1126 out: 1127 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1128 return ret; 1129 } 1130 1131 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 1132 struct btrfs_fs_info *fs_info, u64 qgroupid) 1133 { 1134 struct btrfs_root *quota_root; 1135 struct btrfs_qgroup *qgroup; 1136 int ret = 0; 1137 1138 mutex_lock(&fs_info->qgroup_ioctl_lock); 1139 quota_root = fs_info->quota_root; 1140 if (!quota_root) { 1141 ret = -EINVAL; 1142 goto out; 1143 } 1144 1145 qgroup = find_qgroup_rb(fs_info, qgroupid); 1146 if (!qgroup) { 1147 ret = -ENOENT; 1148 goto out; 1149 } else { 1150 /* check if there are no relations to this qgroup */ 1151 if (!list_empty(&qgroup->groups) || 1152 !list_empty(&qgroup->members)) { 1153 ret = -EBUSY; 1154 goto out; 1155 } 1156 } 1157 ret = del_qgroup_item(trans, quota_root, qgroupid); 1158 1159 spin_lock(&fs_info->qgroup_lock); 1160 del_qgroup_rb(quota_root->fs_info, qgroupid); 1161 spin_unlock(&fs_info->qgroup_lock); 1162 out: 1163 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1164 return ret; 1165 } 1166 1167 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 1168 struct btrfs_fs_info *fs_info, u64 qgroupid, 1169 struct btrfs_qgroup_limit *limit) 1170 { 1171 struct btrfs_root *quota_root; 1172 struct btrfs_qgroup *qgroup; 1173 int ret = 0; 1174 1175 mutex_lock(&fs_info->qgroup_ioctl_lock); 1176 quota_root = fs_info->quota_root; 1177 if (!quota_root) { 1178 ret = -EINVAL; 1179 goto out; 1180 } 1181 1182 qgroup = find_qgroup_rb(fs_info, qgroupid); 1183 if (!qgroup) { 1184 ret = -ENOENT; 1185 goto out; 1186 } 1187 ret = update_qgroup_limit_item(trans, quota_root, qgroupid, 1188 limit->flags, limit->max_rfer, 1189 limit->max_excl, limit->rsv_rfer, 1190 limit->rsv_excl); 1191 if (ret) { 1192 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1193 btrfs_info(fs_info, "unable to update quota limit for %llu", 1194 qgroupid); 1195 } 1196 1197 spin_lock(&fs_info->qgroup_lock); 1198 qgroup->lim_flags = limit->flags; 1199 qgroup->max_rfer = limit->max_rfer; 1200 qgroup->max_excl = limit->max_excl; 1201 qgroup->rsv_rfer = limit->rsv_rfer; 1202 qgroup->rsv_excl = limit->rsv_excl; 1203 spin_unlock(&fs_info->qgroup_lock); 1204 out: 1205 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1206 return ret; 1207 } 1208 1209 static int comp_oper_exist(struct btrfs_qgroup_operation *oper1, 1210 struct btrfs_qgroup_operation *oper2) 1211 { 1212 /* 1213 * Ignore seq and type here, we're looking for any operation 1214 * at all related to this extent on that root. 1215 */ 1216 if (oper1->bytenr < oper2->bytenr) 1217 return -1; 1218 if (oper1->bytenr > oper2->bytenr) 1219 return 1; 1220 if (oper1->ref_root < oper2->ref_root) 1221 return -1; 1222 if (oper1->ref_root > oper2->ref_root) 1223 return 1; 1224 return 0; 1225 } 1226 1227 static int qgroup_oper_exists(struct btrfs_fs_info *fs_info, 1228 struct btrfs_qgroup_operation *oper) 1229 { 1230 struct rb_node *n; 1231 struct btrfs_qgroup_operation *cur; 1232 int cmp; 1233 1234 spin_lock(&fs_info->qgroup_op_lock); 1235 n = fs_info->qgroup_op_tree.rb_node; 1236 while (n) { 1237 cur = rb_entry(n, struct btrfs_qgroup_operation, n); 1238 cmp = comp_oper_exist(cur, oper); 1239 if (cmp < 0) { 1240 n = n->rb_right; 1241 } else if (cmp) { 1242 n = n->rb_left; 1243 } else { 1244 spin_unlock(&fs_info->qgroup_op_lock); 1245 return -EEXIST; 1246 } 1247 } 1248 spin_unlock(&fs_info->qgroup_op_lock); 1249 return 0; 1250 } 1251 1252 static int comp_oper(struct btrfs_qgroup_operation *oper1, 1253 struct btrfs_qgroup_operation *oper2) 1254 { 1255 if (oper1->bytenr < oper2->bytenr) 1256 return -1; 1257 if (oper1->bytenr > oper2->bytenr) 1258 return 1; 1259 if (oper1->seq < oper2->seq) 1260 return -1; 1261 if (oper1->seq > oper2->seq) 1262 return -1; 1263 if (oper1->ref_root < oper2->ref_root) 1264 return -1; 1265 if (oper1->ref_root > oper2->ref_root) 1266 return 1; 1267 if (oper1->type < oper2->type) 1268 return -1; 1269 if (oper1->type > oper2->type) 1270 return 1; 1271 return 0; 1272 } 1273 1274 static int insert_qgroup_oper(struct btrfs_fs_info *fs_info, 1275 struct btrfs_qgroup_operation *oper) 1276 { 1277 struct rb_node **p; 1278 struct rb_node *parent = NULL; 1279 struct btrfs_qgroup_operation *cur; 1280 int cmp; 1281 1282 spin_lock(&fs_info->qgroup_op_lock); 1283 p = &fs_info->qgroup_op_tree.rb_node; 1284 while (*p) { 1285 parent = *p; 1286 cur = rb_entry(parent, struct btrfs_qgroup_operation, n); 1287 cmp = comp_oper(cur, oper); 1288 if (cmp < 0) { 1289 p = &(*p)->rb_right; 1290 } else if (cmp) { 1291 p = &(*p)->rb_left; 1292 } else { 1293 spin_unlock(&fs_info->qgroup_op_lock); 1294 return -EEXIST; 1295 } 1296 } 1297 rb_link_node(&oper->n, parent, p); 1298 rb_insert_color(&oper->n, &fs_info->qgroup_op_tree); 1299 spin_unlock(&fs_info->qgroup_op_lock); 1300 return 0; 1301 } 1302 1303 /* 1304 * Record a quota operation for processing later on. 1305 * @trans: the transaction we are adding the delayed op to. 1306 * @fs_info: the fs_info for this fs. 1307 * @ref_root: the root of the reference we are acting on, 1308 * @bytenr: the bytenr we are acting on. 1309 * @num_bytes: the number of bytes in the reference. 1310 * @type: the type of operation this is. 1311 * @mod_seq: do we need to get a sequence number for looking up roots. 1312 * 1313 * We just add it to our trans qgroup_ref_list and carry on and process these 1314 * operations in order at some later point. If the reference root isn't a fs 1315 * root then we don't bother with doing anything. 1316 * 1317 * MUST BE HOLDING THE REF LOCK. 1318 */ 1319 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, 1320 struct btrfs_fs_info *fs_info, u64 ref_root, 1321 u64 bytenr, u64 num_bytes, 1322 enum btrfs_qgroup_operation_type type, int mod_seq) 1323 { 1324 struct btrfs_qgroup_operation *oper; 1325 int ret; 1326 1327 if (!is_fstree(ref_root) || !fs_info->quota_enabled) 1328 return 0; 1329 1330 oper = kmalloc(sizeof(*oper), GFP_NOFS); 1331 if (!oper) 1332 return -ENOMEM; 1333 1334 oper->ref_root = ref_root; 1335 oper->bytenr = bytenr; 1336 oper->num_bytes = num_bytes; 1337 oper->type = type; 1338 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); 1339 INIT_LIST_HEAD(&oper->elem.list); 1340 oper->elem.seq = 0; 1341 1342 trace_btrfs_qgroup_record_ref(oper); 1343 1344 if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) { 1345 /* 1346 * If any operation for this bytenr/ref_root combo 1347 * exists, then we know it's not exclusively owned and 1348 * shouldn't be queued up. 1349 * 1350 * This also catches the case where we have a cloned 1351 * extent that gets queued up multiple times during 1352 * drop snapshot. 1353 */ 1354 if (qgroup_oper_exists(fs_info, oper)) { 1355 kfree(oper); 1356 return 0; 1357 } 1358 } 1359 1360 ret = insert_qgroup_oper(fs_info, oper); 1361 if (ret) { 1362 /* Shouldn't happen so have an assert for developers */ 1363 ASSERT(0); 1364 kfree(oper); 1365 return ret; 1366 } 1367 list_add_tail(&oper->list, &trans->qgroup_ref_list); 1368 1369 if (mod_seq) 1370 btrfs_get_tree_mod_seq(fs_info, &oper->elem); 1371 1372 return 0; 1373 } 1374 1375 /* 1376 * The easy accounting, if we are adding/removing the only ref for an extent 1377 * then this qgroup and all of the parent qgroups get their refrence and 1378 * exclusive counts adjusted. 1379 */ 1380 static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1381 struct btrfs_qgroup_operation *oper) 1382 { 1383 struct btrfs_qgroup *qgroup; 1384 struct ulist *tmp; 1385 struct btrfs_qgroup_list *glist; 1386 struct ulist_node *unode; 1387 struct ulist_iterator uiter; 1388 int sign = 0; 1389 int ret = 0; 1390 1391 tmp = ulist_alloc(GFP_NOFS); 1392 if (!tmp) 1393 return -ENOMEM; 1394 1395 spin_lock(&fs_info->qgroup_lock); 1396 if (!fs_info->quota_root) 1397 goto out; 1398 qgroup = find_qgroup_rb(fs_info, oper->ref_root); 1399 if (!qgroup) 1400 goto out; 1401 switch (oper->type) { 1402 case BTRFS_QGROUP_OPER_ADD_EXCL: 1403 sign = 1; 1404 break; 1405 case BTRFS_QGROUP_OPER_SUB_EXCL: 1406 sign = -1; 1407 break; 1408 default: 1409 ASSERT(0); 1410 } 1411 qgroup->rfer += sign * oper->num_bytes; 1412 qgroup->rfer_cmpr += sign * oper->num_bytes; 1413 1414 WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); 1415 qgroup->excl += sign * oper->num_bytes; 1416 qgroup->excl_cmpr += sign * oper->num_bytes; 1417 1418 qgroup_dirty(fs_info, qgroup); 1419 1420 /* Get all of the parent groups that contain this qgroup */ 1421 list_for_each_entry(glist, &qgroup->groups, next_group) { 1422 ret = ulist_add(tmp, glist->group->qgroupid, 1423 ptr_to_u64(glist->group), GFP_ATOMIC); 1424 if (ret < 0) 1425 goto out; 1426 } 1427 1428 /* Iterate all of the parents and adjust their reference counts */ 1429 ULIST_ITER_INIT(&uiter); 1430 while ((unode = ulist_next(tmp, &uiter))) { 1431 qgroup = u64_to_ptr(unode->aux); 1432 qgroup->rfer += sign * oper->num_bytes; 1433 qgroup->rfer_cmpr += sign * oper->num_bytes; 1434 WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes); 1435 qgroup->excl += sign * oper->num_bytes; 1436 qgroup->excl_cmpr += sign * oper->num_bytes; 1437 qgroup_dirty(fs_info, qgroup); 1438 1439 /* Add any parents of the parents */ 1440 list_for_each_entry(glist, &qgroup->groups, next_group) { 1441 ret = ulist_add(tmp, glist->group->qgroupid, 1442 ptr_to_u64(glist->group), GFP_ATOMIC); 1443 if (ret < 0) 1444 goto out; 1445 } 1446 } 1447 ret = 0; 1448 out: 1449 spin_unlock(&fs_info->qgroup_lock); 1450 ulist_free(tmp); 1451 return ret; 1452 } 1453 1454 /* 1455 * Walk all of the roots that pointed to our bytenr and adjust their refcnts as 1456 * properly. 1457 */ 1458 static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info, 1459 u64 root_to_skip, struct ulist *tmp, 1460 struct ulist *roots, struct ulist *qgroups, 1461 u64 seq, int *old_roots, int rescan) 1462 { 1463 struct ulist_node *unode; 1464 struct ulist_iterator uiter; 1465 struct ulist_node *tmp_unode; 1466 struct ulist_iterator tmp_uiter; 1467 struct btrfs_qgroup *qg; 1468 int ret; 1469 1470 ULIST_ITER_INIT(&uiter); 1471 while ((unode = ulist_next(roots, &uiter))) { 1472 /* We don't count our current root here */ 1473 if (unode->val == root_to_skip) 1474 continue; 1475 qg = find_qgroup_rb(fs_info, unode->val); 1476 if (!qg) 1477 continue; 1478 /* 1479 * We could have a pending removal of this same ref so we may 1480 * not have actually found our ref root when doing 1481 * btrfs_find_all_roots, so we need to keep track of how many 1482 * old roots we find in case we removed ours and added a 1483 * different one at the same time. I don't think this could 1484 * happen in practice but that sort of thinking leads to pain 1485 * and suffering and to the dark side. 1486 */ 1487 (*old_roots)++; 1488 1489 ulist_reinit(tmp); 1490 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), 1491 GFP_ATOMIC); 1492 if (ret < 0) 1493 return ret; 1494 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC); 1495 if (ret < 0) 1496 return ret; 1497 ULIST_ITER_INIT(&tmp_uiter); 1498 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1499 struct btrfs_qgroup_list *glist; 1500 1501 qg = u64_to_ptr(tmp_unode->aux); 1502 /* 1503 * We use this sequence number to keep from having to 1504 * run the whole list and 0 out the refcnt every time. 1505 * We basically use sequnce as the known 0 count and 1506 * then add 1 everytime we see a qgroup. This is how we 1507 * get how many of the roots actually point up to the 1508 * upper level qgroups in order to determine exclusive 1509 * counts. 1510 * 1511 * For rescan we want to set old_refcnt to seq so our 1512 * exclusive calculations end up correct. 1513 */ 1514 if (rescan) 1515 qg->old_refcnt = seq; 1516 else if (qg->old_refcnt < seq) 1517 qg->old_refcnt = seq + 1; 1518 else 1519 qg->old_refcnt++; 1520 1521 if (qg->new_refcnt < seq) 1522 qg->new_refcnt = seq + 1; 1523 else 1524 qg->new_refcnt++; 1525 list_for_each_entry(glist, &qg->groups, next_group) { 1526 ret = ulist_add(qgroups, glist->group->qgroupid, 1527 ptr_to_u64(glist->group), 1528 GFP_ATOMIC); 1529 if (ret < 0) 1530 return ret; 1531 ret = ulist_add(tmp, glist->group->qgroupid, 1532 ptr_to_u64(glist->group), 1533 GFP_ATOMIC); 1534 if (ret < 0) 1535 return ret; 1536 } 1537 } 1538 } 1539 return 0; 1540 } 1541 1542 /* 1543 * We need to walk forward in our operation tree and account for any roots that 1544 * were deleted after we made this operation. 1545 */ 1546 static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info, 1547 struct btrfs_qgroup_operation *oper, 1548 struct ulist *tmp, 1549 struct ulist *qgroups, u64 seq, 1550 int *old_roots) 1551 { 1552 struct ulist_node *unode; 1553 struct ulist_iterator uiter; 1554 struct btrfs_qgroup *qg; 1555 struct btrfs_qgroup_operation *tmp_oper; 1556 struct rb_node *n; 1557 int ret; 1558 1559 ulist_reinit(tmp); 1560 1561 /* 1562 * We only walk forward in the tree since we're only interested in 1563 * removals that happened _after_ our operation. 1564 */ 1565 spin_lock(&fs_info->qgroup_op_lock); 1566 n = rb_next(&oper->n); 1567 spin_unlock(&fs_info->qgroup_op_lock); 1568 if (!n) 1569 return 0; 1570 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); 1571 while (tmp_oper->bytenr == oper->bytenr) { 1572 /* 1573 * If it's not a removal we don't care, additions work out 1574 * properly with our refcnt tracking. 1575 */ 1576 if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED && 1577 tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL) 1578 goto next; 1579 qg = find_qgroup_rb(fs_info, tmp_oper->ref_root); 1580 if (!qg) 1581 goto next; 1582 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), 1583 GFP_ATOMIC); 1584 if (ret) { 1585 if (ret < 0) 1586 return ret; 1587 /* 1588 * We only want to increase old_roots if this qgroup is 1589 * not already in the list of qgroups. If it is already 1590 * there then that means it must have been re-added or 1591 * the delete will be discarded because we had an 1592 * existing ref that we haven't looked up yet. In this 1593 * case we don't want to increase old_roots. So if ret 1594 * == 1 then we know that this is the first time we've 1595 * seen this qgroup and we can bump the old_roots. 1596 */ 1597 (*old_roots)++; 1598 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), 1599 GFP_ATOMIC); 1600 if (ret < 0) 1601 return ret; 1602 } 1603 next: 1604 spin_lock(&fs_info->qgroup_op_lock); 1605 n = rb_next(&tmp_oper->n); 1606 spin_unlock(&fs_info->qgroup_op_lock); 1607 if (!n) 1608 break; 1609 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); 1610 } 1611 1612 /* Ok now process the qgroups we found */ 1613 ULIST_ITER_INIT(&uiter); 1614 while ((unode = ulist_next(tmp, &uiter))) { 1615 struct btrfs_qgroup_list *glist; 1616 1617 qg = u64_to_ptr(unode->aux); 1618 if (qg->old_refcnt < seq) 1619 qg->old_refcnt = seq + 1; 1620 else 1621 qg->old_refcnt++; 1622 if (qg->new_refcnt < seq) 1623 qg->new_refcnt = seq + 1; 1624 else 1625 qg->new_refcnt++; 1626 list_for_each_entry(glist, &qg->groups, next_group) { 1627 ret = ulist_add(qgroups, glist->group->qgroupid, 1628 ptr_to_u64(glist->group), GFP_ATOMIC); 1629 if (ret < 0) 1630 return ret; 1631 ret = ulist_add(tmp, glist->group->qgroupid, 1632 ptr_to_u64(glist->group), GFP_ATOMIC); 1633 if (ret < 0) 1634 return ret; 1635 } 1636 } 1637 return 0; 1638 } 1639 1640 /* Add refcnt for the newly added reference. */ 1641 static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info, 1642 struct btrfs_qgroup_operation *oper, 1643 struct btrfs_qgroup *qgroup, 1644 struct ulist *tmp, struct ulist *qgroups, 1645 u64 seq) 1646 { 1647 struct ulist_node *unode; 1648 struct ulist_iterator uiter; 1649 struct btrfs_qgroup *qg; 1650 int ret; 1651 1652 ulist_reinit(tmp); 1653 ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup), 1654 GFP_ATOMIC); 1655 if (ret < 0) 1656 return ret; 1657 ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup), 1658 GFP_ATOMIC); 1659 if (ret < 0) 1660 return ret; 1661 ULIST_ITER_INIT(&uiter); 1662 while ((unode = ulist_next(tmp, &uiter))) { 1663 struct btrfs_qgroup_list *glist; 1664 1665 qg = u64_to_ptr(unode->aux); 1666 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { 1667 if (qg->new_refcnt < seq) 1668 qg->new_refcnt = seq + 1; 1669 else 1670 qg->new_refcnt++; 1671 } else { 1672 if (qg->old_refcnt < seq) 1673 qg->old_refcnt = seq + 1; 1674 else 1675 qg->old_refcnt++; 1676 } 1677 list_for_each_entry(glist, &qg->groups, next_group) { 1678 ret = ulist_add(tmp, glist->group->qgroupid, 1679 ptr_to_u64(glist->group), GFP_ATOMIC); 1680 if (ret < 0) 1681 return ret; 1682 ret = ulist_add(qgroups, glist->group->qgroupid, 1683 ptr_to_u64(glist->group), GFP_ATOMIC); 1684 if (ret < 0) 1685 return ret; 1686 } 1687 } 1688 return 0; 1689 } 1690 1691 /* 1692 * This adjusts the counters for all referenced qgroups if need be. 1693 */ 1694 static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info, 1695 u64 root_to_skip, u64 num_bytes, 1696 struct ulist *qgroups, u64 seq, 1697 int old_roots, int new_roots, int rescan) 1698 { 1699 struct ulist_node *unode; 1700 struct ulist_iterator uiter; 1701 struct btrfs_qgroup *qg; 1702 u64 cur_new_count, cur_old_count; 1703 1704 ULIST_ITER_INIT(&uiter); 1705 while ((unode = ulist_next(qgroups, &uiter))) { 1706 bool dirty = false; 1707 1708 qg = u64_to_ptr(unode->aux); 1709 /* 1710 * Wasn't referenced before but is now, add to the reference 1711 * counters. 1712 */ 1713 if (qg->old_refcnt <= seq && qg->new_refcnt > seq) { 1714 qg->rfer += num_bytes; 1715 qg->rfer_cmpr += num_bytes; 1716 dirty = true; 1717 } 1718 1719 /* 1720 * Was referenced before but isn't now, subtract from the 1721 * reference counters. 1722 */ 1723 if (qg->old_refcnt > seq && qg->new_refcnt <= seq) { 1724 qg->rfer -= num_bytes; 1725 qg->rfer_cmpr -= num_bytes; 1726 dirty = true; 1727 } 1728 1729 if (qg->old_refcnt < seq) 1730 cur_old_count = 0; 1731 else 1732 cur_old_count = qg->old_refcnt - seq; 1733 if (qg->new_refcnt < seq) 1734 cur_new_count = 0; 1735 else 1736 cur_new_count = qg->new_refcnt - seq; 1737 1738 /* 1739 * If our refcount was the same as the roots previously but our 1740 * new count isn't the same as the number of roots now then we 1741 * went from having a exclusive reference on this range to not. 1742 */ 1743 if (old_roots && cur_old_count == old_roots && 1744 (cur_new_count != new_roots || new_roots == 0)) { 1745 WARN_ON(cur_new_count != new_roots && new_roots == 0); 1746 qg->excl -= num_bytes; 1747 qg->excl_cmpr -= num_bytes; 1748 dirty = true; 1749 } 1750 1751 /* 1752 * If we didn't reference all the roots before but now we do we 1753 * have an exclusive reference to this range. 1754 */ 1755 if ((!old_roots || (old_roots && cur_old_count != old_roots)) 1756 && cur_new_count == new_roots) { 1757 qg->excl += num_bytes; 1758 qg->excl_cmpr += num_bytes; 1759 dirty = true; 1760 } 1761 1762 if (dirty) 1763 qgroup_dirty(fs_info, qg); 1764 } 1765 return 0; 1766 } 1767 1768 /* 1769 * If we removed a data extent and there were other references for that bytenr 1770 * then we need to lookup all referenced roots to make sure we still don't 1771 * reference this bytenr. If we do then we can just discard this operation. 1772 */ 1773 static int check_existing_refs(struct btrfs_trans_handle *trans, 1774 struct btrfs_fs_info *fs_info, 1775 struct btrfs_qgroup_operation *oper) 1776 { 1777 struct ulist *roots = NULL; 1778 struct ulist_node *unode; 1779 struct ulist_iterator uiter; 1780 int ret = 0; 1781 1782 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, 1783 oper->elem.seq, &roots); 1784 if (ret < 0) 1785 return ret; 1786 ret = 0; 1787 1788 ULIST_ITER_INIT(&uiter); 1789 while ((unode = ulist_next(roots, &uiter))) { 1790 if (unode->val == oper->ref_root) { 1791 ret = 1; 1792 break; 1793 } 1794 } 1795 ulist_free(roots); 1796 btrfs_put_tree_mod_seq(fs_info, &oper->elem); 1797 1798 return ret; 1799 } 1800 1801 /* 1802 * If we share a reference across multiple roots then we may need to adjust 1803 * various qgroups referenced and exclusive counters. The basic premise is this 1804 * 1805 * 1) We have seq to represent a 0 count. Instead of looping through all of the 1806 * qgroups and resetting their refcount to 0 we just constantly bump this 1807 * sequence number to act as the base reference count. This means that if 1808 * anybody is equal to or below this sequence they were never referenced. We 1809 * jack this sequence up by the number of roots we found each time in order to 1810 * make sure we don't have any overlap. 1811 * 1812 * 2) We first search all the roots that reference the area _except_ the root 1813 * we're acting on currently. This makes up the old_refcnt of all the qgroups 1814 * before. 1815 * 1816 * 3) We walk all of the qgroups referenced by the root we are currently acting 1817 * on, and will either adjust old_refcnt in the case of a removal or the 1818 * new_refcnt in the case of an addition. 1819 * 1820 * 4) Finally we walk all the qgroups that are referenced by this range 1821 * including the root we are acting on currently. We will adjust the counters 1822 * based on the number of roots we had and will have after this operation. 1823 * 1824 * Take this example as an illustration 1825 * 1826 * [qgroup 1/0] 1827 * / | \ 1828 * [qg 0/0] [qg 0/1] [qg 0/2] 1829 * \ | / 1830 * [ extent ] 1831 * 1832 * Say we are adding a reference that is covered by qg 0/0. The first step 1833 * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with 1834 * old_roots being 2. Because it is adding new_roots will be 1. We then go 1835 * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's 1836 * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we 1837 * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a 1838 * reference and thus must add the size to the referenced bytes. Everything 1839 * else is the same so nothing else changes. 1840 */ 1841 static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, 1842 struct btrfs_fs_info *fs_info, 1843 struct btrfs_qgroup_operation *oper) 1844 { 1845 struct ulist *roots = NULL; 1846 struct ulist *qgroups, *tmp; 1847 struct btrfs_qgroup *qgroup; 1848 struct seq_list elem = {}; 1849 u64 seq; 1850 int old_roots = 0; 1851 int new_roots = 0; 1852 int ret = 0; 1853 1854 if (oper->elem.seq) { 1855 ret = check_existing_refs(trans, fs_info, oper); 1856 if (ret < 0) 1857 return ret; 1858 if (ret) 1859 return 0; 1860 } 1861 1862 qgroups = ulist_alloc(GFP_NOFS); 1863 if (!qgroups) 1864 return -ENOMEM; 1865 1866 tmp = ulist_alloc(GFP_NOFS); 1867 if (!tmp) { 1868 ulist_free(qgroups); 1869 return -ENOMEM; 1870 } 1871 1872 btrfs_get_tree_mod_seq(fs_info, &elem); 1873 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, 1874 &roots); 1875 btrfs_put_tree_mod_seq(fs_info, &elem); 1876 if (ret < 0) { 1877 ulist_free(qgroups); 1878 ulist_free(tmp); 1879 return ret; 1880 } 1881 spin_lock(&fs_info->qgroup_lock); 1882 qgroup = find_qgroup_rb(fs_info, oper->ref_root); 1883 if (!qgroup) 1884 goto out; 1885 seq = fs_info->qgroup_seq; 1886 1887 /* 1888 * So roots is the list of all the roots currently pointing at the 1889 * bytenr, including the ref we are adding if we are adding, or not if 1890 * we are removing a ref. So we pass in the ref_root to skip that root 1891 * in our calculations. We set old_refnct and new_refcnt cause who the 1892 * hell knows what everything looked like before, and it doesn't matter 1893 * except... 1894 */ 1895 ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups, 1896 seq, &old_roots, 0); 1897 if (ret < 0) 1898 goto out; 1899 1900 /* 1901 * Now adjust the refcounts of the qgroups that care about this 1902 * reference, either the old_count in the case of removal or new_count 1903 * in the case of an addition. 1904 */ 1905 ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups, 1906 seq); 1907 if (ret < 0) 1908 goto out; 1909 1910 /* 1911 * ...in the case of removals. If we had a removal before we got around 1912 * to processing this operation then we need to find that guy and count 1913 * his references as if they really existed so we don't end up screwing 1914 * up the exclusive counts. Then whenever we go to process the delete 1915 * everything will be grand and we can account for whatever exclusive 1916 * changes need to be made there. We also have to pass in old_roots so 1917 * we have an accurate count of the roots as it pertains to this 1918 * operations view of the world. 1919 */ 1920 ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq, 1921 &old_roots); 1922 if (ret < 0) 1923 goto out; 1924 1925 /* 1926 * We are adding our root, need to adjust up the number of roots, 1927 * otherwise old_roots is the number of roots we want. 1928 */ 1929 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { 1930 new_roots = old_roots + 1; 1931 } else { 1932 new_roots = old_roots; 1933 old_roots++; 1934 } 1935 fs_info->qgroup_seq += old_roots + 1; 1936 1937 1938 /* 1939 * And now the magic happens, bless Arne for having a pretty elegant 1940 * solution for this. 1941 */ 1942 qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes, 1943 qgroups, seq, old_roots, new_roots, 0); 1944 out: 1945 spin_unlock(&fs_info->qgroup_lock); 1946 ulist_free(qgroups); 1947 ulist_free(roots); 1948 ulist_free(tmp); 1949 return ret; 1950 } 1951 1952 /* 1953 * Process a reference to a shared subtree. This type of operation is 1954 * queued during snapshot removal when we encounter extents which are 1955 * shared between more than one root. 1956 */ 1957 static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans, 1958 struct btrfs_fs_info *fs_info, 1959 struct btrfs_qgroup_operation *oper) 1960 { 1961 struct ulist *roots = NULL; 1962 struct ulist_node *unode; 1963 struct ulist_iterator uiter; 1964 struct btrfs_qgroup_list *glist; 1965 struct ulist *parents; 1966 int ret = 0; 1967 int err; 1968 struct btrfs_qgroup *qg; 1969 u64 root_obj = 0; 1970 struct seq_list elem = {}; 1971 1972 parents = ulist_alloc(GFP_NOFS); 1973 if (!parents) 1974 return -ENOMEM; 1975 1976 btrfs_get_tree_mod_seq(fs_info, &elem); 1977 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, 1978 elem.seq, &roots); 1979 btrfs_put_tree_mod_seq(fs_info, &elem); 1980 if (ret < 0) 1981 goto out; 1982 1983 if (roots->nnodes != 1) 1984 goto out; 1985 1986 ULIST_ITER_INIT(&uiter); 1987 unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */ 1988 /* 1989 * If we find our ref root then that means all refs 1990 * this extent has to the root have not yet been 1991 * deleted. In that case, we do nothing and let the 1992 * last ref for this bytenr drive our update. 1993 * 1994 * This can happen for example if an extent is 1995 * referenced multiple times in a snapshot (clone, 1996 * etc). If we are in the middle of snapshot removal, 1997 * queued updates for such an extent will find the 1998 * root if we have not yet finished removing the 1999 * snapshot. 2000 */ 2001 if (unode->val == oper->ref_root) 2002 goto out; 2003 2004 root_obj = unode->val; 2005 BUG_ON(!root_obj); 2006 2007 spin_lock(&fs_info->qgroup_lock); 2008 qg = find_qgroup_rb(fs_info, root_obj); 2009 if (!qg) 2010 goto out_unlock; 2011 2012 qg->excl += oper->num_bytes; 2013 qg->excl_cmpr += oper->num_bytes; 2014 qgroup_dirty(fs_info, qg); 2015 2016 /* 2017 * Adjust counts for parent groups. First we find all 2018 * parents, then in the 2nd loop we do the adjustment 2019 * while adding parents of the parents to our ulist. 2020 */ 2021 list_for_each_entry(glist, &qg->groups, next_group) { 2022 err = ulist_add(parents, glist->group->qgroupid, 2023 ptr_to_u64(glist->group), GFP_ATOMIC); 2024 if (err < 0) { 2025 ret = err; 2026 goto out_unlock; 2027 } 2028 } 2029 2030 ULIST_ITER_INIT(&uiter); 2031 while ((unode = ulist_next(parents, &uiter))) { 2032 qg = u64_to_ptr(unode->aux); 2033 qg->excl += oper->num_bytes; 2034 qg->excl_cmpr += oper->num_bytes; 2035 qgroup_dirty(fs_info, qg); 2036 2037 /* Add any parents of the parents */ 2038 list_for_each_entry(glist, &qg->groups, next_group) { 2039 err = ulist_add(parents, glist->group->qgroupid, 2040 ptr_to_u64(glist->group), GFP_ATOMIC); 2041 if (err < 0) { 2042 ret = err; 2043 goto out_unlock; 2044 } 2045 } 2046 } 2047 2048 out_unlock: 2049 spin_unlock(&fs_info->qgroup_lock); 2050 2051 out: 2052 ulist_free(roots); 2053 ulist_free(parents); 2054 return ret; 2055 } 2056 2057 /* 2058 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted 2059 * from the fs. First, all roots referencing the extent are searched, and 2060 * then the space is accounted accordingly to the different roots. The 2061 * accounting algorithm works in 3 steps documented inline. 2062 */ 2063 static int btrfs_qgroup_account(struct btrfs_trans_handle *trans, 2064 struct btrfs_fs_info *fs_info, 2065 struct btrfs_qgroup_operation *oper) 2066 { 2067 int ret = 0; 2068 2069 if (!fs_info->quota_enabled) 2070 return 0; 2071 2072 BUG_ON(!fs_info->quota_root); 2073 2074 mutex_lock(&fs_info->qgroup_rescan_lock); 2075 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 2076 if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) { 2077 mutex_unlock(&fs_info->qgroup_rescan_lock); 2078 return 0; 2079 } 2080 } 2081 mutex_unlock(&fs_info->qgroup_rescan_lock); 2082 2083 ASSERT(is_fstree(oper->ref_root)); 2084 2085 trace_btrfs_qgroup_account(oper); 2086 2087 switch (oper->type) { 2088 case BTRFS_QGROUP_OPER_ADD_EXCL: 2089 case BTRFS_QGROUP_OPER_SUB_EXCL: 2090 ret = qgroup_excl_accounting(fs_info, oper); 2091 break; 2092 case BTRFS_QGROUP_OPER_ADD_SHARED: 2093 case BTRFS_QGROUP_OPER_SUB_SHARED: 2094 ret = qgroup_shared_accounting(trans, fs_info, oper); 2095 break; 2096 case BTRFS_QGROUP_OPER_SUB_SUBTREE: 2097 ret = qgroup_subtree_accounting(trans, fs_info, oper); 2098 break; 2099 default: 2100 ASSERT(0); 2101 } 2102 return ret; 2103 } 2104 2105 /* 2106 * Needs to be called everytime we run delayed refs, even if there is an error 2107 * in order to cleanup outstanding operations. 2108 */ 2109 int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans, 2110 struct btrfs_fs_info *fs_info) 2111 { 2112 struct btrfs_qgroup_operation *oper; 2113 int ret = 0; 2114 2115 while (!list_empty(&trans->qgroup_ref_list)) { 2116 oper = list_first_entry(&trans->qgroup_ref_list, 2117 struct btrfs_qgroup_operation, list); 2118 list_del_init(&oper->list); 2119 if (!ret || !trans->aborted) 2120 ret = btrfs_qgroup_account(trans, fs_info, oper); 2121 spin_lock(&fs_info->qgroup_op_lock); 2122 rb_erase(&oper->n, &fs_info->qgroup_op_tree); 2123 spin_unlock(&fs_info->qgroup_op_lock); 2124 btrfs_put_tree_mod_seq(fs_info, &oper->elem); 2125 kfree(oper); 2126 } 2127 return ret; 2128 } 2129 2130 /* 2131 * called from commit_transaction. Writes all changed qgroups to disk. 2132 */ 2133 int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 2134 struct btrfs_fs_info *fs_info) 2135 { 2136 struct btrfs_root *quota_root = fs_info->quota_root; 2137 int ret = 0; 2138 int start_rescan_worker = 0; 2139 2140 if (!quota_root) 2141 goto out; 2142 2143 if (!fs_info->quota_enabled && fs_info->pending_quota_state) 2144 start_rescan_worker = 1; 2145 2146 fs_info->quota_enabled = fs_info->pending_quota_state; 2147 2148 spin_lock(&fs_info->qgroup_lock); 2149 while (!list_empty(&fs_info->dirty_qgroups)) { 2150 struct btrfs_qgroup *qgroup; 2151 qgroup = list_first_entry(&fs_info->dirty_qgroups, 2152 struct btrfs_qgroup, dirty); 2153 list_del_init(&qgroup->dirty); 2154 spin_unlock(&fs_info->qgroup_lock); 2155 ret = update_qgroup_info_item(trans, quota_root, qgroup); 2156 if (ret) 2157 fs_info->qgroup_flags |= 2158 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2159 spin_lock(&fs_info->qgroup_lock); 2160 } 2161 if (fs_info->quota_enabled) 2162 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; 2163 else 2164 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2165 spin_unlock(&fs_info->qgroup_lock); 2166 2167 ret = update_qgroup_status_item(trans, fs_info, quota_root); 2168 if (ret) 2169 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2170 2171 if (!ret && start_rescan_worker) { 2172 ret = qgroup_rescan_init(fs_info, 0, 1); 2173 if (!ret) { 2174 qgroup_rescan_zero_tracking(fs_info); 2175 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2176 &fs_info->qgroup_rescan_work); 2177 } 2178 ret = 0; 2179 } 2180 2181 out: 2182 2183 return ret; 2184 } 2185 2186 /* 2187 * copy the acounting information between qgroups. This is necessary when a 2188 * snapshot or a subvolume is created 2189 */ 2190 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 2191 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 2192 struct btrfs_qgroup_inherit *inherit) 2193 { 2194 int ret = 0; 2195 int i; 2196 u64 *i_qgroups; 2197 struct btrfs_root *quota_root = fs_info->quota_root; 2198 struct btrfs_qgroup *srcgroup; 2199 struct btrfs_qgroup *dstgroup; 2200 u32 level_size = 0; 2201 u64 nums; 2202 2203 mutex_lock(&fs_info->qgroup_ioctl_lock); 2204 if (!fs_info->quota_enabled) 2205 goto out; 2206 2207 if (!quota_root) { 2208 ret = -EINVAL; 2209 goto out; 2210 } 2211 2212 if (inherit) { 2213 i_qgroups = (u64 *)(inherit + 1); 2214 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 2215 2 * inherit->num_excl_copies; 2216 for (i = 0; i < nums; ++i) { 2217 srcgroup = find_qgroup_rb(fs_info, *i_qgroups); 2218 if (!srcgroup) { 2219 ret = -EINVAL; 2220 goto out; 2221 } 2222 ++i_qgroups; 2223 } 2224 } 2225 2226 /* 2227 * create a tracking group for the subvol itself 2228 */ 2229 ret = add_qgroup_item(trans, quota_root, objectid); 2230 if (ret) 2231 goto out; 2232 2233 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { 2234 ret = update_qgroup_limit_item(trans, quota_root, objectid, 2235 inherit->lim.flags, 2236 inherit->lim.max_rfer, 2237 inherit->lim.max_excl, 2238 inherit->lim.rsv_rfer, 2239 inherit->lim.rsv_excl); 2240 if (ret) 2241 goto out; 2242 } 2243 2244 if (srcid) { 2245 struct btrfs_root *srcroot; 2246 struct btrfs_key srckey; 2247 2248 srckey.objectid = srcid; 2249 srckey.type = BTRFS_ROOT_ITEM_KEY; 2250 srckey.offset = (u64)-1; 2251 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); 2252 if (IS_ERR(srcroot)) { 2253 ret = PTR_ERR(srcroot); 2254 goto out; 2255 } 2256 2257 rcu_read_lock(); 2258 level_size = srcroot->nodesize; 2259 rcu_read_unlock(); 2260 } 2261 2262 /* 2263 * add qgroup to all inherited groups 2264 */ 2265 if (inherit) { 2266 i_qgroups = (u64 *)(inherit + 1); 2267 for (i = 0; i < inherit->num_qgroups; ++i) { 2268 ret = add_qgroup_relation_item(trans, quota_root, 2269 objectid, *i_qgroups); 2270 if (ret) 2271 goto out; 2272 ret = add_qgroup_relation_item(trans, quota_root, 2273 *i_qgroups, objectid); 2274 if (ret) 2275 goto out; 2276 ++i_qgroups; 2277 } 2278 } 2279 2280 2281 spin_lock(&fs_info->qgroup_lock); 2282 2283 dstgroup = add_qgroup_rb(fs_info, objectid); 2284 if (IS_ERR(dstgroup)) { 2285 ret = PTR_ERR(dstgroup); 2286 goto unlock; 2287 } 2288 2289 if (srcid) { 2290 srcgroup = find_qgroup_rb(fs_info, srcid); 2291 if (!srcgroup) 2292 goto unlock; 2293 2294 /* 2295 * We call inherit after we clone the root in order to make sure 2296 * our counts don't go crazy, so at this point the only 2297 * difference between the two roots should be the root node. 2298 */ 2299 dstgroup->rfer = srcgroup->rfer; 2300 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2301 dstgroup->excl = level_size; 2302 dstgroup->excl_cmpr = level_size; 2303 srcgroup->excl = level_size; 2304 srcgroup->excl_cmpr = level_size; 2305 qgroup_dirty(fs_info, dstgroup); 2306 qgroup_dirty(fs_info, srcgroup); 2307 } 2308 2309 if (!inherit) 2310 goto unlock; 2311 2312 i_qgroups = (u64 *)(inherit + 1); 2313 for (i = 0; i < inherit->num_qgroups; ++i) { 2314 ret = add_relation_rb(quota_root->fs_info, objectid, 2315 *i_qgroups); 2316 if (ret) 2317 goto unlock; 2318 ++i_qgroups; 2319 } 2320 2321 for (i = 0; i < inherit->num_ref_copies; ++i) { 2322 struct btrfs_qgroup *src; 2323 struct btrfs_qgroup *dst; 2324 2325 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2326 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2327 2328 if (!src || !dst) { 2329 ret = -EINVAL; 2330 goto unlock; 2331 } 2332 2333 dst->rfer = src->rfer - level_size; 2334 dst->rfer_cmpr = src->rfer_cmpr - level_size; 2335 i_qgroups += 2; 2336 } 2337 for (i = 0; i < inherit->num_excl_copies; ++i) { 2338 struct btrfs_qgroup *src; 2339 struct btrfs_qgroup *dst; 2340 2341 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2342 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2343 2344 if (!src || !dst) { 2345 ret = -EINVAL; 2346 goto unlock; 2347 } 2348 2349 dst->excl = src->excl + level_size; 2350 dst->excl_cmpr = src->excl_cmpr + level_size; 2351 i_qgroups += 2; 2352 } 2353 2354 unlock: 2355 spin_unlock(&fs_info->qgroup_lock); 2356 out: 2357 mutex_unlock(&fs_info->qgroup_ioctl_lock); 2358 return ret; 2359 } 2360 2361 /* 2362 * reserve some space for a qgroup and all its parents. The reservation takes 2363 * place with start_transaction or dealloc_reserve, similar to ENOSPC 2364 * accounting. If not enough space is available, EDQUOT is returned. 2365 * We assume that the requested space is new for all qgroups. 2366 */ 2367 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) 2368 { 2369 struct btrfs_root *quota_root; 2370 struct btrfs_qgroup *qgroup; 2371 struct btrfs_fs_info *fs_info = root->fs_info; 2372 u64 ref_root = root->root_key.objectid; 2373 int ret = 0; 2374 struct ulist_node *unode; 2375 struct ulist_iterator uiter; 2376 2377 if (!is_fstree(ref_root)) 2378 return 0; 2379 2380 if (num_bytes == 0) 2381 return 0; 2382 2383 spin_lock(&fs_info->qgroup_lock); 2384 quota_root = fs_info->quota_root; 2385 if (!quota_root) 2386 goto out; 2387 2388 qgroup = find_qgroup_rb(fs_info, ref_root); 2389 if (!qgroup) 2390 goto out; 2391 2392 /* 2393 * in a first step, we check all affected qgroups if any limits would 2394 * be exceeded 2395 */ 2396 ulist_reinit(fs_info->qgroup_ulist); 2397 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2398 (uintptr_t)qgroup, GFP_ATOMIC); 2399 if (ret < 0) 2400 goto out; 2401 ULIST_ITER_INIT(&uiter); 2402 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2403 struct btrfs_qgroup *qg; 2404 struct btrfs_qgroup_list *glist; 2405 2406 qg = u64_to_ptr(unode->aux); 2407 2408 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2409 qg->reserved + (s64)qg->rfer + num_bytes > 2410 qg->max_rfer) { 2411 ret = -EDQUOT; 2412 goto out; 2413 } 2414 2415 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 2416 qg->reserved + (s64)qg->excl + num_bytes > 2417 qg->max_excl) { 2418 ret = -EDQUOT; 2419 goto out; 2420 } 2421 2422 list_for_each_entry(glist, &qg->groups, next_group) { 2423 ret = ulist_add(fs_info->qgroup_ulist, 2424 glist->group->qgroupid, 2425 (uintptr_t)glist->group, GFP_ATOMIC); 2426 if (ret < 0) 2427 goto out; 2428 } 2429 } 2430 ret = 0; 2431 /* 2432 * no limits exceeded, now record the reservation into all qgroups 2433 */ 2434 ULIST_ITER_INIT(&uiter); 2435 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2436 struct btrfs_qgroup *qg; 2437 2438 qg = u64_to_ptr(unode->aux); 2439 2440 qg->reserved += num_bytes; 2441 } 2442 2443 out: 2444 spin_unlock(&fs_info->qgroup_lock); 2445 return ret; 2446 } 2447 2448 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) 2449 { 2450 struct btrfs_root *quota_root; 2451 struct btrfs_qgroup *qgroup; 2452 struct btrfs_fs_info *fs_info = root->fs_info; 2453 struct ulist_node *unode; 2454 struct ulist_iterator uiter; 2455 u64 ref_root = root->root_key.objectid; 2456 int ret = 0; 2457 2458 if (!is_fstree(ref_root)) 2459 return; 2460 2461 if (num_bytes == 0) 2462 return; 2463 2464 spin_lock(&fs_info->qgroup_lock); 2465 2466 quota_root = fs_info->quota_root; 2467 if (!quota_root) 2468 goto out; 2469 2470 qgroup = find_qgroup_rb(fs_info, ref_root); 2471 if (!qgroup) 2472 goto out; 2473 2474 ulist_reinit(fs_info->qgroup_ulist); 2475 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2476 (uintptr_t)qgroup, GFP_ATOMIC); 2477 if (ret < 0) 2478 goto out; 2479 ULIST_ITER_INIT(&uiter); 2480 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2481 struct btrfs_qgroup *qg; 2482 struct btrfs_qgroup_list *glist; 2483 2484 qg = u64_to_ptr(unode->aux); 2485 2486 qg->reserved -= num_bytes; 2487 2488 list_for_each_entry(glist, &qg->groups, next_group) { 2489 ret = ulist_add(fs_info->qgroup_ulist, 2490 glist->group->qgroupid, 2491 (uintptr_t)glist->group, GFP_ATOMIC); 2492 if (ret < 0) 2493 goto out; 2494 } 2495 } 2496 2497 out: 2498 spin_unlock(&fs_info->qgroup_lock); 2499 } 2500 2501 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) 2502 { 2503 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) 2504 return; 2505 btrfs_err(trans->root->fs_info, 2506 "qgroups not uptodate in trans handle %p: list is%s empty, " 2507 "seq is %#x.%x", 2508 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", 2509 (u32)(trans->delayed_ref_elem.seq >> 32), 2510 (u32)trans->delayed_ref_elem.seq); 2511 BUG(); 2512 } 2513 2514 /* 2515 * returns < 0 on error, 0 when more leafs are to be scanned. 2516 * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared. 2517 */ 2518 static int 2519 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2520 struct btrfs_trans_handle *trans, struct ulist *qgroups, 2521 struct ulist *tmp, struct extent_buffer *scratch_leaf) 2522 { 2523 struct btrfs_key found; 2524 struct ulist *roots = NULL; 2525 struct seq_list tree_mod_seq_elem = {}; 2526 u64 num_bytes; 2527 u64 seq; 2528 int new_roots; 2529 int slot; 2530 int ret; 2531 2532 path->leave_spinning = 1; 2533 mutex_lock(&fs_info->qgroup_rescan_lock); 2534 ret = btrfs_search_slot_for_read(fs_info->extent_root, 2535 &fs_info->qgroup_rescan_progress, 2536 path, 1, 0); 2537 2538 pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n", 2539 fs_info->qgroup_rescan_progress.objectid, 2540 fs_info->qgroup_rescan_progress.type, 2541 fs_info->qgroup_rescan_progress.offset, ret); 2542 2543 if (ret) { 2544 /* 2545 * The rescan is about to end, we will not be scanning any 2546 * further blocks. We cannot unset the RESCAN flag here, because 2547 * we want to commit the transaction if everything went well. 2548 * To make the live accounting work in this phase, we set our 2549 * scan progress pointer such that every real extent objectid 2550 * will be smaller. 2551 */ 2552 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 2553 btrfs_release_path(path); 2554 mutex_unlock(&fs_info->qgroup_rescan_lock); 2555 return ret; 2556 } 2557 2558 btrfs_item_key_to_cpu(path->nodes[0], &found, 2559 btrfs_header_nritems(path->nodes[0]) - 1); 2560 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; 2561 2562 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2563 memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf)); 2564 slot = path->slots[0]; 2565 btrfs_release_path(path); 2566 mutex_unlock(&fs_info->qgroup_rescan_lock); 2567 2568 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 2569 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 2570 if (found.type != BTRFS_EXTENT_ITEM_KEY && 2571 found.type != BTRFS_METADATA_ITEM_KEY) 2572 continue; 2573 if (found.type == BTRFS_METADATA_ITEM_KEY) 2574 num_bytes = fs_info->extent_root->nodesize; 2575 else 2576 num_bytes = found.offset; 2577 2578 ulist_reinit(qgroups); 2579 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 2580 &roots); 2581 if (ret < 0) 2582 goto out; 2583 spin_lock(&fs_info->qgroup_lock); 2584 seq = fs_info->qgroup_seq; 2585 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ 2586 2587 new_roots = 0; 2588 ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups, 2589 seq, &new_roots, 1); 2590 if (ret < 0) { 2591 spin_unlock(&fs_info->qgroup_lock); 2592 ulist_free(roots); 2593 goto out; 2594 } 2595 2596 ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups, 2597 seq, 0, new_roots, 1); 2598 if (ret < 0) { 2599 spin_unlock(&fs_info->qgroup_lock); 2600 ulist_free(roots); 2601 goto out; 2602 } 2603 spin_unlock(&fs_info->qgroup_lock); 2604 ulist_free(roots); 2605 } 2606 out: 2607 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2608 2609 return ret; 2610 } 2611 2612 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) 2613 { 2614 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, 2615 qgroup_rescan_work); 2616 struct btrfs_path *path; 2617 struct btrfs_trans_handle *trans = NULL; 2618 struct ulist *tmp = NULL, *qgroups = NULL; 2619 struct extent_buffer *scratch_leaf = NULL; 2620 int err = -ENOMEM; 2621 2622 path = btrfs_alloc_path(); 2623 if (!path) 2624 goto out; 2625 qgroups = ulist_alloc(GFP_NOFS); 2626 if (!qgroups) 2627 goto out; 2628 tmp = ulist_alloc(GFP_NOFS); 2629 if (!tmp) 2630 goto out; 2631 scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS); 2632 if (!scratch_leaf) 2633 goto out; 2634 2635 err = 0; 2636 while (!err) { 2637 trans = btrfs_start_transaction(fs_info->fs_root, 0); 2638 if (IS_ERR(trans)) { 2639 err = PTR_ERR(trans); 2640 break; 2641 } 2642 if (!fs_info->quota_enabled) { 2643 err = -EINTR; 2644 } else { 2645 err = qgroup_rescan_leaf(fs_info, path, trans, 2646 qgroups, tmp, scratch_leaf); 2647 } 2648 if (err > 0) 2649 btrfs_commit_transaction(trans, fs_info->fs_root); 2650 else 2651 btrfs_end_transaction(trans, fs_info->fs_root); 2652 } 2653 2654 out: 2655 kfree(scratch_leaf); 2656 ulist_free(qgroups); 2657 ulist_free(tmp); 2658 btrfs_free_path(path); 2659 2660 mutex_lock(&fs_info->qgroup_rescan_lock); 2661 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2662 2663 if (err == 2 && 2664 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 2665 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2666 } else if (err < 0) { 2667 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2668 } 2669 mutex_unlock(&fs_info->qgroup_rescan_lock); 2670 2671 if (err >= 0) { 2672 btrfs_info(fs_info, "qgroup scan completed%s", 2673 err == 2 ? " (inconsistency flag cleared)" : ""); 2674 } else { 2675 btrfs_err(fs_info, "qgroup scan failed with %d", err); 2676 } 2677 2678 complete_all(&fs_info->qgroup_rescan_completion); 2679 } 2680 2681 /* 2682 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all 2683 * memory required for the rescan context. 2684 */ 2685 static int 2686 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 2687 int init_flags) 2688 { 2689 int ret = 0; 2690 2691 if (!init_flags && 2692 (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) || 2693 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) { 2694 ret = -EINVAL; 2695 goto err; 2696 } 2697 2698 mutex_lock(&fs_info->qgroup_rescan_lock); 2699 spin_lock(&fs_info->qgroup_lock); 2700 2701 if (init_flags) { 2702 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2703 ret = -EINPROGRESS; 2704 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 2705 ret = -EINVAL; 2706 2707 if (ret) { 2708 spin_unlock(&fs_info->qgroup_lock); 2709 mutex_unlock(&fs_info->qgroup_rescan_lock); 2710 goto err; 2711 } 2712 2713 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2714 } 2715 2716 memset(&fs_info->qgroup_rescan_progress, 0, 2717 sizeof(fs_info->qgroup_rescan_progress)); 2718 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 2719 2720 spin_unlock(&fs_info->qgroup_lock); 2721 mutex_unlock(&fs_info->qgroup_rescan_lock); 2722 2723 init_completion(&fs_info->qgroup_rescan_completion); 2724 2725 memset(&fs_info->qgroup_rescan_work, 0, 2726 sizeof(fs_info->qgroup_rescan_work)); 2727 btrfs_init_work(&fs_info->qgroup_rescan_work, 2728 btrfs_qgroup_rescan_helper, 2729 btrfs_qgroup_rescan_worker, NULL, NULL); 2730 2731 if (ret) { 2732 err: 2733 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret); 2734 return ret; 2735 } 2736 2737 return 0; 2738 } 2739 2740 static void 2741 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) 2742 { 2743 struct rb_node *n; 2744 struct btrfs_qgroup *qgroup; 2745 2746 spin_lock(&fs_info->qgroup_lock); 2747 /* clear all current qgroup tracking information */ 2748 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { 2749 qgroup = rb_entry(n, struct btrfs_qgroup, node); 2750 qgroup->rfer = 0; 2751 qgroup->rfer_cmpr = 0; 2752 qgroup->excl = 0; 2753 qgroup->excl_cmpr = 0; 2754 } 2755 spin_unlock(&fs_info->qgroup_lock); 2756 } 2757 2758 int 2759 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) 2760 { 2761 int ret = 0; 2762 struct btrfs_trans_handle *trans; 2763 2764 ret = qgroup_rescan_init(fs_info, 0, 1); 2765 if (ret) 2766 return ret; 2767 2768 /* 2769 * We have set the rescan_progress to 0, which means no more 2770 * delayed refs will be accounted by btrfs_qgroup_account_ref. 2771 * However, btrfs_qgroup_account_ref may be right after its call 2772 * to btrfs_find_all_roots, in which case it would still do the 2773 * accounting. 2774 * To solve this, we're committing the transaction, which will 2775 * ensure we run all delayed refs and only after that, we are 2776 * going to clear all tracking information for a clean start. 2777 */ 2778 2779 trans = btrfs_join_transaction(fs_info->fs_root); 2780 if (IS_ERR(trans)) { 2781 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2782 return PTR_ERR(trans); 2783 } 2784 ret = btrfs_commit_transaction(trans, fs_info->fs_root); 2785 if (ret) { 2786 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2787 return ret; 2788 } 2789 2790 qgroup_rescan_zero_tracking(fs_info); 2791 2792 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2793 &fs_info->qgroup_rescan_work); 2794 2795 return 0; 2796 } 2797 2798 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) 2799 { 2800 int running; 2801 int ret = 0; 2802 2803 mutex_lock(&fs_info->qgroup_rescan_lock); 2804 spin_lock(&fs_info->qgroup_lock); 2805 running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2806 spin_unlock(&fs_info->qgroup_lock); 2807 mutex_unlock(&fs_info->qgroup_rescan_lock); 2808 2809 if (running) 2810 ret = wait_for_completion_interruptible( 2811 &fs_info->qgroup_rescan_completion); 2812 2813 return ret; 2814 } 2815 2816 /* 2817 * this is only called from open_ctree where we're still single threaded, thus 2818 * locking is omitted here. 2819 */ 2820 void 2821 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) 2822 { 2823 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2824 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2825 &fs_info->qgroup_rescan_work); 2826 } 2827