1 /* 2 * Copyright (C) 2011 STRATO. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include <linux/workqueue.h> 26 #include <linux/btrfs.h> 27 28 #include "ctree.h" 29 #include "transaction.h" 30 #include "disk-io.h" 31 #include "locking.h" 32 #include "ulist.h" 33 #include "backref.h" 34 #include "extent_io.h" 35 #include "qgroup.h" 36 37 /* TODO XXX FIXME 38 * - subvol delete -> delete when ref goes to 0? delete limits also? 39 * - reorganize keys 40 * - compressed 41 * - sync 42 * - copy also limits on subvol creation 43 * - limit 44 * - caches fuer ulists 45 * - performance benchmarks 46 * - check all ioctl parameters 47 */ 48 49 /* 50 * one struct for each qgroup, organized in fs_info->qgroup_tree. 51 */ 52 struct btrfs_qgroup { 53 u64 qgroupid; 54 55 /* 56 * state 57 */ 58 u64 rfer; /* referenced */ 59 u64 rfer_cmpr; /* referenced compressed */ 60 u64 excl; /* exclusive */ 61 u64 excl_cmpr; /* exclusive compressed */ 62 63 /* 64 * limits 65 */ 66 u64 lim_flags; /* which limits are set */ 67 u64 max_rfer; 68 u64 max_excl; 69 u64 rsv_rfer; 70 u64 rsv_excl; 71 72 /* 73 * reservation tracking 74 */ 75 u64 reserved; 76 77 /* 78 * lists 79 */ 80 struct list_head groups; /* groups this group is member of */ 81 struct list_head members; /* groups that are members of this group */ 82 struct list_head dirty; /* dirty groups */ 83 struct rb_node node; /* tree of qgroups */ 84 85 /* 86 * temp variables for accounting operations 87 */ 88 u64 old_refcnt; 89 u64 new_refcnt; 90 }; 91 92 /* 93 * glue structure to represent the relations between qgroups. 94 */ 95 struct btrfs_qgroup_list { 96 struct list_head next_group; 97 struct list_head next_member; 98 struct btrfs_qgroup *group; 99 struct btrfs_qgroup *member; 100 }; 101 102 #define ptr_to_u64(x) ((u64)(uintptr_t)x) 103 #define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x) 104 105 static int 106 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 107 int init_flags); 108 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); 109 110 /* must be called with qgroup_ioctl_lock held */ 111 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 112 u64 qgroupid) 113 { 114 struct rb_node *n = fs_info->qgroup_tree.rb_node; 115 struct btrfs_qgroup *qgroup; 116 117 while (n) { 118 qgroup = rb_entry(n, struct btrfs_qgroup, node); 119 if (qgroup->qgroupid < qgroupid) 120 n = n->rb_left; 121 else if (qgroup->qgroupid > qgroupid) 122 n = n->rb_right; 123 else 124 return qgroup; 125 } 126 return NULL; 127 } 128 129 /* must be called with qgroup_lock held */ 130 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, 131 u64 qgroupid) 132 { 133 struct rb_node **p = &fs_info->qgroup_tree.rb_node; 134 struct rb_node *parent = NULL; 135 struct btrfs_qgroup *qgroup; 136 137 while (*p) { 138 parent = *p; 139 qgroup = rb_entry(parent, struct btrfs_qgroup, node); 140 141 if (qgroup->qgroupid < qgroupid) 142 p = &(*p)->rb_left; 143 else if (qgroup->qgroupid > qgroupid) 144 p = &(*p)->rb_right; 145 else 146 return qgroup; 147 } 148 149 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); 150 if (!qgroup) 151 return ERR_PTR(-ENOMEM); 152 153 qgroup->qgroupid = qgroupid; 154 INIT_LIST_HEAD(&qgroup->groups); 155 INIT_LIST_HEAD(&qgroup->members); 156 INIT_LIST_HEAD(&qgroup->dirty); 157 158 rb_link_node(&qgroup->node, parent, p); 159 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); 160 161 return qgroup; 162 } 163 164 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) 165 { 166 struct btrfs_qgroup_list *list; 167 168 list_del(&qgroup->dirty); 169 while (!list_empty(&qgroup->groups)) { 170 list = list_first_entry(&qgroup->groups, 171 struct btrfs_qgroup_list, next_group); 172 list_del(&list->next_group); 173 list_del(&list->next_member); 174 kfree(list); 175 } 176 177 while (!list_empty(&qgroup->members)) { 178 list = list_first_entry(&qgroup->members, 179 struct btrfs_qgroup_list, next_member); 180 list_del(&list->next_group); 181 list_del(&list->next_member); 182 kfree(list); 183 } 184 kfree(qgroup); 185 } 186 187 /* must be called with qgroup_lock held */ 188 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) 189 { 190 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); 191 192 if (!qgroup) 193 return -ENOENT; 194 195 rb_erase(&qgroup->node, &fs_info->qgroup_tree); 196 __del_qgroup_rb(qgroup); 197 return 0; 198 } 199 200 /* must be called with qgroup_lock held */ 201 static int add_relation_rb(struct btrfs_fs_info *fs_info, 202 u64 memberid, u64 parentid) 203 { 204 struct btrfs_qgroup *member; 205 struct btrfs_qgroup *parent; 206 struct btrfs_qgroup_list *list; 207 208 member = find_qgroup_rb(fs_info, memberid); 209 parent = find_qgroup_rb(fs_info, parentid); 210 if (!member || !parent) 211 return -ENOENT; 212 213 list = kzalloc(sizeof(*list), GFP_ATOMIC); 214 if (!list) 215 return -ENOMEM; 216 217 list->group = parent; 218 list->member = member; 219 list_add_tail(&list->next_group, &member->groups); 220 list_add_tail(&list->next_member, &parent->members); 221 222 return 0; 223 } 224 225 /* must be called with qgroup_lock held */ 226 static int del_relation_rb(struct btrfs_fs_info *fs_info, 227 u64 memberid, u64 parentid) 228 { 229 struct btrfs_qgroup *member; 230 struct btrfs_qgroup *parent; 231 struct btrfs_qgroup_list *list; 232 233 member = find_qgroup_rb(fs_info, memberid); 234 parent = find_qgroup_rb(fs_info, parentid); 235 if (!member || !parent) 236 return -ENOENT; 237 238 list_for_each_entry(list, &member->groups, next_group) { 239 if (list->group == parent) { 240 list_del(&list->next_group); 241 list_del(&list->next_member); 242 kfree(list); 243 return 0; 244 } 245 } 246 return -ENOENT; 247 } 248 249 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 250 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 251 u64 rfer, u64 excl) 252 { 253 struct btrfs_qgroup *qgroup; 254 255 qgroup = find_qgroup_rb(fs_info, qgroupid); 256 if (!qgroup) 257 return -EINVAL; 258 if (qgroup->rfer != rfer || qgroup->excl != excl) 259 return -EINVAL; 260 return 0; 261 } 262 #endif 263 264 /* 265 * The full config is read in one go, only called from open_ctree() 266 * It doesn't use any locking, as at this point we're still single-threaded 267 */ 268 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) 269 { 270 struct btrfs_key key; 271 struct btrfs_key found_key; 272 struct btrfs_root *quota_root = fs_info->quota_root; 273 struct btrfs_path *path = NULL; 274 struct extent_buffer *l; 275 int slot; 276 int ret = 0; 277 u64 flags = 0; 278 u64 rescan_progress = 0; 279 280 if (!fs_info->quota_enabled) 281 return 0; 282 283 fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); 284 if (!fs_info->qgroup_ulist) { 285 ret = -ENOMEM; 286 goto out; 287 } 288 289 path = btrfs_alloc_path(); 290 if (!path) { 291 ret = -ENOMEM; 292 goto out; 293 } 294 295 /* default this to quota off, in case no status key is found */ 296 fs_info->qgroup_flags = 0; 297 298 /* 299 * pass 1: read status, all qgroup infos and limits 300 */ 301 key.objectid = 0; 302 key.type = 0; 303 key.offset = 0; 304 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); 305 if (ret) 306 goto out; 307 308 while (1) { 309 struct btrfs_qgroup *qgroup; 310 311 slot = path->slots[0]; 312 l = path->nodes[0]; 313 btrfs_item_key_to_cpu(l, &found_key, slot); 314 315 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { 316 struct btrfs_qgroup_status_item *ptr; 317 318 ptr = btrfs_item_ptr(l, slot, 319 struct btrfs_qgroup_status_item); 320 321 if (btrfs_qgroup_status_version(l, ptr) != 322 BTRFS_QGROUP_STATUS_VERSION) { 323 btrfs_err(fs_info, 324 "old qgroup version, quota disabled"); 325 goto out; 326 } 327 if (btrfs_qgroup_status_generation(l, ptr) != 328 fs_info->generation) { 329 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 330 btrfs_err(fs_info, 331 "qgroup generation mismatch, " 332 "marked as inconsistent"); 333 } 334 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 335 ptr); 336 rescan_progress = btrfs_qgroup_status_rescan(l, ptr); 337 goto next1; 338 } 339 340 if (found_key.type != BTRFS_QGROUP_INFO_KEY && 341 found_key.type != BTRFS_QGROUP_LIMIT_KEY) 342 goto next1; 343 344 qgroup = find_qgroup_rb(fs_info, found_key.offset); 345 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 346 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 347 btrfs_err(fs_info, "inconsitent qgroup config"); 348 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 349 } 350 if (!qgroup) { 351 qgroup = add_qgroup_rb(fs_info, found_key.offset); 352 if (IS_ERR(qgroup)) { 353 ret = PTR_ERR(qgroup); 354 goto out; 355 } 356 } 357 switch (found_key.type) { 358 case BTRFS_QGROUP_INFO_KEY: { 359 struct btrfs_qgroup_info_item *ptr; 360 361 ptr = btrfs_item_ptr(l, slot, 362 struct btrfs_qgroup_info_item); 363 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); 364 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); 365 qgroup->excl = btrfs_qgroup_info_excl(l, ptr); 366 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); 367 /* generation currently unused */ 368 break; 369 } 370 case BTRFS_QGROUP_LIMIT_KEY: { 371 struct btrfs_qgroup_limit_item *ptr; 372 373 ptr = btrfs_item_ptr(l, slot, 374 struct btrfs_qgroup_limit_item); 375 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); 376 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); 377 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); 378 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); 379 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); 380 break; 381 } 382 } 383 next1: 384 ret = btrfs_next_item(quota_root, path); 385 if (ret < 0) 386 goto out; 387 if (ret) 388 break; 389 } 390 btrfs_release_path(path); 391 392 /* 393 * pass 2: read all qgroup relations 394 */ 395 key.objectid = 0; 396 key.type = BTRFS_QGROUP_RELATION_KEY; 397 key.offset = 0; 398 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); 399 if (ret) 400 goto out; 401 while (1) { 402 slot = path->slots[0]; 403 l = path->nodes[0]; 404 btrfs_item_key_to_cpu(l, &found_key, slot); 405 406 if (found_key.type != BTRFS_QGROUP_RELATION_KEY) 407 goto next2; 408 409 if (found_key.objectid > found_key.offset) { 410 /* parent <- member, not needed to build config */ 411 /* FIXME should we omit the key completely? */ 412 goto next2; 413 } 414 415 ret = add_relation_rb(fs_info, found_key.objectid, 416 found_key.offset); 417 if (ret == -ENOENT) { 418 btrfs_warn(fs_info, 419 "orphan qgroup relation 0x%llx->0x%llx", 420 found_key.objectid, found_key.offset); 421 ret = 0; /* ignore the error */ 422 } 423 if (ret) 424 goto out; 425 next2: 426 ret = btrfs_next_item(quota_root, path); 427 if (ret < 0) 428 goto out; 429 if (ret) 430 break; 431 } 432 out: 433 fs_info->qgroup_flags |= flags; 434 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { 435 fs_info->quota_enabled = 0; 436 fs_info->pending_quota_state = 0; 437 } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && 438 ret >= 0) { 439 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 440 } 441 btrfs_free_path(path); 442 443 if (ret < 0) { 444 ulist_free(fs_info->qgroup_ulist); 445 fs_info->qgroup_ulist = NULL; 446 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 447 } 448 449 return ret < 0 ? ret : 0; 450 } 451 452 /* 453 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), 454 * first two are in single-threaded paths.And for the third one, we have set 455 * quota_root to be null with qgroup_lock held before, so it is safe to clean 456 * up the in-memory structures without qgroup_lock held. 457 */ 458 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 459 { 460 struct rb_node *n; 461 struct btrfs_qgroup *qgroup; 462 463 while ((n = rb_first(&fs_info->qgroup_tree))) { 464 qgroup = rb_entry(n, struct btrfs_qgroup, node); 465 rb_erase(n, &fs_info->qgroup_tree); 466 __del_qgroup_rb(qgroup); 467 } 468 /* 469 * we call btrfs_free_qgroup_config() when umounting 470 * filesystem and disabling quota, so we set qgroup_ulit 471 * to be null here to avoid double free. 472 */ 473 ulist_free(fs_info->qgroup_ulist); 474 fs_info->qgroup_ulist = NULL; 475 } 476 477 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 478 struct btrfs_root *quota_root, 479 u64 src, u64 dst) 480 { 481 int ret; 482 struct btrfs_path *path; 483 struct btrfs_key key; 484 485 path = btrfs_alloc_path(); 486 if (!path) 487 return -ENOMEM; 488 489 key.objectid = src; 490 key.type = BTRFS_QGROUP_RELATION_KEY; 491 key.offset = dst; 492 493 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 494 495 btrfs_mark_buffer_dirty(path->nodes[0]); 496 497 btrfs_free_path(path); 498 return ret; 499 } 500 501 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, 502 struct btrfs_root *quota_root, 503 u64 src, u64 dst) 504 { 505 int ret; 506 struct btrfs_path *path; 507 struct btrfs_key key; 508 509 path = btrfs_alloc_path(); 510 if (!path) 511 return -ENOMEM; 512 513 key.objectid = src; 514 key.type = BTRFS_QGROUP_RELATION_KEY; 515 key.offset = dst; 516 517 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 518 if (ret < 0) 519 goto out; 520 521 if (ret > 0) { 522 ret = -ENOENT; 523 goto out; 524 } 525 526 ret = btrfs_del_item(trans, quota_root, path); 527 out: 528 btrfs_free_path(path); 529 return ret; 530 } 531 532 static int add_qgroup_item(struct btrfs_trans_handle *trans, 533 struct btrfs_root *quota_root, u64 qgroupid) 534 { 535 int ret; 536 struct btrfs_path *path; 537 struct btrfs_qgroup_info_item *qgroup_info; 538 struct btrfs_qgroup_limit_item *qgroup_limit; 539 struct extent_buffer *leaf; 540 struct btrfs_key key; 541 542 if (btrfs_test_is_dummy_root(quota_root)) 543 return 0; 544 545 path = btrfs_alloc_path(); 546 if (!path) 547 return -ENOMEM; 548 549 key.objectid = 0; 550 key.type = BTRFS_QGROUP_INFO_KEY; 551 key.offset = qgroupid; 552 553 /* 554 * Avoid a transaction abort by catching -EEXIST here. In that 555 * case, we proceed by re-initializing the existing structure 556 * on disk. 557 */ 558 559 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 560 sizeof(*qgroup_info)); 561 if (ret && ret != -EEXIST) 562 goto out; 563 564 leaf = path->nodes[0]; 565 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], 566 struct btrfs_qgroup_info_item); 567 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); 568 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); 569 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); 570 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); 571 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); 572 573 btrfs_mark_buffer_dirty(leaf); 574 575 btrfs_release_path(path); 576 577 key.type = BTRFS_QGROUP_LIMIT_KEY; 578 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 579 sizeof(*qgroup_limit)); 580 if (ret && ret != -EEXIST) 581 goto out; 582 583 leaf = path->nodes[0]; 584 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], 585 struct btrfs_qgroup_limit_item); 586 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); 587 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); 588 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); 589 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 590 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 591 592 btrfs_mark_buffer_dirty(leaf); 593 594 ret = 0; 595 out: 596 btrfs_free_path(path); 597 return ret; 598 } 599 600 static int del_qgroup_item(struct btrfs_trans_handle *trans, 601 struct btrfs_root *quota_root, u64 qgroupid) 602 { 603 int ret; 604 struct btrfs_path *path; 605 struct btrfs_key key; 606 607 path = btrfs_alloc_path(); 608 if (!path) 609 return -ENOMEM; 610 611 key.objectid = 0; 612 key.type = BTRFS_QGROUP_INFO_KEY; 613 key.offset = qgroupid; 614 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 615 if (ret < 0) 616 goto out; 617 618 if (ret > 0) { 619 ret = -ENOENT; 620 goto out; 621 } 622 623 ret = btrfs_del_item(trans, quota_root, path); 624 if (ret) 625 goto out; 626 627 btrfs_release_path(path); 628 629 key.type = BTRFS_QGROUP_LIMIT_KEY; 630 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 631 if (ret < 0) 632 goto out; 633 634 if (ret > 0) { 635 ret = -ENOENT; 636 goto out; 637 } 638 639 ret = btrfs_del_item(trans, quota_root, path); 640 641 out: 642 btrfs_free_path(path); 643 return ret; 644 } 645 646 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 647 struct btrfs_root *root, 648 struct btrfs_qgroup *qgroup) 649 { 650 struct btrfs_path *path; 651 struct btrfs_key key; 652 struct extent_buffer *l; 653 struct btrfs_qgroup_limit_item *qgroup_limit; 654 int ret; 655 int slot; 656 657 key.objectid = 0; 658 key.type = BTRFS_QGROUP_LIMIT_KEY; 659 key.offset = qgroup->qgroupid; 660 661 path = btrfs_alloc_path(); 662 if (!path) 663 return -ENOMEM; 664 665 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 666 if (ret > 0) 667 ret = -ENOENT; 668 669 if (ret) 670 goto out; 671 672 l = path->nodes[0]; 673 slot = path->slots[0]; 674 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 675 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags); 676 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer); 677 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); 678 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); 679 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); 680 681 btrfs_mark_buffer_dirty(l); 682 683 out: 684 btrfs_free_path(path); 685 return ret; 686 } 687 688 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 689 struct btrfs_root *root, 690 struct btrfs_qgroup *qgroup) 691 { 692 struct btrfs_path *path; 693 struct btrfs_key key; 694 struct extent_buffer *l; 695 struct btrfs_qgroup_info_item *qgroup_info; 696 int ret; 697 int slot; 698 699 if (btrfs_test_is_dummy_root(root)) 700 return 0; 701 702 key.objectid = 0; 703 key.type = BTRFS_QGROUP_INFO_KEY; 704 key.offset = qgroup->qgroupid; 705 706 path = btrfs_alloc_path(); 707 if (!path) 708 return -ENOMEM; 709 710 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 711 if (ret > 0) 712 ret = -ENOENT; 713 714 if (ret) 715 goto out; 716 717 l = path->nodes[0]; 718 slot = path->slots[0]; 719 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); 720 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 721 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 722 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 723 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 724 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 725 726 btrfs_mark_buffer_dirty(l); 727 728 out: 729 btrfs_free_path(path); 730 return ret; 731 } 732 733 static int update_qgroup_status_item(struct btrfs_trans_handle *trans, 734 struct btrfs_fs_info *fs_info, 735 struct btrfs_root *root) 736 { 737 struct btrfs_path *path; 738 struct btrfs_key key; 739 struct extent_buffer *l; 740 struct btrfs_qgroup_status_item *ptr; 741 int ret; 742 int slot; 743 744 key.objectid = 0; 745 key.type = BTRFS_QGROUP_STATUS_KEY; 746 key.offset = 0; 747 748 path = btrfs_alloc_path(); 749 if (!path) 750 return -ENOMEM; 751 752 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 753 if (ret > 0) 754 ret = -ENOENT; 755 756 if (ret) 757 goto out; 758 759 l = path->nodes[0]; 760 slot = path->slots[0]; 761 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 762 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 763 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 764 btrfs_set_qgroup_status_rescan(l, ptr, 765 fs_info->qgroup_rescan_progress.objectid); 766 767 btrfs_mark_buffer_dirty(l); 768 769 out: 770 btrfs_free_path(path); 771 return ret; 772 } 773 774 /* 775 * called with qgroup_lock held 776 */ 777 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 778 struct btrfs_root *root) 779 { 780 struct btrfs_path *path; 781 struct btrfs_key key; 782 struct extent_buffer *leaf = NULL; 783 int ret; 784 int nr = 0; 785 786 path = btrfs_alloc_path(); 787 if (!path) 788 return -ENOMEM; 789 790 path->leave_spinning = 1; 791 792 key.objectid = 0; 793 key.offset = 0; 794 key.type = 0; 795 796 while (1) { 797 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 798 if (ret < 0) 799 goto out; 800 leaf = path->nodes[0]; 801 nr = btrfs_header_nritems(leaf); 802 if (!nr) 803 break; 804 /* 805 * delete the leaf one by one 806 * since the whole tree is going 807 * to be deleted. 808 */ 809 path->slots[0] = 0; 810 ret = btrfs_del_items(trans, root, path, 0, nr); 811 if (ret) 812 goto out; 813 814 btrfs_release_path(path); 815 } 816 ret = 0; 817 out: 818 root->fs_info->pending_quota_state = 0; 819 btrfs_free_path(path); 820 return ret; 821 } 822 823 int btrfs_quota_enable(struct btrfs_trans_handle *trans, 824 struct btrfs_fs_info *fs_info) 825 { 826 struct btrfs_root *quota_root; 827 struct btrfs_root *tree_root = fs_info->tree_root; 828 struct btrfs_path *path = NULL; 829 struct btrfs_qgroup_status_item *ptr; 830 struct extent_buffer *leaf; 831 struct btrfs_key key; 832 struct btrfs_key found_key; 833 struct btrfs_qgroup *qgroup = NULL; 834 int ret = 0; 835 int slot; 836 837 mutex_lock(&fs_info->qgroup_ioctl_lock); 838 if (fs_info->quota_root) { 839 fs_info->pending_quota_state = 1; 840 goto out; 841 } 842 843 fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); 844 if (!fs_info->qgroup_ulist) { 845 ret = -ENOMEM; 846 goto out; 847 } 848 849 /* 850 * initially create the quota tree 851 */ 852 quota_root = btrfs_create_tree(trans, fs_info, 853 BTRFS_QUOTA_TREE_OBJECTID); 854 if (IS_ERR(quota_root)) { 855 ret = PTR_ERR(quota_root); 856 goto out; 857 } 858 859 path = btrfs_alloc_path(); 860 if (!path) { 861 ret = -ENOMEM; 862 goto out_free_root; 863 } 864 865 key.objectid = 0; 866 key.type = BTRFS_QGROUP_STATUS_KEY; 867 key.offset = 0; 868 869 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 870 sizeof(*ptr)); 871 if (ret) 872 goto out_free_path; 873 874 leaf = path->nodes[0]; 875 ptr = btrfs_item_ptr(leaf, path->slots[0], 876 struct btrfs_qgroup_status_item); 877 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); 878 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); 879 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 880 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 881 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 882 btrfs_set_qgroup_status_rescan(leaf, ptr, 0); 883 884 btrfs_mark_buffer_dirty(leaf); 885 886 key.objectid = 0; 887 key.type = BTRFS_ROOT_REF_KEY; 888 key.offset = 0; 889 890 btrfs_release_path(path); 891 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 892 if (ret > 0) 893 goto out_add_root; 894 if (ret < 0) 895 goto out_free_path; 896 897 898 while (1) { 899 slot = path->slots[0]; 900 leaf = path->nodes[0]; 901 btrfs_item_key_to_cpu(leaf, &found_key, slot); 902 903 if (found_key.type == BTRFS_ROOT_REF_KEY) { 904 ret = add_qgroup_item(trans, quota_root, 905 found_key.offset); 906 if (ret) 907 goto out_free_path; 908 909 qgroup = add_qgroup_rb(fs_info, found_key.offset); 910 if (IS_ERR(qgroup)) { 911 ret = PTR_ERR(qgroup); 912 goto out_free_path; 913 } 914 } 915 ret = btrfs_next_item(tree_root, path); 916 if (ret < 0) 917 goto out_free_path; 918 if (ret) 919 break; 920 } 921 922 out_add_root: 923 btrfs_release_path(path); 924 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 925 if (ret) 926 goto out_free_path; 927 928 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 929 if (IS_ERR(qgroup)) { 930 ret = PTR_ERR(qgroup); 931 goto out_free_path; 932 } 933 spin_lock(&fs_info->qgroup_lock); 934 fs_info->quota_root = quota_root; 935 fs_info->pending_quota_state = 1; 936 spin_unlock(&fs_info->qgroup_lock); 937 out_free_path: 938 btrfs_free_path(path); 939 out_free_root: 940 if (ret) { 941 free_extent_buffer(quota_root->node); 942 free_extent_buffer(quota_root->commit_root); 943 kfree(quota_root); 944 } 945 out: 946 if (ret) { 947 ulist_free(fs_info->qgroup_ulist); 948 fs_info->qgroup_ulist = NULL; 949 } 950 mutex_unlock(&fs_info->qgroup_ioctl_lock); 951 return ret; 952 } 953 954 int btrfs_quota_disable(struct btrfs_trans_handle *trans, 955 struct btrfs_fs_info *fs_info) 956 { 957 struct btrfs_root *tree_root = fs_info->tree_root; 958 struct btrfs_root *quota_root; 959 int ret = 0; 960 961 mutex_lock(&fs_info->qgroup_ioctl_lock); 962 if (!fs_info->quota_root) 963 goto out; 964 spin_lock(&fs_info->qgroup_lock); 965 fs_info->quota_enabled = 0; 966 fs_info->pending_quota_state = 0; 967 quota_root = fs_info->quota_root; 968 fs_info->quota_root = NULL; 969 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 970 spin_unlock(&fs_info->qgroup_lock); 971 972 btrfs_free_qgroup_config(fs_info); 973 974 ret = btrfs_clean_quota_tree(trans, quota_root); 975 if (ret) 976 goto out; 977 978 ret = btrfs_del_root(trans, tree_root, "a_root->root_key); 979 if (ret) 980 goto out; 981 982 list_del("a_root->dirty_list); 983 984 btrfs_tree_lock(quota_root->node); 985 clean_tree_block(trans, tree_root->fs_info, quota_root->node); 986 btrfs_tree_unlock(quota_root->node); 987 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 988 989 free_extent_buffer(quota_root->node); 990 free_extent_buffer(quota_root->commit_root); 991 kfree(quota_root); 992 out: 993 mutex_unlock(&fs_info->qgroup_ioctl_lock); 994 return ret; 995 } 996 997 static void qgroup_dirty(struct btrfs_fs_info *fs_info, 998 struct btrfs_qgroup *qgroup) 999 { 1000 if (list_empty(&qgroup->dirty)) 1001 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 1002 } 1003 1004 /* 1005 * The easy accounting, if we are adding/removing the only ref for an extent 1006 * then this qgroup and all of the parent qgroups get their refrence and 1007 * exclusive counts adjusted. 1008 * 1009 * Caller should hold fs_info->qgroup_lock. 1010 */ 1011 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1012 struct ulist *tmp, u64 ref_root, 1013 u64 num_bytes, int sign) 1014 { 1015 struct btrfs_qgroup *qgroup; 1016 struct btrfs_qgroup_list *glist; 1017 struct ulist_node *unode; 1018 struct ulist_iterator uiter; 1019 int ret = 0; 1020 1021 qgroup = find_qgroup_rb(fs_info, ref_root); 1022 if (!qgroup) 1023 goto out; 1024 1025 qgroup->rfer += sign * num_bytes; 1026 qgroup->rfer_cmpr += sign * num_bytes; 1027 1028 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1029 qgroup->excl += sign * num_bytes; 1030 qgroup->excl_cmpr += sign * num_bytes; 1031 if (sign > 0) 1032 qgroup->reserved -= num_bytes; 1033 1034 qgroup_dirty(fs_info, qgroup); 1035 1036 /* Get all of the parent groups that contain this qgroup */ 1037 list_for_each_entry(glist, &qgroup->groups, next_group) { 1038 ret = ulist_add(tmp, glist->group->qgroupid, 1039 ptr_to_u64(glist->group), GFP_ATOMIC); 1040 if (ret < 0) 1041 goto out; 1042 } 1043 1044 /* Iterate all of the parents and adjust their reference counts */ 1045 ULIST_ITER_INIT(&uiter); 1046 while ((unode = ulist_next(tmp, &uiter))) { 1047 qgroup = u64_to_ptr(unode->aux); 1048 qgroup->rfer += sign * num_bytes; 1049 qgroup->rfer_cmpr += sign * num_bytes; 1050 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1051 qgroup->excl += sign * num_bytes; 1052 if (sign > 0) 1053 qgroup->reserved -= num_bytes; 1054 qgroup->excl_cmpr += sign * num_bytes; 1055 qgroup_dirty(fs_info, qgroup); 1056 1057 /* Add any parents of the parents */ 1058 list_for_each_entry(glist, &qgroup->groups, next_group) { 1059 ret = ulist_add(tmp, glist->group->qgroupid, 1060 ptr_to_u64(glist->group), GFP_ATOMIC); 1061 if (ret < 0) 1062 goto out; 1063 } 1064 } 1065 ret = 0; 1066 out: 1067 return ret; 1068 } 1069 1070 1071 /* 1072 * Quick path for updating qgroup with only excl refs. 1073 * 1074 * In that case, just update all parent will be enough. 1075 * Or we needs to do a full rescan. 1076 * Caller should also hold fs_info->qgroup_lock. 1077 * 1078 * Return 0 for quick update, return >0 for need to full rescan 1079 * and mark INCONSISTENT flag. 1080 * Return < 0 for other error. 1081 */ 1082 static int quick_update_accounting(struct btrfs_fs_info *fs_info, 1083 struct ulist *tmp, u64 src, u64 dst, 1084 int sign) 1085 { 1086 struct btrfs_qgroup *qgroup; 1087 int ret = 1; 1088 int err = 0; 1089 1090 qgroup = find_qgroup_rb(fs_info, src); 1091 if (!qgroup) 1092 goto out; 1093 if (qgroup->excl == qgroup->rfer) { 1094 ret = 0; 1095 err = __qgroup_excl_accounting(fs_info, tmp, dst, 1096 qgroup->excl, sign); 1097 if (err < 0) { 1098 ret = err; 1099 goto out; 1100 } 1101 } 1102 out: 1103 if (ret) 1104 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1105 return ret; 1106 } 1107 1108 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1109 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1110 { 1111 struct btrfs_root *quota_root; 1112 struct btrfs_qgroup *parent; 1113 struct btrfs_qgroup *member; 1114 struct btrfs_qgroup_list *list; 1115 struct ulist *tmp; 1116 int ret = 0; 1117 1118 tmp = ulist_alloc(GFP_NOFS); 1119 if (!tmp) 1120 return -ENOMEM; 1121 1122 /* Check the level of src and dst first */ 1123 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) 1124 return -EINVAL; 1125 1126 mutex_lock(&fs_info->qgroup_ioctl_lock); 1127 quota_root = fs_info->quota_root; 1128 if (!quota_root) { 1129 ret = -EINVAL; 1130 goto out; 1131 } 1132 member = find_qgroup_rb(fs_info, src); 1133 parent = find_qgroup_rb(fs_info, dst); 1134 if (!member || !parent) { 1135 ret = -EINVAL; 1136 goto out; 1137 } 1138 1139 /* check if such qgroup relation exist firstly */ 1140 list_for_each_entry(list, &member->groups, next_group) { 1141 if (list->group == parent) { 1142 ret = -EEXIST; 1143 goto out; 1144 } 1145 } 1146 1147 ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1148 if (ret) 1149 goto out; 1150 1151 ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1152 if (ret) { 1153 del_qgroup_relation_item(trans, quota_root, src, dst); 1154 goto out; 1155 } 1156 1157 spin_lock(&fs_info->qgroup_lock); 1158 ret = add_relation_rb(quota_root->fs_info, src, dst); 1159 if (ret < 0) { 1160 spin_unlock(&fs_info->qgroup_lock); 1161 goto out; 1162 } 1163 ret = quick_update_accounting(fs_info, tmp, src, dst, 1); 1164 spin_unlock(&fs_info->qgroup_lock); 1165 out: 1166 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1167 ulist_free(tmp); 1168 return ret; 1169 } 1170 1171 int __del_qgroup_relation(struct btrfs_trans_handle *trans, 1172 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1173 { 1174 struct btrfs_root *quota_root; 1175 struct btrfs_qgroup *parent; 1176 struct btrfs_qgroup *member; 1177 struct btrfs_qgroup_list *list; 1178 struct ulist *tmp; 1179 int ret = 0; 1180 int err; 1181 1182 tmp = ulist_alloc(GFP_NOFS); 1183 if (!tmp) 1184 return -ENOMEM; 1185 1186 quota_root = fs_info->quota_root; 1187 if (!quota_root) { 1188 ret = -EINVAL; 1189 goto out; 1190 } 1191 1192 member = find_qgroup_rb(fs_info, src); 1193 parent = find_qgroup_rb(fs_info, dst); 1194 if (!member || !parent) { 1195 ret = -EINVAL; 1196 goto out; 1197 } 1198 1199 /* check if such qgroup relation exist firstly */ 1200 list_for_each_entry(list, &member->groups, next_group) { 1201 if (list->group == parent) 1202 goto exist; 1203 } 1204 ret = -ENOENT; 1205 goto out; 1206 exist: 1207 ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1208 err = del_qgroup_relation_item(trans, quota_root, dst, src); 1209 if (err && !ret) 1210 ret = err; 1211 1212 spin_lock(&fs_info->qgroup_lock); 1213 del_relation_rb(fs_info, src, dst); 1214 ret = quick_update_accounting(fs_info, tmp, src, dst, -1); 1215 spin_unlock(&fs_info->qgroup_lock); 1216 out: 1217 ulist_free(tmp); 1218 return ret; 1219 } 1220 1221 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1222 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1223 { 1224 int ret = 0; 1225 1226 mutex_lock(&fs_info->qgroup_ioctl_lock); 1227 ret = __del_qgroup_relation(trans, fs_info, src, dst); 1228 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1229 1230 return ret; 1231 } 1232 1233 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1234 struct btrfs_fs_info *fs_info, u64 qgroupid) 1235 { 1236 struct btrfs_root *quota_root; 1237 struct btrfs_qgroup *qgroup; 1238 int ret = 0; 1239 1240 mutex_lock(&fs_info->qgroup_ioctl_lock); 1241 quota_root = fs_info->quota_root; 1242 if (!quota_root) { 1243 ret = -EINVAL; 1244 goto out; 1245 } 1246 qgroup = find_qgroup_rb(fs_info, qgroupid); 1247 if (qgroup) { 1248 ret = -EEXIST; 1249 goto out; 1250 } 1251 1252 ret = add_qgroup_item(trans, quota_root, qgroupid); 1253 if (ret) 1254 goto out; 1255 1256 spin_lock(&fs_info->qgroup_lock); 1257 qgroup = add_qgroup_rb(fs_info, qgroupid); 1258 spin_unlock(&fs_info->qgroup_lock); 1259 1260 if (IS_ERR(qgroup)) 1261 ret = PTR_ERR(qgroup); 1262 out: 1263 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1264 return ret; 1265 } 1266 1267 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 1268 struct btrfs_fs_info *fs_info, u64 qgroupid) 1269 { 1270 struct btrfs_root *quota_root; 1271 struct btrfs_qgroup *qgroup; 1272 struct btrfs_qgroup_list *list; 1273 int ret = 0; 1274 1275 mutex_lock(&fs_info->qgroup_ioctl_lock); 1276 quota_root = fs_info->quota_root; 1277 if (!quota_root) { 1278 ret = -EINVAL; 1279 goto out; 1280 } 1281 1282 qgroup = find_qgroup_rb(fs_info, qgroupid); 1283 if (!qgroup) { 1284 ret = -ENOENT; 1285 goto out; 1286 } else { 1287 /* check if there are no children of this qgroup */ 1288 if (!list_empty(&qgroup->members)) { 1289 ret = -EBUSY; 1290 goto out; 1291 } 1292 } 1293 ret = del_qgroup_item(trans, quota_root, qgroupid); 1294 1295 while (!list_empty(&qgroup->groups)) { 1296 list = list_first_entry(&qgroup->groups, 1297 struct btrfs_qgroup_list, next_group); 1298 ret = __del_qgroup_relation(trans, fs_info, 1299 qgroupid, 1300 list->group->qgroupid); 1301 if (ret) 1302 goto out; 1303 } 1304 1305 spin_lock(&fs_info->qgroup_lock); 1306 del_qgroup_rb(quota_root->fs_info, qgroupid); 1307 spin_unlock(&fs_info->qgroup_lock); 1308 out: 1309 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1310 return ret; 1311 } 1312 1313 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 1314 struct btrfs_fs_info *fs_info, u64 qgroupid, 1315 struct btrfs_qgroup_limit *limit) 1316 { 1317 struct btrfs_root *quota_root; 1318 struct btrfs_qgroup *qgroup; 1319 int ret = 0; 1320 1321 mutex_lock(&fs_info->qgroup_ioctl_lock); 1322 quota_root = fs_info->quota_root; 1323 if (!quota_root) { 1324 ret = -EINVAL; 1325 goto out; 1326 } 1327 1328 qgroup = find_qgroup_rb(fs_info, qgroupid); 1329 if (!qgroup) { 1330 ret = -ENOENT; 1331 goto out; 1332 } 1333 1334 spin_lock(&fs_info->qgroup_lock); 1335 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) 1336 qgroup->max_rfer = limit->max_rfer; 1337 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) 1338 qgroup->max_excl = limit->max_excl; 1339 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) 1340 qgroup->rsv_rfer = limit->rsv_rfer; 1341 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) 1342 qgroup->rsv_excl = limit->rsv_excl; 1343 qgroup->lim_flags |= limit->flags; 1344 1345 spin_unlock(&fs_info->qgroup_lock); 1346 1347 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 1348 if (ret) { 1349 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1350 btrfs_info(fs_info, "unable to update quota limit for %llu", 1351 qgroupid); 1352 } 1353 1354 out: 1355 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1356 return ret; 1357 } 1358 1359 static int comp_oper_exist(struct btrfs_qgroup_operation *oper1, 1360 struct btrfs_qgroup_operation *oper2) 1361 { 1362 /* 1363 * Ignore seq and type here, we're looking for any operation 1364 * at all related to this extent on that root. 1365 */ 1366 if (oper1->bytenr < oper2->bytenr) 1367 return -1; 1368 if (oper1->bytenr > oper2->bytenr) 1369 return 1; 1370 if (oper1->ref_root < oper2->ref_root) 1371 return -1; 1372 if (oper1->ref_root > oper2->ref_root) 1373 return 1; 1374 return 0; 1375 } 1376 1377 static int qgroup_oper_exists(struct btrfs_fs_info *fs_info, 1378 struct btrfs_qgroup_operation *oper) 1379 { 1380 struct rb_node *n; 1381 struct btrfs_qgroup_operation *cur; 1382 int cmp; 1383 1384 spin_lock(&fs_info->qgroup_op_lock); 1385 n = fs_info->qgroup_op_tree.rb_node; 1386 while (n) { 1387 cur = rb_entry(n, struct btrfs_qgroup_operation, n); 1388 cmp = comp_oper_exist(cur, oper); 1389 if (cmp < 0) { 1390 n = n->rb_right; 1391 } else if (cmp) { 1392 n = n->rb_left; 1393 } else { 1394 spin_unlock(&fs_info->qgroup_op_lock); 1395 return -EEXIST; 1396 } 1397 } 1398 spin_unlock(&fs_info->qgroup_op_lock); 1399 return 0; 1400 } 1401 1402 static int comp_oper(struct btrfs_qgroup_operation *oper1, 1403 struct btrfs_qgroup_operation *oper2) 1404 { 1405 if (oper1->bytenr < oper2->bytenr) 1406 return -1; 1407 if (oper1->bytenr > oper2->bytenr) 1408 return 1; 1409 if (oper1->ref_root < oper2->ref_root) 1410 return -1; 1411 if (oper1->ref_root > oper2->ref_root) 1412 return 1; 1413 if (oper1->seq < oper2->seq) 1414 return -1; 1415 if (oper1->seq > oper2->seq) 1416 return 1; 1417 if (oper1->type < oper2->type) 1418 return -1; 1419 if (oper1->type > oper2->type) 1420 return 1; 1421 return 0; 1422 } 1423 1424 static int insert_qgroup_oper(struct btrfs_fs_info *fs_info, 1425 struct btrfs_qgroup_operation *oper) 1426 { 1427 struct rb_node **p; 1428 struct rb_node *parent = NULL; 1429 struct btrfs_qgroup_operation *cur; 1430 int cmp; 1431 1432 spin_lock(&fs_info->qgroup_op_lock); 1433 p = &fs_info->qgroup_op_tree.rb_node; 1434 while (*p) { 1435 parent = *p; 1436 cur = rb_entry(parent, struct btrfs_qgroup_operation, n); 1437 cmp = comp_oper(cur, oper); 1438 if (cmp < 0) { 1439 p = &(*p)->rb_right; 1440 } else if (cmp) { 1441 p = &(*p)->rb_left; 1442 } else { 1443 spin_unlock(&fs_info->qgroup_op_lock); 1444 return -EEXIST; 1445 } 1446 } 1447 rb_link_node(&oper->n, parent, p); 1448 rb_insert_color(&oper->n, &fs_info->qgroup_op_tree); 1449 spin_unlock(&fs_info->qgroup_op_lock); 1450 return 0; 1451 } 1452 1453 /* 1454 * Record a quota operation for processing later on. 1455 * @trans: the transaction we are adding the delayed op to. 1456 * @fs_info: the fs_info for this fs. 1457 * @ref_root: the root of the reference we are acting on, 1458 * @bytenr: the bytenr we are acting on. 1459 * @num_bytes: the number of bytes in the reference. 1460 * @type: the type of operation this is. 1461 * @mod_seq: do we need to get a sequence number for looking up roots. 1462 * 1463 * We just add it to our trans qgroup_ref_list and carry on and process these 1464 * operations in order at some later point. If the reference root isn't a fs 1465 * root then we don't bother with doing anything. 1466 * 1467 * MUST BE HOLDING THE REF LOCK. 1468 */ 1469 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, 1470 struct btrfs_fs_info *fs_info, u64 ref_root, 1471 u64 bytenr, u64 num_bytes, 1472 enum btrfs_qgroup_operation_type type, int mod_seq) 1473 { 1474 struct btrfs_qgroup_operation *oper; 1475 int ret; 1476 1477 if (!is_fstree(ref_root) || !fs_info->quota_enabled) 1478 return 0; 1479 1480 oper = kmalloc(sizeof(*oper), GFP_NOFS); 1481 if (!oper) 1482 return -ENOMEM; 1483 1484 oper->ref_root = ref_root; 1485 oper->bytenr = bytenr; 1486 oper->num_bytes = num_bytes; 1487 oper->type = type; 1488 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); 1489 INIT_LIST_HEAD(&oper->elem.list); 1490 oper->elem.seq = 0; 1491 1492 trace_btrfs_qgroup_record_ref(oper); 1493 1494 if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) { 1495 /* 1496 * If any operation for this bytenr/ref_root combo 1497 * exists, then we know it's not exclusively owned and 1498 * shouldn't be queued up. 1499 * 1500 * This also catches the case where we have a cloned 1501 * extent that gets queued up multiple times during 1502 * drop snapshot. 1503 */ 1504 if (qgroup_oper_exists(fs_info, oper)) { 1505 kfree(oper); 1506 return 0; 1507 } 1508 } 1509 1510 ret = insert_qgroup_oper(fs_info, oper); 1511 if (ret) { 1512 /* Shouldn't happen so have an assert for developers */ 1513 ASSERT(0); 1514 kfree(oper); 1515 return ret; 1516 } 1517 list_add_tail(&oper->list, &trans->qgroup_ref_list); 1518 1519 if (mod_seq) 1520 btrfs_get_tree_mod_seq(fs_info, &oper->elem); 1521 1522 return 0; 1523 } 1524 1525 static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1526 struct btrfs_qgroup_operation *oper) 1527 { 1528 struct ulist *tmp; 1529 int sign = 0; 1530 int ret = 0; 1531 1532 tmp = ulist_alloc(GFP_NOFS); 1533 if (!tmp) 1534 return -ENOMEM; 1535 1536 spin_lock(&fs_info->qgroup_lock); 1537 if (!fs_info->quota_root) 1538 goto out; 1539 1540 switch (oper->type) { 1541 case BTRFS_QGROUP_OPER_ADD_EXCL: 1542 sign = 1; 1543 break; 1544 case BTRFS_QGROUP_OPER_SUB_EXCL: 1545 sign = -1; 1546 break; 1547 default: 1548 ASSERT(0); 1549 } 1550 ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root, 1551 oper->num_bytes, sign); 1552 out: 1553 spin_unlock(&fs_info->qgroup_lock); 1554 ulist_free(tmp); 1555 return ret; 1556 } 1557 1558 /* 1559 * Walk all of the roots that pointed to our bytenr and adjust their refcnts as 1560 * properly. 1561 */ 1562 static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info, 1563 u64 root_to_skip, struct ulist *tmp, 1564 struct ulist *roots, struct ulist *qgroups, 1565 u64 seq, int *old_roots, int rescan) 1566 { 1567 struct ulist_node *unode; 1568 struct ulist_iterator uiter; 1569 struct ulist_node *tmp_unode; 1570 struct ulist_iterator tmp_uiter; 1571 struct btrfs_qgroup *qg; 1572 int ret; 1573 1574 ULIST_ITER_INIT(&uiter); 1575 while ((unode = ulist_next(roots, &uiter))) { 1576 /* We don't count our current root here */ 1577 if (unode->val == root_to_skip) 1578 continue; 1579 qg = find_qgroup_rb(fs_info, unode->val); 1580 if (!qg) 1581 continue; 1582 /* 1583 * We could have a pending removal of this same ref so we may 1584 * not have actually found our ref root when doing 1585 * btrfs_find_all_roots, so we need to keep track of how many 1586 * old roots we find in case we removed ours and added a 1587 * different one at the same time. I don't think this could 1588 * happen in practice but that sort of thinking leads to pain 1589 * and suffering and to the dark side. 1590 */ 1591 (*old_roots)++; 1592 1593 ulist_reinit(tmp); 1594 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), 1595 GFP_ATOMIC); 1596 if (ret < 0) 1597 return ret; 1598 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC); 1599 if (ret < 0) 1600 return ret; 1601 ULIST_ITER_INIT(&tmp_uiter); 1602 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1603 struct btrfs_qgroup_list *glist; 1604 1605 qg = u64_to_ptr(tmp_unode->aux); 1606 /* 1607 * We use this sequence number to keep from having to 1608 * run the whole list and 0 out the refcnt every time. 1609 * We basically use sequnce as the known 0 count and 1610 * then add 1 everytime we see a qgroup. This is how we 1611 * get how many of the roots actually point up to the 1612 * upper level qgroups in order to determine exclusive 1613 * counts. 1614 * 1615 * For rescan we want to set old_refcnt to seq so our 1616 * exclusive calculations end up correct. 1617 */ 1618 if (rescan) 1619 qg->old_refcnt = seq; 1620 else if (qg->old_refcnt < seq) 1621 qg->old_refcnt = seq + 1; 1622 else 1623 qg->old_refcnt++; 1624 1625 if (qg->new_refcnt < seq) 1626 qg->new_refcnt = seq + 1; 1627 else 1628 qg->new_refcnt++; 1629 list_for_each_entry(glist, &qg->groups, next_group) { 1630 ret = ulist_add(qgroups, glist->group->qgroupid, 1631 ptr_to_u64(glist->group), 1632 GFP_ATOMIC); 1633 if (ret < 0) 1634 return ret; 1635 ret = ulist_add(tmp, glist->group->qgroupid, 1636 ptr_to_u64(glist->group), 1637 GFP_ATOMIC); 1638 if (ret < 0) 1639 return ret; 1640 } 1641 } 1642 } 1643 return 0; 1644 } 1645 1646 /* 1647 * We need to walk forward in our operation tree and account for any roots that 1648 * were deleted after we made this operation. 1649 */ 1650 static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info, 1651 struct btrfs_qgroup_operation *oper, 1652 struct ulist *tmp, 1653 struct ulist *qgroups, u64 seq, 1654 int *old_roots) 1655 { 1656 struct ulist_node *unode; 1657 struct ulist_iterator uiter; 1658 struct btrfs_qgroup *qg; 1659 struct btrfs_qgroup_operation *tmp_oper; 1660 struct rb_node *n; 1661 int ret; 1662 1663 ulist_reinit(tmp); 1664 1665 /* 1666 * We only walk forward in the tree since we're only interested in 1667 * removals that happened _after_ our operation. 1668 */ 1669 spin_lock(&fs_info->qgroup_op_lock); 1670 n = rb_next(&oper->n); 1671 spin_unlock(&fs_info->qgroup_op_lock); 1672 if (!n) 1673 return 0; 1674 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); 1675 while (tmp_oper->bytenr == oper->bytenr) { 1676 /* 1677 * If it's not a removal we don't care, additions work out 1678 * properly with our refcnt tracking. 1679 */ 1680 if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED && 1681 tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL) 1682 goto next; 1683 qg = find_qgroup_rb(fs_info, tmp_oper->ref_root); 1684 if (!qg) 1685 goto next; 1686 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg), 1687 GFP_ATOMIC); 1688 if (ret) { 1689 if (ret < 0) 1690 return ret; 1691 /* 1692 * We only want to increase old_roots if this qgroup is 1693 * not already in the list of qgroups. If it is already 1694 * there then that means it must have been re-added or 1695 * the delete will be discarded because we had an 1696 * existing ref that we haven't looked up yet. In this 1697 * case we don't want to increase old_roots. So if ret 1698 * == 1 then we know that this is the first time we've 1699 * seen this qgroup and we can bump the old_roots. 1700 */ 1701 (*old_roots)++; 1702 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), 1703 GFP_ATOMIC); 1704 if (ret < 0) 1705 return ret; 1706 } 1707 next: 1708 spin_lock(&fs_info->qgroup_op_lock); 1709 n = rb_next(&tmp_oper->n); 1710 spin_unlock(&fs_info->qgroup_op_lock); 1711 if (!n) 1712 break; 1713 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n); 1714 } 1715 1716 /* Ok now process the qgroups we found */ 1717 ULIST_ITER_INIT(&uiter); 1718 while ((unode = ulist_next(tmp, &uiter))) { 1719 struct btrfs_qgroup_list *glist; 1720 1721 qg = u64_to_ptr(unode->aux); 1722 if (qg->old_refcnt < seq) 1723 qg->old_refcnt = seq + 1; 1724 else 1725 qg->old_refcnt++; 1726 if (qg->new_refcnt < seq) 1727 qg->new_refcnt = seq + 1; 1728 else 1729 qg->new_refcnt++; 1730 list_for_each_entry(glist, &qg->groups, next_group) { 1731 ret = ulist_add(qgroups, glist->group->qgroupid, 1732 ptr_to_u64(glist->group), GFP_ATOMIC); 1733 if (ret < 0) 1734 return ret; 1735 ret = ulist_add(tmp, glist->group->qgroupid, 1736 ptr_to_u64(glist->group), GFP_ATOMIC); 1737 if (ret < 0) 1738 return ret; 1739 } 1740 } 1741 return 0; 1742 } 1743 1744 /* Add refcnt for the newly added reference. */ 1745 static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info, 1746 struct btrfs_qgroup_operation *oper, 1747 struct btrfs_qgroup *qgroup, 1748 struct ulist *tmp, struct ulist *qgroups, 1749 u64 seq) 1750 { 1751 struct ulist_node *unode; 1752 struct ulist_iterator uiter; 1753 struct btrfs_qgroup *qg; 1754 int ret; 1755 1756 ulist_reinit(tmp); 1757 ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup), 1758 GFP_ATOMIC); 1759 if (ret < 0) 1760 return ret; 1761 ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup), 1762 GFP_ATOMIC); 1763 if (ret < 0) 1764 return ret; 1765 ULIST_ITER_INIT(&uiter); 1766 while ((unode = ulist_next(tmp, &uiter))) { 1767 struct btrfs_qgroup_list *glist; 1768 1769 qg = u64_to_ptr(unode->aux); 1770 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { 1771 if (qg->new_refcnt < seq) 1772 qg->new_refcnt = seq + 1; 1773 else 1774 qg->new_refcnt++; 1775 } else { 1776 if (qg->old_refcnt < seq) 1777 qg->old_refcnt = seq + 1; 1778 else 1779 qg->old_refcnt++; 1780 } 1781 list_for_each_entry(glist, &qg->groups, next_group) { 1782 ret = ulist_add(tmp, glist->group->qgroupid, 1783 ptr_to_u64(glist->group), GFP_ATOMIC); 1784 if (ret < 0) 1785 return ret; 1786 ret = ulist_add(qgroups, glist->group->qgroupid, 1787 ptr_to_u64(glist->group), GFP_ATOMIC); 1788 if (ret < 0) 1789 return ret; 1790 } 1791 } 1792 return 0; 1793 } 1794 1795 /* 1796 * This adjusts the counters for all referenced qgroups if need be. 1797 */ 1798 static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info, 1799 u64 root_to_skip, u64 num_bytes, 1800 struct ulist *qgroups, u64 seq, 1801 int old_roots, int new_roots, int rescan) 1802 { 1803 struct ulist_node *unode; 1804 struct ulist_iterator uiter; 1805 struct btrfs_qgroup *qg; 1806 u64 cur_new_count, cur_old_count; 1807 1808 ULIST_ITER_INIT(&uiter); 1809 while ((unode = ulist_next(qgroups, &uiter))) { 1810 bool dirty = false; 1811 1812 qg = u64_to_ptr(unode->aux); 1813 /* 1814 * Wasn't referenced before but is now, add to the reference 1815 * counters. 1816 */ 1817 if (qg->old_refcnt <= seq && qg->new_refcnt > seq) { 1818 qg->rfer += num_bytes; 1819 qg->rfer_cmpr += num_bytes; 1820 dirty = true; 1821 } 1822 1823 /* 1824 * Was referenced before but isn't now, subtract from the 1825 * reference counters. 1826 */ 1827 if (qg->old_refcnt > seq && qg->new_refcnt <= seq) { 1828 qg->rfer -= num_bytes; 1829 qg->rfer_cmpr -= num_bytes; 1830 dirty = true; 1831 } 1832 1833 if (qg->old_refcnt < seq) 1834 cur_old_count = 0; 1835 else 1836 cur_old_count = qg->old_refcnt - seq; 1837 if (qg->new_refcnt < seq) 1838 cur_new_count = 0; 1839 else 1840 cur_new_count = qg->new_refcnt - seq; 1841 1842 /* 1843 * If our refcount was the same as the roots previously but our 1844 * new count isn't the same as the number of roots now then we 1845 * went from having a exclusive reference on this range to not. 1846 */ 1847 if (old_roots && cur_old_count == old_roots && 1848 (cur_new_count != new_roots || new_roots == 0)) { 1849 WARN_ON(cur_new_count != new_roots && new_roots == 0); 1850 qg->excl -= num_bytes; 1851 qg->excl_cmpr -= num_bytes; 1852 dirty = true; 1853 } 1854 1855 /* 1856 * If we didn't reference all the roots before but now we do we 1857 * have an exclusive reference to this range. 1858 */ 1859 if ((!old_roots || (old_roots && cur_old_count != old_roots)) 1860 && cur_new_count == new_roots) { 1861 qg->excl += num_bytes; 1862 qg->excl_cmpr += num_bytes; 1863 dirty = true; 1864 } 1865 1866 if (dirty) 1867 qgroup_dirty(fs_info, qg); 1868 } 1869 return 0; 1870 } 1871 1872 /* 1873 * If we removed a data extent and there were other references for that bytenr 1874 * then we need to lookup all referenced roots to make sure we still don't 1875 * reference this bytenr. If we do then we can just discard this operation. 1876 */ 1877 static int check_existing_refs(struct btrfs_trans_handle *trans, 1878 struct btrfs_fs_info *fs_info, 1879 struct btrfs_qgroup_operation *oper) 1880 { 1881 struct ulist *roots = NULL; 1882 struct ulist_node *unode; 1883 struct ulist_iterator uiter; 1884 int ret = 0; 1885 1886 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, 1887 oper->elem.seq, &roots); 1888 if (ret < 0) 1889 return ret; 1890 ret = 0; 1891 1892 ULIST_ITER_INIT(&uiter); 1893 while ((unode = ulist_next(roots, &uiter))) { 1894 if (unode->val == oper->ref_root) { 1895 ret = 1; 1896 break; 1897 } 1898 } 1899 ulist_free(roots); 1900 btrfs_put_tree_mod_seq(fs_info, &oper->elem); 1901 1902 return ret; 1903 } 1904 1905 /* 1906 * If we share a reference across multiple roots then we may need to adjust 1907 * various qgroups referenced and exclusive counters. The basic premise is this 1908 * 1909 * 1) We have seq to represent a 0 count. Instead of looping through all of the 1910 * qgroups and resetting their refcount to 0 we just constantly bump this 1911 * sequence number to act as the base reference count. This means that if 1912 * anybody is equal to or below this sequence they were never referenced. We 1913 * jack this sequence up by the number of roots we found each time in order to 1914 * make sure we don't have any overlap. 1915 * 1916 * 2) We first search all the roots that reference the area _except_ the root 1917 * we're acting on currently. This makes up the old_refcnt of all the qgroups 1918 * before. 1919 * 1920 * 3) We walk all of the qgroups referenced by the root we are currently acting 1921 * on, and will either adjust old_refcnt in the case of a removal or the 1922 * new_refcnt in the case of an addition. 1923 * 1924 * 4) Finally we walk all the qgroups that are referenced by this range 1925 * including the root we are acting on currently. We will adjust the counters 1926 * based on the number of roots we had and will have after this operation. 1927 * 1928 * Take this example as an illustration 1929 * 1930 * [qgroup 1/0] 1931 * / | \ 1932 * [qg 0/0] [qg 0/1] [qg 0/2] 1933 * \ | / 1934 * [ extent ] 1935 * 1936 * Say we are adding a reference that is covered by qg 0/0. The first step 1937 * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with 1938 * old_roots being 2. Because it is adding new_roots will be 1. We then go 1939 * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's 1940 * new_refcnt, bringing it to 3. We then walk through all of the qgroups, we 1941 * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a 1942 * reference and thus must add the size to the referenced bytes. Everything 1943 * else is the same so nothing else changes. 1944 */ 1945 static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, 1946 struct btrfs_fs_info *fs_info, 1947 struct btrfs_qgroup_operation *oper) 1948 { 1949 struct ulist *roots = NULL; 1950 struct ulist *qgroups, *tmp; 1951 struct btrfs_qgroup *qgroup; 1952 struct seq_list elem = SEQ_LIST_INIT(elem); 1953 u64 seq; 1954 int old_roots = 0; 1955 int new_roots = 0; 1956 int ret = 0; 1957 1958 if (oper->elem.seq) { 1959 ret = check_existing_refs(trans, fs_info, oper); 1960 if (ret < 0) 1961 return ret; 1962 if (ret) 1963 return 0; 1964 } 1965 1966 qgroups = ulist_alloc(GFP_NOFS); 1967 if (!qgroups) 1968 return -ENOMEM; 1969 1970 tmp = ulist_alloc(GFP_NOFS); 1971 if (!tmp) { 1972 ulist_free(qgroups); 1973 return -ENOMEM; 1974 } 1975 1976 btrfs_get_tree_mod_seq(fs_info, &elem); 1977 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, 1978 &roots); 1979 btrfs_put_tree_mod_seq(fs_info, &elem); 1980 if (ret < 0) { 1981 ulist_free(qgroups); 1982 ulist_free(tmp); 1983 return ret; 1984 } 1985 spin_lock(&fs_info->qgroup_lock); 1986 qgroup = find_qgroup_rb(fs_info, oper->ref_root); 1987 if (!qgroup) 1988 goto out; 1989 seq = fs_info->qgroup_seq; 1990 1991 /* 1992 * So roots is the list of all the roots currently pointing at the 1993 * bytenr, including the ref we are adding if we are adding, or not if 1994 * we are removing a ref. So we pass in the ref_root to skip that root 1995 * in our calculations. We set old_refnct and new_refcnt cause who the 1996 * hell knows what everything looked like before, and it doesn't matter 1997 * except... 1998 */ 1999 ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups, 2000 seq, &old_roots, 0); 2001 if (ret < 0) 2002 goto out; 2003 2004 /* 2005 * Now adjust the refcounts of the qgroups that care about this 2006 * reference, either the old_count in the case of removal or new_count 2007 * in the case of an addition. 2008 */ 2009 ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups, 2010 seq); 2011 if (ret < 0) 2012 goto out; 2013 2014 /* 2015 * ...in the case of removals. If we had a removal before we got around 2016 * to processing this operation then we need to find that guy and count 2017 * his references as if they really existed so we don't end up screwing 2018 * up the exclusive counts. Then whenever we go to process the delete 2019 * everything will be grand and we can account for whatever exclusive 2020 * changes need to be made there. We also have to pass in old_roots so 2021 * we have an accurate count of the roots as it pertains to this 2022 * operations view of the world. 2023 */ 2024 ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq, 2025 &old_roots); 2026 if (ret < 0) 2027 goto out; 2028 2029 /* 2030 * We are adding our root, need to adjust up the number of roots, 2031 * otherwise old_roots is the number of roots we want. 2032 */ 2033 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) { 2034 new_roots = old_roots + 1; 2035 } else { 2036 new_roots = old_roots; 2037 old_roots++; 2038 } 2039 fs_info->qgroup_seq += old_roots + 1; 2040 2041 2042 /* 2043 * And now the magic happens, bless Arne for having a pretty elegant 2044 * solution for this. 2045 */ 2046 qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes, 2047 qgroups, seq, old_roots, new_roots, 0); 2048 out: 2049 spin_unlock(&fs_info->qgroup_lock); 2050 ulist_free(qgroups); 2051 ulist_free(roots); 2052 ulist_free(tmp); 2053 return ret; 2054 } 2055 2056 /* 2057 * Process a reference to a shared subtree. This type of operation is 2058 * queued during snapshot removal when we encounter extents which are 2059 * shared between more than one root. 2060 */ 2061 static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans, 2062 struct btrfs_fs_info *fs_info, 2063 struct btrfs_qgroup_operation *oper) 2064 { 2065 struct ulist *roots = NULL; 2066 struct ulist_node *unode; 2067 struct ulist_iterator uiter; 2068 struct btrfs_qgroup_list *glist; 2069 struct ulist *parents; 2070 int ret = 0; 2071 int err; 2072 struct btrfs_qgroup *qg; 2073 u64 root_obj = 0; 2074 struct seq_list elem = SEQ_LIST_INIT(elem); 2075 2076 parents = ulist_alloc(GFP_NOFS); 2077 if (!parents) 2078 return -ENOMEM; 2079 2080 btrfs_get_tree_mod_seq(fs_info, &elem); 2081 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, 2082 elem.seq, &roots); 2083 btrfs_put_tree_mod_seq(fs_info, &elem); 2084 if (ret < 0) 2085 goto out; 2086 2087 if (roots->nnodes != 1) 2088 goto out; 2089 2090 ULIST_ITER_INIT(&uiter); 2091 unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */ 2092 /* 2093 * If we find our ref root then that means all refs 2094 * this extent has to the root have not yet been 2095 * deleted. In that case, we do nothing and let the 2096 * last ref for this bytenr drive our update. 2097 * 2098 * This can happen for example if an extent is 2099 * referenced multiple times in a snapshot (clone, 2100 * etc). If we are in the middle of snapshot removal, 2101 * queued updates for such an extent will find the 2102 * root if we have not yet finished removing the 2103 * snapshot. 2104 */ 2105 if (unode->val == oper->ref_root) 2106 goto out; 2107 2108 root_obj = unode->val; 2109 BUG_ON(!root_obj); 2110 2111 spin_lock(&fs_info->qgroup_lock); 2112 qg = find_qgroup_rb(fs_info, root_obj); 2113 if (!qg) 2114 goto out_unlock; 2115 2116 qg->excl += oper->num_bytes; 2117 qg->excl_cmpr += oper->num_bytes; 2118 qgroup_dirty(fs_info, qg); 2119 2120 /* 2121 * Adjust counts for parent groups. First we find all 2122 * parents, then in the 2nd loop we do the adjustment 2123 * while adding parents of the parents to our ulist. 2124 */ 2125 list_for_each_entry(glist, &qg->groups, next_group) { 2126 err = ulist_add(parents, glist->group->qgroupid, 2127 ptr_to_u64(glist->group), GFP_ATOMIC); 2128 if (err < 0) { 2129 ret = err; 2130 goto out_unlock; 2131 } 2132 } 2133 2134 ULIST_ITER_INIT(&uiter); 2135 while ((unode = ulist_next(parents, &uiter))) { 2136 qg = u64_to_ptr(unode->aux); 2137 qg->excl += oper->num_bytes; 2138 qg->excl_cmpr += oper->num_bytes; 2139 qgroup_dirty(fs_info, qg); 2140 2141 /* Add any parents of the parents */ 2142 list_for_each_entry(glist, &qg->groups, next_group) { 2143 err = ulist_add(parents, glist->group->qgroupid, 2144 ptr_to_u64(glist->group), GFP_ATOMIC); 2145 if (err < 0) { 2146 ret = err; 2147 goto out_unlock; 2148 } 2149 } 2150 } 2151 2152 out_unlock: 2153 spin_unlock(&fs_info->qgroup_lock); 2154 2155 out: 2156 ulist_free(roots); 2157 ulist_free(parents); 2158 return ret; 2159 } 2160 2161 /* 2162 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted 2163 * from the fs. First, all roots referencing the extent are searched, and 2164 * then the space is accounted accordingly to the different roots. The 2165 * accounting algorithm works in 3 steps documented inline. 2166 */ 2167 static int btrfs_qgroup_account(struct btrfs_trans_handle *trans, 2168 struct btrfs_fs_info *fs_info, 2169 struct btrfs_qgroup_operation *oper) 2170 { 2171 int ret = 0; 2172 2173 if (!fs_info->quota_enabled) 2174 return 0; 2175 2176 BUG_ON(!fs_info->quota_root); 2177 2178 mutex_lock(&fs_info->qgroup_rescan_lock); 2179 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 2180 if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) { 2181 mutex_unlock(&fs_info->qgroup_rescan_lock); 2182 return 0; 2183 } 2184 } 2185 mutex_unlock(&fs_info->qgroup_rescan_lock); 2186 2187 ASSERT(is_fstree(oper->ref_root)); 2188 2189 trace_btrfs_qgroup_account(oper); 2190 2191 switch (oper->type) { 2192 case BTRFS_QGROUP_OPER_ADD_EXCL: 2193 case BTRFS_QGROUP_OPER_SUB_EXCL: 2194 ret = qgroup_excl_accounting(fs_info, oper); 2195 break; 2196 case BTRFS_QGROUP_OPER_ADD_SHARED: 2197 case BTRFS_QGROUP_OPER_SUB_SHARED: 2198 ret = qgroup_shared_accounting(trans, fs_info, oper); 2199 break; 2200 case BTRFS_QGROUP_OPER_SUB_SUBTREE: 2201 ret = qgroup_subtree_accounting(trans, fs_info, oper); 2202 break; 2203 default: 2204 ASSERT(0); 2205 } 2206 return ret; 2207 } 2208 2209 /* 2210 * Needs to be called everytime we run delayed refs, even if there is an error 2211 * in order to cleanup outstanding operations. 2212 */ 2213 int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans, 2214 struct btrfs_fs_info *fs_info) 2215 { 2216 struct btrfs_qgroup_operation *oper; 2217 int ret = 0; 2218 2219 while (!list_empty(&trans->qgroup_ref_list)) { 2220 oper = list_first_entry(&trans->qgroup_ref_list, 2221 struct btrfs_qgroup_operation, list); 2222 list_del_init(&oper->list); 2223 if (!ret || !trans->aborted) 2224 ret = btrfs_qgroup_account(trans, fs_info, oper); 2225 spin_lock(&fs_info->qgroup_op_lock); 2226 rb_erase(&oper->n, &fs_info->qgroup_op_tree); 2227 spin_unlock(&fs_info->qgroup_op_lock); 2228 btrfs_put_tree_mod_seq(fs_info, &oper->elem); 2229 kfree(oper); 2230 } 2231 return ret; 2232 } 2233 2234 /* 2235 * called from commit_transaction. Writes all changed qgroups to disk. 2236 */ 2237 int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 2238 struct btrfs_fs_info *fs_info) 2239 { 2240 struct btrfs_root *quota_root = fs_info->quota_root; 2241 int ret = 0; 2242 int start_rescan_worker = 0; 2243 2244 if (!quota_root) 2245 goto out; 2246 2247 if (!fs_info->quota_enabled && fs_info->pending_quota_state) 2248 start_rescan_worker = 1; 2249 2250 fs_info->quota_enabled = fs_info->pending_quota_state; 2251 2252 spin_lock(&fs_info->qgroup_lock); 2253 while (!list_empty(&fs_info->dirty_qgroups)) { 2254 struct btrfs_qgroup *qgroup; 2255 qgroup = list_first_entry(&fs_info->dirty_qgroups, 2256 struct btrfs_qgroup, dirty); 2257 list_del_init(&qgroup->dirty); 2258 spin_unlock(&fs_info->qgroup_lock); 2259 ret = update_qgroup_info_item(trans, quota_root, qgroup); 2260 if (ret) 2261 fs_info->qgroup_flags |= 2262 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2263 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 2264 if (ret) 2265 fs_info->qgroup_flags |= 2266 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2267 spin_lock(&fs_info->qgroup_lock); 2268 } 2269 if (fs_info->quota_enabled) 2270 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; 2271 else 2272 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2273 spin_unlock(&fs_info->qgroup_lock); 2274 2275 ret = update_qgroup_status_item(trans, fs_info, quota_root); 2276 if (ret) 2277 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2278 2279 if (!ret && start_rescan_worker) { 2280 ret = qgroup_rescan_init(fs_info, 0, 1); 2281 if (!ret) { 2282 qgroup_rescan_zero_tracking(fs_info); 2283 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2284 &fs_info->qgroup_rescan_work); 2285 } 2286 ret = 0; 2287 } 2288 2289 out: 2290 2291 return ret; 2292 } 2293 2294 /* 2295 * copy the acounting information between qgroups. This is necessary when a 2296 * snapshot or a subvolume is created 2297 */ 2298 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 2299 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 2300 struct btrfs_qgroup_inherit *inherit) 2301 { 2302 int ret = 0; 2303 int i; 2304 u64 *i_qgroups; 2305 struct btrfs_root *quota_root = fs_info->quota_root; 2306 struct btrfs_qgroup *srcgroup; 2307 struct btrfs_qgroup *dstgroup; 2308 u32 level_size = 0; 2309 u64 nums; 2310 2311 mutex_lock(&fs_info->qgroup_ioctl_lock); 2312 if (!fs_info->quota_enabled) 2313 goto out; 2314 2315 if (!quota_root) { 2316 ret = -EINVAL; 2317 goto out; 2318 } 2319 2320 if (inherit) { 2321 i_qgroups = (u64 *)(inherit + 1); 2322 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 2323 2 * inherit->num_excl_copies; 2324 for (i = 0; i < nums; ++i) { 2325 srcgroup = find_qgroup_rb(fs_info, *i_qgroups); 2326 if (!srcgroup) { 2327 ret = -EINVAL; 2328 goto out; 2329 } 2330 2331 if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) { 2332 ret = -EINVAL; 2333 goto out; 2334 } 2335 ++i_qgroups; 2336 } 2337 } 2338 2339 /* 2340 * create a tracking group for the subvol itself 2341 */ 2342 ret = add_qgroup_item(trans, quota_root, objectid); 2343 if (ret) 2344 goto out; 2345 2346 if (srcid) { 2347 struct btrfs_root *srcroot; 2348 struct btrfs_key srckey; 2349 2350 srckey.objectid = srcid; 2351 srckey.type = BTRFS_ROOT_ITEM_KEY; 2352 srckey.offset = (u64)-1; 2353 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); 2354 if (IS_ERR(srcroot)) { 2355 ret = PTR_ERR(srcroot); 2356 goto out; 2357 } 2358 2359 rcu_read_lock(); 2360 level_size = srcroot->nodesize; 2361 rcu_read_unlock(); 2362 } 2363 2364 /* 2365 * add qgroup to all inherited groups 2366 */ 2367 if (inherit) { 2368 i_qgroups = (u64 *)(inherit + 1); 2369 for (i = 0; i < inherit->num_qgroups; ++i) { 2370 ret = add_qgroup_relation_item(trans, quota_root, 2371 objectid, *i_qgroups); 2372 if (ret) 2373 goto out; 2374 ret = add_qgroup_relation_item(trans, quota_root, 2375 *i_qgroups, objectid); 2376 if (ret) 2377 goto out; 2378 ++i_qgroups; 2379 } 2380 } 2381 2382 2383 spin_lock(&fs_info->qgroup_lock); 2384 2385 dstgroup = add_qgroup_rb(fs_info, objectid); 2386 if (IS_ERR(dstgroup)) { 2387 ret = PTR_ERR(dstgroup); 2388 goto unlock; 2389 } 2390 2391 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { 2392 dstgroup->lim_flags = inherit->lim.flags; 2393 dstgroup->max_rfer = inherit->lim.max_rfer; 2394 dstgroup->max_excl = inherit->lim.max_excl; 2395 dstgroup->rsv_rfer = inherit->lim.rsv_rfer; 2396 dstgroup->rsv_excl = inherit->lim.rsv_excl; 2397 2398 ret = update_qgroup_limit_item(trans, quota_root, dstgroup); 2399 if (ret) { 2400 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2401 btrfs_info(fs_info, "unable to update quota limit for %llu", 2402 dstgroup->qgroupid); 2403 goto unlock; 2404 } 2405 } 2406 2407 if (srcid) { 2408 srcgroup = find_qgroup_rb(fs_info, srcid); 2409 if (!srcgroup) 2410 goto unlock; 2411 2412 /* 2413 * We call inherit after we clone the root in order to make sure 2414 * our counts don't go crazy, so at this point the only 2415 * difference between the two roots should be the root node. 2416 */ 2417 dstgroup->rfer = srcgroup->rfer; 2418 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2419 dstgroup->excl = level_size; 2420 dstgroup->excl_cmpr = level_size; 2421 srcgroup->excl = level_size; 2422 srcgroup->excl_cmpr = level_size; 2423 2424 /* inherit the limit info */ 2425 dstgroup->lim_flags = srcgroup->lim_flags; 2426 dstgroup->max_rfer = srcgroup->max_rfer; 2427 dstgroup->max_excl = srcgroup->max_excl; 2428 dstgroup->rsv_rfer = srcgroup->rsv_rfer; 2429 dstgroup->rsv_excl = srcgroup->rsv_excl; 2430 2431 qgroup_dirty(fs_info, dstgroup); 2432 qgroup_dirty(fs_info, srcgroup); 2433 } 2434 2435 if (!inherit) 2436 goto unlock; 2437 2438 i_qgroups = (u64 *)(inherit + 1); 2439 for (i = 0; i < inherit->num_qgroups; ++i) { 2440 ret = add_relation_rb(quota_root->fs_info, objectid, 2441 *i_qgroups); 2442 if (ret) 2443 goto unlock; 2444 ++i_qgroups; 2445 } 2446 2447 for (i = 0; i < inherit->num_ref_copies; ++i) { 2448 struct btrfs_qgroup *src; 2449 struct btrfs_qgroup *dst; 2450 2451 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2452 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2453 2454 if (!src || !dst) { 2455 ret = -EINVAL; 2456 goto unlock; 2457 } 2458 2459 dst->rfer = src->rfer - level_size; 2460 dst->rfer_cmpr = src->rfer_cmpr - level_size; 2461 i_qgroups += 2; 2462 } 2463 for (i = 0; i < inherit->num_excl_copies; ++i) { 2464 struct btrfs_qgroup *src; 2465 struct btrfs_qgroup *dst; 2466 2467 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2468 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2469 2470 if (!src || !dst) { 2471 ret = -EINVAL; 2472 goto unlock; 2473 } 2474 2475 dst->excl = src->excl + level_size; 2476 dst->excl_cmpr = src->excl_cmpr + level_size; 2477 i_qgroups += 2; 2478 } 2479 2480 unlock: 2481 spin_unlock(&fs_info->qgroup_lock); 2482 out: 2483 mutex_unlock(&fs_info->qgroup_ioctl_lock); 2484 return ret; 2485 } 2486 2487 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes) 2488 { 2489 struct btrfs_root *quota_root; 2490 struct btrfs_qgroup *qgroup; 2491 struct btrfs_fs_info *fs_info = root->fs_info; 2492 u64 ref_root = root->root_key.objectid; 2493 int ret = 0; 2494 struct ulist_node *unode; 2495 struct ulist_iterator uiter; 2496 2497 if (!is_fstree(ref_root)) 2498 return 0; 2499 2500 if (num_bytes == 0) 2501 return 0; 2502 2503 spin_lock(&fs_info->qgroup_lock); 2504 quota_root = fs_info->quota_root; 2505 if (!quota_root) 2506 goto out; 2507 2508 qgroup = find_qgroup_rb(fs_info, ref_root); 2509 if (!qgroup) 2510 goto out; 2511 2512 /* 2513 * in a first step, we check all affected qgroups if any limits would 2514 * be exceeded 2515 */ 2516 ulist_reinit(fs_info->qgroup_ulist); 2517 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2518 (uintptr_t)qgroup, GFP_ATOMIC); 2519 if (ret < 0) 2520 goto out; 2521 ULIST_ITER_INIT(&uiter); 2522 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2523 struct btrfs_qgroup *qg; 2524 struct btrfs_qgroup_list *glist; 2525 2526 qg = u64_to_ptr(unode->aux); 2527 2528 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2529 qg->reserved + (s64)qg->rfer + num_bytes > 2530 qg->max_rfer) { 2531 ret = -EDQUOT; 2532 goto out; 2533 } 2534 2535 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 2536 qg->reserved + (s64)qg->excl + num_bytes > 2537 qg->max_excl) { 2538 ret = -EDQUOT; 2539 goto out; 2540 } 2541 2542 list_for_each_entry(glist, &qg->groups, next_group) { 2543 ret = ulist_add(fs_info->qgroup_ulist, 2544 glist->group->qgroupid, 2545 (uintptr_t)glist->group, GFP_ATOMIC); 2546 if (ret < 0) 2547 goto out; 2548 } 2549 } 2550 ret = 0; 2551 /* 2552 * no limits exceeded, now record the reservation into all qgroups 2553 */ 2554 ULIST_ITER_INIT(&uiter); 2555 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2556 struct btrfs_qgroup *qg; 2557 2558 qg = u64_to_ptr(unode->aux); 2559 2560 qg->reserved += num_bytes; 2561 } 2562 2563 out: 2564 spin_unlock(&fs_info->qgroup_lock); 2565 return ret; 2566 } 2567 2568 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes) 2569 { 2570 struct btrfs_root *quota_root; 2571 struct btrfs_qgroup *qgroup; 2572 struct btrfs_fs_info *fs_info = root->fs_info; 2573 struct ulist_node *unode; 2574 struct ulist_iterator uiter; 2575 u64 ref_root = root->root_key.objectid; 2576 int ret = 0; 2577 2578 if (!is_fstree(ref_root)) 2579 return; 2580 2581 if (num_bytes == 0) 2582 return; 2583 2584 spin_lock(&fs_info->qgroup_lock); 2585 2586 quota_root = fs_info->quota_root; 2587 if (!quota_root) 2588 goto out; 2589 2590 qgroup = find_qgroup_rb(fs_info, ref_root); 2591 if (!qgroup) 2592 goto out; 2593 2594 ulist_reinit(fs_info->qgroup_ulist); 2595 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2596 (uintptr_t)qgroup, GFP_ATOMIC); 2597 if (ret < 0) 2598 goto out; 2599 ULIST_ITER_INIT(&uiter); 2600 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2601 struct btrfs_qgroup *qg; 2602 struct btrfs_qgroup_list *glist; 2603 2604 qg = u64_to_ptr(unode->aux); 2605 2606 qg->reserved -= num_bytes; 2607 2608 list_for_each_entry(glist, &qg->groups, next_group) { 2609 ret = ulist_add(fs_info->qgroup_ulist, 2610 glist->group->qgroupid, 2611 (uintptr_t)glist->group, GFP_ATOMIC); 2612 if (ret < 0) 2613 goto out; 2614 } 2615 } 2616 2617 out: 2618 spin_unlock(&fs_info->qgroup_lock); 2619 } 2620 2621 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) 2622 { 2623 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) 2624 return; 2625 btrfs_err(trans->root->fs_info, 2626 "qgroups not uptodate in trans handle %p: list is%s empty, " 2627 "seq is %#x.%x", 2628 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", 2629 (u32)(trans->delayed_ref_elem.seq >> 32), 2630 (u32)trans->delayed_ref_elem.seq); 2631 BUG(); 2632 } 2633 2634 /* 2635 * returns < 0 on error, 0 when more leafs are to be scanned. 2636 * returns 1 when done. 2637 */ 2638 static int 2639 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2640 struct btrfs_trans_handle *trans, struct ulist *qgroups, 2641 struct ulist *tmp, struct extent_buffer *scratch_leaf) 2642 { 2643 struct btrfs_key found; 2644 struct ulist *roots = NULL; 2645 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); 2646 u64 num_bytes; 2647 u64 seq; 2648 int new_roots; 2649 int slot; 2650 int ret; 2651 2652 path->leave_spinning = 1; 2653 mutex_lock(&fs_info->qgroup_rescan_lock); 2654 ret = btrfs_search_slot_for_read(fs_info->extent_root, 2655 &fs_info->qgroup_rescan_progress, 2656 path, 1, 0); 2657 2658 pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n", 2659 fs_info->qgroup_rescan_progress.objectid, 2660 fs_info->qgroup_rescan_progress.type, 2661 fs_info->qgroup_rescan_progress.offset, ret); 2662 2663 if (ret) { 2664 /* 2665 * The rescan is about to end, we will not be scanning any 2666 * further blocks. We cannot unset the RESCAN flag here, because 2667 * we want to commit the transaction if everything went well. 2668 * To make the live accounting work in this phase, we set our 2669 * scan progress pointer such that every real extent objectid 2670 * will be smaller. 2671 */ 2672 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 2673 btrfs_release_path(path); 2674 mutex_unlock(&fs_info->qgroup_rescan_lock); 2675 return ret; 2676 } 2677 2678 btrfs_item_key_to_cpu(path->nodes[0], &found, 2679 btrfs_header_nritems(path->nodes[0]) - 1); 2680 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; 2681 2682 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2683 memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf)); 2684 slot = path->slots[0]; 2685 btrfs_release_path(path); 2686 mutex_unlock(&fs_info->qgroup_rescan_lock); 2687 2688 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 2689 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 2690 if (found.type != BTRFS_EXTENT_ITEM_KEY && 2691 found.type != BTRFS_METADATA_ITEM_KEY) 2692 continue; 2693 if (found.type == BTRFS_METADATA_ITEM_KEY) 2694 num_bytes = fs_info->extent_root->nodesize; 2695 else 2696 num_bytes = found.offset; 2697 2698 ulist_reinit(qgroups); 2699 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 2700 &roots); 2701 if (ret < 0) 2702 goto out; 2703 spin_lock(&fs_info->qgroup_lock); 2704 seq = fs_info->qgroup_seq; 2705 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */ 2706 2707 new_roots = 0; 2708 ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups, 2709 seq, &new_roots, 1); 2710 if (ret < 0) { 2711 spin_unlock(&fs_info->qgroup_lock); 2712 ulist_free(roots); 2713 goto out; 2714 } 2715 2716 ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups, 2717 seq, 0, new_roots, 1); 2718 if (ret < 0) { 2719 spin_unlock(&fs_info->qgroup_lock); 2720 ulist_free(roots); 2721 goto out; 2722 } 2723 spin_unlock(&fs_info->qgroup_lock); 2724 ulist_free(roots); 2725 } 2726 out: 2727 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2728 2729 return ret; 2730 } 2731 2732 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) 2733 { 2734 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, 2735 qgroup_rescan_work); 2736 struct btrfs_path *path; 2737 struct btrfs_trans_handle *trans = NULL; 2738 struct ulist *tmp = NULL, *qgroups = NULL; 2739 struct extent_buffer *scratch_leaf = NULL; 2740 int err = -ENOMEM; 2741 int ret = 0; 2742 2743 path = btrfs_alloc_path(); 2744 if (!path) 2745 goto out; 2746 qgroups = ulist_alloc(GFP_NOFS); 2747 if (!qgroups) 2748 goto out; 2749 tmp = ulist_alloc(GFP_NOFS); 2750 if (!tmp) 2751 goto out; 2752 scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS); 2753 if (!scratch_leaf) 2754 goto out; 2755 2756 err = 0; 2757 while (!err) { 2758 trans = btrfs_start_transaction(fs_info->fs_root, 0); 2759 if (IS_ERR(trans)) { 2760 err = PTR_ERR(trans); 2761 break; 2762 } 2763 if (!fs_info->quota_enabled) { 2764 err = -EINTR; 2765 } else { 2766 err = qgroup_rescan_leaf(fs_info, path, trans, 2767 qgroups, tmp, scratch_leaf); 2768 } 2769 if (err > 0) 2770 btrfs_commit_transaction(trans, fs_info->fs_root); 2771 else 2772 btrfs_end_transaction(trans, fs_info->fs_root); 2773 } 2774 2775 out: 2776 kfree(scratch_leaf); 2777 ulist_free(qgroups); 2778 ulist_free(tmp); 2779 btrfs_free_path(path); 2780 2781 mutex_lock(&fs_info->qgroup_rescan_lock); 2782 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2783 2784 if (err > 0 && 2785 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 2786 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2787 } else if (err < 0) { 2788 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2789 } 2790 mutex_unlock(&fs_info->qgroup_rescan_lock); 2791 2792 /* 2793 * only update status, since the previous part has alreay updated the 2794 * qgroup info. 2795 */ 2796 trans = btrfs_start_transaction(fs_info->quota_root, 1); 2797 if (IS_ERR(trans)) { 2798 err = PTR_ERR(trans); 2799 btrfs_err(fs_info, 2800 "fail to start transaction for status update: %d\n", 2801 err); 2802 goto done; 2803 } 2804 ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); 2805 if (ret < 0) { 2806 err = ret; 2807 btrfs_err(fs_info, "fail to update qgroup status: %d\n", err); 2808 } 2809 btrfs_end_transaction(trans, fs_info->quota_root); 2810 2811 if (err >= 0) { 2812 btrfs_info(fs_info, "qgroup scan completed%s", 2813 err > 0 ? " (inconsistency flag cleared)" : ""); 2814 } else { 2815 btrfs_err(fs_info, "qgroup scan failed with %d", err); 2816 } 2817 2818 done: 2819 complete_all(&fs_info->qgroup_rescan_completion); 2820 } 2821 2822 /* 2823 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all 2824 * memory required for the rescan context. 2825 */ 2826 static int 2827 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 2828 int init_flags) 2829 { 2830 int ret = 0; 2831 2832 if (!init_flags && 2833 (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) || 2834 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) { 2835 ret = -EINVAL; 2836 goto err; 2837 } 2838 2839 mutex_lock(&fs_info->qgroup_rescan_lock); 2840 spin_lock(&fs_info->qgroup_lock); 2841 2842 if (init_flags) { 2843 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2844 ret = -EINPROGRESS; 2845 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 2846 ret = -EINVAL; 2847 2848 if (ret) { 2849 spin_unlock(&fs_info->qgroup_lock); 2850 mutex_unlock(&fs_info->qgroup_rescan_lock); 2851 goto err; 2852 } 2853 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2854 } 2855 2856 memset(&fs_info->qgroup_rescan_progress, 0, 2857 sizeof(fs_info->qgroup_rescan_progress)); 2858 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 2859 2860 spin_unlock(&fs_info->qgroup_lock); 2861 mutex_unlock(&fs_info->qgroup_rescan_lock); 2862 2863 init_completion(&fs_info->qgroup_rescan_completion); 2864 2865 memset(&fs_info->qgroup_rescan_work, 0, 2866 sizeof(fs_info->qgroup_rescan_work)); 2867 btrfs_init_work(&fs_info->qgroup_rescan_work, 2868 btrfs_qgroup_rescan_helper, 2869 btrfs_qgroup_rescan_worker, NULL, NULL); 2870 2871 if (ret) { 2872 err: 2873 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret); 2874 return ret; 2875 } 2876 2877 return 0; 2878 } 2879 2880 static void 2881 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) 2882 { 2883 struct rb_node *n; 2884 struct btrfs_qgroup *qgroup; 2885 2886 spin_lock(&fs_info->qgroup_lock); 2887 /* clear all current qgroup tracking information */ 2888 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { 2889 qgroup = rb_entry(n, struct btrfs_qgroup, node); 2890 qgroup->rfer = 0; 2891 qgroup->rfer_cmpr = 0; 2892 qgroup->excl = 0; 2893 qgroup->excl_cmpr = 0; 2894 } 2895 spin_unlock(&fs_info->qgroup_lock); 2896 } 2897 2898 int 2899 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) 2900 { 2901 int ret = 0; 2902 struct btrfs_trans_handle *trans; 2903 2904 ret = qgroup_rescan_init(fs_info, 0, 1); 2905 if (ret) 2906 return ret; 2907 2908 /* 2909 * We have set the rescan_progress to 0, which means no more 2910 * delayed refs will be accounted by btrfs_qgroup_account_ref. 2911 * However, btrfs_qgroup_account_ref may be right after its call 2912 * to btrfs_find_all_roots, in which case it would still do the 2913 * accounting. 2914 * To solve this, we're committing the transaction, which will 2915 * ensure we run all delayed refs and only after that, we are 2916 * going to clear all tracking information for a clean start. 2917 */ 2918 2919 trans = btrfs_join_transaction(fs_info->fs_root); 2920 if (IS_ERR(trans)) { 2921 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2922 return PTR_ERR(trans); 2923 } 2924 ret = btrfs_commit_transaction(trans, fs_info->fs_root); 2925 if (ret) { 2926 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2927 return ret; 2928 } 2929 2930 qgroup_rescan_zero_tracking(fs_info); 2931 2932 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2933 &fs_info->qgroup_rescan_work); 2934 2935 return 0; 2936 } 2937 2938 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) 2939 { 2940 int running; 2941 int ret = 0; 2942 2943 mutex_lock(&fs_info->qgroup_rescan_lock); 2944 spin_lock(&fs_info->qgroup_lock); 2945 running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2946 spin_unlock(&fs_info->qgroup_lock); 2947 mutex_unlock(&fs_info->qgroup_rescan_lock); 2948 2949 if (running) 2950 ret = wait_for_completion_interruptible( 2951 &fs_info->qgroup_rescan_completion); 2952 2953 return ret; 2954 } 2955 2956 /* 2957 * this is only called from open_ctree where we're still single threaded, thus 2958 * locking is omitted here. 2959 */ 2960 void 2961 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) 2962 { 2963 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2964 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2965 &fs_info->qgroup_rescan_work); 2966 } 2967