1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2011 STRATO. All rights reserved. 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/pagemap.h> 8 #include <linux/writeback.h> 9 #include <linux/blkdev.h> 10 #include <linux/rbtree.h> 11 #include <linux/slab.h> 12 #include <linux/workqueue.h> 13 #include <linux/btrfs.h> 14 15 #include "ctree.h" 16 #include "transaction.h" 17 #include "disk-io.h" 18 #include "locking.h" 19 #include "ulist.h" 20 #include "backref.h" 21 #include "extent_io.h" 22 #include "qgroup.h" 23 #include "block-group.h" 24 #include "sysfs.h" 25 26 /* TODO XXX FIXME 27 * - subvol delete -> delete when ref goes to 0? delete limits also? 28 * - reorganize keys 29 * - compressed 30 * - sync 31 * - copy also limits on subvol creation 32 * - limit 33 * - caches for ulists 34 * - performance benchmarks 35 * - check all ioctl parameters 36 */ 37 38 /* 39 * Helpers to access qgroup reservation 40 * 41 * Callers should ensure the lock context and type are valid 42 */ 43 44 static u64 qgroup_rsv_total(const struct btrfs_qgroup *qgroup) 45 { 46 u64 ret = 0; 47 int i; 48 49 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) 50 ret += qgroup->rsv.values[i]; 51 52 return ret; 53 } 54 55 #ifdef CONFIG_BTRFS_DEBUG 56 static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type) 57 { 58 if (type == BTRFS_QGROUP_RSV_DATA) 59 return "data"; 60 if (type == BTRFS_QGROUP_RSV_META_PERTRANS) 61 return "meta_pertrans"; 62 if (type == BTRFS_QGROUP_RSV_META_PREALLOC) 63 return "meta_prealloc"; 64 return NULL; 65 } 66 #endif 67 68 static void qgroup_rsv_add(struct btrfs_fs_info *fs_info, 69 struct btrfs_qgroup *qgroup, u64 num_bytes, 70 enum btrfs_qgroup_rsv_type type) 71 { 72 trace_qgroup_update_reserve(fs_info, qgroup, num_bytes, type); 73 qgroup->rsv.values[type] += num_bytes; 74 } 75 76 static void qgroup_rsv_release(struct btrfs_fs_info *fs_info, 77 struct btrfs_qgroup *qgroup, u64 num_bytes, 78 enum btrfs_qgroup_rsv_type type) 79 { 80 trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type); 81 if (qgroup->rsv.values[type] >= num_bytes) { 82 qgroup->rsv.values[type] -= num_bytes; 83 return; 84 } 85 #ifdef CONFIG_BTRFS_DEBUG 86 WARN_RATELIMIT(1, 87 "qgroup %llu %s reserved space underflow, have %llu to free %llu", 88 qgroup->qgroupid, qgroup_rsv_type_str(type), 89 qgroup->rsv.values[type], num_bytes); 90 #endif 91 qgroup->rsv.values[type] = 0; 92 } 93 94 static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info, 95 struct btrfs_qgroup *dest, 96 struct btrfs_qgroup *src) 97 { 98 int i; 99 100 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) 101 qgroup_rsv_add(fs_info, dest, src->rsv.values[i], i); 102 } 103 104 static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info, 105 struct btrfs_qgroup *dest, 106 struct btrfs_qgroup *src) 107 { 108 int i; 109 110 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) 111 qgroup_rsv_release(fs_info, dest, src->rsv.values[i], i); 112 } 113 114 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq, 115 int mod) 116 { 117 if (qg->old_refcnt < seq) 118 qg->old_refcnt = seq; 119 qg->old_refcnt += mod; 120 } 121 122 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq, 123 int mod) 124 { 125 if (qg->new_refcnt < seq) 126 qg->new_refcnt = seq; 127 qg->new_refcnt += mod; 128 } 129 130 static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq) 131 { 132 if (qg->old_refcnt < seq) 133 return 0; 134 return qg->old_refcnt - seq; 135 } 136 137 static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq) 138 { 139 if (qg->new_refcnt < seq) 140 return 0; 141 return qg->new_refcnt - seq; 142 } 143 144 /* 145 * glue structure to represent the relations between qgroups. 146 */ 147 struct btrfs_qgroup_list { 148 struct list_head next_group; 149 struct list_head next_member; 150 struct btrfs_qgroup *group; 151 struct btrfs_qgroup *member; 152 }; 153 154 static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg) 155 { 156 return (u64)(uintptr_t)qg; 157 } 158 159 static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n) 160 { 161 return (struct btrfs_qgroup *)(uintptr_t)n->aux; 162 } 163 164 static int 165 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 166 int init_flags); 167 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); 168 169 /* must be called with qgroup_ioctl_lock held */ 170 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 171 u64 qgroupid) 172 { 173 struct rb_node *n = fs_info->qgroup_tree.rb_node; 174 struct btrfs_qgroup *qgroup; 175 176 while (n) { 177 qgroup = rb_entry(n, struct btrfs_qgroup, node); 178 if (qgroup->qgroupid < qgroupid) 179 n = n->rb_left; 180 else if (qgroup->qgroupid > qgroupid) 181 n = n->rb_right; 182 else 183 return qgroup; 184 } 185 return NULL; 186 } 187 188 /* must be called with qgroup_lock held */ 189 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, 190 u64 qgroupid) 191 { 192 struct rb_node **p = &fs_info->qgroup_tree.rb_node; 193 struct rb_node *parent = NULL; 194 struct btrfs_qgroup *qgroup; 195 196 while (*p) { 197 parent = *p; 198 qgroup = rb_entry(parent, struct btrfs_qgroup, node); 199 200 if (qgroup->qgroupid < qgroupid) 201 p = &(*p)->rb_left; 202 else if (qgroup->qgroupid > qgroupid) 203 p = &(*p)->rb_right; 204 else 205 return qgroup; 206 } 207 208 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); 209 if (!qgroup) 210 return ERR_PTR(-ENOMEM); 211 212 qgroup->qgroupid = qgroupid; 213 INIT_LIST_HEAD(&qgroup->groups); 214 INIT_LIST_HEAD(&qgroup->members); 215 INIT_LIST_HEAD(&qgroup->dirty); 216 217 rb_link_node(&qgroup->node, parent, p); 218 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); 219 220 return qgroup; 221 } 222 223 static void __del_qgroup_rb(struct btrfs_fs_info *fs_info, 224 struct btrfs_qgroup *qgroup) 225 { 226 struct btrfs_qgroup_list *list; 227 228 btrfs_sysfs_del_one_qgroup(fs_info, qgroup); 229 list_del(&qgroup->dirty); 230 while (!list_empty(&qgroup->groups)) { 231 list = list_first_entry(&qgroup->groups, 232 struct btrfs_qgroup_list, next_group); 233 list_del(&list->next_group); 234 list_del(&list->next_member); 235 kfree(list); 236 } 237 238 while (!list_empty(&qgroup->members)) { 239 list = list_first_entry(&qgroup->members, 240 struct btrfs_qgroup_list, next_member); 241 list_del(&list->next_group); 242 list_del(&list->next_member); 243 kfree(list); 244 } 245 kfree(qgroup); 246 } 247 248 /* must be called with qgroup_lock held */ 249 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) 250 { 251 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); 252 253 if (!qgroup) 254 return -ENOENT; 255 256 rb_erase(&qgroup->node, &fs_info->qgroup_tree); 257 __del_qgroup_rb(fs_info, qgroup); 258 return 0; 259 } 260 261 /* must be called with qgroup_lock held */ 262 static int add_relation_rb(struct btrfs_fs_info *fs_info, 263 u64 memberid, u64 parentid) 264 { 265 struct btrfs_qgroup *member; 266 struct btrfs_qgroup *parent; 267 struct btrfs_qgroup_list *list; 268 269 member = find_qgroup_rb(fs_info, memberid); 270 parent = find_qgroup_rb(fs_info, parentid); 271 if (!member || !parent) 272 return -ENOENT; 273 274 list = kzalloc(sizeof(*list), GFP_ATOMIC); 275 if (!list) 276 return -ENOMEM; 277 278 list->group = parent; 279 list->member = member; 280 list_add_tail(&list->next_group, &member->groups); 281 list_add_tail(&list->next_member, &parent->members); 282 283 return 0; 284 } 285 286 /* must be called with qgroup_lock held */ 287 static int del_relation_rb(struct btrfs_fs_info *fs_info, 288 u64 memberid, u64 parentid) 289 { 290 struct btrfs_qgroup *member; 291 struct btrfs_qgroup *parent; 292 struct btrfs_qgroup_list *list; 293 294 member = find_qgroup_rb(fs_info, memberid); 295 parent = find_qgroup_rb(fs_info, parentid); 296 if (!member || !parent) 297 return -ENOENT; 298 299 list_for_each_entry(list, &member->groups, next_group) { 300 if (list->group == parent) { 301 list_del(&list->next_group); 302 list_del(&list->next_member); 303 kfree(list); 304 return 0; 305 } 306 } 307 return -ENOENT; 308 } 309 310 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 311 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 312 u64 rfer, u64 excl) 313 { 314 struct btrfs_qgroup *qgroup; 315 316 qgroup = find_qgroup_rb(fs_info, qgroupid); 317 if (!qgroup) 318 return -EINVAL; 319 if (qgroup->rfer != rfer || qgroup->excl != excl) 320 return -EINVAL; 321 return 0; 322 } 323 #endif 324 325 /* 326 * The full config is read in one go, only called from open_ctree() 327 * It doesn't use any locking, as at this point we're still single-threaded 328 */ 329 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) 330 { 331 struct btrfs_key key; 332 struct btrfs_key found_key; 333 struct btrfs_root *quota_root = fs_info->quota_root; 334 struct btrfs_path *path = NULL; 335 struct extent_buffer *l; 336 int slot; 337 int ret = 0; 338 u64 flags = 0; 339 u64 rescan_progress = 0; 340 341 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 342 return 0; 343 344 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 345 if (!fs_info->qgroup_ulist) { 346 ret = -ENOMEM; 347 goto out; 348 } 349 350 path = btrfs_alloc_path(); 351 if (!path) { 352 ret = -ENOMEM; 353 goto out; 354 } 355 356 ret = btrfs_sysfs_add_qgroups(fs_info); 357 if (ret < 0) 358 goto out; 359 /* default this to quota off, in case no status key is found */ 360 fs_info->qgroup_flags = 0; 361 362 /* 363 * pass 1: read status, all qgroup infos and limits 364 */ 365 key.objectid = 0; 366 key.type = 0; 367 key.offset = 0; 368 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); 369 if (ret) 370 goto out; 371 372 while (1) { 373 struct btrfs_qgroup *qgroup; 374 375 slot = path->slots[0]; 376 l = path->nodes[0]; 377 btrfs_item_key_to_cpu(l, &found_key, slot); 378 379 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { 380 struct btrfs_qgroup_status_item *ptr; 381 382 ptr = btrfs_item_ptr(l, slot, 383 struct btrfs_qgroup_status_item); 384 385 if (btrfs_qgroup_status_version(l, ptr) != 386 BTRFS_QGROUP_STATUS_VERSION) { 387 btrfs_err(fs_info, 388 "old qgroup version, quota disabled"); 389 goto out; 390 } 391 if (btrfs_qgroup_status_generation(l, ptr) != 392 fs_info->generation) { 393 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 394 btrfs_err(fs_info, 395 "qgroup generation mismatch, marked as inconsistent"); 396 } 397 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 398 ptr); 399 rescan_progress = btrfs_qgroup_status_rescan(l, ptr); 400 goto next1; 401 } 402 403 if (found_key.type != BTRFS_QGROUP_INFO_KEY && 404 found_key.type != BTRFS_QGROUP_LIMIT_KEY) 405 goto next1; 406 407 qgroup = find_qgroup_rb(fs_info, found_key.offset); 408 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 409 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 410 btrfs_err(fs_info, "inconsistent qgroup config"); 411 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 412 } 413 if (!qgroup) { 414 qgroup = add_qgroup_rb(fs_info, found_key.offset); 415 if (IS_ERR(qgroup)) { 416 ret = PTR_ERR(qgroup); 417 goto out; 418 } 419 } 420 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 421 if (ret < 0) 422 goto out; 423 424 switch (found_key.type) { 425 case BTRFS_QGROUP_INFO_KEY: { 426 struct btrfs_qgroup_info_item *ptr; 427 428 ptr = btrfs_item_ptr(l, slot, 429 struct btrfs_qgroup_info_item); 430 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); 431 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); 432 qgroup->excl = btrfs_qgroup_info_excl(l, ptr); 433 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); 434 /* generation currently unused */ 435 break; 436 } 437 case BTRFS_QGROUP_LIMIT_KEY: { 438 struct btrfs_qgroup_limit_item *ptr; 439 440 ptr = btrfs_item_ptr(l, slot, 441 struct btrfs_qgroup_limit_item); 442 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); 443 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); 444 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); 445 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); 446 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); 447 break; 448 } 449 } 450 next1: 451 ret = btrfs_next_item(quota_root, path); 452 if (ret < 0) 453 goto out; 454 if (ret) 455 break; 456 } 457 btrfs_release_path(path); 458 459 /* 460 * pass 2: read all qgroup relations 461 */ 462 key.objectid = 0; 463 key.type = BTRFS_QGROUP_RELATION_KEY; 464 key.offset = 0; 465 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); 466 if (ret) 467 goto out; 468 while (1) { 469 slot = path->slots[0]; 470 l = path->nodes[0]; 471 btrfs_item_key_to_cpu(l, &found_key, slot); 472 473 if (found_key.type != BTRFS_QGROUP_RELATION_KEY) 474 goto next2; 475 476 if (found_key.objectid > found_key.offset) { 477 /* parent <- member, not needed to build config */ 478 /* FIXME should we omit the key completely? */ 479 goto next2; 480 } 481 482 ret = add_relation_rb(fs_info, found_key.objectid, 483 found_key.offset); 484 if (ret == -ENOENT) { 485 btrfs_warn(fs_info, 486 "orphan qgroup relation 0x%llx->0x%llx", 487 found_key.objectid, found_key.offset); 488 ret = 0; /* ignore the error */ 489 } 490 if (ret) 491 goto out; 492 next2: 493 ret = btrfs_next_item(quota_root, path); 494 if (ret < 0) 495 goto out; 496 if (ret) 497 break; 498 } 499 out: 500 fs_info->qgroup_flags |= flags; 501 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 502 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 503 else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && 504 ret >= 0) 505 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 506 btrfs_free_path(path); 507 508 if (ret < 0) { 509 ulist_free(fs_info->qgroup_ulist); 510 fs_info->qgroup_ulist = NULL; 511 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 512 btrfs_sysfs_del_qgroups(fs_info); 513 } 514 515 return ret < 0 ? ret : 0; 516 } 517 518 /* 519 * Called in close_ctree() when quota is still enabled. This verifies we don't 520 * leak some reserved space. 521 * 522 * Return false if no reserved space is left. 523 * Return true if some reserved space is leaked. 524 */ 525 bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info) 526 { 527 struct rb_node *node; 528 bool ret = false; 529 530 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 531 return ret; 532 /* 533 * Since we're unmounting, there is no race and no need to grab qgroup 534 * lock. And here we don't go post-order to provide a more user 535 * friendly sorted result. 536 */ 537 for (node = rb_first(&fs_info->qgroup_tree); node; node = rb_next(node)) { 538 struct btrfs_qgroup *qgroup; 539 int i; 540 541 qgroup = rb_entry(node, struct btrfs_qgroup, node); 542 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) { 543 if (qgroup->rsv.values[i]) { 544 ret = true; 545 btrfs_warn(fs_info, 546 "qgroup %hu/%llu has unreleased space, type %d rsv %llu", 547 btrfs_qgroup_level(qgroup->qgroupid), 548 btrfs_qgroup_subvolid(qgroup->qgroupid), 549 i, qgroup->rsv.values[i]); 550 } 551 } 552 } 553 return ret; 554 } 555 556 /* 557 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), 558 * first two are in single-threaded paths.And for the third one, we have set 559 * quota_root to be null with qgroup_lock held before, so it is safe to clean 560 * up the in-memory structures without qgroup_lock held. 561 */ 562 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 563 { 564 struct rb_node *n; 565 struct btrfs_qgroup *qgroup; 566 567 while ((n = rb_first(&fs_info->qgroup_tree))) { 568 qgroup = rb_entry(n, struct btrfs_qgroup, node); 569 rb_erase(n, &fs_info->qgroup_tree); 570 __del_qgroup_rb(fs_info, qgroup); 571 } 572 /* 573 * We call btrfs_free_qgroup_config() when unmounting 574 * filesystem and disabling quota, so we set qgroup_ulist 575 * to be null here to avoid double free. 576 */ 577 ulist_free(fs_info->qgroup_ulist); 578 fs_info->qgroup_ulist = NULL; 579 btrfs_sysfs_del_qgroups(fs_info); 580 } 581 582 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, 583 u64 dst) 584 { 585 int ret; 586 struct btrfs_root *quota_root = trans->fs_info->quota_root; 587 struct btrfs_path *path; 588 struct btrfs_key key; 589 590 path = btrfs_alloc_path(); 591 if (!path) 592 return -ENOMEM; 593 594 key.objectid = src; 595 key.type = BTRFS_QGROUP_RELATION_KEY; 596 key.offset = dst; 597 598 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 599 600 btrfs_mark_buffer_dirty(path->nodes[0]); 601 602 btrfs_free_path(path); 603 return ret; 604 } 605 606 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, 607 u64 dst) 608 { 609 int ret; 610 struct btrfs_root *quota_root = trans->fs_info->quota_root; 611 struct btrfs_path *path; 612 struct btrfs_key key; 613 614 path = btrfs_alloc_path(); 615 if (!path) 616 return -ENOMEM; 617 618 key.objectid = src; 619 key.type = BTRFS_QGROUP_RELATION_KEY; 620 key.offset = dst; 621 622 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 623 if (ret < 0) 624 goto out; 625 626 if (ret > 0) { 627 ret = -ENOENT; 628 goto out; 629 } 630 631 ret = btrfs_del_item(trans, quota_root, path); 632 out: 633 btrfs_free_path(path); 634 return ret; 635 } 636 637 static int add_qgroup_item(struct btrfs_trans_handle *trans, 638 struct btrfs_root *quota_root, u64 qgroupid) 639 { 640 int ret; 641 struct btrfs_path *path; 642 struct btrfs_qgroup_info_item *qgroup_info; 643 struct btrfs_qgroup_limit_item *qgroup_limit; 644 struct extent_buffer *leaf; 645 struct btrfs_key key; 646 647 if (btrfs_is_testing(quota_root->fs_info)) 648 return 0; 649 650 path = btrfs_alloc_path(); 651 if (!path) 652 return -ENOMEM; 653 654 key.objectid = 0; 655 key.type = BTRFS_QGROUP_INFO_KEY; 656 key.offset = qgroupid; 657 658 /* 659 * Avoid a transaction abort by catching -EEXIST here. In that 660 * case, we proceed by re-initializing the existing structure 661 * on disk. 662 */ 663 664 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 665 sizeof(*qgroup_info)); 666 if (ret && ret != -EEXIST) 667 goto out; 668 669 leaf = path->nodes[0]; 670 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], 671 struct btrfs_qgroup_info_item); 672 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); 673 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); 674 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); 675 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); 676 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); 677 678 btrfs_mark_buffer_dirty(leaf); 679 680 btrfs_release_path(path); 681 682 key.type = BTRFS_QGROUP_LIMIT_KEY; 683 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 684 sizeof(*qgroup_limit)); 685 if (ret && ret != -EEXIST) 686 goto out; 687 688 leaf = path->nodes[0]; 689 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], 690 struct btrfs_qgroup_limit_item); 691 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); 692 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); 693 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); 694 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 695 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 696 697 btrfs_mark_buffer_dirty(leaf); 698 699 ret = 0; 700 out: 701 btrfs_free_path(path); 702 return ret; 703 } 704 705 static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid) 706 { 707 int ret; 708 struct btrfs_root *quota_root = trans->fs_info->quota_root; 709 struct btrfs_path *path; 710 struct btrfs_key key; 711 712 path = btrfs_alloc_path(); 713 if (!path) 714 return -ENOMEM; 715 716 key.objectid = 0; 717 key.type = BTRFS_QGROUP_INFO_KEY; 718 key.offset = qgroupid; 719 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 720 if (ret < 0) 721 goto out; 722 723 if (ret > 0) { 724 ret = -ENOENT; 725 goto out; 726 } 727 728 ret = btrfs_del_item(trans, quota_root, path); 729 if (ret) 730 goto out; 731 732 btrfs_release_path(path); 733 734 key.type = BTRFS_QGROUP_LIMIT_KEY; 735 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 736 if (ret < 0) 737 goto out; 738 739 if (ret > 0) { 740 ret = -ENOENT; 741 goto out; 742 } 743 744 ret = btrfs_del_item(trans, quota_root, path); 745 746 out: 747 btrfs_free_path(path); 748 return ret; 749 } 750 751 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 752 struct btrfs_qgroup *qgroup) 753 { 754 struct btrfs_root *quota_root = trans->fs_info->quota_root; 755 struct btrfs_path *path; 756 struct btrfs_key key; 757 struct extent_buffer *l; 758 struct btrfs_qgroup_limit_item *qgroup_limit; 759 int ret; 760 int slot; 761 762 key.objectid = 0; 763 key.type = BTRFS_QGROUP_LIMIT_KEY; 764 key.offset = qgroup->qgroupid; 765 766 path = btrfs_alloc_path(); 767 if (!path) 768 return -ENOMEM; 769 770 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); 771 if (ret > 0) 772 ret = -ENOENT; 773 774 if (ret) 775 goto out; 776 777 l = path->nodes[0]; 778 slot = path->slots[0]; 779 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 780 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags); 781 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer); 782 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); 783 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); 784 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); 785 786 btrfs_mark_buffer_dirty(l); 787 788 out: 789 btrfs_free_path(path); 790 return ret; 791 } 792 793 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 794 struct btrfs_qgroup *qgroup) 795 { 796 struct btrfs_fs_info *fs_info = trans->fs_info; 797 struct btrfs_root *quota_root = fs_info->quota_root; 798 struct btrfs_path *path; 799 struct btrfs_key key; 800 struct extent_buffer *l; 801 struct btrfs_qgroup_info_item *qgroup_info; 802 int ret; 803 int slot; 804 805 if (btrfs_is_testing(fs_info)) 806 return 0; 807 808 key.objectid = 0; 809 key.type = BTRFS_QGROUP_INFO_KEY; 810 key.offset = qgroup->qgroupid; 811 812 path = btrfs_alloc_path(); 813 if (!path) 814 return -ENOMEM; 815 816 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); 817 if (ret > 0) 818 ret = -ENOENT; 819 820 if (ret) 821 goto out; 822 823 l = path->nodes[0]; 824 slot = path->slots[0]; 825 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); 826 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 827 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 828 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 829 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 830 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 831 832 btrfs_mark_buffer_dirty(l); 833 834 out: 835 btrfs_free_path(path); 836 return ret; 837 } 838 839 static int update_qgroup_status_item(struct btrfs_trans_handle *trans) 840 { 841 struct btrfs_fs_info *fs_info = trans->fs_info; 842 struct btrfs_root *quota_root = fs_info->quota_root; 843 struct btrfs_path *path; 844 struct btrfs_key key; 845 struct extent_buffer *l; 846 struct btrfs_qgroup_status_item *ptr; 847 int ret; 848 int slot; 849 850 key.objectid = 0; 851 key.type = BTRFS_QGROUP_STATUS_KEY; 852 key.offset = 0; 853 854 path = btrfs_alloc_path(); 855 if (!path) 856 return -ENOMEM; 857 858 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); 859 if (ret > 0) 860 ret = -ENOENT; 861 862 if (ret) 863 goto out; 864 865 l = path->nodes[0]; 866 slot = path->slots[0]; 867 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 868 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 869 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 870 btrfs_set_qgroup_status_rescan(l, ptr, 871 fs_info->qgroup_rescan_progress.objectid); 872 873 btrfs_mark_buffer_dirty(l); 874 875 out: 876 btrfs_free_path(path); 877 return ret; 878 } 879 880 /* 881 * called with qgroup_lock held 882 */ 883 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 884 struct btrfs_root *root) 885 { 886 struct btrfs_path *path; 887 struct btrfs_key key; 888 struct extent_buffer *leaf = NULL; 889 int ret; 890 int nr = 0; 891 892 path = btrfs_alloc_path(); 893 if (!path) 894 return -ENOMEM; 895 896 path->leave_spinning = 1; 897 898 key.objectid = 0; 899 key.offset = 0; 900 key.type = 0; 901 902 while (1) { 903 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 904 if (ret < 0) 905 goto out; 906 leaf = path->nodes[0]; 907 nr = btrfs_header_nritems(leaf); 908 if (!nr) 909 break; 910 /* 911 * delete the leaf one by one 912 * since the whole tree is going 913 * to be deleted. 914 */ 915 path->slots[0] = 0; 916 ret = btrfs_del_items(trans, root, path, 0, nr); 917 if (ret) 918 goto out; 919 920 btrfs_release_path(path); 921 } 922 ret = 0; 923 out: 924 btrfs_free_path(path); 925 return ret; 926 } 927 928 int btrfs_quota_enable(struct btrfs_fs_info *fs_info) 929 { 930 struct btrfs_root *quota_root; 931 struct btrfs_root *tree_root = fs_info->tree_root; 932 struct btrfs_path *path = NULL; 933 struct btrfs_qgroup_status_item *ptr; 934 struct extent_buffer *leaf; 935 struct btrfs_key key; 936 struct btrfs_key found_key; 937 struct btrfs_qgroup *qgroup = NULL; 938 struct btrfs_trans_handle *trans = NULL; 939 int ret = 0; 940 int slot; 941 942 mutex_lock(&fs_info->qgroup_ioctl_lock); 943 if (fs_info->quota_root) 944 goto out; 945 946 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 947 if (!fs_info->qgroup_ulist) { 948 ret = -ENOMEM; 949 goto out; 950 } 951 952 ret = btrfs_sysfs_add_qgroups(fs_info); 953 if (ret < 0) 954 goto out; 955 /* 956 * 1 for quota root item 957 * 1 for BTRFS_QGROUP_STATUS item 958 * 959 * Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items 960 * per subvolume. However those are not currently reserved since it 961 * would be a lot of overkill. 962 */ 963 trans = btrfs_start_transaction(tree_root, 2); 964 if (IS_ERR(trans)) { 965 ret = PTR_ERR(trans); 966 trans = NULL; 967 goto out; 968 } 969 970 /* 971 * initially create the quota tree 972 */ 973 quota_root = btrfs_create_tree(trans, BTRFS_QUOTA_TREE_OBJECTID); 974 if (IS_ERR(quota_root)) { 975 ret = PTR_ERR(quota_root); 976 btrfs_abort_transaction(trans, ret); 977 goto out; 978 } 979 980 path = btrfs_alloc_path(); 981 if (!path) { 982 ret = -ENOMEM; 983 btrfs_abort_transaction(trans, ret); 984 goto out_free_root; 985 } 986 987 key.objectid = 0; 988 key.type = BTRFS_QGROUP_STATUS_KEY; 989 key.offset = 0; 990 991 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 992 sizeof(*ptr)); 993 if (ret) { 994 btrfs_abort_transaction(trans, ret); 995 goto out_free_path; 996 } 997 998 leaf = path->nodes[0]; 999 ptr = btrfs_item_ptr(leaf, path->slots[0], 1000 struct btrfs_qgroup_status_item); 1001 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); 1002 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); 1003 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 1004 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1005 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 1006 btrfs_set_qgroup_status_rescan(leaf, ptr, 0); 1007 1008 btrfs_mark_buffer_dirty(leaf); 1009 1010 key.objectid = 0; 1011 key.type = BTRFS_ROOT_REF_KEY; 1012 key.offset = 0; 1013 1014 btrfs_release_path(path); 1015 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 1016 if (ret > 0) 1017 goto out_add_root; 1018 if (ret < 0) { 1019 btrfs_abort_transaction(trans, ret); 1020 goto out_free_path; 1021 } 1022 1023 while (1) { 1024 slot = path->slots[0]; 1025 leaf = path->nodes[0]; 1026 btrfs_item_key_to_cpu(leaf, &found_key, slot); 1027 1028 if (found_key.type == BTRFS_ROOT_REF_KEY) { 1029 ret = add_qgroup_item(trans, quota_root, 1030 found_key.offset); 1031 if (ret) { 1032 btrfs_abort_transaction(trans, ret); 1033 goto out_free_path; 1034 } 1035 1036 qgroup = add_qgroup_rb(fs_info, found_key.offset); 1037 if (IS_ERR(qgroup)) { 1038 ret = PTR_ERR(qgroup); 1039 btrfs_abort_transaction(trans, ret); 1040 goto out_free_path; 1041 } 1042 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 1043 if (ret < 0) { 1044 btrfs_abort_transaction(trans, ret); 1045 goto out_free_path; 1046 } 1047 } 1048 ret = btrfs_next_item(tree_root, path); 1049 if (ret < 0) { 1050 btrfs_abort_transaction(trans, ret); 1051 goto out_free_path; 1052 } 1053 if (ret) 1054 break; 1055 } 1056 1057 out_add_root: 1058 btrfs_release_path(path); 1059 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 1060 if (ret) { 1061 btrfs_abort_transaction(trans, ret); 1062 goto out_free_path; 1063 } 1064 1065 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 1066 if (IS_ERR(qgroup)) { 1067 ret = PTR_ERR(qgroup); 1068 btrfs_abort_transaction(trans, ret); 1069 goto out_free_path; 1070 } 1071 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 1072 if (ret < 0) { 1073 btrfs_abort_transaction(trans, ret); 1074 goto out_free_path; 1075 } 1076 1077 ret = btrfs_commit_transaction(trans); 1078 trans = NULL; 1079 if (ret) 1080 goto out_free_path; 1081 1082 /* 1083 * Set quota enabled flag after committing the transaction, to avoid 1084 * deadlocks on fs_info->qgroup_ioctl_lock with concurrent snapshot 1085 * creation. 1086 */ 1087 spin_lock(&fs_info->qgroup_lock); 1088 fs_info->quota_root = quota_root; 1089 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1090 spin_unlock(&fs_info->qgroup_lock); 1091 1092 ret = qgroup_rescan_init(fs_info, 0, 1); 1093 if (!ret) { 1094 qgroup_rescan_zero_tracking(fs_info); 1095 fs_info->qgroup_rescan_running = true; 1096 btrfs_queue_work(fs_info->qgroup_rescan_workers, 1097 &fs_info->qgroup_rescan_work); 1098 } 1099 1100 out_free_path: 1101 btrfs_free_path(path); 1102 out_free_root: 1103 if (ret) 1104 btrfs_put_root(quota_root); 1105 out: 1106 if (ret) { 1107 ulist_free(fs_info->qgroup_ulist); 1108 fs_info->qgroup_ulist = NULL; 1109 if (trans) 1110 btrfs_end_transaction(trans); 1111 btrfs_sysfs_del_qgroups(fs_info); 1112 } 1113 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1114 return ret; 1115 } 1116 1117 int btrfs_quota_disable(struct btrfs_fs_info *fs_info) 1118 { 1119 struct btrfs_root *quota_root; 1120 struct btrfs_trans_handle *trans = NULL; 1121 int ret = 0; 1122 1123 mutex_lock(&fs_info->qgroup_ioctl_lock); 1124 if (!fs_info->quota_root) 1125 goto out; 1126 1127 /* 1128 * 1 For the root item 1129 * 1130 * We should also reserve enough items for the quota tree deletion in 1131 * btrfs_clean_quota_tree but this is not done. 1132 */ 1133 trans = btrfs_start_transaction(fs_info->tree_root, 1); 1134 if (IS_ERR(trans)) { 1135 ret = PTR_ERR(trans); 1136 goto out; 1137 } 1138 1139 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1140 btrfs_qgroup_wait_for_completion(fs_info, false); 1141 spin_lock(&fs_info->qgroup_lock); 1142 quota_root = fs_info->quota_root; 1143 fs_info->quota_root = NULL; 1144 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 1145 spin_unlock(&fs_info->qgroup_lock); 1146 1147 btrfs_free_qgroup_config(fs_info); 1148 1149 ret = btrfs_clean_quota_tree(trans, quota_root); 1150 if (ret) { 1151 btrfs_abort_transaction(trans, ret); 1152 goto end_trans; 1153 } 1154 1155 ret = btrfs_del_root(trans, "a_root->root_key); 1156 if (ret) { 1157 btrfs_abort_transaction(trans, ret); 1158 goto end_trans; 1159 } 1160 1161 list_del("a_root->dirty_list); 1162 1163 btrfs_tree_lock(quota_root->node); 1164 btrfs_clean_tree_block(quota_root->node); 1165 btrfs_tree_unlock(quota_root->node); 1166 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 1167 1168 btrfs_put_root(quota_root); 1169 1170 end_trans: 1171 ret = btrfs_end_transaction(trans); 1172 out: 1173 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1174 return ret; 1175 } 1176 1177 static void qgroup_dirty(struct btrfs_fs_info *fs_info, 1178 struct btrfs_qgroup *qgroup) 1179 { 1180 if (list_empty(&qgroup->dirty)) 1181 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 1182 } 1183 1184 /* 1185 * The easy accounting, we're updating qgroup relationship whose child qgroup 1186 * only has exclusive extents. 1187 * 1188 * In this case, all exclusive extents will also be exclusive for parent, so 1189 * excl/rfer just get added/removed. 1190 * 1191 * So is qgroup reservation space, which should also be added/removed to 1192 * parent. 1193 * Or when child tries to release reservation space, parent will underflow its 1194 * reservation (for relationship adding case). 1195 * 1196 * Caller should hold fs_info->qgroup_lock. 1197 */ 1198 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1199 struct ulist *tmp, u64 ref_root, 1200 struct btrfs_qgroup *src, int sign) 1201 { 1202 struct btrfs_qgroup *qgroup; 1203 struct btrfs_qgroup_list *glist; 1204 struct ulist_node *unode; 1205 struct ulist_iterator uiter; 1206 u64 num_bytes = src->excl; 1207 int ret = 0; 1208 1209 qgroup = find_qgroup_rb(fs_info, ref_root); 1210 if (!qgroup) 1211 goto out; 1212 1213 qgroup->rfer += sign * num_bytes; 1214 qgroup->rfer_cmpr += sign * num_bytes; 1215 1216 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1217 qgroup->excl += sign * num_bytes; 1218 qgroup->excl_cmpr += sign * num_bytes; 1219 1220 if (sign > 0) 1221 qgroup_rsv_add_by_qgroup(fs_info, qgroup, src); 1222 else 1223 qgroup_rsv_release_by_qgroup(fs_info, qgroup, src); 1224 1225 qgroup_dirty(fs_info, qgroup); 1226 1227 /* Get all of the parent groups that contain this qgroup */ 1228 list_for_each_entry(glist, &qgroup->groups, next_group) { 1229 ret = ulist_add(tmp, glist->group->qgroupid, 1230 qgroup_to_aux(glist->group), GFP_ATOMIC); 1231 if (ret < 0) 1232 goto out; 1233 } 1234 1235 /* Iterate all of the parents and adjust their reference counts */ 1236 ULIST_ITER_INIT(&uiter); 1237 while ((unode = ulist_next(tmp, &uiter))) { 1238 qgroup = unode_aux_to_qgroup(unode); 1239 qgroup->rfer += sign * num_bytes; 1240 qgroup->rfer_cmpr += sign * num_bytes; 1241 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1242 qgroup->excl += sign * num_bytes; 1243 if (sign > 0) 1244 qgroup_rsv_add_by_qgroup(fs_info, qgroup, src); 1245 else 1246 qgroup_rsv_release_by_qgroup(fs_info, qgroup, src); 1247 qgroup->excl_cmpr += sign * num_bytes; 1248 qgroup_dirty(fs_info, qgroup); 1249 1250 /* Add any parents of the parents */ 1251 list_for_each_entry(glist, &qgroup->groups, next_group) { 1252 ret = ulist_add(tmp, glist->group->qgroupid, 1253 qgroup_to_aux(glist->group), GFP_ATOMIC); 1254 if (ret < 0) 1255 goto out; 1256 } 1257 } 1258 ret = 0; 1259 out: 1260 return ret; 1261 } 1262 1263 1264 /* 1265 * Quick path for updating qgroup with only excl refs. 1266 * 1267 * In that case, just update all parent will be enough. 1268 * Or we needs to do a full rescan. 1269 * Caller should also hold fs_info->qgroup_lock. 1270 * 1271 * Return 0 for quick update, return >0 for need to full rescan 1272 * and mark INCONSISTENT flag. 1273 * Return < 0 for other error. 1274 */ 1275 static int quick_update_accounting(struct btrfs_fs_info *fs_info, 1276 struct ulist *tmp, u64 src, u64 dst, 1277 int sign) 1278 { 1279 struct btrfs_qgroup *qgroup; 1280 int ret = 1; 1281 int err = 0; 1282 1283 qgroup = find_qgroup_rb(fs_info, src); 1284 if (!qgroup) 1285 goto out; 1286 if (qgroup->excl == qgroup->rfer) { 1287 ret = 0; 1288 err = __qgroup_excl_accounting(fs_info, tmp, dst, 1289 qgroup, sign); 1290 if (err < 0) { 1291 ret = err; 1292 goto out; 1293 } 1294 } 1295 out: 1296 if (ret) 1297 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1298 return ret; 1299 } 1300 1301 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, 1302 u64 dst) 1303 { 1304 struct btrfs_fs_info *fs_info = trans->fs_info; 1305 struct btrfs_qgroup *parent; 1306 struct btrfs_qgroup *member; 1307 struct btrfs_qgroup_list *list; 1308 struct ulist *tmp; 1309 int ret = 0; 1310 1311 /* Check the level of src and dst first */ 1312 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) 1313 return -EINVAL; 1314 1315 tmp = ulist_alloc(GFP_KERNEL); 1316 if (!tmp) 1317 return -ENOMEM; 1318 1319 mutex_lock(&fs_info->qgroup_ioctl_lock); 1320 if (!fs_info->quota_root) { 1321 ret = -ENOTCONN; 1322 goto out; 1323 } 1324 member = find_qgroup_rb(fs_info, src); 1325 parent = find_qgroup_rb(fs_info, dst); 1326 if (!member || !parent) { 1327 ret = -EINVAL; 1328 goto out; 1329 } 1330 1331 /* check if such qgroup relation exist firstly */ 1332 list_for_each_entry(list, &member->groups, next_group) { 1333 if (list->group == parent) { 1334 ret = -EEXIST; 1335 goto out; 1336 } 1337 } 1338 1339 ret = add_qgroup_relation_item(trans, src, dst); 1340 if (ret) 1341 goto out; 1342 1343 ret = add_qgroup_relation_item(trans, dst, src); 1344 if (ret) { 1345 del_qgroup_relation_item(trans, src, dst); 1346 goto out; 1347 } 1348 1349 spin_lock(&fs_info->qgroup_lock); 1350 ret = add_relation_rb(fs_info, src, dst); 1351 if (ret < 0) { 1352 spin_unlock(&fs_info->qgroup_lock); 1353 goto out; 1354 } 1355 ret = quick_update_accounting(fs_info, tmp, src, dst, 1); 1356 spin_unlock(&fs_info->qgroup_lock); 1357 out: 1358 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1359 ulist_free(tmp); 1360 return ret; 1361 } 1362 1363 static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, 1364 u64 dst) 1365 { 1366 struct btrfs_fs_info *fs_info = trans->fs_info; 1367 struct btrfs_qgroup *parent; 1368 struct btrfs_qgroup *member; 1369 struct btrfs_qgroup_list *list; 1370 struct ulist *tmp; 1371 bool found = false; 1372 int ret = 0; 1373 int ret2; 1374 1375 tmp = ulist_alloc(GFP_KERNEL); 1376 if (!tmp) 1377 return -ENOMEM; 1378 1379 if (!fs_info->quota_root) { 1380 ret = -ENOTCONN; 1381 goto out; 1382 } 1383 1384 member = find_qgroup_rb(fs_info, src); 1385 parent = find_qgroup_rb(fs_info, dst); 1386 /* 1387 * The parent/member pair doesn't exist, then try to delete the dead 1388 * relation items only. 1389 */ 1390 if (!member || !parent) 1391 goto delete_item; 1392 1393 /* check if such qgroup relation exist firstly */ 1394 list_for_each_entry(list, &member->groups, next_group) { 1395 if (list->group == parent) { 1396 found = true; 1397 break; 1398 } 1399 } 1400 1401 delete_item: 1402 ret = del_qgroup_relation_item(trans, src, dst); 1403 if (ret < 0 && ret != -ENOENT) 1404 goto out; 1405 ret2 = del_qgroup_relation_item(trans, dst, src); 1406 if (ret2 < 0 && ret2 != -ENOENT) 1407 goto out; 1408 1409 /* At least one deletion succeeded, return 0 */ 1410 if (!ret || !ret2) 1411 ret = 0; 1412 1413 if (found) { 1414 spin_lock(&fs_info->qgroup_lock); 1415 del_relation_rb(fs_info, src, dst); 1416 ret = quick_update_accounting(fs_info, tmp, src, dst, -1); 1417 spin_unlock(&fs_info->qgroup_lock); 1418 } 1419 out: 1420 ulist_free(tmp); 1421 return ret; 1422 } 1423 1424 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, 1425 u64 dst) 1426 { 1427 struct btrfs_fs_info *fs_info = trans->fs_info; 1428 int ret = 0; 1429 1430 mutex_lock(&fs_info->qgroup_ioctl_lock); 1431 ret = __del_qgroup_relation(trans, src, dst); 1432 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1433 1434 return ret; 1435 } 1436 1437 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) 1438 { 1439 struct btrfs_fs_info *fs_info = trans->fs_info; 1440 struct btrfs_root *quota_root; 1441 struct btrfs_qgroup *qgroup; 1442 int ret = 0; 1443 1444 mutex_lock(&fs_info->qgroup_ioctl_lock); 1445 if (!fs_info->quota_root) { 1446 ret = -ENOTCONN; 1447 goto out; 1448 } 1449 quota_root = fs_info->quota_root; 1450 qgroup = find_qgroup_rb(fs_info, qgroupid); 1451 if (qgroup) { 1452 ret = -EEXIST; 1453 goto out; 1454 } 1455 1456 ret = add_qgroup_item(trans, quota_root, qgroupid); 1457 if (ret) 1458 goto out; 1459 1460 spin_lock(&fs_info->qgroup_lock); 1461 qgroup = add_qgroup_rb(fs_info, qgroupid); 1462 spin_unlock(&fs_info->qgroup_lock); 1463 1464 if (IS_ERR(qgroup)) { 1465 ret = PTR_ERR(qgroup); 1466 goto out; 1467 } 1468 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 1469 out: 1470 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1471 return ret; 1472 } 1473 1474 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) 1475 { 1476 struct btrfs_fs_info *fs_info = trans->fs_info; 1477 struct btrfs_qgroup *qgroup; 1478 struct btrfs_qgroup_list *list; 1479 int ret = 0; 1480 1481 mutex_lock(&fs_info->qgroup_ioctl_lock); 1482 if (!fs_info->quota_root) { 1483 ret = -ENOTCONN; 1484 goto out; 1485 } 1486 1487 qgroup = find_qgroup_rb(fs_info, qgroupid); 1488 if (!qgroup) { 1489 ret = -ENOENT; 1490 goto out; 1491 } 1492 1493 /* Check if there are no children of this qgroup */ 1494 if (!list_empty(&qgroup->members)) { 1495 ret = -EBUSY; 1496 goto out; 1497 } 1498 1499 ret = del_qgroup_item(trans, qgroupid); 1500 if (ret && ret != -ENOENT) 1501 goto out; 1502 1503 while (!list_empty(&qgroup->groups)) { 1504 list = list_first_entry(&qgroup->groups, 1505 struct btrfs_qgroup_list, next_group); 1506 ret = __del_qgroup_relation(trans, qgroupid, 1507 list->group->qgroupid); 1508 if (ret) 1509 goto out; 1510 } 1511 1512 spin_lock(&fs_info->qgroup_lock); 1513 del_qgroup_rb(fs_info, qgroupid); 1514 spin_unlock(&fs_info->qgroup_lock); 1515 out: 1516 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1517 return ret; 1518 } 1519 1520 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid, 1521 struct btrfs_qgroup_limit *limit) 1522 { 1523 struct btrfs_fs_info *fs_info = trans->fs_info; 1524 struct btrfs_qgroup *qgroup; 1525 int ret = 0; 1526 /* Sometimes we would want to clear the limit on this qgroup. 1527 * To meet this requirement, we treat the -1 as a special value 1528 * which tell kernel to clear the limit on this qgroup. 1529 */ 1530 const u64 CLEAR_VALUE = -1; 1531 1532 mutex_lock(&fs_info->qgroup_ioctl_lock); 1533 if (!fs_info->quota_root) { 1534 ret = -ENOTCONN; 1535 goto out; 1536 } 1537 1538 qgroup = find_qgroup_rb(fs_info, qgroupid); 1539 if (!qgroup) { 1540 ret = -ENOENT; 1541 goto out; 1542 } 1543 1544 spin_lock(&fs_info->qgroup_lock); 1545 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { 1546 if (limit->max_rfer == CLEAR_VALUE) { 1547 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1548 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1549 qgroup->max_rfer = 0; 1550 } else { 1551 qgroup->max_rfer = limit->max_rfer; 1552 } 1553 } 1554 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { 1555 if (limit->max_excl == CLEAR_VALUE) { 1556 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1557 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1558 qgroup->max_excl = 0; 1559 } else { 1560 qgroup->max_excl = limit->max_excl; 1561 } 1562 } 1563 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { 1564 if (limit->rsv_rfer == CLEAR_VALUE) { 1565 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1566 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1567 qgroup->rsv_rfer = 0; 1568 } else { 1569 qgroup->rsv_rfer = limit->rsv_rfer; 1570 } 1571 } 1572 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { 1573 if (limit->rsv_excl == CLEAR_VALUE) { 1574 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1575 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1576 qgroup->rsv_excl = 0; 1577 } else { 1578 qgroup->rsv_excl = limit->rsv_excl; 1579 } 1580 } 1581 qgroup->lim_flags |= limit->flags; 1582 1583 spin_unlock(&fs_info->qgroup_lock); 1584 1585 ret = update_qgroup_limit_item(trans, qgroup); 1586 if (ret) { 1587 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1588 btrfs_info(fs_info, "unable to update quota limit for %llu", 1589 qgroupid); 1590 } 1591 1592 out: 1593 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1594 return ret; 1595 } 1596 1597 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, 1598 struct btrfs_delayed_ref_root *delayed_refs, 1599 struct btrfs_qgroup_extent_record *record) 1600 { 1601 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; 1602 struct rb_node *parent_node = NULL; 1603 struct btrfs_qgroup_extent_record *entry; 1604 u64 bytenr = record->bytenr; 1605 1606 lockdep_assert_held(&delayed_refs->lock); 1607 trace_btrfs_qgroup_trace_extent(fs_info, record); 1608 1609 while (*p) { 1610 parent_node = *p; 1611 entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, 1612 node); 1613 if (bytenr < entry->bytenr) { 1614 p = &(*p)->rb_left; 1615 } else if (bytenr > entry->bytenr) { 1616 p = &(*p)->rb_right; 1617 } else { 1618 if (record->data_rsv && !entry->data_rsv) { 1619 entry->data_rsv = record->data_rsv; 1620 entry->data_rsv_refroot = 1621 record->data_rsv_refroot; 1622 } 1623 return 1; 1624 } 1625 } 1626 1627 rb_link_node(&record->node, parent_node, p); 1628 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); 1629 return 0; 1630 } 1631 1632 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, 1633 struct btrfs_qgroup_extent_record *qrecord) 1634 { 1635 struct ulist *old_root; 1636 u64 bytenr = qrecord->bytenr; 1637 int ret; 1638 1639 ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false); 1640 if (ret < 0) { 1641 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1642 btrfs_warn(fs_info, 1643 "error accounting new delayed refs extent (err code: %d), quota inconsistent", 1644 ret); 1645 return 0; 1646 } 1647 1648 /* 1649 * Here we don't need to get the lock of 1650 * trans->transaction->delayed_refs, since inserted qrecord won't 1651 * be deleted, only qrecord->node may be modified (new qrecord insert) 1652 * 1653 * So modifying qrecord->old_roots is safe here 1654 */ 1655 qrecord->old_roots = old_root; 1656 return 0; 1657 } 1658 1659 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr, 1660 u64 num_bytes, gfp_t gfp_flag) 1661 { 1662 struct btrfs_fs_info *fs_info = trans->fs_info; 1663 struct btrfs_qgroup_extent_record *record; 1664 struct btrfs_delayed_ref_root *delayed_refs; 1665 int ret; 1666 1667 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) 1668 || bytenr == 0 || num_bytes == 0) 1669 return 0; 1670 record = kzalloc(sizeof(*record), gfp_flag); 1671 if (!record) 1672 return -ENOMEM; 1673 1674 delayed_refs = &trans->transaction->delayed_refs; 1675 record->bytenr = bytenr; 1676 record->num_bytes = num_bytes; 1677 record->old_roots = NULL; 1678 1679 spin_lock(&delayed_refs->lock); 1680 ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record); 1681 spin_unlock(&delayed_refs->lock); 1682 if (ret > 0) { 1683 kfree(record); 1684 return 0; 1685 } 1686 return btrfs_qgroup_trace_extent_post(fs_info, record); 1687 } 1688 1689 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 1690 struct extent_buffer *eb) 1691 { 1692 struct btrfs_fs_info *fs_info = trans->fs_info; 1693 int nr = btrfs_header_nritems(eb); 1694 int i, extent_type, ret; 1695 struct btrfs_key key; 1696 struct btrfs_file_extent_item *fi; 1697 u64 bytenr, num_bytes; 1698 1699 /* We can be called directly from walk_up_proc() */ 1700 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1701 return 0; 1702 1703 for (i = 0; i < nr; i++) { 1704 btrfs_item_key_to_cpu(eb, &key, i); 1705 1706 if (key.type != BTRFS_EXTENT_DATA_KEY) 1707 continue; 1708 1709 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); 1710 /* filter out non qgroup-accountable extents */ 1711 extent_type = btrfs_file_extent_type(eb, fi); 1712 1713 if (extent_type == BTRFS_FILE_EXTENT_INLINE) 1714 continue; 1715 1716 bytenr = btrfs_file_extent_disk_bytenr(eb, fi); 1717 if (!bytenr) 1718 continue; 1719 1720 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 1721 1722 ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes, 1723 GFP_NOFS); 1724 if (ret) 1725 return ret; 1726 } 1727 cond_resched(); 1728 return 0; 1729 } 1730 1731 /* 1732 * Walk up the tree from the bottom, freeing leaves and any interior 1733 * nodes which have had all slots visited. If a node (leaf or 1734 * interior) is freed, the node above it will have it's slot 1735 * incremented. The root node will never be freed. 1736 * 1737 * At the end of this function, we should have a path which has all 1738 * slots incremented to the next position for a search. If we need to 1739 * read a new node it will be NULL and the node above it will have the 1740 * correct slot selected for a later read. 1741 * 1742 * If we increment the root nodes slot counter past the number of 1743 * elements, 1 is returned to signal completion of the search. 1744 */ 1745 static int adjust_slots_upwards(struct btrfs_path *path, int root_level) 1746 { 1747 int level = 0; 1748 int nr, slot; 1749 struct extent_buffer *eb; 1750 1751 if (root_level == 0) 1752 return 1; 1753 1754 while (level <= root_level) { 1755 eb = path->nodes[level]; 1756 nr = btrfs_header_nritems(eb); 1757 path->slots[level]++; 1758 slot = path->slots[level]; 1759 if (slot >= nr || level == 0) { 1760 /* 1761 * Don't free the root - we will detect this 1762 * condition after our loop and return a 1763 * positive value for caller to stop walking the tree. 1764 */ 1765 if (level != root_level) { 1766 btrfs_tree_unlock_rw(eb, path->locks[level]); 1767 path->locks[level] = 0; 1768 1769 free_extent_buffer(eb); 1770 path->nodes[level] = NULL; 1771 path->slots[level] = 0; 1772 } 1773 } else { 1774 /* 1775 * We have a valid slot to walk back down 1776 * from. Stop here so caller can process these 1777 * new nodes. 1778 */ 1779 break; 1780 } 1781 1782 level++; 1783 } 1784 1785 eb = path->nodes[root_level]; 1786 if (path->slots[root_level] >= btrfs_header_nritems(eb)) 1787 return 1; 1788 1789 return 0; 1790 } 1791 1792 /* 1793 * Helper function to trace a subtree tree block swap. 1794 * 1795 * The swap will happen in highest tree block, but there may be a lot of 1796 * tree blocks involved. 1797 * 1798 * For example: 1799 * OO = Old tree blocks 1800 * NN = New tree blocks allocated during balance 1801 * 1802 * File tree (257) Reloc tree for 257 1803 * L2 OO NN 1804 * / \ / \ 1805 * L1 OO OO (a) OO NN (a) 1806 * / \ / \ / \ / \ 1807 * L0 OO OO OO OO OO OO NN NN 1808 * (b) (c) (b) (c) 1809 * 1810 * When calling qgroup_trace_extent_swap(), we will pass: 1811 * @src_eb = OO(a) 1812 * @dst_path = [ nodes[1] = NN(a), nodes[0] = NN(c) ] 1813 * @dst_level = 0 1814 * @root_level = 1 1815 * 1816 * In that case, qgroup_trace_extent_swap() will search from OO(a) to 1817 * reach OO(c), then mark both OO(c) and NN(c) as qgroup dirty. 1818 * 1819 * The main work of qgroup_trace_extent_swap() can be split into 3 parts: 1820 * 1821 * 1) Tree search from @src_eb 1822 * It should acts as a simplified btrfs_search_slot(). 1823 * The key for search can be extracted from @dst_path->nodes[dst_level] 1824 * (first key). 1825 * 1826 * 2) Mark the final tree blocks in @src_path and @dst_path qgroup dirty 1827 * NOTE: In above case, OO(a) and NN(a) won't be marked qgroup dirty. 1828 * They should be marked during previous (@dst_level = 1) iteration. 1829 * 1830 * 3) Mark file extents in leaves dirty 1831 * We don't have good way to pick out new file extents only. 1832 * So we still follow the old method by scanning all file extents in 1833 * the leave. 1834 * 1835 * This function can free us from keeping two paths, thus later we only need 1836 * to care about how to iterate all new tree blocks in reloc tree. 1837 */ 1838 static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans, 1839 struct extent_buffer *src_eb, 1840 struct btrfs_path *dst_path, 1841 int dst_level, int root_level, 1842 bool trace_leaf) 1843 { 1844 struct btrfs_key key; 1845 struct btrfs_path *src_path; 1846 struct btrfs_fs_info *fs_info = trans->fs_info; 1847 u32 nodesize = fs_info->nodesize; 1848 int cur_level = root_level; 1849 int ret; 1850 1851 BUG_ON(dst_level > root_level); 1852 /* Level mismatch */ 1853 if (btrfs_header_level(src_eb) != root_level) 1854 return -EINVAL; 1855 1856 src_path = btrfs_alloc_path(); 1857 if (!src_path) { 1858 ret = -ENOMEM; 1859 goto out; 1860 } 1861 1862 if (dst_level) 1863 btrfs_node_key_to_cpu(dst_path->nodes[dst_level], &key, 0); 1864 else 1865 btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0); 1866 1867 /* For src_path */ 1868 atomic_inc(&src_eb->refs); 1869 src_path->nodes[root_level] = src_eb; 1870 src_path->slots[root_level] = dst_path->slots[root_level]; 1871 src_path->locks[root_level] = 0; 1872 1873 /* A simplified version of btrfs_search_slot() */ 1874 while (cur_level >= dst_level) { 1875 struct btrfs_key src_key; 1876 struct btrfs_key dst_key; 1877 1878 if (src_path->nodes[cur_level] == NULL) { 1879 struct btrfs_key first_key; 1880 struct extent_buffer *eb; 1881 int parent_slot; 1882 u64 child_gen; 1883 u64 child_bytenr; 1884 1885 eb = src_path->nodes[cur_level + 1]; 1886 parent_slot = src_path->slots[cur_level + 1]; 1887 child_bytenr = btrfs_node_blockptr(eb, parent_slot); 1888 child_gen = btrfs_node_ptr_generation(eb, parent_slot); 1889 btrfs_node_key_to_cpu(eb, &first_key, parent_slot); 1890 1891 eb = read_tree_block(fs_info, child_bytenr, child_gen, 1892 cur_level, &first_key); 1893 if (IS_ERR(eb)) { 1894 ret = PTR_ERR(eb); 1895 goto out; 1896 } else if (!extent_buffer_uptodate(eb)) { 1897 free_extent_buffer(eb); 1898 ret = -EIO; 1899 goto out; 1900 } 1901 1902 src_path->nodes[cur_level] = eb; 1903 1904 btrfs_tree_read_lock(eb); 1905 btrfs_set_lock_blocking_read(eb); 1906 src_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING; 1907 } 1908 1909 src_path->slots[cur_level] = dst_path->slots[cur_level]; 1910 if (cur_level) { 1911 btrfs_node_key_to_cpu(dst_path->nodes[cur_level], 1912 &dst_key, dst_path->slots[cur_level]); 1913 btrfs_node_key_to_cpu(src_path->nodes[cur_level], 1914 &src_key, src_path->slots[cur_level]); 1915 } else { 1916 btrfs_item_key_to_cpu(dst_path->nodes[cur_level], 1917 &dst_key, dst_path->slots[cur_level]); 1918 btrfs_item_key_to_cpu(src_path->nodes[cur_level], 1919 &src_key, src_path->slots[cur_level]); 1920 } 1921 /* Content mismatch, something went wrong */ 1922 if (btrfs_comp_cpu_keys(&dst_key, &src_key)) { 1923 ret = -ENOENT; 1924 goto out; 1925 } 1926 cur_level--; 1927 } 1928 1929 /* 1930 * Now both @dst_path and @src_path have been populated, record the tree 1931 * blocks for qgroup accounting. 1932 */ 1933 ret = btrfs_qgroup_trace_extent(trans, src_path->nodes[dst_level]->start, 1934 nodesize, GFP_NOFS); 1935 if (ret < 0) 1936 goto out; 1937 ret = btrfs_qgroup_trace_extent(trans, 1938 dst_path->nodes[dst_level]->start, 1939 nodesize, GFP_NOFS); 1940 if (ret < 0) 1941 goto out; 1942 1943 /* Record leaf file extents */ 1944 if (dst_level == 0 && trace_leaf) { 1945 ret = btrfs_qgroup_trace_leaf_items(trans, src_path->nodes[0]); 1946 if (ret < 0) 1947 goto out; 1948 ret = btrfs_qgroup_trace_leaf_items(trans, dst_path->nodes[0]); 1949 } 1950 out: 1951 btrfs_free_path(src_path); 1952 return ret; 1953 } 1954 1955 /* 1956 * Helper function to do recursive generation-aware depth-first search, to 1957 * locate all new tree blocks in a subtree of reloc tree. 1958 * 1959 * E.g. (OO = Old tree blocks, NN = New tree blocks, whose gen == last_snapshot) 1960 * reloc tree 1961 * L2 NN (a) 1962 * / \ 1963 * L1 OO NN (b) 1964 * / \ / \ 1965 * L0 OO OO OO NN 1966 * (c) (d) 1967 * If we pass: 1968 * @dst_path = [ nodes[1] = NN(b), nodes[0] = NULL ], 1969 * @cur_level = 1 1970 * @root_level = 1 1971 * 1972 * We will iterate through tree blocks NN(b), NN(d) and info qgroup to trace 1973 * above tree blocks along with their counter parts in file tree. 1974 * While during search, old tree blocks OO(c) will be skipped as tree block swap 1975 * won't affect OO(c). 1976 */ 1977 static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans, 1978 struct extent_buffer *src_eb, 1979 struct btrfs_path *dst_path, 1980 int cur_level, int root_level, 1981 u64 last_snapshot, bool trace_leaf) 1982 { 1983 struct btrfs_fs_info *fs_info = trans->fs_info; 1984 struct extent_buffer *eb; 1985 bool need_cleanup = false; 1986 int ret = 0; 1987 int i; 1988 1989 /* Level sanity check */ 1990 if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 || 1991 root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 || 1992 root_level < cur_level) { 1993 btrfs_err_rl(fs_info, 1994 "%s: bad levels, cur_level=%d root_level=%d", 1995 __func__, cur_level, root_level); 1996 return -EUCLEAN; 1997 } 1998 1999 /* Read the tree block if needed */ 2000 if (dst_path->nodes[cur_level] == NULL) { 2001 struct btrfs_key first_key; 2002 int parent_slot; 2003 u64 child_gen; 2004 u64 child_bytenr; 2005 2006 /* 2007 * dst_path->nodes[root_level] must be initialized before 2008 * calling this function. 2009 */ 2010 if (cur_level == root_level) { 2011 btrfs_err_rl(fs_info, 2012 "%s: dst_path->nodes[%d] not initialized, root_level=%d cur_level=%d", 2013 __func__, root_level, root_level, cur_level); 2014 return -EUCLEAN; 2015 } 2016 2017 /* 2018 * We need to get child blockptr/gen from parent before we can 2019 * read it. 2020 */ 2021 eb = dst_path->nodes[cur_level + 1]; 2022 parent_slot = dst_path->slots[cur_level + 1]; 2023 child_bytenr = btrfs_node_blockptr(eb, parent_slot); 2024 child_gen = btrfs_node_ptr_generation(eb, parent_slot); 2025 btrfs_node_key_to_cpu(eb, &first_key, parent_slot); 2026 2027 /* This node is old, no need to trace */ 2028 if (child_gen < last_snapshot) 2029 goto out; 2030 2031 eb = read_tree_block(fs_info, child_bytenr, child_gen, 2032 cur_level, &first_key); 2033 if (IS_ERR(eb)) { 2034 ret = PTR_ERR(eb); 2035 goto out; 2036 } else if (!extent_buffer_uptodate(eb)) { 2037 free_extent_buffer(eb); 2038 ret = -EIO; 2039 goto out; 2040 } 2041 2042 dst_path->nodes[cur_level] = eb; 2043 dst_path->slots[cur_level] = 0; 2044 2045 btrfs_tree_read_lock(eb); 2046 btrfs_set_lock_blocking_read(eb); 2047 dst_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING; 2048 need_cleanup = true; 2049 } 2050 2051 /* Now record this tree block and its counter part for qgroups */ 2052 ret = qgroup_trace_extent_swap(trans, src_eb, dst_path, cur_level, 2053 root_level, trace_leaf); 2054 if (ret < 0) 2055 goto cleanup; 2056 2057 eb = dst_path->nodes[cur_level]; 2058 2059 if (cur_level > 0) { 2060 /* Iterate all child tree blocks */ 2061 for (i = 0; i < btrfs_header_nritems(eb); i++) { 2062 /* Skip old tree blocks as they won't be swapped */ 2063 if (btrfs_node_ptr_generation(eb, i) < last_snapshot) 2064 continue; 2065 dst_path->slots[cur_level] = i; 2066 2067 /* Recursive call (at most 7 times) */ 2068 ret = qgroup_trace_new_subtree_blocks(trans, src_eb, 2069 dst_path, cur_level - 1, root_level, 2070 last_snapshot, trace_leaf); 2071 if (ret < 0) 2072 goto cleanup; 2073 } 2074 } 2075 2076 cleanup: 2077 if (need_cleanup) { 2078 /* Clean up */ 2079 btrfs_tree_unlock_rw(dst_path->nodes[cur_level], 2080 dst_path->locks[cur_level]); 2081 free_extent_buffer(dst_path->nodes[cur_level]); 2082 dst_path->nodes[cur_level] = NULL; 2083 dst_path->slots[cur_level] = 0; 2084 dst_path->locks[cur_level] = 0; 2085 } 2086 out: 2087 return ret; 2088 } 2089 2090 static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans, 2091 struct extent_buffer *src_eb, 2092 struct extent_buffer *dst_eb, 2093 u64 last_snapshot, bool trace_leaf) 2094 { 2095 struct btrfs_fs_info *fs_info = trans->fs_info; 2096 struct btrfs_path *dst_path = NULL; 2097 int level; 2098 int ret; 2099 2100 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2101 return 0; 2102 2103 /* Wrong parameter order */ 2104 if (btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb)) { 2105 btrfs_err_rl(fs_info, 2106 "%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__, 2107 btrfs_header_generation(src_eb), 2108 btrfs_header_generation(dst_eb)); 2109 return -EUCLEAN; 2110 } 2111 2112 if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) { 2113 ret = -EIO; 2114 goto out; 2115 } 2116 2117 level = btrfs_header_level(dst_eb); 2118 dst_path = btrfs_alloc_path(); 2119 if (!dst_path) { 2120 ret = -ENOMEM; 2121 goto out; 2122 } 2123 /* For dst_path */ 2124 atomic_inc(&dst_eb->refs); 2125 dst_path->nodes[level] = dst_eb; 2126 dst_path->slots[level] = 0; 2127 dst_path->locks[level] = 0; 2128 2129 /* Do the generation aware breadth-first search */ 2130 ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level, 2131 level, last_snapshot, trace_leaf); 2132 if (ret < 0) 2133 goto out; 2134 ret = 0; 2135 2136 out: 2137 btrfs_free_path(dst_path); 2138 if (ret < 0) 2139 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2140 return ret; 2141 } 2142 2143 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 2144 struct extent_buffer *root_eb, 2145 u64 root_gen, int root_level) 2146 { 2147 struct btrfs_fs_info *fs_info = trans->fs_info; 2148 int ret = 0; 2149 int level; 2150 struct extent_buffer *eb = root_eb; 2151 struct btrfs_path *path = NULL; 2152 2153 BUG_ON(root_level < 0 || root_level >= BTRFS_MAX_LEVEL); 2154 BUG_ON(root_eb == NULL); 2155 2156 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2157 return 0; 2158 2159 if (!extent_buffer_uptodate(root_eb)) { 2160 ret = btrfs_read_buffer(root_eb, root_gen, root_level, NULL); 2161 if (ret) 2162 goto out; 2163 } 2164 2165 if (root_level == 0) { 2166 ret = btrfs_qgroup_trace_leaf_items(trans, root_eb); 2167 goto out; 2168 } 2169 2170 path = btrfs_alloc_path(); 2171 if (!path) 2172 return -ENOMEM; 2173 2174 /* 2175 * Walk down the tree. Missing extent blocks are filled in as 2176 * we go. Metadata is accounted every time we read a new 2177 * extent block. 2178 * 2179 * When we reach a leaf, we account for file extent items in it, 2180 * walk back up the tree (adjusting slot pointers as we go) 2181 * and restart the search process. 2182 */ 2183 atomic_inc(&root_eb->refs); /* For path */ 2184 path->nodes[root_level] = root_eb; 2185 path->slots[root_level] = 0; 2186 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ 2187 walk_down: 2188 level = root_level; 2189 while (level >= 0) { 2190 if (path->nodes[level] == NULL) { 2191 struct btrfs_key first_key; 2192 int parent_slot; 2193 u64 child_gen; 2194 u64 child_bytenr; 2195 2196 /* 2197 * We need to get child blockptr/gen from parent before 2198 * we can read it. 2199 */ 2200 eb = path->nodes[level + 1]; 2201 parent_slot = path->slots[level + 1]; 2202 child_bytenr = btrfs_node_blockptr(eb, parent_slot); 2203 child_gen = btrfs_node_ptr_generation(eb, parent_slot); 2204 btrfs_node_key_to_cpu(eb, &first_key, parent_slot); 2205 2206 eb = read_tree_block(fs_info, child_bytenr, child_gen, 2207 level, &first_key); 2208 if (IS_ERR(eb)) { 2209 ret = PTR_ERR(eb); 2210 goto out; 2211 } else if (!extent_buffer_uptodate(eb)) { 2212 free_extent_buffer(eb); 2213 ret = -EIO; 2214 goto out; 2215 } 2216 2217 path->nodes[level] = eb; 2218 path->slots[level] = 0; 2219 2220 btrfs_tree_read_lock(eb); 2221 btrfs_set_lock_blocking_read(eb); 2222 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 2223 2224 ret = btrfs_qgroup_trace_extent(trans, child_bytenr, 2225 fs_info->nodesize, 2226 GFP_NOFS); 2227 if (ret) 2228 goto out; 2229 } 2230 2231 if (level == 0) { 2232 ret = btrfs_qgroup_trace_leaf_items(trans, 2233 path->nodes[level]); 2234 if (ret) 2235 goto out; 2236 2237 /* Nonzero return here means we completed our search */ 2238 ret = adjust_slots_upwards(path, root_level); 2239 if (ret) 2240 break; 2241 2242 /* Restart search with new slots */ 2243 goto walk_down; 2244 } 2245 2246 level--; 2247 } 2248 2249 ret = 0; 2250 out: 2251 btrfs_free_path(path); 2252 2253 return ret; 2254 } 2255 2256 #define UPDATE_NEW 0 2257 #define UPDATE_OLD 1 2258 /* 2259 * Walk all of the roots that points to the bytenr and adjust their refcnts. 2260 */ 2261 static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info, 2262 struct ulist *roots, struct ulist *tmp, 2263 struct ulist *qgroups, u64 seq, int update_old) 2264 { 2265 struct ulist_node *unode; 2266 struct ulist_iterator uiter; 2267 struct ulist_node *tmp_unode; 2268 struct ulist_iterator tmp_uiter; 2269 struct btrfs_qgroup *qg; 2270 int ret = 0; 2271 2272 if (!roots) 2273 return 0; 2274 ULIST_ITER_INIT(&uiter); 2275 while ((unode = ulist_next(roots, &uiter))) { 2276 qg = find_qgroup_rb(fs_info, unode->val); 2277 if (!qg) 2278 continue; 2279 2280 ulist_reinit(tmp); 2281 ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg), 2282 GFP_ATOMIC); 2283 if (ret < 0) 2284 return ret; 2285 ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC); 2286 if (ret < 0) 2287 return ret; 2288 ULIST_ITER_INIT(&tmp_uiter); 2289 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 2290 struct btrfs_qgroup_list *glist; 2291 2292 qg = unode_aux_to_qgroup(tmp_unode); 2293 if (update_old) 2294 btrfs_qgroup_update_old_refcnt(qg, seq, 1); 2295 else 2296 btrfs_qgroup_update_new_refcnt(qg, seq, 1); 2297 list_for_each_entry(glist, &qg->groups, next_group) { 2298 ret = ulist_add(qgroups, glist->group->qgroupid, 2299 qgroup_to_aux(glist->group), 2300 GFP_ATOMIC); 2301 if (ret < 0) 2302 return ret; 2303 ret = ulist_add(tmp, glist->group->qgroupid, 2304 qgroup_to_aux(glist->group), 2305 GFP_ATOMIC); 2306 if (ret < 0) 2307 return ret; 2308 } 2309 } 2310 } 2311 return 0; 2312 } 2313 2314 /* 2315 * Update qgroup rfer/excl counters. 2316 * Rfer update is easy, codes can explain themselves. 2317 * 2318 * Excl update is tricky, the update is split into 2 part. 2319 * Part 1: Possible exclusive <-> sharing detect: 2320 * | A | !A | 2321 * ------------------------------------- 2322 * B | * | - | 2323 * ------------------------------------- 2324 * !B | + | ** | 2325 * ------------------------------------- 2326 * 2327 * Conditions: 2328 * A: cur_old_roots < nr_old_roots (not exclusive before) 2329 * !A: cur_old_roots == nr_old_roots (possible exclusive before) 2330 * B: cur_new_roots < nr_new_roots (not exclusive now) 2331 * !B: cur_new_roots == nr_new_roots (possible exclusive now) 2332 * 2333 * Results: 2334 * +: Possible sharing -> exclusive -: Possible exclusive -> sharing 2335 * *: Definitely not changed. **: Possible unchanged. 2336 * 2337 * For !A and !B condition, the exception is cur_old/new_roots == 0 case. 2338 * 2339 * To make the logic clear, we first use condition A and B to split 2340 * combination into 4 results. 2341 * 2342 * Then, for result "+" and "-", check old/new_roots == 0 case, as in them 2343 * only on variant maybe 0. 2344 * 2345 * Lastly, check result **, since there are 2 variants maybe 0, split them 2346 * again(2x2). 2347 * But this time we don't need to consider other things, the codes and logic 2348 * is easy to understand now. 2349 */ 2350 static int qgroup_update_counters(struct btrfs_fs_info *fs_info, 2351 struct ulist *qgroups, 2352 u64 nr_old_roots, 2353 u64 nr_new_roots, 2354 u64 num_bytes, u64 seq) 2355 { 2356 struct ulist_node *unode; 2357 struct ulist_iterator uiter; 2358 struct btrfs_qgroup *qg; 2359 u64 cur_new_count, cur_old_count; 2360 2361 ULIST_ITER_INIT(&uiter); 2362 while ((unode = ulist_next(qgroups, &uiter))) { 2363 bool dirty = false; 2364 2365 qg = unode_aux_to_qgroup(unode); 2366 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); 2367 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); 2368 2369 trace_qgroup_update_counters(fs_info, qg, cur_old_count, 2370 cur_new_count); 2371 2372 /* Rfer update part */ 2373 if (cur_old_count == 0 && cur_new_count > 0) { 2374 qg->rfer += num_bytes; 2375 qg->rfer_cmpr += num_bytes; 2376 dirty = true; 2377 } 2378 if (cur_old_count > 0 && cur_new_count == 0) { 2379 qg->rfer -= num_bytes; 2380 qg->rfer_cmpr -= num_bytes; 2381 dirty = true; 2382 } 2383 2384 /* Excl update part */ 2385 /* Exclusive/none -> shared case */ 2386 if (cur_old_count == nr_old_roots && 2387 cur_new_count < nr_new_roots) { 2388 /* Exclusive -> shared */ 2389 if (cur_old_count != 0) { 2390 qg->excl -= num_bytes; 2391 qg->excl_cmpr -= num_bytes; 2392 dirty = true; 2393 } 2394 } 2395 2396 /* Shared -> exclusive/none case */ 2397 if (cur_old_count < nr_old_roots && 2398 cur_new_count == nr_new_roots) { 2399 /* Shared->exclusive */ 2400 if (cur_new_count != 0) { 2401 qg->excl += num_bytes; 2402 qg->excl_cmpr += num_bytes; 2403 dirty = true; 2404 } 2405 } 2406 2407 /* Exclusive/none -> exclusive/none case */ 2408 if (cur_old_count == nr_old_roots && 2409 cur_new_count == nr_new_roots) { 2410 if (cur_old_count == 0) { 2411 /* None -> exclusive/none */ 2412 2413 if (cur_new_count != 0) { 2414 /* None -> exclusive */ 2415 qg->excl += num_bytes; 2416 qg->excl_cmpr += num_bytes; 2417 dirty = true; 2418 } 2419 /* None -> none, nothing changed */ 2420 } else { 2421 /* Exclusive -> exclusive/none */ 2422 2423 if (cur_new_count == 0) { 2424 /* Exclusive -> none */ 2425 qg->excl -= num_bytes; 2426 qg->excl_cmpr -= num_bytes; 2427 dirty = true; 2428 } 2429 /* Exclusive -> exclusive, nothing changed */ 2430 } 2431 } 2432 2433 if (dirty) 2434 qgroup_dirty(fs_info, qg); 2435 } 2436 return 0; 2437 } 2438 2439 /* 2440 * Check if the @roots potentially is a list of fs tree roots 2441 * 2442 * Return 0 for definitely not a fs/subvol tree roots ulist 2443 * Return 1 for possible fs/subvol tree roots in the list (considering an empty 2444 * one as well) 2445 */ 2446 static int maybe_fs_roots(struct ulist *roots) 2447 { 2448 struct ulist_node *unode; 2449 struct ulist_iterator uiter; 2450 2451 /* Empty one, still possible for fs roots */ 2452 if (!roots || roots->nnodes == 0) 2453 return 1; 2454 2455 ULIST_ITER_INIT(&uiter); 2456 unode = ulist_next(roots, &uiter); 2457 if (!unode) 2458 return 1; 2459 2460 /* 2461 * If it contains fs tree roots, then it must belong to fs/subvol 2462 * trees. 2463 * If it contains a non-fs tree, it won't be shared with fs/subvol trees. 2464 */ 2465 return is_fstree(unode->val); 2466 } 2467 2468 int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr, 2469 u64 num_bytes, struct ulist *old_roots, 2470 struct ulist *new_roots) 2471 { 2472 struct btrfs_fs_info *fs_info = trans->fs_info; 2473 struct ulist *qgroups = NULL; 2474 struct ulist *tmp = NULL; 2475 u64 seq; 2476 u64 nr_new_roots = 0; 2477 u64 nr_old_roots = 0; 2478 int ret = 0; 2479 2480 /* 2481 * If quotas get disabled meanwhile, the resouces need to be freed and 2482 * we can't just exit here. 2483 */ 2484 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2485 goto out_free; 2486 2487 if (new_roots) { 2488 if (!maybe_fs_roots(new_roots)) 2489 goto out_free; 2490 nr_new_roots = new_roots->nnodes; 2491 } 2492 if (old_roots) { 2493 if (!maybe_fs_roots(old_roots)) 2494 goto out_free; 2495 nr_old_roots = old_roots->nnodes; 2496 } 2497 2498 /* Quick exit, either not fs tree roots, or won't affect any qgroup */ 2499 if (nr_old_roots == 0 && nr_new_roots == 0) 2500 goto out_free; 2501 2502 BUG_ON(!fs_info->quota_root); 2503 2504 trace_btrfs_qgroup_account_extent(fs_info, trans->transid, bytenr, 2505 num_bytes, nr_old_roots, nr_new_roots); 2506 2507 qgroups = ulist_alloc(GFP_NOFS); 2508 if (!qgroups) { 2509 ret = -ENOMEM; 2510 goto out_free; 2511 } 2512 tmp = ulist_alloc(GFP_NOFS); 2513 if (!tmp) { 2514 ret = -ENOMEM; 2515 goto out_free; 2516 } 2517 2518 mutex_lock(&fs_info->qgroup_rescan_lock); 2519 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 2520 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) { 2521 mutex_unlock(&fs_info->qgroup_rescan_lock); 2522 ret = 0; 2523 goto out_free; 2524 } 2525 } 2526 mutex_unlock(&fs_info->qgroup_rescan_lock); 2527 2528 spin_lock(&fs_info->qgroup_lock); 2529 seq = fs_info->qgroup_seq; 2530 2531 /* Update old refcnts using old_roots */ 2532 ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq, 2533 UPDATE_OLD); 2534 if (ret < 0) 2535 goto out; 2536 2537 /* Update new refcnts using new_roots */ 2538 ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq, 2539 UPDATE_NEW); 2540 if (ret < 0) 2541 goto out; 2542 2543 qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots, 2544 num_bytes, seq); 2545 2546 /* 2547 * Bump qgroup_seq to avoid seq overlap 2548 */ 2549 fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; 2550 out: 2551 spin_unlock(&fs_info->qgroup_lock); 2552 out_free: 2553 ulist_free(tmp); 2554 ulist_free(qgroups); 2555 ulist_free(old_roots); 2556 ulist_free(new_roots); 2557 return ret; 2558 } 2559 2560 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) 2561 { 2562 struct btrfs_fs_info *fs_info = trans->fs_info; 2563 struct btrfs_qgroup_extent_record *record; 2564 struct btrfs_delayed_ref_root *delayed_refs; 2565 struct ulist *new_roots = NULL; 2566 struct rb_node *node; 2567 u64 num_dirty_extents = 0; 2568 u64 qgroup_to_skip; 2569 int ret = 0; 2570 2571 delayed_refs = &trans->transaction->delayed_refs; 2572 qgroup_to_skip = delayed_refs->qgroup_to_skip; 2573 while ((node = rb_first(&delayed_refs->dirty_extent_root))) { 2574 record = rb_entry(node, struct btrfs_qgroup_extent_record, 2575 node); 2576 2577 num_dirty_extents++; 2578 trace_btrfs_qgroup_account_extents(fs_info, record); 2579 2580 if (!ret) { 2581 /* 2582 * Old roots should be searched when inserting qgroup 2583 * extent record 2584 */ 2585 if (WARN_ON(!record->old_roots)) { 2586 /* Search commit root to find old_roots */ 2587 ret = btrfs_find_all_roots(NULL, fs_info, 2588 record->bytenr, 0, 2589 &record->old_roots, false); 2590 if (ret < 0) 2591 goto cleanup; 2592 } 2593 2594 /* Free the reserved data space */ 2595 btrfs_qgroup_free_refroot(fs_info, 2596 record->data_rsv_refroot, 2597 record->data_rsv, 2598 BTRFS_QGROUP_RSV_DATA); 2599 /* 2600 * Use SEQ_LAST as time_seq to do special search, which 2601 * doesn't lock tree or delayed_refs and search current 2602 * root. It's safe inside commit_transaction(). 2603 */ 2604 ret = btrfs_find_all_roots(trans, fs_info, 2605 record->bytenr, SEQ_LAST, &new_roots, false); 2606 if (ret < 0) 2607 goto cleanup; 2608 if (qgroup_to_skip) { 2609 ulist_del(new_roots, qgroup_to_skip, 0); 2610 ulist_del(record->old_roots, qgroup_to_skip, 2611 0); 2612 } 2613 ret = btrfs_qgroup_account_extent(trans, record->bytenr, 2614 record->num_bytes, 2615 record->old_roots, 2616 new_roots); 2617 record->old_roots = NULL; 2618 new_roots = NULL; 2619 } 2620 cleanup: 2621 ulist_free(record->old_roots); 2622 ulist_free(new_roots); 2623 new_roots = NULL; 2624 rb_erase(node, &delayed_refs->dirty_extent_root); 2625 kfree(record); 2626 2627 } 2628 trace_qgroup_num_dirty_extents(fs_info, trans->transid, 2629 num_dirty_extents); 2630 return ret; 2631 } 2632 2633 /* 2634 * called from commit_transaction. Writes all changed qgroups to disk. 2635 */ 2636 int btrfs_run_qgroups(struct btrfs_trans_handle *trans) 2637 { 2638 struct btrfs_fs_info *fs_info = trans->fs_info; 2639 int ret = 0; 2640 2641 if (!fs_info->quota_root) 2642 return ret; 2643 2644 spin_lock(&fs_info->qgroup_lock); 2645 while (!list_empty(&fs_info->dirty_qgroups)) { 2646 struct btrfs_qgroup *qgroup; 2647 qgroup = list_first_entry(&fs_info->dirty_qgroups, 2648 struct btrfs_qgroup, dirty); 2649 list_del_init(&qgroup->dirty); 2650 spin_unlock(&fs_info->qgroup_lock); 2651 ret = update_qgroup_info_item(trans, qgroup); 2652 if (ret) 2653 fs_info->qgroup_flags |= 2654 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2655 ret = update_qgroup_limit_item(trans, qgroup); 2656 if (ret) 2657 fs_info->qgroup_flags |= 2658 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2659 spin_lock(&fs_info->qgroup_lock); 2660 } 2661 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2662 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; 2663 else 2664 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2665 spin_unlock(&fs_info->qgroup_lock); 2666 2667 ret = update_qgroup_status_item(trans); 2668 if (ret) 2669 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2670 2671 return ret; 2672 } 2673 2674 /* 2675 * Copy the accounting information between qgroups. This is necessary 2676 * when a snapshot or a subvolume is created. Throwing an error will 2677 * cause a transaction abort so we take extra care here to only error 2678 * when a readonly fs is a reasonable outcome. 2679 */ 2680 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, 2681 u64 objectid, struct btrfs_qgroup_inherit *inherit) 2682 { 2683 int ret = 0; 2684 int i; 2685 u64 *i_qgroups; 2686 bool committing = false; 2687 struct btrfs_fs_info *fs_info = trans->fs_info; 2688 struct btrfs_root *quota_root; 2689 struct btrfs_qgroup *srcgroup; 2690 struct btrfs_qgroup *dstgroup; 2691 bool need_rescan = false; 2692 u32 level_size = 0; 2693 u64 nums; 2694 2695 /* 2696 * There are only two callers of this function. 2697 * 2698 * One in create_subvol() in the ioctl context, which needs to hold 2699 * the qgroup_ioctl_lock. 2700 * 2701 * The other one in create_pending_snapshot() where no other qgroup 2702 * code can modify the fs as they all need to either start a new trans 2703 * or hold a trans handler, thus we don't need to hold 2704 * qgroup_ioctl_lock. 2705 * This would avoid long and complex lock chain and make lockdep happy. 2706 */ 2707 spin_lock(&fs_info->trans_lock); 2708 if (trans->transaction->state == TRANS_STATE_COMMIT_DOING) 2709 committing = true; 2710 spin_unlock(&fs_info->trans_lock); 2711 2712 if (!committing) 2713 mutex_lock(&fs_info->qgroup_ioctl_lock); 2714 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2715 goto out; 2716 2717 quota_root = fs_info->quota_root; 2718 if (!quota_root) { 2719 ret = -EINVAL; 2720 goto out; 2721 } 2722 2723 if (inherit) { 2724 i_qgroups = (u64 *)(inherit + 1); 2725 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 2726 2 * inherit->num_excl_copies; 2727 for (i = 0; i < nums; ++i) { 2728 srcgroup = find_qgroup_rb(fs_info, *i_qgroups); 2729 2730 /* 2731 * Zero out invalid groups so we can ignore 2732 * them later. 2733 */ 2734 if (!srcgroup || 2735 ((srcgroup->qgroupid >> 48) <= (objectid >> 48))) 2736 *i_qgroups = 0ULL; 2737 2738 ++i_qgroups; 2739 } 2740 } 2741 2742 /* 2743 * create a tracking group for the subvol itself 2744 */ 2745 ret = add_qgroup_item(trans, quota_root, objectid); 2746 if (ret) 2747 goto out; 2748 2749 /* 2750 * add qgroup to all inherited groups 2751 */ 2752 if (inherit) { 2753 i_qgroups = (u64 *)(inherit + 1); 2754 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) { 2755 if (*i_qgroups == 0) 2756 continue; 2757 ret = add_qgroup_relation_item(trans, objectid, 2758 *i_qgroups); 2759 if (ret && ret != -EEXIST) 2760 goto out; 2761 ret = add_qgroup_relation_item(trans, *i_qgroups, 2762 objectid); 2763 if (ret && ret != -EEXIST) 2764 goto out; 2765 } 2766 ret = 0; 2767 } 2768 2769 2770 spin_lock(&fs_info->qgroup_lock); 2771 2772 dstgroup = add_qgroup_rb(fs_info, objectid); 2773 if (IS_ERR(dstgroup)) { 2774 ret = PTR_ERR(dstgroup); 2775 goto unlock; 2776 } 2777 2778 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { 2779 dstgroup->lim_flags = inherit->lim.flags; 2780 dstgroup->max_rfer = inherit->lim.max_rfer; 2781 dstgroup->max_excl = inherit->lim.max_excl; 2782 dstgroup->rsv_rfer = inherit->lim.rsv_rfer; 2783 dstgroup->rsv_excl = inherit->lim.rsv_excl; 2784 2785 ret = update_qgroup_limit_item(trans, dstgroup); 2786 if (ret) { 2787 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2788 btrfs_info(fs_info, 2789 "unable to update quota limit for %llu", 2790 dstgroup->qgroupid); 2791 goto unlock; 2792 } 2793 } 2794 2795 if (srcid) { 2796 srcgroup = find_qgroup_rb(fs_info, srcid); 2797 if (!srcgroup) 2798 goto unlock; 2799 2800 /* 2801 * We call inherit after we clone the root in order to make sure 2802 * our counts don't go crazy, so at this point the only 2803 * difference between the two roots should be the root node. 2804 */ 2805 level_size = fs_info->nodesize; 2806 dstgroup->rfer = srcgroup->rfer; 2807 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2808 dstgroup->excl = level_size; 2809 dstgroup->excl_cmpr = level_size; 2810 srcgroup->excl = level_size; 2811 srcgroup->excl_cmpr = level_size; 2812 2813 /* inherit the limit info */ 2814 dstgroup->lim_flags = srcgroup->lim_flags; 2815 dstgroup->max_rfer = srcgroup->max_rfer; 2816 dstgroup->max_excl = srcgroup->max_excl; 2817 dstgroup->rsv_rfer = srcgroup->rsv_rfer; 2818 dstgroup->rsv_excl = srcgroup->rsv_excl; 2819 2820 qgroup_dirty(fs_info, dstgroup); 2821 qgroup_dirty(fs_info, srcgroup); 2822 } 2823 2824 if (!inherit) 2825 goto unlock; 2826 2827 i_qgroups = (u64 *)(inherit + 1); 2828 for (i = 0; i < inherit->num_qgroups; ++i) { 2829 if (*i_qgroups) { 2830 ret = add_relation_rb(fs_info, objectid, *i_qgroups); 2831 if (ret) 2832 goto unlock; 2833 } 2834 ++i_qgroups; 2835 2836 /* 2837 * If we're doing a snapshot, and adding the snapshot to a new 2838 * qgroup, the numbers are guaranteed to be incorrect. 2839 */ 2840 if (srcid) 2841 need_rescan = true; 2842 } 2843 2844 for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) { 2845 struct btrfs_qgroup *src; 2846 struct btrfs_qgroup *dst; 2847 2848 if (!i_qgroups[0] || !i_qgroups[1]) 2849 continue; 2850 2851 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2852 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2853 2854 if (!src || !dst) { 2855 ret = -EINVAL; 2856 goto unlock; 2857 } 2858 2859 dst->rfer = src->rfer - level_size; 2860 dst->rfer_cmpr = src->rfer_cmpr - level_size; 2861 2862 /* Manually tweaking numbers certainly needs a rescan */ 2863 need_rescan = true; 2864 } 2865 for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) { 2866 struct btrfs_qgroup *src; 2867 struct btrfs_qgroup *dst; 2868 2869 if (!i_qgroups[0] || !i_qgroups[1]) 2870 continue; 2871 2872 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2873 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2874 2875 if (!src || !dst) { 2876 ret = -EINVAL; 2877 goto unlock; 2878 } 2879 2880 dst->excl = src->excl + level_size; 2881 dst->excl_cmpr = src->excl_cmpr + level_size; 2882 need_rescan = true; 2883 } 2884 2885 unlock: 2886 spin_unlock(&fs_info->qgroup_lock); 2887 if (!ret) 2888 ret = btrfs_sysfs_add_one_qgroup(fs_info, dstgroup); 2889 out: 2890 if (!committing) 2891 mutex_unlock(&fs_info->qgroup_ioctl_lock); 2892 if (need_rescan) 2893 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2894 return ret; 2895 } 2896 2897 static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) 2898 { 2899 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2900 qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer) 2901 return false; 2902 2903 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 2904 qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl) 2905 return false; 2906 2907 return true; 2908 } 2909 2910 static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, 2911 enum btrfs_qgroup_rsv_type type) 2912 { 2913 struct btrfs_qgroup *qgroup; 2914 struct btrfs_fs_info *fs_info = root->fs_info; 2915 u64 ref_root = root->root_key.objectid; 2916 int ret = 0; 2917 struct ulist_node *unode; 2918 struct ulist_iterator uiter; 2919 2920 if (!is_fstree(ref_root)) 2921 return 0; 2922 2923 if (num_bytes == 0) 2924 return 0; 2925 2926 if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) && 2927 capable(CAP_SYS_RESOURCE)) 2928 enforce = false; 2929 2930 spin_lock(&fs_info->qgroup_lock); 2931 if (!fs_info->quota_root) 2932 goto out; 2933 2934 qgroup = find_qgroup_rb(fs_info, ref_root); 2935 if (!qgroup) 2936 goto out; 2937 2938 /* 2939 * in a first step, we check all affected qgroups if any limits would 2940 * be exceeded 2941 */ 2942 ulist_reinit(fs_info->qgroup_ulist); 2943 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2944 qgroup_to_aux(qgroup), GFP_ATOMIC); 2945 if (ret < 0) 2946 goto out; 2947 ULIST_ITER_INIT(&uiter); 2948 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2949 struct btrfs_qgroup *qg; 2950 struct btrfs_qgroup_list *glist; 2951 2952 qg = unode_aux_to_qgroup(unode); 2953 2954 if (enforce && !qgroup_check_limits(qg, num_bytes)) { 2955 ret = -EDQUOT; 2956 goto out; 2957 } 2958 2959 list_for_each_entry(glist, &qg->groups, next_group) { 2960 ret = ulist_add(fs_info->qgroup_ulist, 2961 glist->group->qgroupid, 2962 qgroup_to_aux(glist->group), GFP_ATOMIC); 2963 if (ret < 0) 2964 goto out; 2965 } 2966 } 2967 ret = 0; 2968 /* 2969 * no limits exceeded, now record the reservation into all qgroups 2970 */ 2971 ULIST_ITER_INIT(&uiter); 2972 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2973 struct btrfs_qgroup *qg; 2974 2975 qg = unode_aux_to_qgroup(unode); 2976 2977 qgroup_rsv_add(fs_info, qg, num_bytes, type); 2978 } 2979 2980 out: 2981 spin_unlock(&fs_info->qgroup_lock); 2982 return ret; 2983 } 2984 2985 /* 2986 * Free @num_bytes of reserved space with @type for qgroup. (Normally level 0 2987 * qgroup). 2988 * 2989 * Will handle all higher level qgroup too. 2990 * 2991 * NOTE: If @num_bytes is (u64)-1, this means to free all bytes of this qgroup. 2992 * This special case is only used for META_PERTRANS type. 2993 */ 2994 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, 2995 u64 ref_root, u64 num_bytes, 2996 enum btrfs_qgroup_rsv_type type) 2997 { 2998 struct btrfs_qgroup *qgroup; 2999 struct ulist_node *unode; 3000 struct ulist_iterator uiter; 3001 int ret = 0; 3002 3003 if (!is_fstree(ref_root)) 3004 return; 3005 3006 if (num_bytes == 0) 3007 return; 3008 3009 if (num_bytes == (u64)-1 && type != BTRFS_QGROUP_RSV_META_PERTRANS) { 3010 WARN(1, "%s: Invalid type to free", __func__); 3011 return; 3012 } 3013 spin_lock(&fs_info->qgroup_lock); 3014 3015 if (!fs_info->quota_root) 3016 goto out; 3017 3018 qgroup = find_qgroup_rb(fs_info, ref_root); 3019 if (!qgroup) 3020 goto out; 3021 3022 if (num_bytes == (u64)-1) 3023 /* 3024 * We're freeing all pertrans rsv, get reserved value from 3025 * level 0 qgroup as real num_bytes to free. 3026 */ 3027 num_bytes = qgroup->rsv.values[type]; 3028 3029 ulist_reinit(fs_info->qgroup_ulist); 3030 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 3031 qgroup_to_aux(qgroup), GFP_ATOMIC); 3032 if (ret < 0) 3033 goto out; 3034 ULIST_ITER_INIT(&uiter); 3035 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 3036 struct btrfs_qgroup *qg; 3037 struct btrfs_qgroup_list *glist; 3038 3039 qg = unode_aux_to_qgroup(unode); 3040 3041 qgroup_rsv_release(fs_info, qg, num_bytes, type); 3042 3043 list_for_each_entry(glist, &qg->groups, next_group) { 3044 ret = ulist_add(fs_info->qgroup_ulist, 3045 glist->group->qgroupid, 3046 qgroup_to_aux(glist->group), GFP_ATOMIC); 3047 if (ret < 0) 3048 goto out; 3049 } 3050 } 3051 3052 out: 3053 spin_unlock(&fs_info->qgroup_lock); 3054 } 3055 3056 /* 3057 * Check if the leaf is the last leaf. Which means all node pointers 3058 * are at their last position. 3059 */ 3060 static bool is_last_leaf(struct btrfs_path *path) 3061 { 3062 int i; 3063 3064 for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { 3065 if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1) 3066 return false; 3067 } 3068 return true; 3069 } 3070 3071 /* 3072 * returns < 0 on error, 0 when more leafs are to be scanned. 3073 * returns 1 when done. 3074 */ 3075 static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, 3076 struct btrfs_path *path) 3077 { 3078 struct btrfs_fs_info *fs_info = trans->fs_info; 3079 struct btrfs_key found; 3080 struct extent_buffer *scratch_leaf = NULL; 3081 struct ulist *roots = NULL; 3082 u64 num_bytes; 3083 bool done; 3084 int slot; 3085 int ret; 3086 3087 mutex_lock(&fs_info->qgroup_rescan_lock); 3088 ret = btrfs_search_slot_for_read(fs_info->extent_root, 3089 &fs_info->qgroup_rescan_progress, 3090 path, 1, 0); 3091 3092 btrfs_debug(fs_info, 3093 "current progress key (%llu %u %llu), search_slot ret %d", 3094 fs_info->qgroup_rescan_progress.objectid, 3095 fs_info->qgroup_rescan_progress.type, 3096 fs_info->qgroup_rescan_progress.offset, ret); 3097 3098 if (ret) { 3099 /* 3100 * The rescan is about to end, we will not be scanning any 3101 * further blocks. We cannot unset the RESCAN flag here, because 3102 * we want to commit the transaction if everything went well. 3103 * To make the live accounting work in this phase, we set our 3104 * scan progress pointer such that every real extent objectid 3105 * will be smaller. 3106 */ 3107 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 3108 btrfs_release_path(path); 3109 mutex_unlock(&fs_info->qgroup_rescan_lock); 3110 return ret; 3111 } 3112 done = is_last_leaf(path); 3113 3114 btrfs_item_key_to_cpu(path->nodes[0], &found, 3115 btrfs_header_nritems(path->nodes[0]) - 1); 3116 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; 3117 3118 scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]); 3119 if (!scratch_leaf) { 3120 ret = -ENOMEM; 3121 mutex_unlock(&fs_info->qgroup_rescan_lock); 3122 goto out; 3123 } 3124 slot = path->slots[0]; 3125 btrfs_release_path(path); 3126 mutex_unlock(&fs_info->qgroup_rescan_lock); 3127 3128 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 3129 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 3130 if (found.type != BTRFS_EXTENT_ITEM_KEY && 3131 found.type != BTRFS_METADATA_ITEM_KEY) 3132 continue; 3133 if (found.type == BTRFS_METADATA_ITEM_KEY) 3134 num_bytes = fs_info->nodesize; 3135 else 3136 num_bytes = found.offset; 3137 3138 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 3139 &roots, false); 3140 if (ret < 0) 3141 goto out; 3142 /* For rescan, just pass old_roots as NULL */ 3143 ret = btrfs_qgroup_account_extent(trans, found.objectid, 3144 num_bytes, NULL, roots); 3145 if (ret < 0) 3146 goto out; 3147 } 3148 out: 3149 if (scratch_leaf) 3150 free_extent_buffer(scratch_leaf); 3151 3152 if (done && !ret) { 3153 ret = 1; 3154 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 3155 } 3156 return ret; 3157 } 3158 3159 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) 3160 { 3161 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, 3162 qgroup_rescan_work); 3163 struct btrfs_path *path; 3164 struct btrfs_trans_handle *trans = NULL; 3165 int err = -ENOMEM; 3166 int ret = 0; 3167 3168 path = btrfs_alloc_path(); 3169 if (!path) 3170 goto out; 3171 /* 3172 * Rescan should only search for commit root, and any later difference 3173 * should be recorded by qgroup 3174 */ 3175 path->search_commit_root = 1; 3176 path->skip_locking = 1; 3177 3178 err = 0; 3179 while (!err && !btrfs_fs_closing(fs_info)) { 3180 trans = btrfs_start_transaction(fs_info->fs_root, 0); 3181 if (IS_ERR(trans)) { 3182 err = PTR_ERR(trans); 3183 break; 3184 } 3185 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { 3186 err = -EINTR; 3187 } else { 3188 err = qgroup_rescan_leaf(trans, path); 3189 } 3190 if (err > 0) 3191 btrfs_commit_transaction(trans); 3192 else 3193 btrfs_end_transaction(trans); 3194 } 3195 3196 out: 3197 btrfs_free_path(path); 3198 3199 mutex_lock(&fs_info->qgroup_rescan_lock); 3200 if (err > 0 && 3201 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 3202 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 3203 } else if (err < 0) { 3204 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 3205 } 3206 mutex_unlock(&fs_info->qgroup_rescan_lock); 3207 3208 /* 3209 * only update status, since the previous part has already updated the 3210 * qgroup info. 3211 */ 3212 trans = btrfs_start_transaction(fs_info->quota_root, 1); 3213 if (IS_ERR(trans)) { 3214 err = PTR_ERR(trans); 3215 trans = NULL; 3216 btrfs_err(fs_info, 3217 "fail to start transaction for status update: %d", 3218 err); 3219 } 3220 3221 mutex_lock(&fs_info->qgroup_rescan_lock); 3222 if (!btrfs_fs_closing(fs_info)) 3223 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 3224 if (trans) { 3225 ret = update_qgroup_status_item(trans); 3226 if (ret < 0) { 3227 err = ret; 3228 btrfs_err(fs_info, "fail to update qgroup status: %d", 3229 err); 3230 } 3231 } 3232 fs_info->qgroup_rescan_running = false; 3233 complete_all(&fs_info->qgroup_rescan_completion); 3234 mutex_unlock(&fs_info->qgroup_rescan_lock); 3235 3236 if (!trans) 3237 return; 3238 3239 btrfs_end_transaction(trans); 3240 3241 if (btrfs_fs_closing(fs_info)) { 3242 btrfs_info(fs_info, "qgroup scan paused"); 3243 } else if (err >= 0) { 3244 btrfs_info(fs_info, "qgroup scan completed%s", 3245 err > 0 ? " (inconsistency flag cleared)" : ""); 3246 } else { 3247 btrfs_err(fs_info, "qgroup scan failed with %d", err); 3248 } 3249 } 3250 3251 /* 3252 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all 3253 * memory required for the rescan context. 3254 */ 3255 static int 3256 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 3257 int init_flags) 3258 { 3259 int ret = 0; 3260 3261 if (!init_flags) { 3262 /* we're resuming qgroup rescan at mount time */ 3263 if (!(fs_info->qgroup_flags & 3264 BTRFS_QGROUP_STATUS_FLAG_RESCAN)) { 3265 btrfs_warn(fs_info, 3266 "qgroup rescan init failed, qgroup rescan is not queued"); 3267 ret = -EINVAL; 3268 } else if (!(fs_info->qgroup_flags & 3269 BTRFS_QGROUP_STATUS_FLAG_ON)) { 3270 btrfs_warn(fs_info, 3271 "qgroup rescan init failed, qgroup is not enabled"); 3272 ret = -EINVAL; 3273 } 3274 3275 if (ret) 3276 return ret; 3277 } 3278 3279 mutex_lock(&fs_info->qgroup_rescan_lock); 3280 3281 if (init_flags) { 3282 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 3283 btrfs_warn(fs_info, 3284 "qgroup rescan is already in progress"); 3285 ret = -EINPROGRESS; 3286 } else if (!(fs_info->qgroup_flags & 3287 BTRFS_QGROUP_STATUS_FLAG_ON)) { 3288 btrfs_warn(fs_info, 3289 "qgroup rescan init failed, qgroup is not enabled"); 3290 ret = -EINVAL; 3291 } 3292 3293 if (ret) { 3294 mutex_unlock(&fs_info->qgroup_rescan_lock); 3295 return ret; 3296 } 3297 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 3298 } 3299 3300 memset(&fs_info->qgroup_rescan_progress, 0, 3301 sizeof(fs_info->qgroup_rescan_progress)); 3302 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 3303 init_completion(&fs_info->qgroup_rescan_completion); 3304 mutex_unlock(&fs_info->qgroup_rescan_lock); 3305 3306 btrfs_init_work(&fs_info->qgroup_rescan_work, 3307 btrfs_qgroup_rescan_worker, NULL, NULL); 3308 return 0; 3309 } 3310 3311 static void 3312 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) 3313 { 3314 struct rb_node *n; 3315 struct btrfs_qgroup *qgroup; 3316 3317 spin_lock(&fs_info->qgroup_lock); 3318 /* clear all current qgroup tracking information */ 3319 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { 3320 qgroup = rb_entry(n, struct btrfs_qgroup, node); 3321 qgroup->rfer = 0; 3322 qgroup->rfer_cmpr = 0; 3323 qgroup->excl = 0; 3324 qgroup->excl_cmpr = 0; 3325 qgroup_dirty(fs_info, qgroup); 3326 } 3327 spin_unlock(&fs_info->qgroup_lock); 3328 } 3329 3330 int 3331 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) 3332 { 3333 int ret = 0; 3334 struct btrfs_trans_handle *trans; 3335 3336 ret = qgroup_rescan_init(fs_info, 0, 1); 3337 if (ret) 3338 return ret; 3339 3340 /* 3341 * We have set the rescan_progress to 0, which means no more 3342 * delayed refs will be accounted by btrfs_qgroup_account_ref. 3343 * However, btrfs_qgroup_account_ref may be right after its call 3344 * to btrfs_find_all_roots, in which case it would still do the 3345 * accounting. 3346 * To solve this, we're committing the transaction, which will 3347 * ensure we run all delayed refs and only after that, we are 3348 * going to clear all tracking information for a clean start. 3349 */ 3350 3351 trans = btrfs_join_transaction(fs_info->fs_root); 3352 if (IS_ERR(trans)) { 3353 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 3354 return PTR_ERR(trans); 3355 } 3356 ret = btrfs_commit_transaction(trans); 3357 if (ret) { 3358 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 3359 return ret; 3360 } 3361 3362 qgroup_rescan_zero_tracking(fs_info); 3363 3364 mutex_lock(&fs_info->qgroup_rescan_lock); 3365 fs_info->qgroup_rescan_running = true; 3366 btrfs_queue_work(fs_info->qgroup_rescan_workers, 3367 &fs_info->qgroup_rescan_work); 3368 mutex_unlock(&fs_info->qgroup_rescan_lock); 3369 3370 return 0; 3371 } 3372 3373 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 3374 bool interruptible) 3375 { 3376 int running; 3377 int ret = 0; 3378 3379 mutex_lock(&fs_info->qgroup_rescan_lock); 3380 running = fs_info->qgroup_rescan_running; 3381 mutex_unlock(&fs_info->qgroup_rescan_lock); 3382 3383 if (!running) 3384 return 0; 3385 3386 if (interruptible) 3387 ret = wait_for_completion_interruptible( 3388 &fs_info->qgroup_rescan_completion); 3389 else 3390 wait_for_completion(&fs_info->qgroup_rescan_completion); 3391 3392 return ret; 3393 } 3394 3395 /* 3396 * this is only called from open_ctree where we're still single threaded, thus 3397 * locking is omitted here. 3398 */ 3399 void 3400 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) 3401 { 3402 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 3403 mutex_lock(&fs_info->qgroup_rescan_lock); 3404 fs_info->qgroup_rescan_running = true; 3405 btrfs_queue_work(fs_info->qgroup_rescan_workers, 3406 &fs_info->qgroup_rescan_work); 3407 mutex_unlock(&fs_info->qgroup_rescan_lock); 3408 } 3409 } 3410 3411 #define rbtree_iterate_from_safe(node, next, start) \ 3412 for (node = start; node && ({ next = rb_next(node); 1;}); node = next) 3413 3414 static int qgroup_unreserve_range(struct btrfs_inode *inode, 3415 struct extent_changeset *reserved, u64 start, 3416 u64 len) 3417 { 3418 struct rb_node *node; 3419 struct rb_node *next; 3420 struct ulist_node *entry = NULL; 3421 int ret = 0; 3422 3423 node = reserved->range_changed.root.rb_node; 3424 while (node) { 3425 entry = rb_entry(node, struct ulist_node, rb_node); 3426 if (entry->val < start) 3427 node = node->rb_right; 3428 else if (entry) 3429 node = node->rb_left; 3430 else 3431 break; 3432 } 3433 3434 /* Empty changeset */ 3435 if (!entry) 3436 return 0; 3437 3438 if (entry->val > start && rb_prev(&entry->rb_node)) 3439 entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node, 3440 rb_node); 3441 3442 rbtree_iterate_from_safe(node, next, &entry->rb_node) { 3443 u64 entry_start; 3444 u64 entry_end; 3445 u64 entry_len; 3446 int clear_ret; 3447 3448 entry = rb_entry(node, struct ulist_node, rb_node); 3449 entry_start = entry->val; 3450 entry_end = entry->aux; 3451 entry_len = entry_end - entry_start + 1; 3452 3453 if (entry_start >= start + len) 3454 break; 3455 if (entry_start + entry_len <= start) 3456 continue; 3457 /* 3458 * Now the entry is in [start, start + len), revert the 3459 * EXTENT_QGROUP_RESERVED bit. 3460 */ 3461 clear_ret = clear_extent_bits(&inode->io_tree, entry_start, 3462 entry_end, EXTENT_QGROUP_RESERVED); 3463 if (!ret && clear_ret < 0) 3464 ret = clear_ret; 3465 3466 ulist_del(&reserved->range_changed, entry->val, entry->aux); 3467 if (likely(reserved->bytes_changed >= entry_len)) { 3468 reserved->bytes_changed -= entry_len; 3469 } else { 3470 WARN_ON(1); 3471 reserved->bytes_changed = 0; 3472 } 3473 } 3474 3475 return ret; 3476 } 3477 3478 /* 3479 * Try to free some space for qgroup. 3480 * 3481 * For qgroup, there are only 3 ways to free qgroup space: 3482 * - Flush nodatacow write 3483 * Any nodatacow write will free its reserved data space at run_delalloc_range(). 3484 * In theory, we should only flush nodatacow inodes, but it's not yet 3485 * possible, so we need to flush the whole root. 3486 * 3487 * - Wait for ordered extents 3488 * When ordered extents are finished, their reserved metadata is finally 3489 * converted to per_trans status, which can be freed by later commit 3490 * transaction. 3491 * 3492 * - Commit transaction 3493 * This would free the meta_per_trans space. 3494 * In theory this shouldn't provide much space, but any more qgroup space 3495 * is needed. 3496 */ 3497 static int try_flush_qgroup(struct btrfs_root *root) 3498 { 3499 struct btrfs_trans_handle *trans; 3500 int ret; 3501 3502 /* 3503 * We don't want to run flush again and again, so if there is a running 3504 * one, we won't try to start a new flush, but exit directly. 3505 */ 3506 if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) { 3507 wait_event(root->qgroup_flush_wait, 3508 !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)); 3509 return 0; 3510 } 3511 3512 ret = btrfs_start_delalloc_snapshot(root); 3513 if (ret < 0) 3514 goto out; 3515 btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); 3516 3517 trans = btrfs_join_transaction(root); 3518 if (IS_ERR(trans)) { 3519 ret = PTR_ERR(trans); 3520 goto out; 3521 } 3522 3523 ret = btrfs_commit_transaction(trans); 3524 out: 3525 clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state); 3526 wake_up(&root->qgroup_flush_wait); 3527 return ret; 3528 } 3529 3530 static int qgroup_reserve_data(struct btrfs_inode *inode, 3531 struct extent_changeset **reserved_ret, u64 start, 3532 u64 len) 3533 { 3534 struct btrfs_root *root = inode->root; 3535 struct extent_changeset *reserved; 3536 bool new_reserved = false; 3537 u64 orig_reserved; 3538 u64 to_reserve; 3539 int ret; 3540 3541 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || 3542 !is_fstree(root->root_key.objectid) || len == 0) 3543 return 0; 3544 3545 /* @reserved parameter is mandatory for qgroup */ 3546 if (WARN_ON(!reserved_ret)) 3547 return -EINVAL; 3548 if (!*reserved_ret) { 3549 new_reserved = true; 3550 *reserved_ret = extent_changeset_alloc(); 3551 if (!*reserved_ret) 3552 return -ENOMEM; 3553 } 3554 reserved = *reserved_ret; 3555 /* Record already reserved space */ 3556 orig_reserved = reserved->bytes_changed; 3557 ret = set_record_extent_bits(&inode->io_tree, start, 3558 start + len -1, EXTENT_QGROUP_RESERVED, reserved); 3559 3560 /* Newly reserved space */ 3561 to_reserve = reserved->bytes_changed - orig_reserved; 3562 trace_btrfs_qgroup_reserve_data(&inode->vfs_inode, start, len, 3563 to_reserve, QGROUP_RESERVE); 3564 if (ret < 0) 3565 goto out; 3566 ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA); 3567 if (ret < 0) 3568 goto cleanup; 3569 3570 return ret; 3571 3572 cleanup: 3573 qgroup_unreserve_range(inode, reserved, start, len); 3574 out: 3575 if (new_reserved) { 3576 extent_changeset_release(reserved); 3577 kfree(reserved); 3578 *reserved_ret = NULL; 3579 } 3580 return ret; 3581 } 3582 3583 /* 3584 * Reserve qgroup space for range [start, start + len). 3585 * 3586 * This function will either reserve space from related qgroups or do nothing 3587 * if the range is already reserved. 3588 * 3589 * Return 0 for successful reservation 3590 * Return <0 for error (including -EQUOT) 3591 * 3592 * NOTE: This function may sleep for memory allocation, dirty page flushing and 3593 * commit transaction. So caller should not hold any dirty page locked. 3594 */ 3595 int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, 3596 struct extent_changeset **reserved_ret, u64 start, 3597 u64 len) 3598 { 3599 int ret; 3600 3601 ret = qgroup_reserve_data(inode, reserved_ret, start, len); 3602 if (ret <= 0 && ret != -EDQUOT) 3603 return ret; 3604 3605 ret = try_flush_qgroup(inode->root); 3606 if (ret < 0) 3607 return ret; 3608 return qgroup_reserve_data(inode, reserved_ret, start, len); 3609 } 3610 3611 /* Free ranges specified by @reserved, normally in error path */ 3612 static int qgroup_free_reserved_data(struct btrfs_inode *inode, 3613 struct extent_changeset *reserved, u64 start, u64 len) 3614 { 3615 struct btrfs_root *root = inode->root; 3616 struct ulist_node *unode; 3617 struct ulist_iterator uiter; 3618 struct extent_changeset changeset; 3619 int freed = 0; 3620 int ret; 3621 3622 extent_changeset_init(&changeset); 3623 len = round_up(start + len, root->fs_info->sectorsize); 3624 start = round_down(start, root->fs_info->sectorsize); 3625 3626 ULIST_ITER_INIT(&uiter); 3627 while ((unode = ulist_next(&reserved->range_changed, &uiter))) { 3628 u64 range_start = unode->val; 3629 /* unode->aux is the inclusive end */ 3630 u64 range_len = unode->aux - range_start + 1; 3631 u64 free_start; 3632 u64 free_len; 3633 3634 extent_changeset_release(&changeset); 3635 3636 /* Only free range in range [start, start + len) */ 3637 if (range_start >= start + len || 3638 range_start + range_len <= start) 3639 continue; 3640 free_start = max(range_start, start); 3641 free_len = min(start + len, range_start + range_len) - 3642 free_start; 3643 /* 3644 * TODO: To also modify reserved->ranges_reserved to reflect 3645 * the modification. 3646 * 3647 * However as long as we free qgroup reserved according to 3648 * EXTENT_QGROUP_RESERVED, we won't double free. 3649 * So not need to rush. 3650 */ 3651 ret = clear_record_extent_bits(&inode->io_tree, free_start, 3652 free_start + free_len - 1, 3653 EXTENT_QGROUP_RESERVED, &changeset); 3654 if (ret < 0) 3655 goto out; 3656 freed += changeset.bytes_changed; 3657 } 3658 btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, freed, 3659 BTRFS_QGROUP_RSV_DATA); 3660 ret = freed; 3661 out: 3662 extent_changeset_release(&changeset); 3663 return ret; 3664 } 3665 3666 static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, 3667 struct extent_changeset *reserved, u64 start, u64 len, 3668 int free) 3669 { 3670 struct extent_changeset changeset; 3671 int trace_op = QGROUP_RELEASE; 3672 int ret; 3673 3674 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &inode->root->fs_info->flags)) 3675 return 0; 3676 3677 /* In release case, we shouldn't have @reserved */ 3678 WARN_ON(!free && reserved); 3679 if (free && reserved) 3680 return qgroup_free_reserved_data(inode, reserved, start, len); 3681 extent_changeset_init(&changeset); 3682 ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1, 3683 EXTENT_QGROUP_RESERVED, &changeset); 3684 if (ret < 0) 3685 goto out; 3686 3687 if (free) 3688 trace_op = QGROUP_FREE; 3689 trace_btrfs_qgroup_release_data(&inode->vfs_inode, start, len, 3690 changeset.bytes_changed, trace_op); 3691 if (free) 3692 btrfs_qgroup_free_refroot(inode->root->fs_info, 3693 inode->root->root_key.objectid, 3694 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA); 3695 ret = changeset.bytes_changed; 3696 out: 3697 extent_changeset_release(&changeset); 3698 return ret; 3699 } 3700 3701 /* 3702 * Free a reserved space range from io_tree and related qgroups 3703 * 3704 * Should be called when a range of pages get invalidated before reaching disk. 3705 * Or for error cleanup case. 3706 * if @reserved is given, only reserved range in [@start, @start + @len) will 3707 * be freed. 3708 * 3709 * For data written to disk, use btrfs_qgroup_release_data(). 3710 * 3711 * NOTE: This function may sleep for memory allocation. 3712 */ 3713 int btrfs_qgroup_free_data(struct btrfs_inode *inode, 3714 struct extent_changeset *reserved, u64 start, u64 len) 3715 { 3716 return __btrfs_qgroup_release_data(inode, reserved, start, len, 1); 3717 } 3718 3719 /* 3720 * Release a reserved space range from io_tree only. 3721 * 3722 * Should be called when a range of pages get written to disk and corresponding 3723 * FILE_EXTENT is inserted into corresponding root. 3724 * 3725 * Since new qgroup accounting framework will only update qgroup numbers at 3726 * commit_transaction() time, its reserved space shouldn't be freed from 3727 * related qgroups. 3728 * 3729 * But we should release the range from io_tree, to allow further write to be 3730 * COWed. 3731 * 3732 * NOTE: This function may sleep for memory allocation. 3733 */ 3734 int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len) 3735 { 3736 return __btrfs_qgroup_release_data(inode, NULL, start, len, 0); 3737 } 3738 3739 static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes, 3740 enum btrfs_qgroup_rsv_type type) 3741 { 3742 if (type != BTRFS_QGROUP_RSV_META_PREALLOC && 3743 type != BTRFS_QGROUP_RSV_META_PERTRANS) 3744 return; 3745 if (num_bytes == 0) 3746 return; 3747 3748 spin_lock(&root->qgroup_meta_rsv_lock); 3749 if (type == BTRFS_QGROUP_RSV_META_PREALLOC) 3750 root->qgroup_meta_rsv_prealloc += num_bytes; 3751 else 3752 root->qgroup_meta_rsv_pertrans += num_bytes; 3753 spin_unlock(&root->qgroup_meta_rsv_lock); 3754 } 3755 3756 static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes, 3757 enum btrfs_qgroup_rsv_type type) 3758 { 3759 if (type != BTRFS_QGROUP_RSV_META_PREALLOC && 3760 type != BTRFS_QGROUP_RSV_META_PERTRANS) 3761 return 0; 3762 if (num_bytes == 0) 3763 return 0; 3764 3765 spin_lock(&root->qgroup_meta_rsv_lock); 3766 if (type == BTRFS_QGROUP_RSV_META_PREALLOC) { 3767 num_bytes = min_t(u64, root->qgroup_meta_rsv_prealloc, 3768 num_bytes); 3769 root->qgroup_meta_rsv_prealloc -= num_bytes; 3770 } else { 3771 num_bytes = min_t(u64, root->qgroup_meta_rsv_pertrans, 3772 num_bytes); 3773 root->qgroup_meta_rsv_pertrans -= num_bytes; 3774 } 3775 spin_unlock(&root->qgroup_meta_rsv_lock); 3776 return num_bytes; 3777 } 3778 3779 static int qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, 3780 enum btrfs_qgroup_rsv_type type, bool enforce) 3781 { 3782 struct btrfs_fs_info *fs_info = root->fs_info; 3783 int ret; 3784 3785 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3786 !is_fstree(root->root_key.objectid) || num_bytes == 0) 3787 return 0; 3788 3789 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 3790 trace_qgroup_meta_reserve(root, (s64)num_bytes, type); 3791 ret = qgroup_reserve(root, num_bytes, enforce, type); 3792 if (ret < 0) 3793 return ret; 3794 /* 3795 * Record what we have reserved into root. 3796 * 3797 * To avoid quota disabled->enabled underflow. 3798 * In that case, we may try to free space we haven't reserved 3799 * (since quota was disabled), so record what we reserved into root. 3800 * And ensure later release won't underflow this number. 3801 */ 3802 add_root_meta_rsv(root, num_bytes, type); 3803 return ret; 3804 } 3805 3806 int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, 3807 enum btrfs_qgroup_rsv_type type, bool enforce) 3808 { 3809 int ret; 3810 3811 ret = qgroup_reserve_meta(root, num_bytes, type, enforce); 3812 if (ret <= 0 && ret != -EDQUOT) 3813 return ret; 3814 3815 ret = try_flush_qgroup(root); 3816 if (ret < 0) 3817 return ret; 3818 return qgroup_reserve_meta(root, num_bytes, type, enforce); 3819 } 3820 3821 void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root) 3822 { 3823 struct btrfs_fs_info *fs_info = root->fs_info; 3824 3825 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3826 !is_fstree(root->root_key.objectid)) 3827 return; 3828 3829 /* TODO: Update trace point to handle such free */ 3830 trace_qgroup_meta_free_all_pertrans(root); 3831 /* Special value -1 means to free all reserved space */ 3832 btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid, (u64)-1, 3833 BTRFS_QGROUP_RSV_META_PERTRANS); 3834 } 3835 3836 void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes, 3837 enum btrfs_qgroup_rsv_type type) 3838 { 3839 struct btrfs_fs_info *fs_info = root->fs_info; 3840 3841 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3842 !is_fstree(root->root_key.objectid)) 3843 return; 3844 3845 /* 3846 * reservation for META_PREALLOC can happen before quota is enabled, 3847 * which can lead to underflow. 3848 * Here ensure we will only free what we really have reserved. 3849 */ 3850 num_bytes = sub_root_meta_rsv(root, num_bytes, type); 3851 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 3852 trace_qgroup_meta_reserve(root, -(s64)num_bytes, type); 3853 btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid, 3854 num_bytes, type); 3855 } 3856 3857 static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root, 3858 int num_bytes) 3859 { 3860 struct btrfs_qgroup *qgroup; 3861 struct ulist_node *unode; 3862 struct ulist_iterator uiter; 3863 int ret = 0; 3864 3865 if (num_bytes == 0) 3866 return; 3867 if (!fs_info->quota_root) 3868 return; 3869 3870 spin_lock(&fs_info->qgroup_lock); 3871 qgroup = find_qgroup_rb(fs_info, ref_root); 3872 if (!qgroup) 3873 goto out; 3874 ulist_reinit(fs_info->qgroup_ulist); 3875 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 3876 qgroup_to_aux(qgroup), GFP_ATOMIC); 3877 if (ret < 0) 3878 goto out; 3879 ULIST_ITER_INIT(&uiter); 3880 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 3881 struct btrfs_qgroup *qg; 3882 struct btrfs_qgroup_list *glist; 3883 3884 qg = unode_aux_to_qgroup(unode); 3885 3886 qgroup_rsv_release(fs_info, qg, num_bytes, 3887 BTRFS_QGROUP_RSV_META_PREALLOC); 3888 qgroup_rsv_add(fs_info, qg, num_bytes, 3889 BTRFS_QGROUP_RSV_META_PERTRANS); 3890 list_for_each_entry(glist, &qg->groups, next_group) { 3891 ret = ulist_add(fs_info->qgroup_ulist, 3892 glist->group->qgroupid, 3893 qgroup_to_aux(glist->group), GFP_ATOMIC); 3894 if (ret < 0) 3895 goto out; 3896 } 3897 } 3898 out: 3899 spin_unlock(&fs_info->qgroup_lock); 3900 } 3901 3902 void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes) 3903 { 3904 struct btrfs_fs_info *fs_info = root->fs_info; 3905 3906 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3907 !is_fstree(root->root_key.objectid)) 3908 return; 3909 /* Same as btrfs_qgroup_free_meta_prealloc() */ 3910 num_bytes = sub_root_meta_rsv(root, num_bytes, 3911 BTRFS_QGROUP_RSV_META_PREALLOC); 3912 trace_qgroup_meta_convert(root, num_bytes); 3913 qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes); 3914 } 3915 3916 /* 3917 * Check qgroup reserved space leaking, normally at destroy inode 3918 * time 3919 */ 3920 void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode) 3921 { 3922 struct extent_changeset changeset; 3923 struct ulist_node *unode; 3924 struct ulist_iterator iter; 3925 int ret; 3926 3927 extent_changeset_init(&changeset); 3928 ret = clear_record_extent_bits(&inode->io_tree, 0, (u64)-1, 3929 EXTENT_QGROUP_RESERVED, &changeset); 3930 3931 WARN_ON(ret < 0); 3932 if (WARN_ON(changeset.bytes_changed)) { 3933 ULIST_ITER_INIT(&iter); 3934 while ((unode = ulist_next(&changeset.range_changed, &iter))) { 3935 btrfs_warn(inode->root->fs_info, 3936 "leaking qgroup reserved space, ino: %llu, start: %llu, end: %llu", 3937 btrfs_ino(inode), unode->val, unode->aux); 3938 } 3939 btrfs_qgroup_free_refroot(inode->root->fs_info, 3940 inode->root->root_key.objectid, 3941 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA); 3942 3943 } 3944 extent_changeset_release(&changeset); 3945 } 3946 3947 void btrfs_qgroup_init_swapped_blocks( 3948 struct btrfs_qgroup_swapped_blocks *swapped_blocks) 3949 { 3950 int i; 3951 3952 spin_lock_init(&swapped_blocks->lock); 3953 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 3954 swapped_blocks->blocks[i] = RB_ROOT; 3955 swapped_blocks->swapped = false; 3956 } 3957 3958 /* 3959 * Delete all swapped blocks record of @root. 3960 * Every record here means we skipped a full subtree scan for qgroup. 3961 * 3962 * Gets called when committing one transaction. 3963 */ 3964 void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root) 3965 { 3966 struct btrfs_qgroup_swapped_blocks *swapped_blocks; 3967 int i; 3968 3969 swapped_blocks = &root->swapped_blocks; 3970 3971 spin_lock(&swapped_blocks->lock); 3972 if (!swapped_blocks->swapped) 3973 goto out; 3974 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 3975 struct rb_root *cur_root = &swapped_blocks->blocks[i]; 3976 struct btrfs_qgroup_swapped_block *entry; 3977 struct btrfs_qgroup_swapped_block *next; 3978 3979 rbtree_postorder_for_each_entry_safe(entry, next, cur_root, 3980 node) 3981 kfree(entry); 3982 swapped_blocks->blocks[i] = RB_ROOT; 3983 } 3984 swapped_blocks->swapped = false; 3985 out: 3986 spin_unlock(&swapped_blocks->lock); 3987 } 3988 3989 /* 3990 * Add subtree roots record into @subvol_root. 3991 * 3992 * @subvol_root: tree root of the subvolume tree get swapped 3993 * @bg: block group under balance 3994 * @subvol_parent/slot: pointer to the subtree root in subvolume tree 3995 * @reloc_parent/slot: pointer to the subtree root in reloc tree 3996 * BOTH POINTERS ARE BEFORE TREE SWAP 3997 * @last_snapshot: last snapshot generation of the subvolume tree 3998 */ 3999 int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans, 4000 struct btrfs_root *subvol_root, 4001 struct btrfs_block_group *bg, 4002 struct extent_buffer *subvol_parent, int subvol_slot, 4003 struct extent_buffer *reloc_parent, int reloc_slot, 4004 u64 last_snapshot) 4005 { 4006 struct btrfs_fs_info *fs_info = subvol_root->fs_info; 4007 struct btrfs_qgroup_swapped_blocks *blocks = &subvol_root->swapped_blocks; 4008 struct btrfs_qgroup_swapped_block *block; 4009 struct rb_node **cur; 4010 struct rb_node *parent = NULL; 4011 int level = btrfs_header_level(subvol_parent) - 1; 4012 int ret = 0; 4013 4014 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 4015 return 0; 4016 4017 if (btrfs_node_ptr_generation(subvol_parent, subvol_slot) > 4018 btrfs_node_ptr_generation(reloc_parent, reloc_slot)) { 4019 btrfs_err_rl(fs_info, 4020 "%s: bad parameter order, subvol_gen=%llu reloc_gen=%llu", 4021 __func__, 4022 btrfs_node_ptr_generation(subvol_parent, subvol_slot), 4023 btrfs_node_ptr_generation(reloc_parent, reloc_slot)); 4024 return -EUCLEAN; 4025 } 4026 4027 block = kmalloc(sizeof(*block), GFP_NOFS); 4028 if (!block) { 4029 ret = -ENOMEM; 4030 goto out; 4031 } 4032 4033 /* 4034 * @reloc_parent/slot is still before swap, while @block is going to 4035 * record the bytenr after swap, so we do the swap here. 4036 */ 4037 block->subvol_bytenr = btrfs_node_blockptr(reloc_parent, reloc_slot); 4038 block->subvol_generation = btrfs_node_ptr_generation(reloc_parent, 4039 reloc_slot); 4040 block->reloc_bytenr = btrfs_node_blockptr(subvol_parent, subvol_slot); 4041 block->reloc_generation = btrfs_node_ptr_generation(subvol_parent, 4042 subvol_slot); 4043 block->last_snapshot = last_snapshot; 4044 block->level = level; 4045 4046 /* 4047 * If we have bg == NULL, we're called from btrfs_recover_relocation(), 4048 * no one else can modify tree blocks thus we qgroup will not change 4049 * no matter the value of trace_leaf. 4050 */ 4051 if (bg && bg->flags & BTRFS_BLOCK_GROUP_DATA) 4052 block->trace_leaf = true; 4053 else 4054 block->trace_leaf = false; 4055 btrfs_node_key_to_cpu(reloc_parent, &block->first_key, reloc_slot); 4056 4057 /* Insert @block into @blocks */ 4058 spin_lock(&blocks->lock); 4059 cur = &blocks->blocks[level].rb_node; 4060 while (*cur) { 4061 struct btrfs_qgroup_swapped_block *entry; 4062 4063 parent = *cur; 4064 entry = rb_entry(parent, struct btrfs_qgroup_swapped_block, 4065 node); 4066 4067 if (entry->subvol_bytenr < block->subvol_bytenr) { 4068 cur = &(*cur)->rb_left; 4069 } else if (entry->subvol_bytenr > block->subvol_bytenr) { 4070 cur = &(*cur)->rb_right; 4071 } else { 4072 if (entry->subvol_generation != 4073 block->subvol_generation || 4074 entry->reloc_bytenr != block->reloc_bytenr || 4075 entry->reloc_generation != 4076 block->reloc_generation) { 4077 /* 4078 * Duplicated but mismatch entry found. 4079 * Shouldn't happen. 4080 * 4081 * Marking qgroup inconsistent should be enough 4082 * for end users. 4083 */ 4084 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); 4085 ret = -EEXIST; 4086 } 4087 kfree(block); 4088 goto out_unlock; 4089 } 4090 } 4091 rb_link_node(&block->node, parent, cur); 4092 rb_insert_color(&block->node, &blocks->blocks[level]); 4093 blocks->swapped = true; 4094 out_unlock: 4095 spin_unlock(&blocks->lock); 4096 out: 4097 if (ret < 0) 4098 fs_info->qgroup_flags |= 4099 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 4100 return ret; 4101 } 4102 4103 /* 4104 * Check if the tree block is a subtree root, and if so do the needed 4105 * delayed subtree trace for qgroup. 4106 * 4107 * This is called during btrfs_cow_block(). 4108 */ 4109 int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, 4110 struct btrfs_root *root, 4111 struct extent_buffer *subvol_eb) 4112 { 4113 struct btrfs_fs_info *fs_info = root->fs_info; 4114 struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks; 4115 struct btrfs_qgroup_swapped_block *block; 4116 struct extent_buffer *reloc_eb = NULL; 4117 struct rb_node *node; 4118 bool found = false; 4119 bool swapped = false; 4120 int level = btrfs_header_level(subvol_eb); 4121 int ret = 0; 4122 int i; 4123 4124 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 4125 return 0; 4126 if (!is_fstree(root->root_key.objectid) || !root->reloc_root) 4127 return 0; 4128 4129 spin_lock(&blocks->lock); 4130 if (!blocks->swapped) { 4131 spin_unlock(&blocks->lock); 4132 return 0; 4133 } 4134 node = blocks->blocks[level].rb_node; 4135 4136 while (node) { 4137 block = rb_entry(node, struct btrfs_qgroup_swapped_block, node); 4138 if (block->subvol_bytenr < subvol_eb->start) { 4139 node = node->rb_left; 4140 } else if (block->subvol_bytenr > subvol_eb->start) { 4141 node = node->rb_right; 4142 } else { 4143 found = true; 4144 break; 4145 } 4146 } 4147 if (!found) { 4148 spin_unlock(&blocks->lock); 4149 goto out; 4150 } 4151 /* Found one, remove it from @blocks first and update blocks->swapped */ 4152 rb_erase(&block->node, &blocks->blocks[level]); 4153 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 4154 if (RB_EMPTY_ROOT(&blocks->blocks[i])) { 4155 swapped = true; 4156 break; 4157 } 4158 } 4159 blocks->swapped = swapped; 4160 spin_unlock(&blocks->lock); 4161 4162 /* Read out reloc subtree root */ 4163 reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, 4164 block->reloc_generation, block->level, 4165 &block->first_key); 4166 if (IS_ERR(reloc_eb)) { 4167 ret = PTR_ERR(reloc_eb); 4168 reloc_eb = NULL; 4169 goto free_out; 4170 } 4171 if (!extent_buffer_uptodate(reloc_eb)) { 4172 ret = -EIO; 4173 goto free_out; 4174 } 4175 4176 ret = qgroup_trace_subtree_swap(trans, reloc_eb, subvol_eb, 4177 block->last_snapshot, block->trace_leaf); 4178 free_out: 4179 kfree(block); 4180 free_extent_buffer(reloc_eb); 4181 out: 4182 if (ret < 0) { 4183 btrfs_err_rl(fs_info, 4184 "failed to account subtree at bytenr %llu: %d", 4185 subvol_eb->start, ret); 4186 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 4187 } 4188 return ret; 4189 } 4190 4191 void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) 4192 { 4193 struct btrfs_qgroup_extent_record *entry; 4194 struct btrfs_qgroup_extent_record *next; 4195 struct rb_root *root; 4196 4197 root = &trans->delayed_refs.dirty_extent_root; 4198 rbtree_postorder_for_each_entry_safe(entry, next, root, node) { 4199 ulist_free(entry->old_roots); 4200 kfree(entry); 4201 } 4202 } 4203