1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2011 STRATO. All rights reserved. 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/pagemap.h> 8 #include <linux/writeback.h> 9 #include <linux/blkdev.h> 10 #include <linux/rbtree.h> 11 #include <linux/slab.h> 12 #include <linux/workqueue.h> 13 #include <linux/btrfs.h> 14 #include <linux/sched/mm.h> 15 16 #include "ctree.h" 17 #include "transaction.h" 18 #include "disk-io.h" 19 #include "locking.h" 20 #include "ulist.h" 21 #include "backref.h" 22 #include "extent_io.h" 23 #include "qgroup.h" 24 #include "block-group.h" 25 #include "sysfs.h" 26 27 /* TODO XXX FIXME 28 * - subvol delete -> delete when ref goes to 0? delete limits also? 29 * - reorganize keys 30 * - compressed 31 * - sync 32 * - copy also limits on subvol creation 33 * - limit 34 * - caches for ulists 35 * - performance benchmarks 36 * - check all ioctl parameters 37 */ 38 39 /* 40 * Helpers to access qgroup reservation 41 * 42 * Callers should ensure the lock context and type are valid 43 */ 44 45 static u64 qgroup_rsv_total(const struct btrfs_qgroup *qgroup) 46 { 47 u64 ret = 0; 48 int i; 49 50 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) 51 ret += qgroup->rsv.values[i]; 52 53 return ret; 54 } 55 56 #ifdef CONFIG_BTRFS_DEBUG 57 static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type) 58 { 59 if (type == BTRFS_QGROUP_RSV_DATA) 60 return "data"; 61 if (type == BTRFS_QGROUP_RSV_META_PERTRANS) 62 return "meta_pertrans"; 63 if (type == BTRFS_QGROUP_RSV_META_PREALLOC) 64 return "meta_prealloc"; 65 return NULL; 66 } 67 #endif 68 69 static void qgroup_rsv_add(struct btrfs_fs_info *fs_info, 70 struct btrfs_qgroup *qgroup, u64 num_bytes, 71 enum btrfs_qgroup_rsv_type type) 72 { 73 trace_qgroup_update_reserve(fs_info, qgroup, num_bytes, type); 74 qgroup->rsv.values[type] += num_bytes; 75 } 76 77 static void qgroup_rsv_release(struct btrfs_fs_info *fs_info, 78 struct btrfs_qgroup *qgroup, u64 num_bytes, 79 enum btrfs_qgroup_rsv_type type) 80 { 81 trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type); 82 if (qgroup->rsv.values[type] >= num_bytes) { 83 qgroup->rsv.values[type] -= num_bytes; 84 return; 85 } 86 #ifdef CONFIG_BTRFS_DEBUG 87 WARN_RATELIMIT(1, 88 "qgroup %llu %s reserved space underflow, have %llu to free %llu", 89 qgroup->qgroupid, qgroup_rsv_type_str(type), 90 qgroup->rsv.values[type], num_bytes); 91 #endif 92 qgroup->rsv.values[type] = 0; 93 } 94 95 static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info, 96 struct btrfs_qgroup *dest, 97 struct btrfs_qgroup *src) 98 { 99 int i; 100 101 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) 102 qgroup_rsv_add(fs_info, dest, src->rsv.values[i], i); 103 } 104 105 static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info, 106 struct btrfs_qgroup *dest, 107 struct btrfs_qgroup *src) 108 { 109 int i; 110 111 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) 112 qgroup_rsv_release(fs_info, dest, src->rsv.values[i], i); 113 } 114 115 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq, 116 int mod) 117 { 118 if (qg->old_refcnt < seq) 119 qg->old_refcnt = seq; 120 qg->old_refcnt += mod; 121 } 122 123 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq, 124 int mod) 125 { 126 if (qg->new_refcnt < seq) 127 qg->new_refcnt = seq; 128 qg->new_refcnt += mod; 129 } 130 131 static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq) 132 { 133 if (qg->old_refcnt < seq) 134 return 0; 135 return qg->old_refcnt - seq; 136 } 137 138 static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq) 139 { 140 if (qg->new_refcnt < seq) 141 return 0; 142 return qg->new_refcnt - seq; 143 } 144 145 /* 146 * glue structure to represent the relations between qgroups. 147 */ 148 struct btrfs_qgroup_list { 149 struct list_head next_group; 150 struct list_head next_member; 151 struct btrfs_qgroup *group; 152 struct btrfs_qgroup *member; 153 }; 154 155 static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg) 156 { 157 return (u64)(uintptr_t)qg; 158 } 159 160 static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n) 161 { 162 return (struct btrfs_qgroup *)(uintptr_t)n->aux; 163 } 164 165 static int 166 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 167 int init_flags); 168 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); 169 170 /* must be called with qgroup_ioctl_lock held */ 171 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 172 u64 qgroupid) 173 { 174 struct rb_node *n = fs_info->qgroup_tree.rb_node; 175 struct btrfs_qgroup *qgroup; 176 177 while (n) { 178 qgroup = rb_entry(n, struct btrfs_qgroup, node); 179 if (qgroup->qgroupid < qgroupid) 180 n = n->rb_left; 181 else if (qgroup->qgroupid > qgroupid) 182 n = n->rb_right; 183 else 184 return qgroup; 185 } 186 return NULL; 187 } 188 189 /* must be called with qgroup_lock held */ 190 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, 191 u64 qgroupid) 192 { 193 struct rb_node **p = &fs_info->qgroup_tree.rb_node; 194 struct rb_node *parent = NULL; 195 struct btrfs_qgroup *qgroup; 196 197 while (*p) { 198 parent = *p; 199 qgroup = rb_entry(parent, struct btrfs_qgroup, node); 200 201 if (qgroup->qgroupid < qgroupid) 202 p = &(*p)->rb_left; 203 else if (qgroup->qgroupid > qgroupid) 204 p = &(*p)->rb_right; 205 else 206 return qgroup; 207 } 208 209 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); 210 if (!qgroup) 211 return ERR_PTR(-ENOMEM); 212 213 qgroup->qgroupid = qgroupid; 214 INIT_LIST_HEAD(&qgroup->groups); 215 INIT_LIST_HEAD(&qgroup->members); 216 INIT_LIST_HEAD(&qgroup->dirty); 217 218 rb_link_node(&qgroup->node, parent, p); 219 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); 220 221 return qgroup; 222 } 223 224 static void __del_qgroup_rb(struct btrfs_fs_info *fs_info, 225 struct btrfs_qgroup *qgroup) 226 { 227 struct btrfs_qgroup_list *list; 228 229 list_del(&qgroup->dirty); 230 while (!list_empty(&qgroup->groups)) { 231 list = list_first_entry(&qgroup->groups, 232 struct btrfs_qgroup_list, next_group); 233 list_del(&list->next_group); 234 list_del(&list->next_member); 235 kfree(list); 236 } 237 238 while (!list_empty(&qgroup->members)) { 239 list = list_first_entry(&qgroup->members, 240 struct btrfs_qgroup_list, next_member); 241 list_del(&list->next_group); 242 list_del(&list->next_member); 243 kfree(list); 244 } 245 } 246 247 /* must be called with qgroup_lock held */ 248 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) 249 { 250 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); 251 252 if (!qgroup) 253 return -ENOENT; 254 255 rb_erase(&qgroup->node, &fs_info->qgroup_tree); 256 __del_qgroup_rb(fs_info, qgroup); 257 return 0; 258 } 259 260 /* must be called with qgroup_lock held */ 261 static int add_relation_rb(struct btrfs_fs_info *fs_info, 262 u64 memberid, u64 parentid) 263 { 264 struct btrfs_qgroup *member; 265 struct btrfs_qgroup *parent; 266 struct btrfs_qgroup_list *list; 267 268 member = find_qgroup_rb(fs_info, memberid); 269 parent = find_qgroup_rb(fs_info, parentid); 270 if (!member || !parent) 271 return -ENOENT; 272 273 list = kzalloc(sizeof(*list), GFP_ATOMIC); 274 if (!list) 275 return -ENOMEM; 276 277 list->group = parent; 278 list->member = member; 279 list_add_tail(&list->next_group, &member->groups); 280 list_add_tail(&list->next_member, &parent->members); 281 282 return 0; 283 } 284 285 /* must be called with qgroup_lock held */ 286 static int del_relation_rb(struct btrfs_fs_info *fs_info, 287 u64 memberid, u64 parentid) 288 { 289 struct btrfs_qgroup *member; 290 struct btrfs_qgroup *parent; 291 struct btrfs_qgroup_list *list; 292 293 member = find_qgroup_rb(fs_info, memberid); 294 parent = find_qgroup_rb(fs_info, parentid); 295 if (!member || !parent) 296 return -ENOENT; 297 298 list_for_each_entry(list, &member->groups, next_group) { 299 if (list->group == parent) { 300 list_del(&list->next_group); 301 list_del(&list->next_member); 302 kfree(list); 303 return 0; 304 } 305 } 306 return -ENOENT; 307 } 308 309 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 310 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 311 u64 rfer, u64 excl) 312 { 313 struct btrfs_qgroup *qgroup; 314 315 qgroup = find_qgroup_rb(fs_info, qgroupid); 316 if (!qgroup) 317 return -EINVAL; 318 if (qgroup->rfer != rfer || qgroup->excl != excl) 319 return -EINVAL; 320 return 0; 321 } 322 #endif 323 324 /* 325 * The full config is read in one go, only called from open_ctree() 326 * It doesn't use any locking, as at this point we're still single-threaded 327 */ 328 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) 329 { 330 struct btrfs_key key; 331 struct btrfs_key found_key; 332 struct btrfs_root *quota_root = fs_info->quota_root; 333 struct btrfs_path *path = NULL; 334 struct extent_buffer *l; 335 int slot; 336 int ret = 0; 337 u64 flags = 0; 338 u64 rescan_progress = 0; 339 340 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 341 return 0; 342 343 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 344 if (!fs_info->qgroup_ulist) { 345 ret = -ENOMEM; 346 goto out; 347 } 348 349 path = btrfs_alloc_path(); 350 if (!path) { 351 ret = -ENOMEM; 352 goto out; 353 } 354 355 ret = btrfs_sysfs_add_qgroups(fs_info); 356 if (ret < 0) 357 goto out; 358 /* default this to quota off, in case no status key is found */ 359 fs_info->qgroup_flags = 0; 360 361 /* 362 * pass 1: read status, all qgroup infos and limits 363 */ 364 key.objectid = 0; 365 key.type = 0; 366 key.offset = 0; 367 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); 368 if (ret) 369 goto out; 370 371 while (1) { 372 struct btrfs_qgroup *qgroup; 373 374 slot = path->slots[0]; 375 l = path->nodes[0]; 376 btrfs_item_key_to_cpu(l, &found_key, slot); 377 378 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { 379 struct btrfs_qgroup_status_item *ptr; 380 381 ptr = btrfs_item_ptr(l, slot, 382 struct btrfs_qgroup_status_item); 383 384 if (btrfs_qgroup_status_version(l, ptr) != 385 BTRFS_QGROUP_STATUS_VERSION) { 386 btrfs_err(fs_info, 387 "old qgroup version, quota disabled"); 388 goto out; 389 } 390 if (btrfs_qgroup_status_generation(l, ptr) != 391 fs_info->generation) { 392 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 393 btrfs_err(fs_info, 394 "qgroup generation mismatch, marked as inconsistent"); 395 } 396 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 397 ptr); 398 rescan_progress = btrfs_qgroup_status_rescan(l, ptr); 399 goto next1; 400 } 401 402 if (found_key.type != BTRFS_QGROUP_INFO_KEY && 403 found_key.type != BTRFS_QGROUP_LIMIT_KEY) 404 goto next1; 405 406 qgroup = find_qgroup_rb(fs_info, found_key.offset); 407 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 408 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 409 btrfs_err(fs_info, "inconsistent qgroup config"); 410 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 411 } 412 if (!qgroup) { 413 qgroup = add_qgroup_rb(fs_info, found_key.offset); 414 if (IS_ERR(qgroup)) { 415 ret = PTR_ERR(qgroup); 416 goto out; 417 } 418 } 419 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 420 if (ret < 0) 421 goto out; 422 423 switch (found_key.type) { 424 case BTRFS_QGROUP_INFO_KEY: { 425 struct btrfs_qgroup_info_item *ptr; 426 427 ptr = btrfs_item_ptr(l, slot, 428 struct btrfs_qgroup_info_item); 429 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); 430 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); 431 qgroup->excl = btrfs_qgroup_info_excl(l, ptr); 432 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); 433 /* generation currently unused */ 434 break; 435 } 436 case BTRFS_QGROUP_LIMIT_KEY: { 437 struct btrfs_qgroup_limit_item *ptr; 438 439 ptr = btrfs_item_ptr(l, slot, 440 struct btrfs_qgroup_limit_item); 441 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); 442 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); 443 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); 444 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); 445 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); 446 break; 447 } 448 } 449 next1: 450 ret = btrfs_next_item(quota_root, path); 451 if (ret < 0) 452 goto out; 453 if (ret) 454 break; 455 } 456 btrfs_release_path(path); 457 458 /* 459 * pass 2: read all qgroup relations 460 */ 461 key.objectid = 0; 462 key.type = BTRFS_QGROUP_RELATION_KEY; 463 key.offset = 0; 464 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); 465 if (ret) 466 goto out; 467 while (1) { 468 slot = path->slots[0]; 469 l = path->nodes[0]; 470 btrfs_item_key_to_cpu(l, &found_key, slot); 471 472 if (found_key.type != BTRFS_QGROUP_RELATION_KEY) 473 goto next2; 474 475 if (found_key.objectid > found_key.offset) { 476 /* parent <- member, not needed to build config */ 477 /* FIXME should we omit the key completely? */ 478 goto next2; 479 } 480 481 ret = add_relation_rb(fs_info, found_key.objectid, 482 found_key.offset); 483 if (ret == -ENOENT) { 484 btrfs_warn(fs_info, 485 "orphan qgroup relation 0x%llx->0x%llx", 486 found_key.objectid, found_key.offset); 487 ret = 0; /* ignore the error */ 488 } 489 if (ret) 490 goto out; 491 next2: 492 ret = btrfs_next_item(quota_root, path); 493 if (ret < 0) 494 goto out; 495 if (ret) 496 break; 497 } 498 out: 499 btrfs_free_path(path); 500 fs_info->qgroup_flags |= flags; 501 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 502 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 503 else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && 504 ret >= 0) 505 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 506 507 if (ret < 0) { 508 ulist_free(fs_info->qgroup_ulist); 509 fs_info->qgroup_ulist = NULL; 510 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 511 btrfs_sysfs_del_qgroups(fs_info); 512 } 513 514 return ret < 0 ? ret : 0; 515 } 516 517 /* 518 * Called in close_ctree() when quota is still enabled. This verifies we don't 519 * leak some reserved space. 520 * 521 * Return false if no reserved space is left. 522 * Return true if some reserved space is leaked. 523 */ 524 bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info) 525 { 526 struct rb_node *node; 527 bool ret = false; 528 529 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 530 return ret; 531 /* 532 * Since we're unmounting, there is no race and no need to grab qgroup 533 * lock. And here we don't go post-order to provide a more user 534 * friendly sorted result. 535 */ 536 for (node = rb_first(&fs_info->qgroup_tree); node; node = rb_next(node)) { 537 struct btrfs_qgroup *qgroup; 538 int i; 539 540 qgroup = rb_entry(node, struct btrfs_qgroup, node); 541 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) { 542 if (qgroup->rsv.values[i]) { 543 ret = true; 544 btrfs_warn(fs_info, 545 "qgroup %hu/%llu has unreleased space, type %d rsv %llu", 546 btrfs_qgroup_level(qgroup->qgroupid), 547 btrfs_qgroup_subvolid(qgroup->qgroupid), 548 i, qgroup->rsv.values[i]); 549 } 550 } 551 } 552 return ret; 553 } 554 555 /* 556 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), 557 * first two are in single-threaded paths.And for the third one, we have set 558 * quota_root to be null with qgroup_lock held before, so it is safe to clean 559 * up the in-memory structures without qgroup_lock held. 560 */ 561 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 562 { 563 struct rb_node *n; 564 struct btrfs_qgroup *qgroup; 565 566 while ((n = rb_first(&fs_info->qgroup_tree))) { 567 qgroup = rb_entry(n, struct btrfs_qgroup, node); 568 rb_erase(n, &fs_info->qgroup_tree); 569 __del_qgroup_rb(fs_info, qgroup); 570 btrfs_sysfs_del_one_qgroup(fs_info, qgroup); 571 kfree(qgroup); 572 } 573 /* 574 * We call btrfs_free_qgroup_config() when unmounting 575 * filesystem and disabling quota, so we set qgroup_ulist 576 * to be null here to avoid double free. 577 */ 578 ulist_free(fs_info->qgroup_ulist); 579 fs_info->qgroup_ulist = NULL; 580 btrfs_sysfs_del_qgroups(fs_info); 581 } 582 583 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, 584 u64 dst) 585 { 586 int ret; 587 struct btrfs_root *quota_root = trans->fs_info->quota_root; 588 struct btrfs_path *path; 589 struct btrfs_key key; 590 591 path = btrfs_alloc_path(); 592 if (!path) 593 return -ENOMEM; 594 595 key.objectid = src; 596 key.type = BTRFS_QGROUP_RELATION_KEY; 597 key.offset = dst; 598 599 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 600 601 btrfs_mark_buffer_dirty(path->nodes[0]); 602 603 btrfs_free_path(path); 604 return ret; 605 } 606 607 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src, 608 u64 dst) 609 { 610 int ret; 611 struct btrfs_root *quota_root = trans->fs_info->quota_root; 612 struct btrfs_path *path; 613 struct btrfs_key key; 614 615 path = btrfs_alloc_path(); 616 if (!path) 617 return -ENOMEM; 618 619 key.objectid = src; 620 key.type = BTRFS_QGROUP_RELATION_KEY; 621 key.offset = dst; 622 623 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 624 if (ret < 0) 625 goto out; 626 627 if (ret > 0) { 628 ret = -ENOENT; 629 goto out; 630 } 631 632 ret = btrfs_del_item(trans, quota_root, path); 633 out: 634 btrfs_free_path(path); 635 return ret; 636 } 637 638 static int add_qgroup_item(struct btrfs_trans_handle *trans, 639 struct btrfs_root *quota_root, u64 qgroupid) 640 { 641 int ret; 642 struct btrfs_path *path; 643 struct btrfs_qgroup_info_item *qgroup_info; 644 struct btrfs_qgroup_limit_item *qgroup_limit; 645 struct extent_buffer *leaf; 646 struct btrfs_key key; 647 648 if (btrfs_is_testing(quota_root->fs_info)) 649 return 0; 650 651 path = btrfs_alloc_path(); 652 if (!path) 653 return -ENOMEM; 654 655 key.objectid = 0; 656 key.type = BTRFS_QGROUP_INFO_KEY; 657 key.offset = qgroupid; 658 659 /* 660 * Avoid a transaction abort by catching -EEXIST here. In that 661 * case, we proceed by re-initializing the existing structure 662 * on disk. 663 */ 664 665 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 666 sizeof(*qgroup_info)); 667 if (ret && ret != -EEXIST) 668 goto out; 669 670 leaf = path->nodes[0]; 671 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], 672 struct btrfs_qgroup_info_item); 673 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); 674 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); 675 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); 676 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); 677 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); 678 679 btrfs_mark_buffer_dirty(leaf); 680 681 btrfs_release_path(path); 682 683 key.type = BTRFS_QGROUP_LIMIT_KEY; 684 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 685 sizeof(*qgroup_limit)); 686 if (ret && ret != -EEXIST) 687 goto out; 688 689 leaf = path->nodes[0]; 690 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], 691 struct btrfs_qgroup_limit_item); 692 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); 693 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); 694 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); 695 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 696 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 697 698 btrfs_mark_buffer_dirty(leaf); 699 700 ret = 0; 701 out: 702 btrfs_free_path(path); 703 return ret; 704 } 705 706 static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid) 707 { 708 int ret; 709 struct btrfs_root *quota_root = trans->fs_info->quota_root; 710 struct btrfs_path *path; 711 struct btrfs_key key; 712 713 path = btrfs_alloc_path(); 714 if (!path) 715 return -ENOMEM; 716 717 key.objectid = 0; 718 key.type = BTRFS_QGROUP_INFO_KEY; 719 key.offset = qgroupid; 720 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 721 if (ret < 0) 722 goto out; 723 724 if (ret > 0) { 725 ret = -ENOENT; 726 goto out; 727 } 728 729 ret = btrfs_del_item(trans, quota_root, path); 730 if (ret) 731 goto out; 732 733 btrfs_release_path(path); 734 735 key.type = BTRFS_QGROUP_LIMIT_KEY; 736 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 737 if (ret < 0) 738 goto out; 739 740 if (ret > 0) { 741 ret = -ENOENT; 742 goto out; 743 } 744 745 ret = btrfs_del_item(trans, quota_root, path); 746 747 out: 748 btrfs_free_path(path); 749 return ret; 750 } 751 752 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 753 struct btrfs_qgroup *qgroup) 754 { 755 struct btrfs_root *quota_root = trans->fs_info->quota_root; 756 struct btrfs_path *path; 757 struct btrfs_key key; 758 struct extent_buffer *l; 759 struct btrfs_qgroup_limit_item *qgroup_limit; 760 int ret; 761 int slot; 762 763 key.objectid = 0; 764 key.type = BTRFS_QGROUP_LIMIT_KEY; 765 key.offset = qgroup->qgroupid; 766 767 path = btrfs_alloc_path(); 768 if (!path) 769 return -ENOMEM; 770 771 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); 772 if (ret > 0) 773 ret = -ENOENT; 774 775 if (ret) 776 goto out; 777 778 l = path->nodes[0]; 779 slot = path->slots[0]; 780 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 781 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags); 782 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer); 783 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); 784 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); 785 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); 786 787 btrfs_mark_buffer_dirty(l); 788 789 out: 790 btrfs_free_path(path); 791 return ret; 792 } 793 794 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 795 struct btrfs_qgroup *qgroup) 796 { 797 struct btrfs_fs_info *fs_info = trans->fs_info; 798 struct btrfs_root *quota_root = fs_info->quota_root; 799 struct btrfs_path *path; 800 struct btrfs_key key; 801 struct extent_buffer *l; 802 struct btrfs_qgroup_info_item *qgroup_info; 803 int ret; 804 int slot; 805 806 if (btrfs_is_testing(fs_info)) 807 return 0; 808 809 key.objectid = 0; 810 key.type = BTRFS_QGROUP_INFO_KEY; 811 key.offset = qgroup->qgroupid; 812 813 path = btrfs_alloc_path(); 814 if (!path) 815 return -ENOMEM; 816 817 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); 818 if (ret > 0) 819 ret = -ENOENT; 820 821 if (ret) 822 goto out; 823 824 l = path->nodes[0]; 825 slot = path->slots[0]; 826 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); 827 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 828 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 829 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 830 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 831 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 832 833 btrfs_mark_buffer_dirty(l); 834 835 out: 836 btrfs_free_path(path); 837 return ret; 838 } 839 840 static int update_qgroup_status_item(struct btrfs_trans_handle *trans) 841 { 842 struct btrfs_fs_info *fs_info = trans->fs_info; 843 struct btrfs_root *quota_root = fs_info->quota_root; 844 struct btrfs_path *path; 845 struct btrfs_key key; 846 struct extent_buffer *l; 847 struct btrfs_qgroup_status_item *ptr; 848 int ret; 849 int slot; 850 851 key.objectid = 0; 852 key.type = BTRFS_QGROUP_STATUS_KEY; 853 key.offset = 0; 854 855 path = btrfs_alloc_path(); 856 if (!path) 857 return -ENOMEM; 858 859 ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1); 860 if (ret > 0) 861 ret = -ENOENT; 862 863 if (ret) 864 goto out; 865 866 l = path->nodes[0]; 867 slot = path->slots[0]; 868 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 869 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 870 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 871 btrfs_set_qgroup_status_rescan(l, ptr, 872 fs_info->qgroup_rescan_progress.objectid); 873 874 btrfs_mark_buffer_dirty(l); 875 876 out: 877 btrfs_free_path(path); 878 return ret; 879 } 880 881 /* 882 * called with qgroup_lock held 883 */ 884 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 885 struct btrfs_root *root) 886 { 887 struct btrfs_path *path; 888 struct btrfs_key key; 889 struct extent_buffer *leaf = NULL; 890 int ret; 891 int nr = 0; 892 893 path = btrfs_alloc_path(); 894 if (!path) 895 return -ENOMEM; 896 897 key.objectid = 0; 898 key.offset = 0; 899 key.type = 0; 900 901 while (1) { 902 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 903 if (ret < 0) 904 goto out; 905 leaf = path->nodes[0]; 906 nr = btrfs_header_nritems(leaf); 907 if (!nr) 908 break; 909 /* 910 * delete the leaf one by one 911 * since the whole tree is going 912 * to be deleted. 913 */ 914 path->slots[0] = 0; 915 ret = btrfs_del_items(trans, root, path, 0, nr); 916 if (ret) 917 goto out; 918 919 btrfs_release_path(path); 920 } 921 ret = 0; 922 out: 923 btrfs_free_path(path); 924 return ret; 925 } 926 927 int btrfs_quota_enable(struct btrfs_fs_info *fs_info) 928 { 929 struct btrfs_root *quota_root; 930 struct btrfs_root *tree_root = fs_info->tree_root; 931 struct btrfs_path *path = NULL; 932 struct btrfs_qgroup_status_item *ptr; 933 struct extent_buffer *leaf; 934 struct btrfs_key key; 935 struct btrfs_key found_key; 936 struct btrfs_qgroup *qgroup = NULL; 937 struct btrfs_trans_handle *trans = NULL; 938 struct ulist *ulist = NULL; 939 int ret = 0; 940 int slot; 941 942 mutex_lock(&fs_info->qgroup_ioctl_lock); 943 if (fs_info->quota_root) 944 goto out; 945 946 ulist = ulist_alloc(GFP_KERNEL); 947 if (!ulist) { 948 ret = -ENOMEM; 949 goto out; 950 } 951 952 ret = btrfs_sysfs_add_qgroups(fs_info); 953 if (ret < 0) 954 goto out; 955 956 /* 957 * Unlock qgroup_ioctl_lock before starting the transaction. This is to 958 * avoid lock acquisition inversion problems (reported by lockdep) between 959 * qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we 960 * start a transaction. 961 * After we started the transaction lock qgroup_ioctl_lock again and 962 * check if someone else created the quota root in the meanwhile. If so, 963 * just return success and release the transaction handle. 964 * 965 * Also we don't need to worry about someone else calling 966 * btrfs_sysfs_add_qgroups() after we unlock and getting an error because 967 * that function returns 0 (success) when the sysfs entries already exist. 968 */ 969 mutex_unlock(&fs_info->qgroup_ioctl_lock); 970 971 /* 972 * 1 for quota root item 973 * 1 for BTRFS_QGROUP_STATUS item 974 * 975 * Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items 976 * per subvolume. However those are not currently reserved since it 977 * would be a lot of overkill. 978 */ 979 trans = btrfs_start_transaction(tree_root, 2); 980 981 mutex_lock(&fs_info->qgroup_ioctl_lock); 982 if (IS_ERR(trans)) { 983 ret = PTR_ERR(trans); 984 trans = NULL; 985 goto out; 986 } 987 988 if (fs_info->quota_root) 989 goto out; 990 991 fs_info->qgroup_ulist = ulist; 992 ulist = NULL; 993 994 /* 995 * initially create the quota tree 996 */ 997 quota_root = btrfs_create_tree(trans, BTRFS_QUOTA_TREE_OBJECTID); 998 if (IS_ERR(quota_root)) { 999 ret = PTR_ERR(quota_root); 1000 btrfs_abort_transaction(trans, ret); 1001 goto out; 1002 } 1003 1004 path = btrfs_alloc_path(); 1005 if (!path) { 1006 ret = -ENOMEM; 1007 btrfs_abort_transaction(trans, ret); 1008 goto out_free_root; 1009 } 1010 1011 key.objectid = 0; 1012 key.type = BTRFS_QGROUP_STATUS_KEY; 1013 key.offset = 0; 1014 1015 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 1016 sizeof(*ptr)); 1017 if (ret) { 1018 btrfs_abort_transaction(trans, ret); 1019 goto out_free_path; 1020 } 1021 1022 leaf = path->nodes[0]; 1023 ptr = btrfs_item_ptr(leaf, path->slots[0], 1024 struct btrfs_qgroup_status_item); 1025 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); 1026 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); 1027 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 1028 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1029 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 1030 btrfs_set_qgroup_status_rescan(leaf, ptr, 0); 1031 1032 btrfs_mark_buffer_dirty(leaf); 1033 1034 key.objectid = 0; 1035 key.type = BTRFS_ROOT_REF_KEY; 1036 key.offset = 0; 1037 1038 btrfs_release_path(path); 1039 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 1040 if (ret > 0) 1041 goto out_add_root; 1042 if (ret < 0) { 1043 btrfs_abort_transaction(trans, ret); 1044 goto out_free_path; 1045 } 1046 1047 while (1) { 1048 slot = path->slots[0]; 1049 leaf = path->nodes[0]; 1050 btrfs_item_key_to_cpu(leaf, &found_key, slot); 1051 1052 if (found_key.type == BTRFS_ROOT_REF_KEY) { 1053 1054 /* Release locks on tree_root before we access quota_root */ 1055 btrfs_release_path(path); 1056 1057 ret = add_qgroup_item(trans, quota_root, 1058 found_key.offset); 1059 if (ret) { 1060 btrfs_abort_transaction(trans, ret); 1061 goto out_free_path; 1062 } 1063 1064 qgroup = add_qgroup_rb(fs_info, found_key.offset); 1065 if (IS_ERR(qgroup)) { 1066 ret = PTR_ERR(qgroup); 1067 btrfs_abort_transaction(trans, ret); 1068 goto out_free_path; 1069 } 1070 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 1071 if (ret < 0) { 1072 btrfs_abort_transaction(trans, ret); 1073 goto out_free_path; 1074 } 1075 ret = btrfs_search_slot_for_read(tree_root, &found_key, 1076 path, 1, 0); 1077 if (ret < 0) { 1078 btrfs_abort_transaction(trans, ret); 1079 goto out_free_path; 1080 } 1081 if (ret > 0) { 1082 /* 1083 * Shouldn't happen, but in case it does we 1084 * don't need to do the btrfs_next_item, just 1085 * continue. 1086 */ 1087 continue; 1088 } 1089 } 1090 ret = btrfs_next_item(tree_root, path); 1091 if (ret < 0) { 1092 btrfs_abort_transaction(trans, ret); 1093 goto out_free_path; 1094 } 1095 if (ret) 1096 break; 1097 } 1098 1099 out_add_root: 1100 btrfs_release_path(path); 1101 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 1102 if (ret) { 1103 btrfs_abort_transaction(trans, ret); 1104 goto out_free_path; 1105 } 1106 1107 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 1108 if (IS_ERR(qgroup)) { 1109 ret = PTR_ERR(qgroup); 1110 btrfs_abort_transaction(trans, ret); 1111 goto out_free_path; 1112 } 1113 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 1114 if (ret < 0) { 1115 btrfs_abort_transaction(trans, ret); 1116 goto out_free_path; 1117 } 1118 1119 ret = btrfs_commit_transaction(trans); 1120 trans = NULL; 1121 if (ret) 1122 goto out_free_path; 1123 1124 /* 1125 * Set quota enabled flag after committing the transaction, to avoid 1126 * deadlocks on fs_info->qgroup_ioctl_lock with concurrent snapshot 1127 * creation. 1128 */ 1129 spin_lock(&fs_info->qgroup_lock); 1130 fs_info->quota_root = quota_root; 1131 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1132 spin_unlock(&fs_info->qgroup_lock); 1133 1134 ret = qgroup_rescan_init(fs_info, 0, 1); 1135 if (!ret) { 1136 qgroup_rescan_zero_tracking(fs_info); 1137 fs_info->qgroup_rescan_running = true; 1138 btrfs_queue_work(fs_info->qgroup_rescan_workers, 1139 &fs_info->qgroup_rescan_work); 1140 } 1141 1142 out_free_path: 1143 btrfs_free_path(path); 1144 out_free_root: 1145 if (ret) 1146 btrfs_put_root(quota_root); 1147 out: 1148 if (ret) { 1149 ulist_free(fs_info->qgroup_ulist); 1150 fs_info->qgroup_ulist = NULL; 1151 btrfs_sysfs_del_qgroups(fs_info); 1152 } 1153 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1154 if (ret && trans) 1155 btrfs_end_transaction(trans); 1156 else if (trans) 1157 ret = btrfs_end_transaction(trans); 1158 ulist_free(ulist); 1159 return ret; 1160 } 1161 1162 int btrfs_quota_disable(struct btrfs_fs_info *fs_info) 1163 { 1164 struct btrfs_root *quota_root; 1165 struct btrfs_trans_handle *trans = NULL; 1166 int ret = 0; 1167 1168 mutex_lock(&fs_info->qgroup_ioctl_lock); 1169 if (!fs_info->quota_root) 1170 goto out; 1171 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1172 1173 /* 1174 * 1 For the root item 1175 * 1176 * We should also reserve enough items for the quota tree deletion in 1177 * btrfs_clean_quota_tree but this is not done. 1178 * 1179 * Also, we must always start a transaction without holding the mutex 1180 * qgroup_ioctl_lock, see btrfs_quota_enable(). 1181 */ 1182 trans = btrfs_start_transaction(fs_info->tree_root, 1); 1183 1184 mutex_lock(&fs_info->qgroup_ioctl_lock); 1185 if (IS_ERR(trans)) { 1186 ret = PTR_ERR(trans); 1187 trans = NULL; 1188 goto out; 1189 } 1190 1191 if (!fs_info->quota_root) 1192 goto out; 1193 1194 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1195 btrfs_qgroup_wait_for_completion(fs_info, false); 1196 spin_lock(&fs_info->qgroup_lock); 1197 quota_root = fs_info->quota_root; 1198 fs_info->quota_root = NULL; 1199 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 1200 spin_unlock(&fs_info->qgroup_lock); 1201 1202 btrfs_free_qgroup_config(fs_info); 1203 1204 ret = btrfs_clean_quota_tree(trans, quota_root); 1205 if (ret) { 1206 btrfs_abort_transaction(trans, ret); 1207 goto out; 1208 } 1209 1210 ret = btrfs_del_root(trans, "a_root->root_key); 1211 if (ret) { 1212 btrfs_abort_transaction(trans, ret); 1213 goto out; 1214 } 1215 1216 list_del("a_root->dirty_list); 1217 1218 btrfs_tree_lock(quota_root->node); 1219 btrfs_clean_tree_block(quota_root->node); 1220 btrfs_tree_unlock(quota_root->node); 1221 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 1222 1223 btrfs_put_root(quota_root); 1224 1225 out: 1226 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1227 if (ret && trans) 1228 btrfs_end_transaction(trans); 1229 else if (trans) 1230 ret = btrfs_end_transaction(trans); 1231 1232 return ret; 1233 } 1234 1235 static void qgroup_dirty(struct btrfs_fs_info *fs_info, 1236 struct btrfs_qgroup *qgroup) 1237 { 1238 if (list_empty(&qgroup->dirty)) 1239 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 1240 } 1241 1242 /* 1243 * The easy accounting, we're updating qgroup relationship whose child qgroup 1244 * only has exclusive extents. 1245 * 1246 * In this case, all exclusive extents will also be exclusive for parent, so 1247 * excl/rfer just get added/removed. 1248 * 1249 * So is qgroup reservation space, which should also be added/removed to 1250 * parent. 1251 * Or when child tries to release reservation space, parent will underflow its 1252 * reservation (for relationship adding case). 1253 * 1254 * Caller should hold fs_info->qgroup_lock. 1255 */ 1256 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1257 struct ulist *tmp, u64 ref_root, 1258 struct btrfs_qgroup *src, int sign) 1259 { 1260 struct btrfs_qgroup *qgroup; 1261 struct btrfs_qgroup_list *glist; 1262 struct ulist_node *unode; 1263 struct ulist_iterator uiter; 1264 u64 num_bytes = src->excl; 1265 int ret = 0; 1266 1267 qgroup = find_qgroup_rb(fs_info, ref_root); 1268 if (!qgroup) 1269 goto out; 1270 1271 qgroup->rfer += sign * num_bytes; 1272 qgroup->rfer_cmpr += sign * num_bytes; 1273 1274 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1275 qgroup->excl += sign * num_bytes; 1276 qgroup->excl_cmpr += sign * num_bytes; 1277 1278 if (sign > 0) 1279 qgroup_rsv_add_by_qgroup(fs_info, qgroup, src); 1280 else 1281 qgroup_rsv_release_by_qgroup(fs_info, qgroup, src); 1282 1283 qgroup_dirty(fs_info, qgroup); 1284 1285 /* Get all of the parent groups that contain this qgroup */ 1286 list_for_each_entry(glist, &qgroup->groups, next_group) { 1287 ret = ulist_add(tmp, glist->group->qgroupid, 1288 qgroup_to_aux(glist->group), GFP_ATOMIC); 1289 if (ret < 0) 1290 goto out; 1291 } 1292 1293 /* Iterate all of the parents and adjust their reference counts */ 1294 ULIST_ITER_INIT(&uiter); 1295 while ((unode = ulist_next(tmp, &uiter))) { 1296 qgroup = unode_aux_to_qgroup(unode); 1297 qgroup->rfer += sign * num_bytes; 1298 qgroup->rfer_cmpr += sign * num_bytes; 1299 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1300 qgroup->excl += sign * num_bytes; 1301 if (sign > 0) 1302 qgroup_rsv_add_by_qgroup(fs_info, qgroup, src); 1303 else 1304 qgroup_rsv_release_by_qgroup(fs_info, qgroup, src); 1305 qgroup->excl_cmpr += sign * num_bytes; 1306 qgroup_dirty(fs_info, qgroup); 1307 1308 /* Add any parents of the parents */ 1309 list_for_each_entry(glist, &qgroup->groups, next_group) { 1310 ret = ulist_add(tmp, glist->group->qgroupid, 1311 qgroup_to_aux(glist->group), GFP_ATOMIC); 1312 if (ret < 0) 1313 goto out; 1314 } 1315 } 1316 ret = 0; 1317 out: 1318 return ret; 1319 } 1320 1321 1322 /* 1323 * Quick path for updating qgroup with only excl refs. 1324 * 1325 * In that case, just update all parent will be enough. 1326 * Or we needs to do a full rescan. 1327 * Caller should also hold fs_info->qgroup_lock. 1328 * 1329 * Return 0 for quick update, return >0 for need to full rescan 1330 * and mark INCONSISTENT flag. 1331 * Return < 0 for other error. 1332 */ 1333 static int quick_update_accounting(struct btrfs_fs_info *fs_info, 1334 struct ulist *tmp, u64 src, u64 dst, 1335 int sign) 1336 { 1337 struct btrfs_qgroup *qgroup; 1338 int ret = 1; 1339 int err = 0; 1340 1341 qgroup = find_qgroup_rb(fs_info, src); 1342 if (!qgroup) 1343 goto out; 1344 if (qgroup->excl == qgroup->rfer) { 1345 ret = 0; 1346 err = __qgroup_excl_accounting(fs_info, tmp, dst, 1347 qgroup, sign); 1348 if (err < 0) { 1349 ret = err; 1350 goto out; 1351 } 1352 } 1353 out: 1354 if (ret) 1355 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1356 return ret; 1357 } 1358 1359 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, 1360 u64 dst) 1361 { 1362 struct btrfs_fs_info *fs_info = trans->fs_info; 1363 struct btrfs_qgroup *parent; 1364 struct btrfs_qgroup *member; 1365 struct btrfs_qgroup_list *list; 1366 struct ulist *tmp; 1367 unsigned int nofs_flag; 1368 int ret = 0; 1369 1370 /* Check the level of src and dst first */ 1371 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) 1372 return -EINVAL; 1373 1374 /* We hold a transaction handle open, must do a NOFS allocation. */ 1375 nofs_flag = memalloc_nofs_save(); 1376 tmp = ulist_alloc(GFP_KERNEL); 1377 memalloc_nofs_restore(nofs_flag); 1378 if (!tmp) 1379 return -ENOMEM; 1380 1381 mutex_lock(&fs_info->qgroup_ioctl_lock); 1382 if (!fs_info->quota_root) { 1383 ret = -ENOTCONN; 1384 goto out; 1385 } 1386 member = find_qgroup_rb(fs_info, src); 1387 parent = find_qgroup_rb(fs_info, dst); 1388 if (!member || !parent) { 1389 ret = -EINVAL; 1390 goto out; 1391 } 1392 1393 /* check if such qgroup relation exist firstly */ 1394 list_for_each_entry(list, &member->groups, next_group) { 1395 if (list->group == parent) { 1396 ret = -EEXIST; 1397 goto out; 1398 } 1399 } 1400 1401 ret = add_qgroup_relation_item(trans, src, dst); 1402 if (ret) 1403 goto out; 1404 1405 ret = add_qgroup_relation_item(trans, dst, src); 1406 if (ret) { 1407 del_qgroup_relation_item(trans, src, dst); 1408 goto out; 1409 } 1410 1411 spin_lock(&fs_info->qgroup_lock); 1412 ret = add_relation_rb(fs_info, src, dst); 1413 if (ret < 0) { 1414 spin_unlock(&fs_info->qgroup_lock); 1415 goto out; 1416 } 1417 ret = quick_update_accounting(fs_info, tmp, src, dst, 1); 1418 spin_unlock(&fs_info->qgroup_lock); 1419 out: 1420 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1421 ulist_free(tmp); 1422 return ret; 1423 } 1424 1425 static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, 1426 u64 dst) 1427 { 1428 struct btrfs_fs_info *fs_info = trans->fs_info; 1429 struct btrfs_qgroup *parent; 1430 struct btrfs_qgroup *member; 1431 struct btrfs_qgroup_list *list; 1432 struct ulist *tmp; 1433 bool found = false; 1434 unsigned int nofs_flag; 1435 int ret = 0; 1436 int ret2; 1437 1438 /* We hold a transaction handle open, must do a NOFS allocation. */ 1439 nofs_flag = memalloc_nofs_save(); 1440 tmp = ulist_alloc(GFP_KERNEL); 1441 memalloc_nofs_restore(nofs_flag); 1442 if (!tmp) 1443 return -ENOMEM; 1444 1445 if (!fs_info->quota_root) { 1446 ret = -ENOTCONN; 1447 goto out; 1448 } 1449 1450 member = find_qgroup_rb(fs_info, src); 1451 parent = find_qgroup_rb(fs_info, dst); 1452 /* 1453 * The parent/member pair doesn't exist, then try to delete the dead 1454 * relation items only. 1455 */ 1456 if (!member || !parent) 1457 goto delete_item; 1458 1459 /* check if such qgroup relation exist firstly */ 1460 list_for_each_entry(list, &member->groups, next_group) { 1461 if (list->group == parent) { 1462 found = true; 1463 break; 1464 } 1465 } 1466 1467 delete_item: 1468 ret = del_qgroup_relation_item(trans, src, dst); 1469 if (ret < 0 && ret != -ENOENT) 1470 goto out; 1471 ret2 = del_qgroup_relation_item(trans, dst, src); 1472 if (ret2 < 0 && ret2 != -ENOENT) 1473 goto out; 1474 1475 /* At least one deletion succeeded, return 0 */ 1476 if (!ret || !ret2) 1477 ret = 0; 1478 1479 if (found) { 1480 spin_lock(&fs_info->qgroup_lock); 1481 del_relation_rb(fs_info, src, dst); 1482 ret = quick_update_accounting(fs_info, tmp, src, dst, -1); 1483 spin_unlock(&fs_info->qgroup_lock); 1484 } 1485 out: 1486 ulist_free(tmp); 1487 return ret; 1488 } 1489 1490 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, 1491 u64 dst) 1492 { 1493 struct btrfs_fs_info *fs_info = trans->fs_info; 1494 int ret = 0; 1495 1496 mutex_lock(&fs_info->qgroup_ioctl_lock); 1497 ret = __del_qgroup_relation(trans, src, dst); 1498 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1499 1500 return ret; 1501 } 1502 1503 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) 1504 { 1505 struct btrfs_fs_info *fs_info = trans->fs_info; 1506 struct btrfs_root *quota_root; 1507 struct btrfs_qgroup *qgroup; 1508 int ret = 0; 1509 1510 mutex_lock(&fs_info->qgroup_ioctl_lock); 1511 if (!fs_info->quota_root) { 1512 ret = -ENOTCONN; 1513 goto out; 1514 } 1515 quota_root = fs_info->quota_root; 1516 qgroup = find_qgroup_rb(fs_info, qgroupid); 1517 if (qgroup) { 1518 ret = -EEXIST; 1519 goto out; 1520 } 1521 1522 ret = add_qgroup_item(trans, quota_root, qgroupid); 1523 if (ret) 1524 goto out; 1525 1526 spin_lock(&fs_info->qgroup_lock); 1527 qgroup = add_qgroup_rb(fs_info, qgroupid); 1528 spin_unlock(&fs_info->qgroup_lock); 1529 1530 if (IS_ERR(qgroup)) { 1531 ret = PTR_ERR(qgroup); 1532 goto out; 1533 } 1534 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); 1535 out: 1536 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1537 return ret; 1538 } 1539 1540 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) 1541 { 1542 struct btrfs_fs_info *fs_info = trans->fs_info; 1543 struct btrfs_qgroup *qgroup; 1544 struct btrfs_qgroup_list *list; 1545 int ret = 0; 1546 1547 mutex_lock(&fs_info->qgroup_ioctl_lock); 1548 if (!fs_info->quota_root) { 1549 ret = -ENOTCONN; 1550 goto out; 1551 } 1552 1553 qgroup = find_qgroup_rb(fs_info, qgroupid); 1554 if (!qgroup) { 1555 ret = -ENOENT; 1556 goto out; 1557 } 1558 1559 /* Check if there are no children of this qgroup */ 1560 if (!list_empty(&qgroup->members)) { 1561 ret = -EBUSY; 1562 goto out; 1563 } 1564 1565 ret = del_qgroup_item(trans, qgroupid); 1566 if (ret && ret != -ENOENT) 1567 goto out; 1568 1569 while (!list_empty(&qgroup->groups)) { 1570 list = list_first_entry(&qgroup->groups, 1571 struct btrfs_qgroup_list, next_group); 1572 ret = __del_qgroup_relation(trans, qgroupid, 1573 list->group->qgroupid); 1574 if (ret) 1575 goto out; 1576 } 1577 1578 spin_lock(&fs_info->qgroup_lock); 1579 del_qgroup_rb(fs_info, qgroupid); 1580 spin_unlock(&fs_info->qgroup_lock); 1581 1582 /* 1583 * Remove the qgroup from sysfs now without holding the qgroup_lock 1584 * spinlock, since the sysfs_remove_group() function needs to take 1585 * the mutex kernfs_mutex through kernfs_remove_by_name_ns(). 1586 */ 1587 btrfs_sysfs_del_one_qgroup(fs_info, qgroup); 1588 kfree(qgroup); 1589 out: 1590 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1591 return ret; 1592 } 1593 1594 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid, 1595 struct btrfs_qgroup_limit *limit) 1596 { 1597 struct btrfs_fs_info *fs_info = trans->fs_info; 1598 struct btrfs_qgroup *qgroup; 1599 int ret = 0; 1600 /* Sometimes we would want to clear the limit on this qgroup. 1601 * To meet this requirement, we treat the -1 as a special value 1602 * which tell kernel to clear the limit on this qgroup. 1603 */ 1604 const u64 CLEAR_VALUE = -1; 1605 1606 mutex_lock(&fs_info->qgroup_ioctl_lock); 1607 if (!fs_info->quota_root) { 1608 ret = -ENOTCONN; 1609 goto out; 1610 } 1611 1612 qgroup = find_qgroup_rb(fs_info, qgroupid); 1613 if (!qgroup) { 1614 ret = -ENOENT; 1615 goto out; 1616 } 1617 1618 spin_lock(&fs_info->qgroup_lock); 1619 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { 1620 if (limit->max_rfer == CLEAR_VALUE) { 1621 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1622 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1623 qgroup->max_rfer = 0; 1624 } else { 1625 qgroup->max_rfer = limit->max_rfer; 1626 } 1627 } 1628 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { 1629 if (limit->max_excl == CLEAR_VALUE) { 1630 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1631 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1632 qgroup->max_excl = 0; 1633 } else { 1634 qgroup->max_excl = limit->max_excl; 1635 } 1636 } 1637 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { 1638 if (limit->rsv_rfer == CLEAR_VALUE) { 1639 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1640 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1641 qgroup->rsv_rfer = 0; 1642 } else { 1643 qgroup->rsv_rfer = limit->rsv_rfer; 1644 } 1645 } 1646 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { 1647 if (limit->rsv_excl == CLEAR_VALUE) { 1648 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1649 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1650 qgroup->rsv_excl = 0; 1651 } else { 1652 qgroup->rsv_excl = limit->rsv_excl; 1653 } 1654 } 1655 qgroup->lim_flags |= limit->flags; 1656 1657 spin_unlock(&fs_info->qgroup_lock); 1658 1659 ret = update_qgroup_limit_item(trans, qgroup); 1660 if (ret) { 1661 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1662 btrfs_info(fs_info, "unable to update quota limit for %llu", 1663 qgroupid); 1664 } 1665 1666 out: 1667 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1668 return ret; 1669 } 1670 1671 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, 1672 struct btrfs_delayed_ref_root *delayed_refs, 1673 struct btrfs_qgroup_extent_record *record) 1674 { 1675 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; 1676 struct rb_node *parent_node = NULL; 1677 struct btrfs_qgroup_extent_record *entry; 1678 u64 bytenr = record->bytenr; 1679 1680 lockdep_assert_held(&delayed_refs->lock); 1681 trace_btrfs_qgroup_trace_extent(fs_info, record); 1682 1683 while (*p) { 1684 parent_node = *p; 1685 entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, 1686 node); 1687 if (bytenr < entry->bytenr) { 1688 p = &(*p)->rb_left; 1689 } else if (bytenr > entry->bytenr) { 1690 p = &(*p)->rb_right; 1691 } else { 1692 if (record->data_rsv && !entry->data_rsv) { 1693 entry->data_rsv = record->data_rsv; 1694 entry->data_rsv_refroot = 1695 record->data_rsv_refroot; 1696 } 1697 return 1; 1698 } 1699 } 1700 1701 rb_link_node(&record->node, parent_node, p); 1702 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); 1703 return 0; 1704 } 1705 1706 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, 1707 struct btrfs_qgroup_extent_record *qrecord) 1708 { 1709 struct ulist *old_root; 1710 u64 bytenr = qrecord->bytenr; 1711 int ret; 1712 1713 ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false); 1714 if (ret < 0) { 1715 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1716 btrfs_warn(fs_info, 1717 "error accounting new delayed refs extent (err code: %d), quota inconsistent", 1718 ret); 1719 return 0; 1720 } 1721 1722 /* 1723 * Here we don't need to get the lock of 1724 * trans->transaction->delayed_refs, since inserted qrecord won't 1725 * be deleted, only qrecord->node may be modified (new qrecord insert) 1726 * 1727 * So modifying qrecord->old_roots is safe here 1728 */ 1729 qrecord->old_roots = old_root; 1730 return 0; 1731 } 1732 1733 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr, 1734 u64 num_bytes, gfp_t gfp_flag) 1735 { 1736 struct btrfs_fs_info *fs_info = trans->fs_info; 1737 struct btrfs_qgroup_extent_record *record; 1738 struct btrfs_delayed_ref_root *delayed_refs; 1739 int ret; 1740 1741 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) 1742 || bytenr == 0 || num_bytes == 0) 1743 return 0; 1744 record = kzalloc(sizeof(*record), gfp_flag); 1745 if (!record) 1746 return -ENOMEM; 1747 1748 delayed_refs = &trans->transaction->delayed_refs; 1749 record->bytenr = bytenr; 1750 record->num_bytes = num_bytes; 1751 record->old_roots = NULL; 1752 1753 spin_lock(&delayed_refs->lock); 1754 ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record); 1755 spin_unlock(&delayed_refs->lock); 1756 if (ret > 0) { 1757 kfree(record); 1758 return 0; 1759 } 1760 return btrfs_qgroup_trace_extent_post(fs_info, record); 1761 } 1762 1763 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 1764 struct extent_buffer *eb) 1765 { 1766 struct btrfs_fs_info *fs_info = trans->fs_info; 1767 int nr = btrfs_header_nritems(eb); 1768 int i, extent_type, ret; 1769 struct btrfs_key key; 1770 struct btrfs_file_extent_item *fi; 1771 u64 bytenr, num_bytes; 1772 1773 /* We can be called directly from walk_up_proc() */ 1774 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1775 return 0; 1776 1777 for (i = 0; i < nr; i++) { 1778 btrfs_item_key_to_cpu(eb, &key, i); 1779 1780 if (key.type != BTRFS_EXTENT_DATA_KEY) 1781 continue; 1782 1783 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); 1784 /* filter out non qgroup-accountable extents */ 1785 extent_type = btrfs_file_extent_type(eb, fi); 1786 1787 if (extent_type == BTRFS_FILE_EXTENT_INLINE) 1788 continue; 1789 1790 bytenr = btrfs_file_extent_disk_bytenr(eb, fi); 1791 if (!bytenr) 1792 continue; 1793 1794 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 1795 1796 ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes, 1797 GFP_NOFS); 1798 if (ret) 1799 return ret; 1800 } 1801 cond_resched(); 1802 return 0; 1803 } 1804 1805 /* 1806 * Walk up the tree from the bottom, freeing leaves and any interior 1807 * nodes which have had all slots visited. If a node (leaf or 1808 * interior) is freed, the node above it will have it's slot 1809 * incremented. The root node will never be freed. 1810 * 1811 * At the end of this function, we should have a path which has all 1812 * slots incremented to the next position for a search. If we need to 1813 * read a new node it will be NULL and the node above it will have the 1814 * correct slot selected for a later read. 1815 * 1816 * If we increment the root nodes slot counter past the number of 1817 * elements, 1 is returned to signal completion of the search. 1818 */ 1819 static int adjust_slots_upwards(struct btrfs_path *path, int root_level) 1820 { 1821 int level = 0; 1822 int nr, slot; 1823 struct extent_buffer *eb; 1824 1825 if (root_level == 0) 1826 return 1; 1827 1828 while (level <= root_level) { 1829 eb = path->nodes[level]; 1830 nr = btrfs_header_nritems(eb); 1831 path->slots[level]++; 1832 slot = path->slots[level]; 1833 if (slot >= nr || level == 0) { 1834 /* 1835 * Don't free the root - we will detect this 1836 * condition after our loop and return a 1837 * positive value for caller to stop walking the tree. 1838 */ 1839 if (level != root_level) { 1840 btrfs_tree_unlock_rw(eb, path->locks[level]); 1841 path->locks[level] = 0; 1842 1843 free_extent_buffer(eb); 1844 path->nodes[level] = NULL; 1845 path->slots[level] = 0; 1846 } 1847 } else { 1848 /* 1849 * We have a valid slot to walk back down 1850 * from. Stop here so caller can process these 1851 * new nodes. 1852 */ 1853 break; 1854 } 1855 1856 level++; 1857 } 1858 1859 eb = path->nodes[root_level]; 1860 if (path->slots[root_level] >= btrfs_header_nritems(eb)) 1861 return 1; 1862 1863 return 0; 1864 } 1865 1866 /* 1867 * Helper function to trace a subtree tree block swap. 1868 * 1869 * The swap will happen in highest tree block, but there may be a lot of 1870 * tree blocks involved. 1871 * 1872 * For example: 1873 * OO = Old tree blocks 1874 * NN = New tree blocks allocated during balance 1875 * 1876 * File tree (257) Reloc tree for 257 1877 * L2 OO NN 1878 * / \ / \ 1879 * L1 OO OO (a) OO NN (a) 1880 * / \ / \ / \ / \ 1881 * L0 OO OO OO OO OO OO NN NN 1882 * (b) (c) (b) (c) 1883 * 1884 * When calling qgroup_trace_extent_swap(), we will pass: 1885 * @src_eb = OO(a) 1886 * @dst_path = [ nodes[1] = NN(a), nodes[0] = NN(c) ] 1887 * @dst_level = 0 1888 * @root_level = 1 1889 * 1890 * In that case, qgroup_trace_extent_swap() will search from OO(a) to 1891 * reach OO(c), then mark both OO(c) and NN(c) as qgroup dirty. 1892 * 1893 * The main work of qgroup_trace_extent_swap() can be split into 3 parts: 1894 * 1895 * 1) Tree search from @src_eb 1896 * It should acts as a simplified btrfs_search_slot(). 1897 * The key for search can be extracted from @dst_path->nodes[dst_level] 1898 * (first key). 1899 * 1900 * 2) Mark the final tree blocks in @src_path and @dst_path qgroup dirty 1901 * NOTE: In above case, OO(a) and NN(a) won't be marked qgroup dirty. 1902 * They should be marked during previous (@dst_level = 1) iteration. 1903 * 1904 * 3) Mark file extents in leaves dirty 1905 * We don't have good way to pick out new file extents only. 1906 * So we still follow the old method by scanning all file extents in 1907 * the leave. 1908 * 1909 * This function can free us from keeping two paths, thus later we only need 1910 * to care about how to iterate all new tree blocks in reloc tree. 1911 */ 1912 static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans, 1913 struct extent_buffer *src_eb, 1914 struct btrfs_path *dst_path, 1915 int dst_level, int root_level, 1916 bool trace_leaf) 1917 { 1918 struct btrfs_key key; 1919 struct btrfs_path *src_path; 1920 struct btrfs_fs_info *fs_info = trans->fs_info; 1921 u32 nodesize = fs_info->nodesize; 1922 int cur_level = root_level; 1923 int ret; 1924 1925 BUG_ON(dst_level > root_level); 1926 /* Level mismatch */ 1927 if (btrfs_header_level(src_eb) != root_level) 1928 return -EINVAL; 1929 1930 src_path = btrfs_alloc_path(); 1931 if (!src_path) { 1932 ret = -ENOMEM; 1933 goto out; 1934 } 1935 1936 if (dst_level) 1937 btrfs_node_key_to_cpu(dst_path->nodes[dst_level], &key, 0); 1938 else 1939 btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0); 1940 1941 /* For src_path */ 1942 atomic_inc(&src_eb->refs); 1943 src_path->nodes[root_level] = src_eb; 1944 src_path->slots[root_level] = dst_path->slots[root_level]; 1945 src_path->locks[root_level] = 0; 1946 1947 /* A simplified version of btrfs_search_slot() */ 1948 while (cur_level >= dst_level) { 1949 struct btrfs_key src_key; 1950 struct btrfs_key dst_key; 1951 1952 if (src_path->nodes[cur_level] == NULL) { 1953 struct extent_buffer *eb; 1954 int parent_slot; 1955 1956 eb = src_path->nodes[cur_level + 1]; 1957 parent_slot = src_path->slots[cur_level + 1]; 1958 1959 eb = btrfs_read_node_slot(eb, parent_slot); 1960 if (IS_ERR(eb)) { 1961 ret = PTR_ERR(eb); 1962 goto out; 1963 } 1964 1965 src_path->nodes[cur_level] = eb; 1966 1967 btrfs_tree_read_lock(eb); 1968 src_path->locks[cur_level] = BTRFS_READ_LOCK; 1969 } 1970 1971 src_path->slots[cur_level] = dst_path->slots[cur_level]; 1972 if (cur_level) { 1973 btrfs_node_key_to_cpu(dst_path->nodes[cur_level], 1974 &dst_key, dst_path->slots[cur_level]); 1975 btrfs_node_key_to_cpu(src_path->nodes[cur_level], 1976 &src_key, src_path->slots[cur_level]); 1977 } else { 1978 btrfs_item_key_to_cpu(dst_path->nodes[cur_level], 1979 &dst_key, dst_path->slots[cur_level]); 1980 btrfs_item_key_to_cpu(src_path->nodes[cur_level], 1981 &src_key, src_path->slots[cur_level]); 1982 } 1983 /* Content mismatch, something went wrong */ 1984 if (btrfs_comp_cpu_keys(&dst_key, &src_key)) { 1985 ret = -ENOENT; 1986 goto out; 1987 } 1988 cur_level--; 1989 } 1990 1991 /* 1992 * Now both @dst_path and @src_path have been populated, record the tree 1993 * blocks for qgroup accounting. 1994 */ 1995 ret = btrfs_qgroup_trace_extent(trans, src_path->nodes[dst_level]->start, 1996 nodesize, GFP_NOFS); 1997 if (ret < 0) 1998 goto out; 1999 ret = btrfs_qgroup_trace_extent(trans, 2000 dst_path->nodes[dst_level]->start, 2001 nodesize, GFP_NOFS); 2002 if (ret < 0) 2003 goto out; 2004 2005 /* Record leaf file extents */ 2006 if (dst_level == 0 && trace_leaf) { 2007 ret = btrfs_qgroup_trace_leaf_items(trans, src_path->nodes[0]); 2008 if (ret < 0) 2009 goto out; 2010 ret = btrfs_qgroup_trace_leaf_items(trans, dst_path->nodes[0]); 2011 } 2012 out: 2013 btrfs_free_path(src_path); 2014 return ret; 2015 } 2016 2017 /* 2018 * Helper function to do recursive generation-aware depth-first search, to 2019 * locate all new tree blocks in a subtree of reloc tree. 2020 * 2021 * E.g. (OO = Old tree blocks, NN = New tree blocks, whose gen == last_snapshot) 2022 * reloc tree 2023 * L2 NN (a) 2024 * / \ 2025 * L1 OO NN (b) 2026 * / \ / \ 2027 * L0 OO OO OO NN 2028 * (c) (d) 2029 * If we pass: 2030 * @dst_path = [ nodes[1] = NN(b), nodes[0] = NULL ], 2031 * @cur_level = 1 2032 * @root_level = 1 2033 * 2034 * We will iterate through tree blocks NN(b), NN(d) and info qgroup to trace 2035 * above tree blocks along with their counter parts in file tree. 2036 * While during search, old tree blocks OO(c) will be skipped as tree block swap 2037 * won't affect OO(c). 2038 */ 2039 static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans, 2040 struct extent_buffer *src_eb, 2041 struct btrfs_path *dst_path, 2042 int cur_level, int root_level, 2043 u64 last_snapshot, bool trace_leaf) 2044 { 2045 struct btrfs_fs_info *fs_info = trans->fs_info; 2046 struct extent_buffer *eb; 2047 bool need_cleanup = false; 2048 int ret = 0; 2049 int i; 2050 2051 /* Level sanity check */ 2052 if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 || 2053 root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 || 2054 root_level < cur_level) { 2055 btrfs_err_rl(fs_info, 2056 "%s: bad levels, cur_level=%d root_level=%d", 2057 __func__, cur_level, root_level); 2058 return -EUCLEAN; 2059 } 2060 2061 /* Read the tree block if needed */ 2062 if (dst_path->nodes[cur_level] == NULL) { 2063 int parent_slot; 2064 u64 child_gen; 2065 2066 /* 2067 * dst_path->nodes[root_level] must be initialized before 2068 * calling this function. 2069 */ 2070 if (cur_level == root_level) { 2071 btrfs_err_rl(fs_info, 2072 "%s: dst_path->nodes[%d] not initialized, root_level=%d cur_level=%d", 2073 __func__, root_level, root_level, cur_level); 2074 return -EUCLEAN; 2075 } 2076 2077 /* 2078 * We need to get child blockptr/gen from parent before we can 2079 * read it. 2080 */ 2081 eb = dst_path->nodes[cur_level + 1]; 2082 parent_slot = dst_path->slots[cur_level + 1]; 2083 child_gen = btrfs_node_ptr_generation(eb, parent_slot); 2084 2085 /* This node is old, no need to trace */ 2086 if (child_gen < last_snapshot) 2087 goto out; 2088 2089 eb = btrfs_read_node_slot(eb, parent_slot); 2090 if (IS_ERR(eb)) { 2091 ret = PTR_ERR(eb); 2092 goto out; 2093 } 2094 2095 dst_path->nodes[cur_level] = eb; 2096 dst_path->slots[cur_level] = 0; 2097 2098 btrfs_tree_read_lock(eb); 2099 dst_path->locks[cur_level] = BTRFS_READ_LOCK; 2100 need_cleanup = true; 2101 } 2102 2103 /* Now record this tree block and its counter part for qgroups */ 2104 ret = qgroup_trace_extent_swap(trans, src_eb, dst_path, cur_level, 2105 root_level, trace_leaf); 2106 if (ret < 0) 2107 goto cleanup; 2108 2109 eb = dst_path->nodes[cur_level]; 2110 2111 if (cur_level > 0) { 2112 /* Iterate all child tree blocks */ 2113 for (i = 0; i < btrfs_header_nritems(eb); i++) { 2114 /* Skip old tree blocks as they won't be swapped */ 2115 if (btrfs_node_ptr_generation(eb, i) < last_snapshot) 2116 continue; 2117 dst_path->slots[cur_level] = i; 2118 2119 /* Recursive call (at most 7 times) */ 2120 ret = qgroup_trace_new_subtree_blocks(trans, src_eb, 2121 dst_path, cur_level - 1, root_level, 2122 last_snapshot, trace_leaf); 2123 if (ret < 0) 2124 goto cleanup; 2125 } 2126 } 2127 2128 cleanup: 2129 if (need_cleanup) { 2130 /* Clean up */ 2131 btrfs_tree_unlock_rw(dst_path->nodes[cur_level], 2132 dst_path->locks[cur_level]); 2133 free_extent_buffer(dst_path->nodes[cur_level]); 2134 dst_path->nodes[cur_level] = NULL; 2135 dst_path->slots[cur_level] = 0; 2136 dst_path->locks[cur_level] = 0; 2137 } 2138 out: 2139 return ret; 2140 } 2141 2142 static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans, 2143 struct extent_buffer *src_eb, 2144 struct extent_buffer *dst_eb, 2145 u64 last_snapshot, bool trace_leaf) 2146 { 2147 struct btrfs_fs_info *fs_info = trans->fs_info; 2148 struct btrfs_path *dst_path = NULL; 2149 int level; 2150 int ret; 2151 2152 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2153 return 0; 2154 2155 /* Wrong parameter order */ 2156 if (btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb)) { 2157 btrfs_err_rl(fs_info, 2158 "%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__, 2159 btrfs_header_generation(src_eb), 2160 btrfs_header_generation(dst_eb)); 2161 return -EUCLEAN; 2162 } 2163 2164 if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) { 2165 ret = -EIO; 2166 goto out; 2167 } 2168 2169 level = btrfs_header_level(dst_eb); 2170 dst_path = btrfs_alloc_path(); 2171 if (!dst_path) { 2172 ret = -ENOMEM; 2173 goto out; 2174 } 2175 /* For dst_path */ 2176 atomic_inc(&dst_eb->refs); 2177 dst_path->nodes[level] = dst_eb; 2178 dst_path->slots[level] = 0; 2179 dst_path->locks[level] = 0; 2180 2181 /* Do the generation aware breadth-first search */ 2182 ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level, 2183 level, last_snapshot, trace_leaf); 2184 if (ret < 0) 2185 goto out; 2186 ret = 0; 2187 2188 out: 2189 btrfs_free_path(dst_path); 2190 if (ret < 0) 2191 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2192 return ret; 2193 } 2194 2195 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 2196 struct extent_buffer *root_eb, 2197 u64 root_gen, int root_level) 2198 { 2199 struct btrfs_fs_info *fs_info = trans->fs_info; 2200 int ret = 0; 2201 int level; 2202 struct extent_buffer *eb = root_eb; 2203 struct btrfs_path *path = NULL; 2204 2205 BUG_ON(root_level < 0 || root_level >= BTRFS_MAX_LEVEL); 2206 BUG_ON(root_eb == NULL); 2207 2208 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2209 return 0; 2210 2211 if (!extent_buffer_uptodate(root_eb)) { 2212 ret = btrfs_read_buffer(root_eb, root_gen, root_level, NULL); 2213 if (ret) 2214 goto out; 2215 } 2216 2217 if (root_level == 0) { 2218 ret = btrfs_qgroup_trace_leaf_items(trans, root_eb); 2219 goto out; 2220 } 2221 2222 path = btrfs_alloc_path(); 2223 if (!path) 2224 return -ENOMEM; 2225 2226 /* 2227 * Walk down the tree. Missing extent blocks are filled in as 2228 * we go. Metadata is accounted every time we read a new 2229 * extent block. 2230 * 2231 * When we reach a leaf, we account for file extent items in it, 2232 * walk back up the tree (adjusting slot pointers as we go) 2233 * and restart the search process. 2234 */ 2235 atomic_inc(&root_eb->refs); /* For path */ 2236 path->nodes[root_level] = root_eb; 2237 path->slots[root_level] = 0; 2238 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ 2239 walk_down: 2240 level = root_level; 2241 while (level >= 0) { 2242 if (path->nodes[level] == NULL) { 2243 int parent_slot; 2244 u64 child_bytenr; 2245 2246 /* 2247 * We need to get child blockptr from parent before we 2248 * can read it. 2249 */ 2250 eb = path->nodes[level + 1]; 2251 parent_slot = path->slots[level + 1]; 2252 child_bytenr = btrfs_node_blockptr(eb, parent_slot); 2253 2254 eb = btrfs_read_node_slot(eb, parent_slot); 2255 if (IS_ERR(eb)) { 2256 ret = PTR_ERR(eb); 2257 goto out; 2258 } 2259 2260 path->nodes[level] = eb; 2261 path->slots[level] = 0; 2262 2263 btrfs_tree_read_lock(eb); 2264 path->locks[level] = BTRFS_READ_LOCK; 2265 2266 ret = btrfs_qgroup_trace_extent(trans, child_bytenr, 2267 fs_info->nodesize, 2268 GFP_NOFS); 2269 if (ret) 2270 goto out; 2271 } 2272 2273 if (level == 0) { 2274 ret = btrfs_qgroup_trace_leaf_items(trans, 2275 path->nodes[level]); 2276 if (ret) 2277 goto out; 2278 2279 /* Nonzero return here means we completed our search */ 2280 ret = adjust_slots_upwards(path, root_level); 2281 if (ret) 2282 break; 2283 2284 /* Restart search with new slots */ 2285 goto walk_down; 2286 } 2287 2288 level--; 2289 } 2290 2291 ret = 0; 2292 out: 2293 btrfs_free_path(path); 2294 2295 return ret; 2296 } 2297 2298 #define UPDATE_NEW 0 2299 #define UPDATE_OLD 1 2300 /* 2301 * Walk all of the roots that points to the bytenr and adjust their refcnts. 2302 */ 2303 static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info, 2304 struct ulist *roots, struct ulist *tmp, 2305 struct ulist *qgroups, u64 seq, int update_old) 2306 { 2307 struct ulist_node *unode; 2308 struct ulist_iterator uiter; 2309 struct ulist_node *tmp_unode; 2310 struct ulist_iterator tmp_uiter; 2311 struct btrfs_qgroup *qg; 2312 int ret = 0; 2313 2314 if (!roots) 2315 return 0; 2316 ULIST_ITER_INIT(&uiter); 2317 while ((unode = ulist_next(roots, &uiter))) { 2318 qg = find_qgroup_rb(fs_info, unode->val); 2319 if (!qg) 2320 continue; 2321 2322 ulist_reinit(tmp); 2323 ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg), 2324 GFP_ATOMIC); 2325 if (ret < 0) 2326 return ret; 2327 ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC); 2328 if (ret < 0) 2329 return ret; 2330 ULIST_ITER_INIT(&tmp_uiter); 2331 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 2332 struct btrfs_qgroup_list *glist; 2333 2334 qg = unode_aux_to_qgroup(tmp_unode); 2335 if (update_old) 2336 btrfs_qgroup_update_old_refcnt(qg, seq, 1); 2337 else 2338 btrfs_qgroup_update_new_refcnt(qg, seq, 1); 2339 list_for_each_entry(glist, &qg->groups, next_group) { 2340 ret = ulist_add(qgroups, glist->group->qgroupid, 2341 qgroup_to_aux(glist->group), 2342 GFP_ATOMIC); 2343 if (ret < 0) 2344 return ret; 2345 ret = ulist_add(tmp, glist->group->qgroupid, 2346 qgroup_to_aux(glist->group), 2347 GFP_ATOMIC); 2348 if (ret < 0) 2349 return ret; 2350 } 2351 } 2352 } 2353 return 0; 2354 } 2355 2356 /* 2357 * Update qgroup rfer/excl counters. 2358 * Rfer update is easy, codes can explain themselves. 2359 * 2360 * Excl update is tricky, the update is split into 2 parts. 2361 * Part 1: Possible exclusive <-> sharing detect: 2362 * | A | !A | 2363 * ------------------------------------- 2364 * B | * | - | 2365 * ------------------------------------- 2366 * !B | + | ** | 2367 * ------------------------------------- 2368 * 2369 * Conditions: 2370 * A: cur_old_roots < nr_old_roots (not exclusive before) 2371 * !A: cur_old_roots == nr_old_roots (possible exclusive before) 2372 * B: cur_new_roots < nr_new_roots (not exclusive now) 2373 * !B: cur_new_roots == nr_new_roots (possible exclusive now) 2374 * 2375 * Results: 2376 * +: Possible sharing -> exclusive -: Possible exclusive -> sharing 2377 * *: Definitely not changed. **: Possible unchanged. 2378 * 2379 * For !A and !B condition, the exception is cur_old/new_roots == 0 case. 2380 * 2381 * To make the logic clear, we first use condition A and B to split 2382 * combination into 4 results. 2383 * 2384 * Then, for result "+" and "-", check old/new_roots == 0 case, as in them 2385 * only on variant maybe 0. 2386 * 2387 * Lastly, check result **, since there are 2 variants maybe 0, split them 2388 * again(2x2). 2389 * But this time we don't need to consider other things, the codes and logic 2390 * is easy to understand now. 2391 */ 2392 static int qgroup_update_counters(struct btrfs_fs_info *fs_info, 2393 struct ulist *qgroups, 2394 u64 nr_old_roots, 2395 u64 nr_new_roots, 2396 u64 num_bytes, u64 seq) 2397 { 2398 struct ulist_node *unode; 2399 struct ulist_iterator uiter; 2400 struct btrfs_qgroup *qg; 2401 u64 cur_new_count, cur_old_count; 2402 2403 ULIST_ITER_INIT(&uiter); 2404 while ((unode = ulist_next(qgroups, &uiter))) { 2405 bool dirty = false; 2406 2407 qg = unode_aux_to_qgroup(unode); 2408 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); 2409 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); 2410 2411 trace_qgroup_update_counters(fs_info, qg, cur_old_count, 2412 cur_new_count); 2413 2414 /* Rfer update part */ 2415 if (cur_old_count == 0 && cur_new_count > 0) { 2416 qg->rfer += num_bytes; 2417 qg->rfer_cmpr += num_bytes; 2418 dirty = true; 2419 } 2420 if (cur_old_count > 0 && cur_new_count == 0) { 2421 qg->rfer -= num_bytes; 2422 qg->rfer_cmpr -= num_bytes; 2423 dirty = true; 2424 } 2425 2426 /* Excl update part */ 2427 /* Exclusive/none -> shared case */ 2428 if (cur_old_count == nr_old_roots && 2429 cur_new_count < nr_new_roots) { 2430 /* Exclusive -> shared */ 2431 if (cur_old_count != 0) { 2432 qg->excl -= num_bytes; 2433 qg->excl_cmpr -= num_bytes; 2434 dirty = true; 2435 } 2436 } 2437 2438 /* Shared -> exclusive/none case */ 2439 if (cur_old_count < nr_old_roots && 2440 cur_new_count == nr_new_roots) { 2441 /* Shared->exclusive */ 2442 if (cur_new_count != 0) { 2443 qg->excl += num_bytes; 2444 qg->excl_cmpr += num_bytes; 2445 dirty = true; 2446 } 2447 } 2448 2449 /* Exclusive/none -> exclusive/none case */ 2450 if (cur_old_count == nr_old_roots && 2451 cur_new_count == nr_new_roots) { 2452 if (cur_old_count == 0) { 2453 /* None -> exclusive/none */ 2454 2455 if (cur_new_count != 0) { 2456 /* None -> exclusive */ 2457 qg->excl += num_bytes; 2458 qg->excl_cmpr += num_bytes; 2459 dirty = true; 2460 } 2461 /* None -> none, nothing changed */ 2462 } else { 2463 /* Exclusive -> exclusive/none */ 2464 2465 if (cur_new_count == 0) { 2466 /* Exclusive -> none */ 2467 qg->excl -= num_bytes; 2468 qg->excl_cmpr -= num_bytes; 2469 dirty = true; 2470 } 2471 /* Exclusive -> exclusive, nothing changed */ 2472 } 2473 } 2474 2475 if (dirty) 2476 qgroup_dirty(fs_info, qg); 2477 } 2478 return 0; 2479 } 2480 2481 /* 2482 * Check if the @roots potentially is a list of fs tree roots 2483 * 2484 * Return 0 for definitely not a fs/subvol tree roots ulist 2485 * Return 1 for possible fs/subvol tree roots in the list (considering an empty 2486 * one as well) 2487 */ 2488 static int maybe_fs_roots(struct ulist *roots) 2489 { 2490 struct ulist_node *unode; 2491 struct ulist_iterator uiter; 2492 2493 /* Empty one, still possible for fs roots */ 2494 if (!roots || roots->nnodes == 0) 2495 return 1; 2496 2497 ULIST_ITER_INIT(&uiter); 2498 unode = ulist_next(roots, &uiter); 2499 if (!unode) 2500 return 1; 2501 2502 /* 2503 * If it contains fs tree roots, then it must belong to fs/subvol 2504 * trees. 2505 * If it contains a non-fs tree, it won't be shared with fs/subvol trees. 2506 */ 2507 return is_fstree(unode->val); 2508 } 2509 2510 int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr, 2511 u64 num_bytes, struct ulist *old_roots, 2512 struct ulist *new_roots) 2513 { 2514 struct btrfs_fs_info *fs_info = trans->fs_info; 2515 struct ulist *qgroups = NULL; 2516 struct ulist *tmp = NULL; 2517 u64 seq; 2518 u64 nr_new_roots = 0; 2519 u64 nr_old_roots = 0; 2520 int ret = 0; 2521 2522 /* 2523 * If quotas get disabled meanwhile, the resouces need to be freed and 2524 * we can't just exit here. 2525 */ 2526 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2527 goto out_free; 2528 2529 if (new_roots) { 2530 if (!maybe_fs_roots(new_roots)) 2531 goto out_free; 2532 nr_new_roots = new_roots->nnodes; 2533 } 2534 if (old_roots) { 2535 if (!maybe_fs_roots(old_roots)) 2536 goto out_free; 2537 nr_old_roots = old_roots->nnodes; 2538 } 2539 2540 /* Quick exit, either not fs tree roots, or won't affect any qgroup */ 2541 if (nr_old_roots == 0 && nr_new_roots == 0) 2542 goto out_free; 2543 2544 BUG_ON(!fs_info->quota_root); 2545 2546 trace_btrfs_qgroup_account_extent(fs_info, trans->transid, bytenr, 2547 num_bytes, nr_old_roots, nr_new_roots); 2548 2549 qgroups = ulist_alloc(GFP_NOFS); 2550 if (!qgroups) { 2551 ret = -ENOMEM; 2552 goto out_free; 2553 } 2554 tmp = ulist_alloc(GFP_NOFS); 2555 if (!tmp) { 2556 ret = -ENOMEM; 2557 goto out_free; 2558 } 2559 2560 mutex_lock(&fs_info->qgroup_rescan_lock); 2561 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 2562 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) { 2563 mutex_unlock(&fs_info->qgroup_rescan_lock); 2564 ret = 0; 2565 goto out_free; 2566 } 2567 } 2568 mutex_unlock(&fs_info->qgroup_rescan_lock); 2569 2570 spin_lock(&fs_info->qgroup_lock); 2571 seq = fs_info->qgroup_seq; 2572 2573 /* Update old refcnts using old_roots */ 2574 ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq, 2575 UPDATE_OLD); 2576 if (ret < 0) 2577 goto out; 2578 2579 /* Update new refcnts using new_roots */ 2580 ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq, 2581 UPDATE_NEW); 2582 if (ret < 0) 2583 goto out; 2584 2585 qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots, 2586 num_bytes, seq); 2587 2588 /* 2589 * Bump qgroup_seq to avoid seq overlap 2590 */ 2591 fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; 2592 out: 2593 spin_unlock(&fs_info->qgroup_lock); 2594 out_free: 2595 ulist_free(tmp); 2596 ulist_free(qgroups); 2597 ulist_free(old_roots); 2598 ulist_free(new_roots); 2599 return ret; 2600 } 2601 2602 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) 2603 { 2604 struct btrfs_fs_info *fs_info = trans->fs_info; 2605 struct btrfs_qgroup_extent_record *record; 2606 struct btrfs_delayed_ref_root *delayed_refs; 2607 struct ulist *new_roots = NULL; 2608 struct rb_node *node; 2609 u64 num_dirty_extents = 0; 2610 u64 qgroup_to_skip; 2611 int ret = 0; 2612 2613 delayed_refs = &trans->transaction->delayed_refs; 2614 qgroup_to_skip = delayed_refs->qgroup_to_skip; 2615 while ((node = rb_first(&delayed_refs->dirty_extent_root))) { 2616 record = rb_entry(node, struct btrfs_qgroup_extent_record, 2617 node); 2618 2619 num_dirty_extents++; 2620 trace_btrfs_qgroup_account_extents(fs_info, record); 2621 2622 if (!ret) { 2623 /* 2624 * Old roots should be searched when inserting qgroup 2625 * extent record 2626 */ 2627 if (WARN_ON(!record->old_roots)) { 2628 /* Search commit root to find old_roots */ 2629 ret = btrfs_find_all_roots(NULL, fs_info, 2630 record->bytenr, 0, 2631 &record->old_roots, false); 2632 if (ret < 0) 2633 goto cleanup; 2634 } 2635 2636 /* Free the reserved data space */ 2637 btrfs_qgroup_free_refroot(fs_info, 2638 record->data_rsv_refroot, 2639 record->data_rsv, 2640 BTRFS_QGROUP_RSV_DATA); 2641 /* 2642 * Use SEQ_LAST as time_seq to do special search, which 2643 * doesn't lock tree or delayed_refs and search current 2644 * root. It's safe inside commit_transaction(). 2645 */ 2646 ret = btrfs_find_all_roots(trans, fs_info, 2647 record->bytenr, SEQ_LAST, &new_roots, false); 2648 if (ret < 0) 2649 goto cleanup; 2650 if (qgroup_to_skip) { 2651 ulist_del(new_roots, qgroup_to_skip, 0); 2652 ulist_del(record->old_roots, qgroup_to_skip, 2653 0); 2654 } 2655 ret = btrfs_qgroup_account_extent(trans, record->bytenr, 2656 record->num_bytes, 2657 record->old_roots, 2658 new_roots); 2659 record->old_roots = NULL; 2660 new_roots = NULL; 2661 } 2662 cleanup: 2663 ulist_free(record->old_roots); 2664 ulist_free(new_roots); 2665 new_roots = NULL; 2666 rb_erase(node, &delayed_refs->dirty_extent_root); 2667 kfree(record); 2668 2669 } 2670 trace_qgroup_num_dirty_extents(fs_info, trans->transid, 2671 num_dirty_extents); 2672 return ret; 2673 } 2674 2675 /* 2676 * called from commit_transaction. Writes all changed qgroups to disk. 2677 */ 2678 int btrfs_run_qgroups(struct btrfs_trans_handle *trans) 2679 { 2680 struct btrfs_fs_info *fs_info = trans->fs_info; 2681 int ret = 0; 2682 2683 if (!fs_info->quota_root) 2684 return ret; 2685 2686 spin_lock(&fs_info->qgroup_lock); 2687 while (!list_empty(&fs_info->dirty_qgroups)) { 2688 struct btrfs_qgroup *qgroup; 2689 qgroup = list_first_entry(&fs_info->dirty_qgroups, 2690 struct btrfs_qgroup, dirty); 2691 list_del_init(&qgroup->dirty); 2692 spin_unlock(&fs_info->qgroup_lock); 2693 ret = update_qgroup_info_item(trans, qgroup); 2694 if (ret) 2695 fs_info->qgroup_flags |= 2696 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2697 ret = update_qgroup_limit_item(trans, qgroup); 2698 if (ret) 2699 fs_info->qgroup_flags |= 2700 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2701 spin_lock(&fs_info->qgroup_lock); 2702 } 2703 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2704 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; 2705 else 2706 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2707 spin_unlock(&fs_info->qgroup_lock); 2708 2709 ret = update_qgroup_status_item(trans); 2710 if (ret) 2711 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2712 2713 return ret; 2714 } 2715 2716 /* 2717 * Copy the accounting information between qgroups. This is necessary 2718 * when a snapshot or a subvolume is created. Throwing an error will 2719 * cause a transaction abort so we take extra care here to only error 2720 * when a readonly fs is a reasonable outcome. 2721 */ 2722 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, 2723 u64 objectid, struct btrfs_qgroup_inherit *inherit) 2724 { 2725 int ret = 0; 2726 int i; 2727 u64 *i_qgroups; 2728 bool committing = false; 2729 struct btrfs_fs_info *fs_info = trans->fs_info; 2730 struct btrfs_root *quota_root; 2731 struct btrfs_qgroup *srcgroup; 2732 struct btrfs_qgroup *dstgroup; 2733 bool need_rescan = false; 2734 u32 level_size = 0; 2735 u64 nums; 2736 2737 /* 2738 * There are only two callers of this function. 2739 * 2740 * One in create_subvol() in the ioctl context, which needs to hold 2741 * the qgroup_ioctl_lock. 2742 * 2743 * The other one in create_pending_snapshot() where no other qgroup 2744 * code can modify the fs as they all need to either start a new trans 2745 * or hold a trans handler, thus we don't need to hold 2746 * qgroup_ioctl_lock. 2747 * This would avoid long and complex lock chain and make lockdep happy. 2748 */ 2749 spin_lock(&fs_info->trans_lock); 2750 if (trans->transaction->state == TRANS_STATE_COMMIT_DOING) 2751 committing = true; 2752 spin_unlock(&fs_info->trans_lock); 2753 2754 if (!committing) 2755 mutex_lock(&fs_info->qgroup_ioctl_lock); 2756 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2757 goto out; 2758 2759 quota_root = fs_info->quota_root; 2760 if (!quota_root) { 2761 ret = -EINVAL; 2762 goto out; 2763 } 2764 2765 if (inherit) { 2766 i_qgroups = (u64 *)(inherit + 1); 2767 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 2768 2 * inherit->num_excl_copies; 2769 for (i = 0; i < nums; ++i) { 2770 srcgroup = find_qgroup_rb(fs_info, *i_qgroups); 2771 2772 /* 2773 * Zero out invalid groups so we can ignore 2774 * them later. 2775 */ 2776 if (!srcgroup || 2777 ((srcgroup->qgroupid >> 48) <= (objectid >> 48))) 2778 *i_qgroups = 0ULL; 2779 2780 ++i_qgroups; 2781 } 2782 } 2783 2784 /* 2785 * create a tracking group for the subvol itself 2786 */ 2787 ret = add_qgroup_item(trans, quota_root, objectid); 2788 if (ret) 2789 goto out; 2790 2791 /* 2792 * add qgroup to all inherited groups 2793 */ 2794 if (inherit) { 2795 i_qgroups = (u64 *)(inherit + 1); 2796 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) { 2797 if (*i_qgroups == 0) 2798 continue; 2799 ret = add_qgroup_relation_item(trans, objectid, 2800 *i_qgroups); 2801 if (ret && ret != -EEXIST) 2802 goto out; 2803 ret = add_qgroup_relation_item(trans, *i_qgroups, 2804 objectid); 2805 if (ret && ret != -EEXIST) 2806 goto out; 2807 } 2808 ret = 0; 2809 } 2810 2811 2812 spin_lock(&fs_info->qgroup_lock); 2813 2814 dstgroup = add_qgroup_rb(fs_info, objectid); 2815 if (IS_ERR(dstgroup)) { 2816 ret = PTR_ERR(dstgroup); 2817 goto unlock; 2818 } 2819 2820 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { 2821 dstgroup->lim_flags = inherit->lim.flags; 2822 dstgroup->max_rfer = inherit->lim.max_rfer; 2823 dstgroup->max_excl = inherit->lim.max_excl; 2824 dstgroup->rsv_rfer = inherit->lim.rsv_rfer; 2825 dstgroup->rsv_excl = inherit->lim.rsv_excl; 2826 2827 ret = update_qgroup_limit_item(trans, dstgroup); 2828 if (ret) { 2829 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2830 btrfs_info(fs_info, 2831 "unable to update quota limit for %llu", 2832 dstgroup->qgroupid); 2833 goto unlock; 2834 } 2835 } 2836 2837 if (srcid) { 2838 srcgroup = find_qgroup_rb(fs_info, srcid); 2839 if (!srcgroup) 2840 goto unlock; 2841 2842 /* 2843 * We call inherit after we clone the root in order to make sure 2844 * our counts don't go crazy, so at this point the only 2845 * difference between the two roots should be the root node. 2846 */ 2847 level_size = fs_info->nodesize; 2848 dstgroup->rfer = srcgroup->rfer; 2849 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2850 dstgroup->excl = level_size; 2851 dstgroup->excl_cmpr = level_size; 2852 srcgroup->excl = level_size; 2853 srcgroup->excl_cmpr = level_size; 2854 2855 /* inherit the limit info */ 2856 dstgroup->lim_flags = srcgroup->lim_flags; 2857 dstgroup->max_rfer = srcgroup->max_rfer; 2858 dstgroup->max_excl = srcgroup->max_excl; 2859 dstgroup->rsv_rfer = srcgroup->rsv_rfer; 2860 dstgroup->rsv_excl = srcgroup->rsv_excl; 2861 2862 qgroup_dirty(fs_info, dstgroup); 2863 qgroup_dirty(fs_info, srcgroup); 2864 } 2865 2866 if (!inherit) 2867 goto unlock; 2868 2869 i_qgroups = (u64 *)(inherit + 1); 2870 for (i = 0; i < inherit->num_qgroups; ++i) { 2871 if (*i_qgroups) { 2872 ret = add_relation_rb(fs_info, objectid, *i_qgroups); 2873 if (ret) 2874 goto unlock; 2875 } 2876 ++i_qgroups; 2877 2878 /* 2879 * If we're doing a snapshot, and adding the snapshot to a new 2880 * qgroup, the numbers are guaranteed to be incorrect. 2881 */ 2882 if (srcid) 2883 need_rescan = true; 2884 } 2885 2886 for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) { 2887 struct btrfs_qgroup *src; 2888 struct btrfs_qgroup *dst; 2889 2890 if (!i_qgroups[0] || !i_qgroups[1]) 2891 continue; 2892 2893 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2894 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2895 2896 if (!src || !dst) { 2897 ret = -EINVAL; 2898 goto unlock; 2899 } 2900 2901 dst->rfer = src->rfer - level_size; 2902 dst->rfer_cmpr = src->rfer_cmpr - level_size; 2903 2904 /* Manually tweaking numbers certainly needs a rescan */ 2905 need_rescan = true; 2906 } 2907 for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) { 2908 struct btrfs_qgroup *src; 2909 struct btrfs_qgroup *dst; 2910 2911 if (!i_qgroups[0] || !i_qgroups[1]) 2912 continue; 2913 2914 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2915 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2916 2917 if (!src || !dst) { 2918 ret = -EINVAL; 2919 goto unlock; 2920 } 2921 2922 dst->excl = src->excl + level_size; 2923 dst->excl_cmpr = src->excl_cmpr + level_size; 2924 need_rescan = true; 2925 } 2926 2927 unlock: 2928 spin_unlock(&fs_info->qgroup_lock); 2929 if (!ret) 2930 ret = btrfs_sysfs_add_one_qgroup(fs_info, dstgroup); 2931 out: 2932 if (!committing) 2933 mutex_unlock(&fs_info->qgroup_ioctl_lock); 2934 if (need_rescan) 2935 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2936 return ret; 2937 } 2938 2939 static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) 2940 { 2941 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2942 qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer) 2943 return false; 2944 2945 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 2946 qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl) 2947 return false; 2948 2949 return true; 2950 } 2951 2952 static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce, 2953 enum btrfs_qgroup_rsv_type type) 2954 { 2955 struct btrfs_qgroup *qgroup; 2956 struct btrfs_fs_info *fs_info = root->fs_info; 2957 u64 ref_root = root->root_key.objectid; 2958 int ret = 0; 2959 struct ulist_node *unode; 2960 struct ulist_iterator uiter; 2961 2962 if (!is_fstree(ref_root)) 2963 return 0; 2964 2965 if (num_bytes == 0) 2966 return 0; 2967 2968 if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) && 2969 capable(CAP_SYS_RESOURCE)) 2970 enforce = false; 2971 2972 spin_lock(&fs_info->qgroup_lock); 2973 if (!fs_info->quota_root) 2974 goto out; 2975 2976 qgroup = find_qgroup_rb(fs_info, ref_root); 2977 if (!qgroup) 2978 goto out; 2979 2980 /* 2981 * in a first step, we check all affected qgroups if any limits would 2982 * be exceeded 2983 */ 2984 ulist_reinit(fs_info->qgroup_ulist); 2985 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2986 qgroup_to_aux(qgroup), GFP_ATOMIC); 2987 if (ret < 0) 2988 goto out; 2989 ULIST_ITER_INIT(&uiter); 2990 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2991 struct btrfs_qgroup *qg; 2992 struct btrfs_qgroup_list *glist; 2993 2994 qg = unode_aux_to_qgroup(unode); 2995 2996 if (enforce && !qgroup_check_limits(qg, num_bytes)) { 2997 ret = -EDQUOT; 2998 goto out; 2999 } 3000 3001 list_for_each_entry(glist, &qg->groups, next_group) { 3002 ret = ulist_add(fs_info->qgroup_ulist, 3003 glist->group->qgroupid, 3004 qgroup_to_aux(glist->group), GFP_ATOMIC); 3005 if (ret < 0) 3006 goto out; 3007 } 3008 } 3009 ret = 0; 3010 /* 3011 * no limits exceeded, now record the reservation into all qgroups 3012 */ 3013 ULIST_ITER_INIT(&uiter); 3014 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 3015 struct btrfs_qgroup *qg; 3016 3017 qg = unode_aux_to_qgroup(unode); 3018 3019 qgroup_rsv_add(fs_info, qg, num_bytes, type); 3020 } 3021 3022 out: 3023 spin_unlock(&fs_info->qgroup_lock); 3024 return ret; 3025 } 3026 3027 /* 3028 * Free @num_bytes of reserved space with @type for qgroup. (Normally level 0 3029 * qgroup). 3030 * 3031 * Will handle all higher level qgroup too. 3032 * 3033 * NOTE: If @num_bytes is (u64)-1, this means to free all bytes of this qgroup. 3034 * This special case is only used for META_PERTRANS type. 3035 */ 3036 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, 3037 u64 ref_root, u64 num_bytes, 3038 enum btrfs_qgroup_rsv_type type) 3039 { 3040 struct btrfs_qgroup *qgroup; 3041 struct ulist_node *unode; 3042 struct ulist_iterator uiter; 3043 int ret = 0; 3044 3045 if (!is_fstree(ref_root)) 3046 return; 3047 3048 if (num_bytes == 0) 3049 return; 3050 3051 if (num_bytes == (u64)-1 && type != BTRFS_QGROUP_RSV_META_PERTRANS) { 3052 WARN(1, "%s: Invalid type to free", __func__); 3053 return; 3054 } 3055 spin_lock(&fs_info->qgroup_lock); 3056 3057 if (!fs_info->quota_root) 3058 goto out; 3059 3060 qgroup = find_qgroup_rb(fs_info, ref_root); 3061 if (!qgroup) 3062 goto out; 3063 3064 if (num_bytes == (u64)-1) 3065 /* 3066 * We're freeing all pertrans rsv, get reserved value from 3067 * level 0 qgroup as real num_bytes to free. 3068 */ 3069 num_bytes = qgroup->rsv.values[type]; 3070 3071 ulist_reinit(fs_info->qgroup_ulist); 3072 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 3073 qgroup_to_aux(qgroup), GFP_ATOMIC); 3074 if (ret < 0) 3075 goto out; 3076 ULIST_ITER_INIT(&uiter); 3077 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 3078 struct btrfs_qgroup *qg; 3079 struct btrfs_qgroup_list *glist; 3080 3081 qg = unode_aux_to_qgroup(unode); 3082 3083 qgroup_rsv_release(fs_info, qg, num_bytes, type); 3084 3085 list_for_each_entry(glist, &qg->groups, next_group) { 3086 ret = ulist_add(fs_info->qgroup_ulist, 3087 glist->group->qgroupid, 3088 qgroup_to_aux(glist->group), GFP_ATOMIC); 3089 if (ret < 0) 3090 goto out; 3091 } 3092 } 3093 3094 out: 3095 spin_unlock(&fs_info->qgroup_lock); 3096 } 3097 3098 /* 3099 * Check if the leaf is the last leaf. Which means all node pointers 3100 * are at their last position. 3101 */ 3102 static bool is_last_leaf(struct btrfs_path *path) 3103 { 3104 int i; 3105 3106 for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { 3107 if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1) 3108 return false; 3109 } 3110 return true; 3111 } 3112 3113 /* 3114 * returns < 0 on error, 0 when more leafs are to be scanned. 3115 * returns 1 when done. 3116 */ 3117 static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, 3118 struct btrfs_path *path) 3119 { 3120 struct btrfs_fs_info *fs_info = trans->fs_info; 3121 struct btrfs_key found; 3122 struct extent_buffer *scratch_leaf = NULL; 3123 struct ulist *roots = NULL; 3124 u64 num_bytes; 3125 bool done; 3126 int slot; 3127 int ret; 3128 3129 mutex_lock(&fs_info->qgroup_rescan_lock); 3130 ret = btrfs_search_slot_for_read(fs_info->extent_root, 3131 &fs_info->qgroup_rescan_progress, 3132 path, 1, 0); 3133 3134 btrfs_debug(fs_info, 3135 "current progress key (%llu %u %llu), search_slot ret %d", 3136 fs_info->qgroup_rescan_progress.objectid, 3137 fs_info->qgroup_rescan_progress.type, 3138 fs_info->qgroup_rescan_progress.offset, ret); 3139 3140 if (ret) { 3141 /* 3142 * The rescan is about to end, we will not be scanning any 3143 * further blocks. We cannot unset the RESCAN flag here, because 3144 * we want to commit the transaction if everything went well. 3145 * To make the live accounting work in this phase, we set our 3146 * scan progress pointer such that every real extent objectid 3147 * will be smaller. 3148 */ 3149 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 3150 btrfs_release_path(path); 3151 mutex_unlock(&fs_info->qgroup_rescan_lock); 3152 return ret; 3153 } 3154 done = is_last_leaf(path); 3155 3156 btrfs_item_key_to_cpu(path->nodes[0], &found, 3157 btrfs_header_nritems(path->nodes[0]) - 1); 3158 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; 3159 3160 scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]); 3161 if (!scratch_leaf) { 3162 ret = -ENOMEM; 3163 mutex_unlock(&fs_info->qgroup_rescan_lock); 3164 goto out; 3165 } 3166 slot = path->slots[0]; 3167 btrfs_release_path(path); 3168 mutex_unlock(&fs_info->qgroup_rescan_lock); 3169 3170 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 3171 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 3172 if (found.type != BTRFS_EXTENT_ITEM_KEY && 3173 found.type != BTRFS_METADATA_ITEM_KEY) 3174 continue; 3175 if (found.type == BTRFS_METADATA_ITEM_KEY) 3176 num_bytes = fs_info->nodesize; 3177 else 3178 num_bytes = found.offset; 3179 3180 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 3181 &roots, false); 3182 if (ret < 0) 3183 goto out; 3184 /* For rescan, just pass old_roots as NULL */ 3185 ret = btrfs_qgroup_account_extent(trans, found.objectid, 3186 num_bytes, NULL, roots); 3187 if (ret < 0) 3188 goto out; 3189 } 3190 out: 3191 if (scratch_leaf) 3192 free_extent_buffer(scratch_leaf); 3193 3194 if (done && !ret) { 3195 ret = 1; 3196 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 3197 } 3198 return ret; 3199 } 3200 3201 static bool rescan_should_stop(struct btrfs_fs_info *fs_info) 3202 { 3203 return btrfs_fs_closing(fs_info) || 3204 test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); 3205 } 3206 3207 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) 3208 { 3209 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, 3210 qgroup_rescan_work); 3211 struct btrfs_path *path; 3212 struct btrfs_trans_handle *trans = NULL; 3213 int err = -ENOMEM; 3214 int ret = 0; 3215 bool stopped = false; 3216 3217 path = btrfs_alloc_path(); 3218 if (!path) 3219 goto out; 3220 /* 3221 * Rescan should only search for commit root, and any later difference 3222 * should be recorded by qgroup 3223 */ 3224 path->search_commit_root = 1; 3225 path->skip_locking = 1; 3226 3227 err = 0; 3228 while (!err && !(stopped = rescan_should_stop(fs_info))) { 3229 trans = btrfs_start_transaction(fs_info->fs_root, 0); 3230 if (IS_ERR(trans)) { 3231 err = PTR_ERR(trans); 3232 break; 3233 } 3234 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { 3235 err = -EINTR; 3236 } else { 3237 err = qgroup_rescan_leaf(trans, path); 3238 } 3239 if (err > 0) 3240 btrfs_commit_transaction(trans); 3241 else 3242 btrfs_end_transaction(trans); 3243 } 3244 3245 out: 3246 btrfs_free_path(path); 3247 3248 mutex_lock(&fs_info->qgroup_rescan_lock); 3249 if (err > 0 && 3250 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 3251 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 3252 } else if (err < 0) { 3253 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 3254 } 3255 mutex_unlock(&fs_info->qgroup_rescan_lock); 3256 3257 /* 3258 * only update status, since the previous part has already updated the 3259 * qgroup info. 3260 */ 3261 trans = btrfs_start_transaction(fs_info->quota_root, 1); 3262 if (IS_ERR(trans)) { 3263 err = PTR_ERR(trans); 3264 trans = NULL; 3265 btrfs_err(fs_info, 3266 "fail to start transaction for status update: %d", 3267 err); 3268 } 3269 3270 mutex_lock(&fs_info->qgroup_rescan_lock); 3271 if (!stopped) 3272 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 3273 if (trans) { 3274 ret = update_qgroup_status_item(trans); 3275 if (ret < 0) { 3276 err = ret; 3277 btrfs_err(fs_info, "fail to update qgroup status: %d", 3278 err); 3279 } 3280 } 3281 fs_info->qgroup_rescan_running = false; 3282 complete_all(&fs_info->qgroup_rescan_completion); 3283 mutex_unlock(&fs_info->qgroup_rescan_lock); 3284 3285 if (!trans) 3286 return; 3287 3288 btrfs_end_transaction(trans); 3289 3290 if (stopped) { 3291 btrfs_info(fs_info, "qgroup scan paused"); 3292 } else if (err >= 0) { 3293 btrfs_info(fs_info, "qgroup scan completed%s", 3294 err > 0 ? " (inconsistency flag cleared)" : ""); 3295 } else { 3296 btrfs_err(fs_info, "qgroup scan failed with %d", err); 3297 } 3298 } 3299 3300 /* 3301 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all 3302 * memory required for the rescan context. 3303 */ 3304 static int 3305 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 3306 int init_flags) 3307 { 3308 int ret = 0; 3309 3310 if (!init_flags) { 3311 /* we're resuming qgroup rescan at mount time */ 3312 if (!(fs_info->qgroup_flags & 3313 BTRFS_QGROUP_STATUS_FLAG_RESCAN)) { 3314 btrfs_warn(fs_info, 3315 "qgroup rescan init failed, qgroup rescan is not queued"); 3316 ret = -EINVAL; 3317 } else if (!(fs_info->qgroup_flags & 3318 BTRFS_QGROUP_STATUS_FLAG_ON)) { 3319 btrfs_warn(fs_info, 3320 "qgroup rescan init failed, qgroup is not enabled"); 3321 ret = -EINVAL; 3322 } 3323 3324 if (ret) 3325 return ret; 3326 } 3327 3328 mutex_lock(&fs_info->qgroup_rescan_lock); 3329 3330 if (init_flags) { 3331 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 3332 btrfs_warn(fs_info, 3333 "qgroup rescan is already in progress"); 3334 ret = -EINPROGRESS; 3335 } else if (!(fs_info->qgroup_flags & 3336 BTRFS_QGROUP_STATUS_FLAG_ON)) { 3337 btrfs_warn(fs_info, 3338 "qgroup rescan init failed, qgroup is not enabled"); 3339 ret = -EINVAL; 3340 } 3341 3342 if (ret) { 3343 mutex_unlock(&fs_info->qgroup_rescan_lock); 3344 return ret; 3345 } 3346 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 3347 } 3348 3349 memset(&fs_info->qgroup_rescan_progress, 0, 3350 sizeof(fs_info->qgroup_rescan_progress)); 3351 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 3352 init_completion(&fs_info->qgroup_rescan_completion); 3353 mutex_unlock(&fs_info->qgroup_rescan_lock); 3354 3355 btrfs_init_work(&fs_info->qgroup_rescan_work, 3356 btrfs_qgroup_rescan_worker, NULL, NULL); 3357 return 0; 3358 } 3359 3360 static void 3361 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) 3362 { 3363 struct rb_node *n; 3364 struct btrfs_qgroup *qgroup; 3365 3366 spin_lock(&fs_info->qgroup_lock); 3367 /* clear all current qgroup tracking information */ 3368 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { 3369 qgroup = rb_entry(n, struct btrfs_qgroup, node); 3370 qgroup->rfer = 0; 3371 qgroup->rfer_cmpr = 0; 3372 qgroup->excl = 0; 3373 qgroup->excl_cmpr = 0; 3374 qgroup_dirty(fs_info, qgroup); 3375 } 3376 spin_unlock(&fs_info->qgroup_lock); 3377 } 3378 3379 int 3380 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) 3381 { 3382 int ret = 0; 3383 struct btrfs_trans_handle *trans; 3384 3385 ret = qgroup_rescan_init(fs_info, 0, 1); 3386 if (ret) 3387 return ret; 3388 3389 /* 3390 * We have set the rescan_progress to 0, which means no more 3391 * delayed refs will be accounted by btrfs_qgroup_account_ref. 3392 * However, btrfs_qgroup_account_ref may be right after its call 3393 * to btrfs_find_all_roots, in which case it would still do the 3394 * accounting. 3395 * To solve this, we're committing the transaction, which will 3396 * ensure we run all delayed refs and only after that, we are 3397 * going to clear all tracking information for a clean start. 3398 */ 3399 3400 trans = btrfs_join_transaction(fs_info->fs_root); 3401 if (IS_ERR(trans)) { 3402 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 3403 return PTR_ERR(trans); 3404 } 3405 ret = btrfs_commit_transaction(trans); 3406 if (ret) { 3407 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 3408 return ret; 3409 } 3410 3411 qgroup_rescan_zero_tracking(fs_info); 3412 3413 mutex_lock(&fs_info->qgroup_rescan_lock); 3414 fs_info->qgroup_rescan_running = true; 3415 btrfs_queue_work(fs_info->qgroup_rescan_workers, 3416 &fs_info->qgroup_rescan_work); 3417 mutex_unlock(&fs_info->qgroup_rescan_lock); 3418 3419 return 0; 3420 } 3421 3422 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 3423 bool interruptible) 3424 { 3425 int running; 3426 int ret = 0; 3427 3428 mutex_lock(&fs_info->qgroup_rescan_lock); 3429 running = fs_info->qgroup_rescan_running; 3430 mutex_unlock(&fs_info->qgroup_rescan_lock); 3431 3432 if (!running) 3433 return 0; 3434 3435 if (interruptible) 3436 ret = wait_for_completion_interruptible( 3437 &fs_info->qgroup_rescan_completion); 3438 else 3439 wait_for_completion(&fs_info->qgroup_rescan_completion); 3440 3441 return ret; 3442 } 3443 3444 /* 3445 * this is only called from open_ctree where we're still single threaded, thus 3446 * locking is omitted here. 3447 */ 3448 void 3449 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) 3450 { 3451 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 3452 mutex_lock(&fs_info->qgroup_rescan_lock); 3453 fs_info->qgroup_rescan_running = true; 3454 btrfs_queue_work(fs_info->qgroup_rescan_workers, 3455 &fs_info->qgroup_rescan_work); 3456 mutex_unlock(&fs_info->qgroup_rescan_lock); 3457 } 3458 } 3459 3460 #define rbtree_iterate_from_safe(node, next, start) \ 3461 for (node = start; node && ({ next = rb_next(node); 1;}); node = next) 3462 3463 static int qgroup_unreserve_range(struct btrfs_inode *inode, 3464 struct extent_changeset *reserved, u64 start, 3465 u64 len) 3466 { 3467 struct rb_node *node; 3468 struct rb_node *next; 3469 struct ulist_node *entry; 3470 int ret = 0; 3471 3472 node = reserved->range_changed.root.rb_node; 3473 if (!node) 3474 return 0; 3475 while (node) { 3476 entry = rb_entry(node, struct ulist_node, rb_node); 3477 if (entry->val < start) 3478 node = node->rb_right; 3479 else 3480 node = node->rb_left; 3481 } 3482 3483 if (entry->val > start && rb_prev(&entry->rb_node)) 3484 entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node, 3485 rb_node); 3486 3487 rbtree_iterate_from_safe(node, next, &entry->rb_node) { 3488 u64 entry_start; 3489 u64 entry_end; 3490 u64 entry_len; 3491 int clear_ret; 3492 3493 entry = rb_entry(node, struct ulist_node, rb_node); 3494 entry_start = entry->val; 3495 entry_end = entry->aux; 3496 entry_len = entry_end - entry_start + 1; 3497 3498 if (entry_start >= start + len) 3499 break; 3500 if (entry_start + entry_len <= start) 3501 continue; 3502 /* 3503 * Now the entry is in [start, start + len), revert the 3504 * EXTENT_QGROUP_RESERVED bit. 3505 */ 3506 clear_ret = clear_extent_bits(&inode->io_tree, entry_start, 3507 entry_end, EXTENT_QGROUP_RESERVED); 3508 if (!ret && clear_ret < 0) 3509 ret = clear_ret; 3510 3511 ulist_del(&reserved->range_changed, entry->val, entry->aux); 3512 if (likely(reserved->bytes_changed >= entry_len)) { 3513 reserved->bytes_changed -= entry_len; 3514 } else { 3515 WARN_ON(1); 3516 reserved->bytes_changed = 0; 3517 } 3518 } 3519 3520 return ret; 3521 } 3522 3523 /* 3524 * Try to free some space for qgroup. 3525 * 3526 * For qgroup, there are only 3 ways to free qgroup space: 3527 * - Flush nodatacow write 3528 * Any nodatacow write will free its reserved data space at run_delalloc_range(). 3529 * In theory, we should only flush nodatacow inodes, but it's not yet 3530 * possible, so we need to flush the whole root. 3531 * 3532 * - Wait for ordered extents 3533 * When ordered extents are finished, their reserved metadata is finally 3534 * converted to per_trans status, which can be freed by later commit 3535 * transaction. 3536 * 3537 * - Commit transaction 3538 * This would free the meta_per_trans space. 3539 * In theory this shouldn't provide much space, but any more qgroup space 3540 * is needed. 3541 */ 3542 static int try_flush_qgroup(struct btrfs_root *root) 3543 { 3544 struct btrfs_trans_handle *trans; 3545 int ret; 3546 bool can_commit = true; 3547 3548 /* 3549 * If current process holds a transaction, we shouldn't flush, as we 3550 * assume all space reservation happens before a transaction handle is 3551 * held. 3552 * 3553 * But there are cases like btrfs_delayed_item_reserve_metadata() where 3554 * we try to reserve space with one transction handle already held. 3555 * In that case we can't commit transaction, but at least try to end it 3556 * and hope the started data writes can free some space. 3557 */ 3558 if (current->journal_info && 3559 current->journal_info != BTRFS_SEND_TRANS_STUB) 3560 can_commit = false; 3561 3562 /* 3563 * We don't want to run flush again and again, so if there is a running 3564 * one, we won't try to start a new flush, but exit directly. 3565 */ 3566 if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) { 3567 /* 3568 * We are already holding a transaction, thus we can block other 3569 * threads from flushing. So exit right now. This increases 3570 * the chance of EDQUOT for heavy load and near limit cases. 3571 * But we can argue that if we're already near limit, EDQUOT is 3572 * unavoidable anyway. 3573 */ 3574 if (!can_commit) 3575 return 0; 3576 3577 wait_event(root->qgroup_flush_wait, 3578 !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)); 3579 return 0; 3580 } 3581 3582 ret = btrfs_start_delalloc_snapshot(root); 3583 if (ret < 0) 3584 goto out; 3585 btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); 3586 3587 trans = btrfs_join_transaction(root); 3588 if (IS_ERR(trans)) { 3589 ret = PTR_ERR(trans); 3590 goto out; 3591 } 3592 3593 if (can_commit) 3594 ret = btrfs_commit_transaction(trans); 3595 else 3596 ret = btrfs_end_transaction(trans); 3597 out: 3598 clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state); 3599 wake_up(&root->qgroup_flush_wait); 3600 return ret; 3601 } 3602 3603 static int qgroup_reserve_data(struct btrfs_inode *inode, 3604 struct extent_changeset **reserved_ret, u64 start, 3605 u64 len) 3606 { 3607 struct btrfs_root *root = inode->root; 3608 struct extent_changeset *reserved; 3609 bool new_reserved = false; 3610 u64 orig_reserved; 3611 u64 to_reserve; 3612 int ret; 3613 3614 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || 3615 !is_fstree(root->root_key.objectid) || len == 0) 3616 return 0; 3617 3618 /* @reserved parameter is mandatory for qgroup */ 3619 if (WARN_ON(!reserved_ret)) 3620 return -EINVAL; 3621 if (!*reserved_ret) { 3622 new_reserved = true; 3623 *reserved_ret = extent_changeset_alloc(); 3624 if (!*reserved_ret) 3625 return -ENOMEM; 3626 } 3627 reserved = *reserved_ret; 3628 /* Record already reserved space */ 3629 orig_reserved = reserved->bytes_changed; 3630 ret = set_record_extent_bits(&inode->io_tree, start, 3631 start + len -1, EXTENT_QGROUP_RESERVED, reserved); 3632 3633 /* Newly reserved space */ 3634 to_reserve = reserved->bytes_changed - orig_reserved; 3635 trace_btrfs_qgroup_reserve_data(&inode->vfs_inode, start, len, 3636 to_reserve, QGROUP_RESERVE); 3637 if (ret < 0) 3638 goto out; 3639 ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA); 3640 if (ret < 0) 3641 goto cleanup; 3642 3643 return ret; 3644 3645 cleanup: 3646 qgroup_unreserve_range(inode, reserved, start, len); 3647 out: 3648 if (new_reserved) { 3649 extent_changeset_release(reserved); 3650 kfree(reserved); 3651 *reserved_ret = NULL; 3652 } 3653 return ret; 3654 } 3655 3656 /* 3657 * Reserve qgroup space for range [start, start + len). 3658 * 3659 * This function will either reserve space from related qgroups or do nothing 3660 * if the range is already reserved. 3661 * 3662 * Return 0 for successful reservation 3663 * Return <0 for error (including -EQUOT) 3664 * 3665 * NOTE: This function may sleep for memory allocation, dirty page flushing and 3666 * commit transaction. So caller should not hold any dirty page locked. 3667 */ 3668 int btrfs_qgroup_reserve_data(struct btrfs_inode *inode, 3669 struct extent_changeset **reserved_ret, u64 start, 3670 u64 len) 3671 { 3672 int ret; 3673 3674 ret = qgroup_reserve_data(inode, reserved_ret, start, len); 3675 if (ret <= 0 && ret != -EDQUOT) 3676 return ret; 3677 3678 ret = try_flush_qgroup(inode->root); 3679 if (ret < 0) 3680 return ret; 3681 return qgroup_reserve_data(inode, reserved_ret, start, len); 3682 } 3683 3684 /* Free ranges specified by @reserved, normally in error path */ 3685 static int qgroup_free_reserved_data(struct btrfs_inode *inode, 3686 struct extent_changeset *reserved, u64 start, u64 len) 3687 { 3688 struct btrfs_root *root = inode->root; 3689 struct ulist_node *unode; 3690 struct ulist_iterator uiter; 3691 struct extent_changeset changeset; 3692 int freed = 0; 3693 int ret; 3694 3695 extent_changeset_init(&changeset); 3696 len = round_up(start + len, root->fs_info->sectorsize); 3697 start = round_down(start, root->fs_info->sectorsize); 3698 3699 ULIST_ITER_INIT(&uiter); 3700 while ((unode = ulist_next(&reserved->range_changed, &uiter))) { 3701 u64 range_start = unode->val; 3702 /* unode->aux is the inclusive end */ 3703 u64 range_len = unode->aux - range_start + 1; 3704 u64 free_start; 3705 u64 free_len; 3706 3707 extent_changeset_release(&changeset); 3708 3709 /* Only free range in range [start, start + len) */ 3710 if (range_start >= start + len || 3711 range_start + range_len <= start) 3712 continue; 3713 free_start = max(range_start, start); 3714 free_len = min(start + len, range_start + range_len) - 3715 free_start; 3716 /* 3717 * TODO: To also modify reserved->ranges_reserved to reflect 3718 * the modification. 3719 * 3720 * However as long as we free qgroup reserved according to 3721 * EXTENT_QGROUP_RESERVED, we won't double free. 3722 * So not need to rush. 3723 */ 3724 ret = clear_record_extent_bits(&inode->io_tree, free_start, 3725 free_start + free_len - 1, 3726 EXTENT_QGROUP_RESERVED, &changeset); 3727 if (ret < 0) 3728 goto out; 3729 freed += changeset.bytes_changed; 3730 } 3731 btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, freed, 3732 BTRFS_QGROUP_RSV_DATA); 3733 ret = freed; 3734 out: 3735 extent_changeset_release(&changeset); 3736 return ret; 3737 } 3738 3739 static int __btrfs_qgroup_release_data(struct btrfs_inode *inode, 3740 struct extent_changeset *reserved, u64 start, u64 len, 3741 int free) 3742 { 3743 struct extent_changeset changeset; 3744 int trace_op = QGROUP_RELEASE; 3745 int ret; 3746 3747 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &inode->root->fs_info->flags)) 3748 return 0; 3749 3750 /* In release case, we shouldn't have @reserved */ 3751 WARN_ON(!free && reserved); 3752 if (free && reserved) 3753 return qgroup_free_reserved_data(inode, reserved, start, len); 3754 extent_changeset_init(&changeset); 3755 ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1, 3756 EXTENT_QGROUP_RESERVED, &changeset); 3757 if (ret < 0) 3758 goto out; 3759 3760 if (free) 3761 trace_op = QGROUP_FREE; 3762 trace_btrfs_qgroup_release_data(&inode->vfs_inode, start, len, 3763 changeset.bytes_changed, trace_op); 3764 if (free) 3765 btrfs_qgroup_free_refroot(inode->root->fs_info, 3766 inode->root->root_key.objectid, 3767 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA); 3768 ret = changeset.bytes_changed; 3769 out: 3770 extent_changeset_release(&changeset); 3771 return ret; 3772 } 3773 3774 /* 3775 * Free a reserved space range from io_tree and related qgroups 3776 * 3777 * Should be called when a range of pages get invalidated before reaching disk. 3778 * Or for error cleanup case. 3779 * if @reserved is given, only reserved range in [@start, @start + @len) will 3780 * be freed. 3781 * 3782 * For data written to disk, use btrfs_qgroup_release_data(). 3783 * 3784 * NOTE: This function may sleep for memory allocation. 3785 */ 3786 int btrfs_qgroup_free_data(struct btrfs_inode *inode, 3787 struct extent_changeset *reserved, u64 start, u64 len) 3788 { 3789 return __btrfs_qgroup_release_data(inode, reserved, start, len, 1); 3790 } 3791 3792 /* 3793 * Release a reserved space range from io_tree only. 3794 * 3795 * Should be called when a range of pages get written to disk and corresponding 3796 * FILE_EXTENT is inserted into corresponding root. 3797 * 3798 * Since new qgroup accounting framework will only update qgroup numbers at 3799 * commit_transaction() time, its reserved space shouldn't be freed from 3800 * related qgroups. 3801 * 3802 * But we should release the range from io_tree, to allow further write to be 3803 * COWed. 3804 * 3805 * NOTE: This function may sleep for memory allocation. 3806 */ 3807 int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len) 3808 { 3809 return __btrfs_qgroup_release_data(inode, NULL, start, len, 0); 3810 } 3811 3812 static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes, 3813 enum btrfs_qgroup_rsv_type type) 3814 { 3815 if (type != BTRFS_QGROUP_RSV_META_PREALLOC && 3816 type != BTRFS_QGROUP_RSV_META_PERTRANS) 3817 return; 3818 if (num_bytes == 0) 3819 return; 3820 3821 spin_lock(&root->qgroup_meta_rsv_lock); 3822 if (type == BTRFS_QGROUP_RSV_META_PREALLOC) 3823 root->qgroup_meta_rsv_prealloc += num_bytes; 3824 else 3825 root->qgroup_meta_rsv_pertrans += num_bytes; 3826 spin_unlock(&root->qgroup_meta_rsv_lock); 3827 } 3828 3829 static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes, 3830 enum btrfs_qgroup_rsv_type type) 3831 { 3832 if (type != BTRFS_QGROUP_RSV_META_PREALLOC && 3833 type != BTRFS_QGROUP_RSV_META_PERTRANS) 3834 return 0; 3835 if (num_bytes == 0) 3836 return 0; 3837 3838 spin_lock(&root->qgroup_meta_rsv_lock); 3839 if (type == BTRFS_QGROUP_RSV_META_PREALLOC) { 3840 num_bytes = min_t(u64, root->qgroup_meta_rsv_prealloc, 3841 num_bytes); 3842 root->qgroup_meta_rsv_prealloc -= num_bytes; 3843 } else { 3844 num_bytes = min_t(u64, root->qgroup_meta_rsv_pertrans, 3845 num_bytes); 3846 root->qgroup_meta_rsv_pertrans -= num_bytes; 3847 } 3848 spin_unlock(&root->qgroup_meta_rsv_lock); 3849 return num_bytes; 3850 } 3851 3852 int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, 3853 enum btrfs_qgroup_rsv_type type, bool enforce) 3854 { 3855 struct btrfs_fs_info *fs_info = root->fs_info; 3856 int ret; 3857 3858 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3859 !is_fstree(root->root_key.objectid) || num_bytes == 0) 3860 return 0; 3861 3862 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 3863 trace_qgroup_meta_reserve(root, (s64)num_bytes, type); 3864 ret = qgroup_reserve(root, num_bytes, enforce, type); 3865 if (ret < 0) 3866 return ret; 3867 /* 3868 * Record what we have reserved into root. 3869 * 3870 * To avoid quota disabled->enabled underflow. 3871 * In that case, we may try to free space we haven't reserved 3872 * (since quota was disabled), so record what we reserved into root. 3873 * And ensure later release won't underflow this number. 3874 */ 3875 add_root_meta_rsv(root, num_bytes, type); 3876 return ret; 3877 } 3878 3879 int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, 3880 enum btrfs_qgroup_rsv_type type, bool enforce) 3881 { 3882 int ret; 3883 3884 ret = btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce); 3885 if (ret <= 0 && ret != -EDQUOT) 3886 return ret; 3887 3888 ret = try_flush_qgroup(root); 3889 if (ret < 0) 3890 return ret; 3891 return btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce); 3892 } 3893 3894 void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root) 3895 { 3896 struct btrfs_fs_info *fs_info = root->fs_info; 3897 3898 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3899 !is_fstree(root->root_key.objectid)) 3900 return; 3901 3902 /* TODO: Update trace point to handle such free */ 3903 trace_qgroup_meta_free_all_pertrans(root); 3904 /* Special value -1 means to free all reserved space */ 3905 btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid, (u64)-1, 3906 BTRFS_QGROUP_RSV_META_PERTRANS); 3907 } 3908 3909 void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes, 3910 enum btrfs_qgroup_rsv_type type) 3911 { 3912 struct btrfs_fs_info *fs_info = root->fs_info; 3913 3914 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3915 !is_fstree(root->root_key.objectid)) 3916 return; 3917 3918 /* 3919 * reservation for META_PREALLOC can happen before quota is enabled, 3920 * which can lead to underflow. 3921 * Here ensure we will only free what we really have reserved. 3922 */ 3923 num_bytes = sub_root_meta_rsv(root, num_bytes, type); 3924 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 3925 trace_qgroup_meta_reserve(root, -(s64)num_bytes, type); 3926 btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid, 3927 num_bytes, type); 3928 } 3929 3930 static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root, 3931 int num_bytes) 3932 { 3933 struct btrfs_qgroup *qgroup; 3934 struct ulist_node *unode; 3935 struct ulist_iterator uiter; 3936 int ret = 0; 3937 3938 if (num_bytes == 0) 3939 return; 3940 if (!fs_info->quota_root) 3941 return; 3942 3943 spin_lock(&fs_info->qgroup_lock); 3944 qgroup = find_qgroup_rb(fs_info, ref_root); 3945 if (!qgroup) 3946 goto out; 3947 ulist_reinit(fs_info->qgroup_ulist); 3948 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 3949 qgroup_to_aux(qgroup), GFP_ATOMIC); 3950 if (ret < 0) 3951 goto out; 3952 ULIST_ITER_INIT(&uiter); 3953 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 3954 struct btrfs_qgroup *qg; 3955 struct btrfs_qgroup_list *glist; 3956 3957 qg = unode_aux_to_qgroup(unode); 3958 3959 qgroup_rsv_release(fs_info, qg, num_bytes, 3960 BTRFS_QGROUP_RSV_META_PREALLOC); 3961 qgroup_rsv_add(fs_info, qg, num_bytes, 3962 BTRFS_QGROUP_RSV_META_PERTRANS); 3963 list_for_each_entry(glist, &qg->groups, next_group) { 3964 ret = ulist_add(fs_info->qgroup_ulist, 3965 glist->group->qgroupid, 3966 qgroup_to_aux(glist->group), GFP_ATOMIC); 3967 if (ret < 0) 3968 goto out; 3969 } 3970 } 3971 out: 3972 spin_unlock(&fs_info->qgroup_lock); 3973 } 3974 3975 void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes) 3976 { 3977 struct btrfs_fs_info *fs_info = root->fs_info; 3978 3979 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3980 !is_fstree(root->root_key.objectid)) 3981 return; 3982 /* Same as btrfs_qgroup_free_meta_prealloc() */ 3983 num_bytes = sub_root_meta_rsv(root, num_bytes, 3984 BTRFS_QGROUP_RSV_META_PREALLOC); 3985 trace_qgroup_meta_convert(root, num_bytes); 3986 qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes); 3987 } 3988 3989 /* 3990 * Check qgroup reserved space leaking, normally at destroy inode 3991 * time 3992 */ 3993 void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode) 3994 { 3995 struct extent_changeset changeset; 3996 struct ulist_node *unode; 3997 struct ulist_iterator iter; 3998 int ret; 3999 4000 extent_changeset_init(&changeset); 4001 ret = clear_record_extent_bits(&inode->io_tree, 0, (u64)-1, 4002 EXTENT_QGROUP_RESERVED, &changeset); 4003 4004 WARN_ON(ret < 0); 4005 if (WARN_ON(changeset.bytes_changed)) { 4006 ULIST_ITER_INIT(&iter); 4007 while ((unode = ulist_next(&changeset.range_changed, &iter))) { 4008 btrfs_warn(inode->root->fs_info, 4009 "leaking qgroup reserved space, ino: %llu, start: %llu, end: %llu", 4010 btrfs_ino(inode), unode->val, unode->aux); 4011 } 4012 btrfs_qgroup_free_refroot(inode->root->fs_info, 4013 inode->root->root_key.objectid, 4014 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA); 4015 4016 } 4017 extent_changeset_release(&changeset); 4018 } 4019 4020 void btrfs_qgroup_init_swapped_blocks( 4021 struct btrfs_qgroup_swapped_blocks *swapped_blocks) 4022 { 4023 int i; 4024 4025 spin_lock_init(&swapped_blocks->lock); 4026 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 4027 swapped_blocks->blocks[i] = RB_ROOT; 4028 swapped_blocks->swapped = false; 4029 } 4030 4031 /* 4032 * Delete all swapped blocks record of @root. 4033 * Every record here means we skipped a full subtree scan for qgroup. 4034 * 4035 * Gets called when committing one transaction. 4036 */ 4037 void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root) 4038 { 4039 struct btrfs_qgroup_swapped_blocks *swapped_blocks; 4040 int i; 4041 4042 swapped_blocks = &root->swapped_blocks; 4043 4044 spin_lock(&swapped_blocks->lock); 4045 if (!swapped_blocks->swapped) 4046 goto out; 4047 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 4048 struct rb_root *cur_root = &swapped_blocks->blocks[i]; 4049 struct btrfs_qgroup_swapped_block *entry; 4050 struct btrfs_qgroup_swapped_block *next; 4051 4052 rbtree_postorder_for_each_entry_safe(entry, next, cur_root, 4053 node) 4054 kfree(entry); 4055 swapped_blocks->blocks[i] = RB_ROOT; 4056 } 4057 swapped_blocks->swapped = false; 4058 out: 4059 spin_unlock(&swapped_blocks->lock); 4060 } 4061 4062 /* 4063 * Add subtree roots record into @subvol_root. 4064 * 4065 * @subvol_root: tree root of the subvolume tree get swapped 4066 * @bg: block group under balance 4067 * @subvol_parent/slot: pointer to the subtree root in subvolume tree 4068 * @reloc_parent/slot: pointer to the subtree root in reloc tree 4069 * BOTH POINTERS ARE BEFORE TREE SWAP 4070 * @last_snapshot: last snapshot generation of the subvolume tree 4071 */ 4072 int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans, 4073 struct btrfs_root *subvol_root, 4074 struct btrfs_block_group *bg, 4075 struct extent_buffer *subvol_parent, int subvol_slot, 4076 struct extent_buffer *reloc_parent, int reloc_slot, 4077 u64 last_snapshot) 4078 { 4079 struct btrfs_fs_info *fs_info = subvol_root->fs_info; 4080 struct btrfs_qgroup_swapped_blocks *blocks = &subvol_root->swapped_blocks; 4081 struct btrfs_qgroup_swapped_block *block; 4082 struct rb_node **cur; 4083 struct rb_node *parent = NULL; 4084 int level = btrfs_header_level(subvol_parent) - 1; 4085 int ret = 0; 4086 4087 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 4088 return 0; 4089 4090 if (btrfs_node_ptr_generation(subvol_parent, subvol_slot) > 4091 btrfs_node_ptr_generation(reloc_parent, reloc_slot)) { 4092 btrfs_err_rl(fs_info, 4093 "%s: bad parameter order, subvol_gen=%llu reloc_gen=%llu", 4094 __func__, 4095 btrfs_node_ptr_generation(subvol_parent, subvol_slot), 4096 btrfs_node_ptr_generation(reloc_parent, reloc_slot)); 4097 return -EUCLEAN; 4098 } 4099 4100 block = kmalloc(sizeof(*block), GFP_NOFS); 4101 if (!block) { 4102 ret = -ENOMEM; 4103 goto out; 4104 } 4105 4106 /* 4107 * @reloc_parent/slot is still before swap, while @block is going to 4108 * record the bytenr after swap, so we do the swap here. 4109 */ 4110 block->subvol_bytenr = btrfs_node_blockptr(reloc_parent, reloc_slot); 4111 block->subvol_generation = btrfs_node_ptr_generation(reloc_parent, 4112 reloc_slot); 4113 block->reloc_bytenr = btrfs_node_blockptr(subvol_parent, subvol_slot); 4114 block->reloc_generation = btrfs_node_ptr_generation(subvol_parent, 4115 subvol_slot); 4116 block->last_snapshot = last_snapshot; 4117 block->level = level; 4118 4119 /* 4120 * If we have bg == NULL, we're called from btrfs_recover_relocation(), 4121 * no one else can modify tree blocks thus we qgroup will not change 4122 * no matter the value of trace_leaf. 4123 */ 4124 if (bg && bg->flags & BTRFS_BLOCK_GROUP_DATA) 4125 block->trace_leaf = true; 4126 else 4127 block->trace_leaf = false; 4128 btrfs_node_key_to_cpu(reloc_parent, &block->first_key, reloc_slot); 4129 4130 /* Insert @block into @blocks */ 4131 spin_lock(&blocks->lock); 4132 cur = &blocks->blocks[level].rb_node; 4133 while (*cur) { 4134 struct btrfs_qgroup_swapped_block *entry; 4135 4136 parent = *cur; 4137 entry = rb_entry(parent, struct btrfs_qgroup_swapped_block, 4138 node); 4139 4140 if (entry->subvol_bytenr < block->subvol_bytenr) { 4141 cur = &(*cur)->rb_left; 4142 } else if (entry->subvol_bytenr > block->subvol_bytenr) { 4143 cur = &(*cur)->rb_right; 4144 } else { 4145 if (entry->subvol_generation != 4146 block->subvol_generation || 4147 entry->reloc_bytenr != block->reloc_bytenr || 4148 entry->reloc_generation != 4149 block->reloc_generation) { 4150 /* 4151 * Duplicated but mismatch entry found. 4152 * Shouldn't happen. 4153 * 4154 * Marking qgroup inconsistent should be enough 4155 * for end users. 4156 */ 4157 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); 4158 ret = -EEXIST; 4159 } 4160 kfree(block); 4161 goto out_unlock; 4162 } 4163 } 4164 rb_link_node(&block->node, parent, cur); 4165 rb_insert_color(&block->node, &blocks->blocks[level]); 4166 blocks->swapped = true; 4167 out_unlock: 4168 spin_unlock(&blocks->lock); 4169 out: 4170 if (ret < 0) 4171 fs_info->qgroup_flags |= 4172 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 4173 return ret; 4174 } 4175 4176 /* 4177 * Check if the tree block is a subtree root, and if so do the needed 4178 * delayed subtree trace for qgroup. 4179 * 4180 * This is called during btrfs_cow_block(). 4181 */ 4182 int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, 4183 struct btrfs_root *root, 4184 struct extent_buffer *subvol_eb) 4185 { 4186 struct btrfs_fs_info *fs_info = root->fs_info; 4187 struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks; 4188 struct btrfs_qgroup_swapped_block *block; 4189 struct extent_buffer *reloc_eb = NULL; 4190 struct rb_node *node; 4191 bool found = false; 4192 bool swapped = false; 4193 int level = btrfs_header_level(subvol_eb); 4194 int ret = 0; 4195 int i; 4196 4197 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 4198 return 0; 4199 if (!is_fstree(root->root_key.objectid) || !root->reloc_root) 4200 return 0; 4201 4202 spin_lock(&blocks->lock); 4203 if (!blocks->swapped) { 4204 spin_unlock(&blocks->lock); 4205 return 0; 4206 } 4207 node = blocks->blocks[level].rb_node; 4208 4209 while (node) { 4210 block = rb_entry(node, struct btrfs_qgroup_swapped_block, node); 4211 if (block->subvol_bytenr < subvol_eb->start) { 4212 node = node->rb_left; 4213 } else if (block->subvol_bytenr > subvol_eb->start) { 4214 node = node->rb_right; 4215 } else { 4216 found = true; 4217 break; 4218 } 4219 } 4220 if (!found) { 4221 spin_unlock(&blocks->lock); 4222 goto out; 4223 } 4224 /* Found one, remove it from @blocks first and update blocks->swapped */ 4225 rb_erase(&block->node, &blocks->blocks[level]); 4226 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 4227 if (RB_EMPTY_ROOT(&blocks->blocks[i])) { 4228 swapped = true; 4229 break; 4230 } 4231 } 4232 blocks->swapped = swapped; 4233 spin_unlock(&blocks->lock); 4234 4235 /* Read out reloc subtree root */ 4236 reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, 0, 4237 block->reloc_generation, block->level, 4238 &block->first_key); 4239 if (IS_ERR(reloc_eb)) { 4240 ret = PTR_ERR(reloc_eb); 4241 reloc_eb = NULL; 4242 goto free_out; 4243 } 4244 if (!extent_buffer_uptodate(reloc_eb)) { 4245 ret = -EIO; 4246 goto free_out; 4247 } 4248 4249 ret = qgroup_trace_subtree_swap(trans, reloc_eb, subvol_eb, 4250 block->last_snapshot, block->trace_leaf); 4251 free_out: 4252 kfree(block); 4253 free_extent_buffer(reloc_eb); 4254 out: 4255 if (ret < 0) { 4256 btrfs_err_rl(fs_info, 4257 "failed to account subtree at bytenr %llu: %d", 4258 subvol_eb->start, ret); 4259 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 4260 } 4261 return ret; 4262 } 4263 4264 void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) 4265 { 4266 struct btrfs_qgroup_extent_record *entry; 4267 struct btrfs_qgroup_extent_record *next; 4268 struct rb_root *root; 4269 4270 root = &trans->delayed_refs.dirty_extent_root; 4271 rbtree_postorder_for_each_entry_safe(entry, next, root, node) { 4272 ulist_free(entry->old_roots); 4273 kfree(entry); 4274 } 4275 } 4276