1 /* 2 * Copyright (C) 2011 STRATO. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include <linux/workqueue.h> 26 #include <linux/btrfs.h> 27 28 #include "ctree.h" 29 #include "transaction.h" 30 #include "disk-io.h" 31 #include "locking.h" 32 #include "ulist.h" 33 #include "backref.h" 34 #include "extent_io.h" 35 #include "qgroup.h" 36 37 38 /* TODO XXX FIXME 39 * - subvol delete -> delete when ref goes to 0? delete limits also? 40 * - reorganize keys 41 * - compressed 42 * - sync 43 * - copy also limits on subvol creation 44 * - limit 45 * - caches fuer ulists 46 * - performance benchmarks 47 * - check all ioctl parameters 48 */ 49 50 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq, 51 int mod) 52 { 53 if (qg->old_refcnt < seq) 54 qg->old_refcnt = seq; 55 qg->old_refcnt += mod; 56 } 57 58 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq, 59 int mod) 60 { 61 if (qg->new_refcnt < seq) 62 qg->new_refcnt = seq; 63 qg->new_refcnt += mod; 64 } 65 66 static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq) 67 { 68 if (qg->old_refcnt < seq) 69 return 0; 70 return qg->old_refcnt - seq; 71 } 72 73 static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq) 74 { 75 if (qg->new_refcnt < seq) 76 return 0; 77 return qg->new_refcnt - seq; 78 } 79 80 /* 81 * glue structure to represent the relations between qgroups. 82 */ 83 struct btrfs_qgroup_list { 84 struct list_head next_group; 85 struct list_head next_member; 86 struct btrfs_qgroup *group; 87 struct btrfs_qgroup *member; 88 }; 89 90 static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg) 91 { 92 return (u64)(uintptr_t)qg; 93 } 94 95 static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n) 96 { 97 return (struct btrfs_qgroup *)(uintptr_t)n->aux; 98 } 99 100 static int 101 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 102 int init_flags); 103 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); 104 105 /* must be called with qgroup_ioctl_lock held */ 106 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 107 u64 qgroupid) 108 { 109 struct rb_node *n = fs_info->qgroup_tree.rb_node; 110 struct btrfs_qgroup *qgroup; 111 112 while (n) { 113 qgroup = rb_entry(n, struct btrfs_qgroup, node); 114 if (qgroup->qgroupid < qgroupid) 115 n = n->rb_left; 116 else if (qgroup->qgroupid > qgroupid) 117 n = n->rb_right; 118 else 119 return qgroup; 120 } 121 return NULL; 122 } 123 124 /* must be called with qgroup_lock held */ 125 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, 126 u64 qgroupid) 127 { 128 struct rb_node **p = &fs_info->qgroup_tree.rb_node; 129 struct rb_node *parent = NULL; 130 struct btrfs_qgroup *qgroup; 131 132 while (*p) { 133 parent = *p; 134 qgroup = rb_entry(parent, struct btrfs_qgroup, node); 135 136 if (qgroup->qgroupid < qgroupid) 137 p = &(*p)->rb_left; 138 else if (qgroup->qgroupid > qgroupid) 139 p = &(*p)->rb_right; 140 else 141 return qgroup; 142 } 143 144 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); 145 if (!qgroup) 146 return ERR_PTR(-ENOMEM); 147 148 qgroup->qgroupid = qgroupid; 149 INIT_LIST_HEAD(&qgroup->groups); 150 INIT_LIST_HEAD(&qgroup->members); 151 INIT_LIST_HEAD(&qgroup->dirty); 152 153 rb_link_node(&qgroup->node, parent, p); 154 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); 155 156 return qgroup; 157 } 158 159 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) 160 { 161 struct btrfs_qgroup_list *list; 162 163 list_del(&qgroup->dirty); 164 while (!list_empty(&qgroup->groups)) { 165 list = list_first_entry(&qgroup->groups, 166 struct btrfs_qgroup_list, next_group); 167 list_del(&list->next_group); 168 list_del(&list->next_member); 169 kfree(list); 170 } 171 172 while (!list_empty(&qgroup->members)) { 173 list = list_first_entry(&qgroup->members, 174 struct btrfs_qgroup_list, next_member); 175 list_del(&list->next_group); 176 list_del(&list->next_member); 177 kfree(list); 178 } 179 kfree(qgroup); 180 } 181 182 /* must be called with qgroup_lock held */ 183 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) 184 { 185 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); 186 187 if (!qgroup) 188 return -ENOENT; 189 190 rb_erase(&qgroup->node, &fs_info->qgroup_tree); 191 __del_qgroup_rb(qgroup); 192 return 0; 193 } 194 195 /* must be called with qgroup_lock held */ 196 static int add_relation_rb(struct btrfs_fs_info *fs_info, 197 u64 memberid, u64 parentid) 198 { 199 struct btrfs_qgroup *member; 200 struct btrfs_qgroup *parent; 201 struct btrfs_qgroup_list *list; 202 203 member = find_qgroup_rb(fs_info, memberid); 204 parent = find_qgroup_rb(fs_info, parentid); 205 if (!member || !parent) 206 return -ENOENT; 207 208 list = kzalloc(sizeof(*list), GFP_ATOMIC); 209 if (!list) 210 return -ENOMEM; 211 212 list->group = parent; 213 list->member = member; 214 list_add_tail(&list->next_group, &member->groups); 215 list_add_tail(&list->next_member, &parent->members); 216 217 return 0; 218 } 219 220 /* must be called with qgroup_lock held */ 221 static int del_relation_rb(struct btrfs_fs_info *fs_info, 222 u64 memberid, u64 parentid) 223 { 224 struct btrfs_qgroup *member; 225 struct btrfs_qgroup *parent; 226 struct btrfs_qgroup_list *list; 227 228 member = find_qgroup_rb(fs_info, memberid); 229 parent = find_qgroup_rb(fs_info, parentid); 230 if (!member || !parent) 231 return -ENOENT; 232 233 list_for_each_entry(list, &member->groups, next_group) { 234 if (list->group == parent) { 235 list_del(&list->next_group); 236 list_del(&list->next_member); 237 kfree(list); 238 return 0; 239 } 240 } 241 return -ENOENT; 242 } 243 244 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 245 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 246 u64 rfer, u64 excl) 247 { 248 struct btrfs_qgroup *qgroup; 249 250 qgroup = find_qgroup_rb(fs_info, qgroupid); 251 if (!qgroup) 252 return -EINVAL; 253 if (qgroup->rfer != rfer || qgroup->excl != excl) 254 return -EINVAL; 255 return 0; 256 } 257 #endif 258 259 /* 260 * The full config is read in one go, only called from open_ctree() 261 * It doesn't use any locking, as at this point we're still single-threaded 262 */ 263 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) 264 { 265 struct btrfs_key key; 266 struct btrfs_key found_key; 267 struct btrfs_root *quota_root = fs_info->quota_root; 268 struct btrfs_path *path = NULL; 269 struct extent_buffer *l; 270 int slot; 271 int ret = 0; 272 u64 flags = 0; 273 u64 rescan_progress = 0; 274 275 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 276 return 0; 277 278 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 279 if (!fs_info->qgroup_ulist) { 280 ret = -ENOMEM; 281 goto out; 282 } 283 284 path = btrfs_alloc_path(); 285 if (!path) { 286 ret = -ENOMEM; 287 goto out; 288 } 289 290 /* default this to quota off, in case no status key is found */ 291 fs_info->qgroup_flags = 0; 292 293 /* 294 * pass 1: read status, all qgroup infos and limits 295 */ 296 key.objectid = 0; 297 key.type = 0; 298 key.offset = 0; 299 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); 300 if (ret) 301 goto out; 302 303 while (1) { 304 struct btrfs_qgroup *qgroup; 305 306 slot = path->slots[0]; 307 l = path->nodes[0]; 308 btrfs_item_key_to_cpu(l, &found_key, slot); 309 310 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { 311 struct btrfs_qgroup_status_item *ptr; 312 313 ptr = btrfs_item_ptr(l, slot, 314 struct btrfs_qgroup_status_item); 315 316 if (btrfs_qgroup_status_version(l, ptr) != 317 BTRFS_QGROUP_STATUS_VERSION) { 318 btrfs_err(fs_info, 319 "old qgroup version, quota disabled"); 320 goto out; 321 } 322 if (btrfs_qgroup_status_generation(l, ptr) != 323 fs_info->generation) { 324 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 325 btrfs_err(fs_info, 326 "qgroup generation mismatch, marked as inconsistent"); 327 } 328 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 329 ptr); 330 rescan_progress = btrfs_qgroup_status_rescan(l, ptr); 331 goto next1; 332 } 333 334 if (found_key.type != BTRFS_QGROUP_INFO_KEY && 335 found_key.type != BTRFS_QGROUP_LIMIT_KEY) 336 goto next1; 337 338 qgroup = find_qgroup_rb(fs_info, found_key.offset); 339 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 340 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 341 btrfs_err(fs_info, "inconsistent qgroup config"); 342 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 343 } 344 if (!qgroup) { 345 qgroup = add_qgroup_rb(fs_info, found_key.offset); 346 if (IS_ERR(qgroup)) { 347 ret = PTR_ERR(qgroup); 348 goto out; 349 } 350 } 351 switch (found_key.type) { 352 case BTRFS_QGROUP_INFO_KEY: { 353 struct btrfs_qgroup_info_item *ptr; 354 355 ptr = btrfs_item_ptr(l, slot, 356 struct btrfs_qgroup_info_item); 357 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); 358 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); 359 qgroup->excl = btrfs_qgroup_info_excl(l, ptr); 360 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); 361 /* generation currently unused */ 362 break; 363 } 364 case BTRFS_QGROUP_LIMIT_KEY: { 365 struct btrfs_qgroup_limit_item *ptr; 366 367 ptr = btrfs_item_ptr(l, slot, 368 struct btrfs_qgroup_limit_item); 369 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); 370 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); 371 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); 372 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); 373 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); 374 break; 375 } 376 } 377 next1: 378 ret = btrfs_next_item(quota_root, path); 379 if (ret < 0) 380 goto out; 381 if (ret) 382 break; 383 } 384 btrfs_release_path(path); 385 386 /* 387 * pass 2: read all qgroup relations 388 */ 389 key.objectid = 0; 390 key.type = BTRFS_QGROUP_RELATION_KEY; 391 key.offset = 0; 392 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); 393 if (ret) 394 goto out; 395 while (1) { 396 slot = path->slots[0]; 397 l = path->nodes[0]; 398 btrfs_item_key_to_cpu(l, &found_key, slot); 399 400 if (found_key.type != BTRFS_QGROUP_RELATION_KEY) 401 goto next2; 402 403 if (found_key.objectid > found_key.offset) { 404 /* parent <- member, not needed to build config */ 405 /* FIXME should we omit the key completely? */ 406 goto next2; 407 } 408 409 ret = add_relation_rb(fs_info, found_key.objectid, 410 found_key.offset); 411 if (ret == -ENOENT) { 412 btrfs_warn(fs_info, 413 "orphan qgroup relation 0x%llx->0x%llx", 414 found_key.objectid, found_key.offset); 415 ret = 0; /* ignore the error */ 416 } 417 if (ret) 418 goto out; 419 next2: 420 ret = btrfs_next_item(quota_root, path); 421 if (ret < 0) 422 goto out; 423 if (ret) 424 break; 425 } 426 out: 427 fs_info->qgroup_flags |= flags; 428 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 429 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 430 else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && 431 ret >= 0) 432 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 433 btrfs_free_path(path); 434 435 if (ret < 0) { 436 ulist_free(fs_info->qgroup_ulist); 437 fs_info->qgroup_ulist = NULL; 438 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 439 } 440 441 return ret < 0 ? ret : 0; 442 } 443 444 /* 445 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), 446 * first two are in single-threaded paths.And for the third one, we have set 447 * quota_root to be null with qgroup_lock held before, so it is safe to clean 448 * up the in-memory structures without qgroup_lock held. 449 */ 450 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 451 { 452 struct rb_node *n; 453 struct btrfs_qgroup *qgroup; 454 455 while ((n = rb_first(&fs_info->qgroup_tree))) { 456 qgroup = rb_entry(n, struct btrfs_qgroup, node); 457 rb_erase(n, &fs_info->qgroup_tree); 458 __del_qgroup_rb(qgroup); 459 } 460 /* 461 * we call btrfs_free_qgroup_config() when umounting 462 * filesystem and disabling quota, so we set qgroup_ulist 463 * to be null here to avoid double free. 464 */ 465 ulist_free(fs_info->qgroup_ulist); 466 fs_info->qgroup_ulist = NULL; 467 } 468 469 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 470 struct btrfs_root *quota_root, 471 u64 src, u64 dst) 472 { 473 int ret; 474 struct btrfs_path *path; 475 struct btrfs_key key; 476 477 path = btrfs_alloc_path(); 478 if (!path) 479 return -ENOMEM; 480 481 key.objectid = src; 482 key.type = BTRFS_QGROUP_RELATION_KEY; 483 key.offset = dst; 484 485 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 486 487 btrfs_mark_buffer_dirty(path->nodes[0]); 488 489 btrfs_free_path(path); 490 return ret; 491 } 492 493 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, 494 struct btrfs_root *quota_root, 495 u64 src, u64 dst) 496 { 497 int ret; 498 struct btrfs_path *path; 499 struct btrfs_key key; 500 501 path = btrfs_alloc_path(); 502 if (!path) 503 return -ENOMEM; 504 505 key.objectid = src; 506 key.type = BTRFS_QGROUP_RELATION_KEY; 507 key.offset = dst; 508 509 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 510 if (ret < 0) 511 goto out; 512 513 if (ret > 0) { 514 ret = -ENOENT; 515 goto out; 516 } 517 518 ret = btrfs_del_item(trans, quota_root, path); 519 out: 520 btrfs_free_path(path); 521 return ret; 522 } 523 524 static int add_qgroup_item(struct btrfs_trans_handle *trans, 525 struct btrfs_root *quota_root, u64 qgroupid) 526 { 527 int ret; 528 struct btrfs_path *path; 529 struct btrfs_qgroup_info_item *qgroup_info; 530 struct btrfs_qgroup_limit_item *qgroup_limit; 531 struct extent_buffer *leaf; 532 struct btrfs_key key; 533 534 if (btrfs_is_testing(quota_root->fs_info)) 535 return 0; 536 537 path = btrfs_alloc_path(); 538 if (!path) 539 return -ENOMEM; 540 541 key.objectid = 0; 542 key.type = BTRFS_QGROUP_INFO_KEY; 543 key.offset = qgroupid; 544 545 /* 546 * Avoid a transaction abort by catching -EEXIST here. In that 547 * case, we proceed by re-initializing the existing structure 548 * on disk. 549 */ 550 551 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 552 sizeof(*qgroup_info)); 553 if (ret && ret != -EEXIST) 554 goto out; 555 556 leaf = path->nodes[0]; 557 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], 558 struct btrfs_qgroup_info_item); 559 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); 560 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); 561 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); 562 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); 563 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); 564 565 btrfs_mark_buffer_dirty(leaf); 566 567 btrfs_release_path(path); 568 569 key.type = BTRFS_QGROUP_LIMIT_KEY; 570 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 571 sizeof(*qgroup_limit)); 572 if (ret && ret != -EEXIST) 573 goto out; 574 575 leaf = path->nodes[0]; 576 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], 577 struct btrfs_qgroup_limit_item); 578 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); 579 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); 580 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); 581 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 582 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 583 584 btrfs_mark_buffer_dirty(leaf); 585 586 ret = 0; 587 out: 588 btrfs_free_path(path); 589 return ret; 590 } 591 592 static int del_qgroup_item(struct btrfs_trans_handle *trans, 593 struct btrfs_root *quota_root, u64 qgroupid) 594 { 595 int ret; 596 struct btrfs_path *path; 597 struct btrfs_key key; 598 599 path = btrfs_alloc_path(); 600 if (!path) 601 return -ENOMEM; 602 603 key.objectid = 0; 604 key.type = BTRFS_QGROUP_INFO_KEY; 605 key.offset = qgroupid; 606 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 607 if (ret < 0) 608 goto out; 609 610 if (ret > 0) { 611 ret = -ENOENT; 612 goto out; 613 } 614 615 ret = btrfs_del_item(trans, quota_root, path); 616 if (ret) 617 goto out; 618 619 btrfs_release_path(path); 620 621 key.type = BTRFS_QGROUP_LIMIT_KEY; 622 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 623 if (ret < 0) 624 goto out; 625 626 if (ret > 0) { 627 ret = -ENOENT; 628 goto out; 629 } 630 631 ret = btrfs_del_item(trans, quota_root, path); 632 633 out: 634 btrfs_free_path(path); 635 return ret; 636 } 637 638 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 639 struct btrfs_root *root, 640 struct btrfs_qgroup *qgroup) 641 { 642 struct btrfs_path *path; 643 struct btrfs_key key; 644 struct extent_buffer *l; 645 struct btrfs_qgroup_limit_item *qgroup_limit; 646 int ret; 647 int slot; 648 649 key.objectid = 0; 650 key.type = BTRFS_QGROUP_LIMIT_KEY; 651 key.offset = qgroup->qgroupid; 652 653 path = btrfs_alloc_path(); 654 if (!path) 655 return -ENOMEM; 656 657 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 658 if (ret > 0) 659 ret = -ENOENT; 660 661 if (ret) 662 goto out; 663 664 l = path->nodes[0]; 665 slot = path->slots[0]; 666 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 667 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags); 668 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer); 669 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); 670 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); 671 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); 672 673 btrfs_mark_buffer_dirty(l); 674 675 out: 676 btrfs_free_path(path); 677 return ret; 678 } 679 680 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 681 struct btrfs_root *root, 682 struct btrfs_qgroup *qgroup) 683 { 684 struct btrfs_path *path; 685 struct btrfs_key key; 686 struct extent_buffer *l; 687 struct btrfs_qgroup_info_item *qgroup_info; 688 int ret; 689 int slot; 690 691 if (btrfs_is_testing(root->fs_info)) 692 return 0; 693 694 key.objectid = 0; 695 key.type = BTRFS_QGROUP_INFO_KEY; 696 key.offset = qgroup->qgroupid; 697 698 path = btrfs_alloc_path(); 699 if (!path) 700 return -ENOMEM; 701 702 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 703 if (ret > 0) 704 ret = -ENOENT; 705 706 if (ret) 707 goto out; 708 709 l = path->nodes[0]; 710 slot = path->slots[0]; 711 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); 712 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 713 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 714 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 715 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 716 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 717 718 btrfs_mark_buffer_dirty(l); 719 720 out: 721 btrfs_free_path(path); 722 return ret; 723 } 724 725 static int update_qgroup_status_item(struct btrfs_trans_handle *trans, 726 struct btrfs_fs_info *fs_info, 727 struct btrfs_root *root) 728 { 729 struct btrfs_path *path; 730 struct btrfs_key key; 731 struct extent_buffer *l; 732 struct btrfs_qgroup_status_item *ptr; 733 int ret; 734 int slot; 735 736 key.objectid = 0; 737 key.type = BTRFS_QGROUP_STATUS_KEY; 738 key.offset = 0; 739 740 path = btrfs_alloc_path(); 741 if (!path) 742 return -ENOMEM; 743 744 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 745 if (ret > 0) 746 ret = -ENOENT; 747 748 if (ret) 749 goto out; 750 751 l = path->nodes[0]; 752 slot = path->slots[0]; 753 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 754 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 755 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 756 btrfs_set_qgroup_status_rescan(l, ptr, 757 fs_info->qgroup_rescan_progress.objectid); 758 759 btrfs_mark_buffer_dirty(l); 760 761 out: 762 btrfs_free_path(path); 763 return ret; 764 } 765 766 /* 767 * called with qgroup_lock held 768 */ 769 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 770 struct btrfs_root *root) 771 { 772 struct btrfs_path *path; 773 struct btrfs_key key; 774 struct extent_buffer *leaf = NULL; 775 int ret; 776 int nr = 0; 777 778 path = btrfs_alloc_path(); 779 if (!path) 780 return -ENOMEM; 781 782 path->leave_spinning = 1; 783 784 key.objectid = 0; 785 key.offset = 0; 786 key.type = 0; 787 788 while (1) { 789 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 790 if (ret < 0) 791 goto out; 792 leaf = path->nodes[0]; 793 nr = btrfs_header_nritems(leaf); 794 if (!nr) 795 break; 796 /* 797 * delete the leaf one by one 798 * since the whole tree is going 799 * to be deleted. 800 */ 801 path->slots[0] = 0; 802 ret = btrfs_del_items(trans, root, path, 0, nr); 803 if (ret) 804 goto out; 805 806 btrfs_release_path(path); 807 } 808 ret = 0; 809 out: 810 set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags); 811 btrfs_free_path(path); 812 return ret; 813 } 814 815 int btrfs_quota_enable(struct btrfs_trans_handle *trans, 816 struct btrfs_fs_info *fs_info) 817 { 818 struct btrfs_root *quota_root; 819 struct btrfs_root *tree_root = fs_info->tree_root; 820 struct btrfs_path *path = NULL; 821 struct btrfs_qgroup_status_item *ptr; 822 struct extent_buffer *leaf; 823 struct btrfs_key key; 824 struct btrfs_key found_key; 825 struct btrfs_qgroup *qgroup = NULL; 826 int ret = 0; 827 int slot; 828 829 mutex_lock(&fs_info->qgroup_ioctl_lock); 830 if (fs_info->quota_root) { 831 set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); 832 goto out; 833 } 834 835 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 836 if (!fs_info->qgroup_ulist) { 837 ret = -ENOMEM; 838 goto out; 839 } 840 841 /* 842 * initially create the quota tree 843 */ 844 quota_root = btrfs_create_tree(trans, fs_info, 845 BTRFS_QUOTA_TREE_OBJECTID); 846 if (IS_ERR(quota_root)) { 847 ret = PTR_ERR(quota_root); 848 goto out; 849 } 850 851 path = btrfs_alloc_path(); 852 if (!path) { 853 ret = -ENOMEM; 854 goto out_free_root; 855 } 856 857 key.objectid = 0; 858 key.type = BTRFS_QGROUP_STATUS_KEY; 859 key.offset = 0; 860 861 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 862 sizeof(*ptr)); 863 if (ret) 864 goto out_free_path; 865 866 leaf = path->nodes[0]; 867 ptr = btrfs_item_ptr(leaf, path->slots[0], 868 struct btrfs_qgroup_status_item); 869 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); 870 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); 871 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 872 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 873 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 874 btrfs_set_qgroup_status_rescan(leaf, ptr, 0); 875 876 btrfs_mark_buffer_dirty(leaf); 877 878 key.objectid = 0; 879 key.type = BTRFS_ROOT_REF_KEY; 880 key.offset = 0; 881 882 btrfs_release_path(path); 883 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 884 if (ret > 0) 885 goto out_add_root; 886 if (ret < 0) 887 goto out_free_path; 888 889 890 while (1) { 891 slot = path->slots[0]; 892 leaf = path->nodes[0]; 893 btrfs_item_key_to_cpu(leaf, &found_key, slot); 894 895 if (found_key.type == BTRFS_ROOT_REF_KEY) { 896 ret = add_qgroup_item(trans, quota_root, 897 found_key.offset); 898 if (ret) 899 goto out_free_path; 900 901 qgroup = add_qgroup_rb(fs_info, found_key.offset); 902 if (IS_ERR(qgroup)) { 903 ret = PTR_ERR(qgroup); 904 goto out_free_path; 905 } 906 } 907 ret = btrfs_next_item(tree_root, path); 908 if (ret < 0) 909 goto out_free_path; 910 if (ret) 911 break; 912 } 913 914 out_add_root: 915 btrfs_release_path(path); 916 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 917 if (ret) 918 goto out_free_path; 919 920 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 921 if (IS_ERR(qgroup)) { 922 ret = PTR_ERR(qgroup); 923 goto out_free_path; 924 } 925 spin_lock(&fs_info->qgroup_lock); 926 fs_info->quota_root = quota_root; 927 set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); 928 spin_unlock(&fs_info->qgroup_lock); 929 out_free_path: 930 btrfs_free_path(path); 931 out_free_root: 932 if (ret) { 933 free_extent_buffer(quota_root->node); 934 free_extent_buffer(quota_root->commit_root); 935 kfree(quota_root); 936 } 937 out: 938 if (ret) { 939 ulist_free(fs_info->qgroup_ulist); 940 fs_info->qgroup_ulist = NULL; 941 } 942 mutex_unlock(&fs_info->qgroup_ioctl_lock); 943 return ret; 944 } 945 946 int btrfs_quota_disable(struct btrfs_trans_handle *trans, 947 struct btrfs_fs_info *fs_info) 948 { 949 struct btrfs_root *tree_root = fs_info->tree_root; 950 struct btrfs_root *quota_root; 951 int ret = 0; 952 953 mutex_lock(&fs_info->qgroup_ioctl_lock); 954 if (!fs_info->quota_root) 955 goto out; 956 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 957 set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags); 958 btrfs_qgroup_wait_for_completion(fs_info, false); 959 spin_lock(&fs_info->qgroup_lock); 960 quota_root = fs_info->quota_root; 961 fs_info->quota_root = NULL; 962 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 963 spin_unlock(&fs_info->qgroup_lock); 964 965 btrfs_free_qgroup_config(fs_info); 966 967 ret = btrfs_clean_quota_tree(trans, quota_root); 968 if (ret) 969 goto out; 970 971 ret = btrfs_del_root(trans, tree_root, "a_root->root_key); 972 if (ret) 973 goto out; 974 975 list_del("a_root->dirty_list); 976 977 btrfs_tree_lock(quota_root->node); 978 clean_tree_block(fs_info, quota_root->node); 979 btrfs_tree_unlock(quota_root->node); 980 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 981 982 free_extent_buffer(quota_root->node); 983 free_extent_buffer(quota_root->commit_root); 984 kfree(quota_root); 985 out: 986 mutex_unlock(&fs_info->qgroup_ioctl_lock); 987 return ret; 988 } 989 990 static void qgroup_dirty(struct btrfs_fs_info *fs_info, 991 struct btrfs_qgroup *qgroup) 992 { 993 if (list_empty(&qgroup->dirty)) 994 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 995 } 996 997 static void report_reserved_underflow(struct btrfs_fs_info *fs_info, 998 struct btrfs_qgroup *qgroup, 999 u64 num_bytes) 1000 { 1001 #ifdef CONFIG_BTRFS_DEBUG 1002 WARN_ON(qgroup->reserved < num_bytes); 1003 btrfs_debug(fs_info, 1004 "qgroup %llu reserved space underflow, have: %llu, to free: %llu", 1005 qgroup->qgroupid, qgroup->reserved, num_bytes); 1006 #endif 1007 qgroup->reserved = 0; 1008 } 1009 /* 1010 * The easy accounting, if we are adding/removing the only ref for an extent 1011 * then this qgroup and all of the parent qgroups get their reference and 1012 * exclusive counts adjusted. 1013 * 1014 * Caller should hold fs_info->qgroup_lock. 1015 */ 1016 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1017 struct ulist *tmp, u64 ref_root, 1018 u64 num_bytes, int sign) 1019 { 1020 struct btrfs_qgroup *qgroup; 1021 struct btrfs_qgroup_list *glist; 1022 struct ulist_node *unode; 1023 struct ulist_iterator uiter; 1024 int ret = 0; 1025 1026 qgroup = find_qgroup_rb(fs_info, ref_root); 1027 if (!qgroup) 1028 goto out; 1029 1030 qgroup->rfer += sign * num_bytes; 1031 qgroup->rfer_cmpr += sign * num_bytes; 1032 1033 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1034 qgroup->excl += sign * num_bytes; 1035 qgroup->excl_cmpr += sign * num_bytes; 1036 if (sign > 0) { 1037 trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes); 1038 if (qgroup->reserved < num_bytes) 1039 report_reserved_underflow(fs_info, qgroup, num_bytes); 1040 else 1041 qgroup->reserved -= num_bytes; 1042 } 1043 1044 qgroup_dirty(fs_info, qgroup); 1045 1046 /* Get all of the parent groups that contain this qgroup */ 1047 list_for_each_entry(glist, &qgroup->groups, next_group) { 1048 ret = ulist_add(tmp, glist->group->qgroupid, 1049 qgroup_to_aux(glist->group), GFP_ATOMIC); 1050 if (ret < 0) 1051 goto out; 1052 } 1053 1054 /* Iterate all of the parents and adjust their reference counts */ 1055 ULIST_ITER_INIT(&uiter); 1056 while ((unode = ulist_next(tmp, &uiter))) { 1057 qgroup = unode_aux_to_qgroup(unode); 1058 qgroup->rfer += sign * num_bytes; 1059 qgroup->rfer_cmpr += sign * num_bytes; 1060 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1061 qgroup->excl += sign * num_bytes; 1062 if (sign > 0) { 1063 trace_qgroup_update_reserve(fs_info, qgroup, 1064 -(s64)num_bytes); 1065 if (qgroup->reserved < num_bytes) 1066 report_reserved_underflow(fs_info, qgroup, 1067 num_bytes); 1068 else 1069 qgroup->reserved -= num_bytes; 1070 } 1071 qgroup->excl_cmpr += sign * num_bytes; 1072 qgroup_dirty(fs_info, qgroup); 1073 1074 /* Add any parents of the parents */ 1075 list_for_each_entry(glist, &qgroup->groups, next_group) { 1076 ret = ulist_add(tmp, glist->group->qgroupid, 1077 qgroup_to_aux(glist->group), GFP_ATOMIC); 1078 if (ret < 0) 1079 goto out; 1080 } 1081 } 1082 ret = 0; 1083 out: 1084 return ret; 1085 } 1086 1087 1088 /* 1089 * Quick path for updating qgroup with only excl refs. 1090 * 1091 * In that case, just update all parent will be enough. 1092 * Or we needs to do a full rescan. 1093 * Caller should also hold fs_info->qgroup_lock. 1094 * 1095 * Return 0 for quick update, return >0 for need to full rescan 1096 * and mark INCONSISTENT flag. 1097 * Return < 0 for other error. 1098 */ 1099 static int quick_update_accounting(struct btrfs_fs_info *fs_info, 1100 struct ulist *tmp, u64 src, u64 dst, 1101 int sign) 1102 { 1103 struct btrfs_qgroup *qgroup; 1104 int ret = 1; 1105 int err = 0; 1106 1107 qgroup = find_qgroup_rb(fs_info, src); 1108 if (!qgroup) 1109 goto out; 1110 if (qgroup->excl == qgroup->rfer) { 1111 ret = 0; 1112 err = __qgroup_excl_accounting(fs_info, tmp, dst, 1113 qgroup->excl, sign); 1114 if (err < 0) { 1115 ret = err; 1116 goto out; 1117 } 1118 } 1119 out: 1120 if (ret) 1121 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1122 return ret; 1123 } 1124 1125 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1126 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1127 { 1128 struct btrfs_root *quota_root; 1129 struct btrfs_qgroup *parent; 1130 struct btrfs_qgroup *member; 1131 struct btrfs_qgroup_list *list; 1132 struct ulist *tmp; 1133 int ret = 0; 1134 1135 /* Check the level of src and dst first */ 1136 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) 1137 return -EINVAL; 1138 1139 tmp = ulist_alloc(GFP_KERNEL); 1140 if (!tmp) 1141 return -ENOMEM; 1142 1143 mutex_lock(&fs_info->qgroup_ioctl_lock); 1144 quota_root = fs_info->quota_root; 1145 if (!quota_root) { 1146 ret = -EINVAL; 1147 goto out; 1148 } 1149 member = find_qgroup_rb(fs_info, src); 1150 parent = find_qgroup_rb(fs_info, dst); 1151 if (!member || !parent) { 1152 ret = -EINVAL; 1153 goto out; 1154 } 1155 1156 /* check if such qgroup relation exist firstly */ 1157 list_for_each_entry(list, &member->groups, next_group) { 1158 if (list->group == parent) { 1159 ret = -EEXIST; 1160 goto out; 1161 } 1162 } 1163 1164 ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1165 if (ret) 1166 goto out; 1167 1168 ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1169 if (ret) { 1170 del_qgroup_relation_item(trans, quota_root, src, dst); 1171 goto out; 1172 } 1173 1174 spin_lock(&fs_info->qgroup_lock); 1175 ret = add_relation_rb(fs_info, src, dst); 1176 if (ret < 0) { 1177 spin_unlock(&fs_info->qgroup_lock); 1178 goto out; 1179 } 1180 ret = quick_update_accounting(fs_info, tmp, src, dst, 1); 1181 spin_unlock(&fs_info->qgroup_lock); 1182 out: 1183 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1184 ulist_free(tmp); 1185 return ret; 1186 } 1187 1188 static int __del_qgroup_relation(struct btrfs_trans_handle *trans, 1189 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1190 { 1191 struct btrfs_root *quota_root; 1192 struct btrfs_qgroup *parent; 1193 struct btrfs_qgroup *member; 1194 struct btrfs_qgroup_list *list; 1195 struct ulist *tmp; 1196 int ret = 0; 1197 int err; 1198 1199 tmp = ulist_alloc(GFP_KERNEL); 1200 if (!tmp) 1201 return -ENOMEM; 1202 1203 quota_root = fs_info->quota_root; 1204 if (!quota_root) { 1205 ret = -EINVAL; 1206 goto out; 1207 } 1208 1209 member = find_qgroup_rb(fs_info, src); 1210 parent = find_qgroup_rb(fs_info, dst); 1211 if (!member || !parent) { 1212 ret = -EINVAL; 1213 goto out; 1214 } 1215 1216 /* check if such qgroup relation exist firstly */ 1217 list_for_each_entry(list, &member->groups, next_group) { 1218 if (list->group == parent) 1219 goto exist; 1220 } 1221 ret = -ENOENT; 1222 goto out; 1223 exist: 1224 ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1225 err = del_qgroup_relation_item(trans, quota_root, dst, src); 1226 if (err && !ret) 1227 ret = err; 1228 1229 spin_lock(&fs_info->qgroup_lock); 1230 del_relation_rb(fs_info, src, dst); 1231 ret = quick_update_accounting(fs_info, tmp, src, dst, -1); 1232 spin_unlock(&fs_info->qgroup_lock); 1233 out: 1234 ulist_free(tmp); 1235 return ret; 1236 } 1237 1238 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1239 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1240 { 1241 int ret = 0; 1242 1243 mutex_lock(&fs_info->qgroup_ioctl_lock); 1244 ret = __del_qgroup_relation(trans, fs_info, src, dst); 1245 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1246 1247 return ret; 1248 } 1249 1250 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1251 struct btrfs_fs_info *fs_info, u64 qgroupid) 1252 { 1253 struct btrfs_root *quota_root; 1254 struct btrfs_qgroup *qgroup; 1255 int ret = 0; 1256 1257 mutex_lock(&fs_info->qgroup_ioctl_lock); 1258 quota_root = fs_info->quota_root; 1259 if (!quota_root) { 1260 ret = -EINVAL; 1261 goto out; 1262 } 1263 qgroup = find_qgroup_rb(fs_info, qgroupid); 1264 if (qgroup) { 1265 ret = -EEXIST; 1266 goto out; 1267 } 1268 1269 ret = add_qgroup_item(trans, quota_root, qgroupid); 1270 if (ret) 1271 goto out; 1272 1273 spin_lock(&fs_info->qgroup_lock); 1274 qgroup = add_qgroup_rb(fs_info, qgroupid); 1275 spin_unlock(&fs_info->qgroup_lock); 1276 1277 if (IS_ERR(qgroup)) 1278 ret = PTR_ERR(qgroup); 1279 out: 1280 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1281 return ret; 1282 } 1283 1284 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 1285 struct btrfs_fs_info *fs_info, u64 qgroupid) 1286 { 1287 struct btrfs_root *quota_root; 1288 struct btrfs_qgroup *qgroup; 1289 struct btrfs_qgroup_list *list; 1290 int ret = 0; 1291 1292 mutex_lock(&fs_info->qgroup_ioctl_lock); 1293 quota_root = fs_info->quota_root; 1294 if (!quota_root) { 1295 ret = -EINVAL; 1296 goto out; 1297 } 1298 1299 qgroup = find_qgroup_rb(fs_info, qgroupid); 1300 if (!qgroup) { 1301 ret = -ENOENT; 1302 goto out; 1303 } else { 1304 /* check if there are no children of this qgroup */ 1305 if (!list_empty(&qgroup->members)) { 1306 ret = -EBUSY; 1307 goto out; 1308 } 1309 } 1310 ret = del_qgroup_item(trans, quota_root, qgroupid); 1311 1312 while (!list_empty(&qgroup->groups)) { 1313 list = list_first_entry(&qgroup->groups, 1314 struct btrfs_qgroup_list, next_group); 1315 ret = __del_qgroup_relation(trans, fs_info, 1316 qgroupid, 1317 list->group->qgroupid); 1318 if (ret) 1319 goto out; 1320 } 1321 1322 spin_lock(&fs_info->qgroup_lock); 1323 del_qgroup_rb(fs_info, qgroupid); 1324 spin_unlock(&fs_info->qgroup_lock); 1325 out: 1326 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1327 return ret; 1328 } 1329 1330 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 1331 struct btrfs_fs_info *fs_info, u64 qgroupid, 1332 struct btrfs_qgroup_limit *limit) 1333 { 1334 struct btrfs_root *quota_root; 1335 struct btrfs_qgroup *qgroup; 1336 int ret = 0; 1337 /* Sometimes we would want to clear the limit on this qgroup. 1338 * To meet this requirement, we treat the -1 as a special value 1339 * which tell kernel to clear the limit on this qgroup. 1340 */ 1341 const u64 CLEAR_VALUE = -1; 1342 1343 mutex_lock(&fs_info->qgroup_ioctl_lock); 1344 quota_root = fs_info->quota_root; 1345 if (!quota_root) { 1346 ret = -EINVAL; 1347 goto out; 1348 } 1349 1350 qgroup = find_qgroup_rb(fs_info, qgroupid); 1351 if (!qgroup) { 1352 ret = -ENOENT; 1353 goto out; 1354 } 1355 1356 spin_lock(&fs_info->qgroup_lock); 1357 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { 1358 if (limit->max_rfer == CLEAR_VALUE) { 1359 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1360 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1361 qgroup->max_rfer = 0; 1362 } else { 1363 qgroup->max_rfer = limit->max_rfer; 1364 } 1365 } 1366 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { 1367 if (limit->max_excl == CLEAR_VALUE) { 1368 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1369 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1370 qgroup->max_excl = 0; 1371 } else { 1372 qgroup->max_excl = limit->max_excl; 1373 } 1374 } 1375 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { 1376 if (limit->rsv_rfer == CLEAR_VALUE) { 1377 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1378 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1379 qgroup->rsv_rfer = 0; 1380 } else { 1381 qgroup->rsv_rfer = limit->rsv_rfer; 1382 } 1383 } 1384 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { 1385 if (limit->rsv_excl == CLEAR_VALUE) { 1386 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1387 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1388 qgroup->rsv_excl = 0; 1389 } else { 1390 qgroup->rsv_excl = limit->rsv_excl; 1391 } 1392 } 1393 qgroup->lim_flags |= limit->flags; 1394 1395 spin_unlock(&fs_info->qgroup_lock); 1396 1397 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 1398 if (ret) { 1399 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1400 btrfs_info(fs_info, "unable to update quota limit for %llu", 1401 qgroupid); 1402 } 1403 1404 out: 1405 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1406 return ret; 1407 } 1408 1409 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, 1410 struct btrfs_delayed_ref_root *delayed_refs, 1411 struct btrfs_qgroup_extent_record *record) 1412 { 1413 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; 1414 struct rb_node *parent_node = NULL; 1415 struct btrfs_qgroup_extent_record *entry; 1416 u64 bytenr = record->bytenr; 1417 1418 assert_spin_locked(&delayed_refs->lock); 1419 trace_btrfs_qgroup_trace_extent(fs_info, record); 1420 1421 while (*p) { 1422 parent_node = *p; 1423 entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, 1424 node); 1425 if (bytenr < entry->bytenr) 1426 p = &(*p)->rb_left; 1427 else if (bytenr > entry->bytenr) 1428 p = &(*p)->rb_right; 1429 else 1430 return 1; 1431 } 1432 1433 rb_link_node(&record->node, parent_node, p); 1434 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); 1435 return 0; 1436 } 1437 1438 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, 1439 struct btrfs_qgroup_extent_record *qrecord) 1440 { 1441 struct ulist *old_root; 1442 u64 bytenr = qrecord->bytenr; 1443 int ret; 1444 1445 ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root); 1446 if (ret < 0) 1447 return ret; 1448 1449 /* 1450 * Here we don't need to get the lock of 1451 * trans->transaction->delayed_refs, since inserted qrecord won't 1452 * be deleted, only qrecord->node may be modified (new qrecord insert) 1453 * 1454 * So modifying qrecord->old_roots is safe here 1455 */ 1456 qrecord->old_roots = old_root; 1457 return 0; 1458 } 1459 1460 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, 1461 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, 1462 gfp_t gfp_flag) 1463 { 1464 struct btrfs_qgroup_extent_record *record; 1465 struct btrfs_delayed_ref_root *delayed_refs; 1466 int ret; 1467 1468 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) 1469 || bytenr == 0 || num_bytes == 0) 1470 return 0; 1471 if (WARN_ON(trans == NULL)) 1472 return -EINVAL; 1473 record = kmalloc(sizeof(*record), gfp_flag); 1474 if (!record) 1475 return -ENOMEM; 1476 1477 delayed_refs = &trans->transaction->delayed_refs; 1478 record->bytenr = bytenr; 1479 record->num_bytes = num_bytes; 1480 record->old_roots = NULL; 1481 1482 spin_lock(&delayed_refs->lock); 1483 ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record); 1484 spin_unlock(&delayed_refs->lock); 1485 if (ret > 0) { 1486 kfree(record); 1487 return 0; 1488 } 1489 return btrfs_qgroup_trace_extent_post(fs_info, record); 1490 } 1491 1492 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 1493 struct btrfs_fs_info *fs_info, 1494 struct extent_buffer *eb) 1495 { 1496 int nr = btrfs_header_nritems(eb); 1497 int i, extent_type, ret; 1498 struct btrfs_key key; 1499 struct btrfs_file_extent_item *fi; 1500 u64 bytenr, num_bytes; 1501 1502 /* We can be called directly from walk_up_proc() */ 1503 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1504 return 0; 1505 1506 for (i = 0; i < nr; i++) { 1507 btrfs_item_key_to_cpu(eb, &key, i); 1508 1509 if (key.type != BTRFS_EXTENT_DATA_KEY) 1510 continue; 1511 1512 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); 1513 /* filter out non qgroup-accountable extents */ 1514 extent_type = btrfs_file_extent_type(eb, fi); 1515 1516 if (extent_type == BTRFS_FILE_EXTENT_INLINE) 1517 continue; 1518 1519 bytenr = btrfs_file_extent_disk_bytenr(eb, fi); 1520 if (!bytenr) 1521 continue; 1522 1523 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 1524 1525 ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr, 1526 num_bytes, GFP_NOFS); 1527 if (ret) 1528 return ret; 1529 } 1530 cond_resched(); 1531 return 0; 1532 } 1533 1534 /* 1535 * Walk up the tree from the bottom, freeing leaves and any interior 1536 * nodes which have had all slots visited. If a node (leaf or 1537 * interior) is freed, the node above it will have it's slot 1538 * incremented. The root node will never be freed. 1539 * 1540 * At the end of this function, we should have a path which has all 1541 * slots incremented to the next position for a search. If we need to 1542 * read a new node it will be NULL and the node above it will have the 1543 * correct slot selected for a later read. 1544 * 1545 * If we increment the root nodes slot counter past the number of 1546 * elements, 1 is returned to signal completion of the search. 1547 */ 1548 static int adjust_slots_upwards(struct btrfs_path *path, int root_level) 1549 { 1550 int level = 0; 1551 int nr, slot; 1552 struct extent_buffer *eb; 1553 1554 if (root_level == 0) 1555 return 1; 1556 1557 while (level <= root_level) { 1558 eb = path->nodes[level]; 1559 nr = btrfs_header_nritems(eb); 1560 path->slots[level]++; 1561 slot = path->slots[level]; 1562 if (slot >= nr || level == 0) { 1563 /* 1564 * Don't free the root - we will detect this 1565 * condition after our loop and return a 1566 * positive value for caller to stop walking the tree. 1567 */ 1568 if (level != root_level) { 1569 btrfs_tree_unlock_rw(eb, path->locks[level]); 1570 path->locks[level] = 0; 1571 1572 free_extent_buffer(eb); 1573 path->nodes[level] = NULL; 1574 path->slots[level] = 0; 1575 } 1576 } else { 1577 /* 1578 * We have a valid slot to walk back down 1579 * from. Stop here so caller can process these 1580 * new nodes. 1581 */ 1582 break; 1583 } 1584 1585 level++; 1586 } 1587 1588 eb = path->nodes[root_level]; 1589 if (path->slots[root_level] >= btrfs_header_nritems(eb)) 1590 return 1; 1591 1592 return 0; 1593 } 1594 1595 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 1596 struct btrfs_root *root, 1597 struct extent_buffer *root_eb, 1598 u64 root_gen, int root_level) 1599 { 1600 struct btrfs_fs_info *fs_info = root->fs_info; 1601 int ret = 0; 1602 int level; 1603 struct extent_buffer *eb = root_eb; 1604 struct btrfs_path *path = NULL; 1605 1606 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL); 1607 BUG_ON(root_eb == NULL); 1608 1609 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1610 return 0; 1611 1612 if (!extent_buffer_uptodate(root_eb)) { 1613 ret = btrfs_read_buffer(root_eb, root_gen); 1614 if (ret) 1615 goto out; 1616 } 1617 1618 if (root_level == 0) { 1619 ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb); 1620 goto out; 1621 } 1622 1623 path = btrfs_alloc_path(); 1624 if (!path) 1625 return -ENOMEM; 1626 1627 /* 1628 * Walk down the tree. Missing extent blocks are filled in as 1629 * we go. Metadata is accounted every time we read a new 1630 * extent block. 1631 * 1632 * When we reach a leaf, we account for file extent items in it, 1633 * walk back up the tree (adjusting slot pointers as we go) 1634 * and restart the search process. 1635 */ 1636 extent_buffer_get(root_eb); /* For path */ 1637 path->nodes[root_level] = root_eb; 1638 path->slots[root_level] = 0; 1639 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ 1640 walk_down: 1641 level = root_level; 1642 while (level >= 0) { 1643 if (path->nodes[level] == NULL) { 1644 int parent_slot; 1645 u64 child_gen; 1646 u64 child_bytenr; 1647 1648 /* 1649 * We need to get child blockptr/gen from parent before 1650 * we can read it. 1651 */ 1652 eb = path->nodes[level + 1]; 1653 parent_slot = path->slots[level + 1]; 1654 child_bytenr = btrfs_node_blockptr(eb, parent_slot); 1655 child_gen = btrfs_node_ptr_generation(eb, parent_slot); 1656 1657 eb = read_tree_block(fs_info, child_bytenr, child_gen); 1658 if (IS_ERR(eb)) { 1659 ret = PTR_ERR(eb); 1660 goto out; 1661 } else if (!extent_buffer_uptodate(eb)) { 1662 free_extent_buffer(eb); 1663 ret = -EIO; 1664 goto out; 1665 } 1666 1667 path->nodes[level] = eb; 1668 path->slots[level] = 0; 1669 1670 btrfs_tree_read_lock(eb); 1671 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1672 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 1673 1674 ret = btrfs_qgroup_trace_extent(trans, fs_info, 1675 child_bytenr, 1676 fs_info->nodesize, 1677 GFP_NOFS); 1678 if (ret) 1679 goto out; 1680 } 1681 1682 if (level == 0) { 1683 ret = btrfs_qgroup_trace_leaf_items(trans,fs_info, 1684 path->nodes[level]); 1685 if (ret) 1686 goto out; 1687 1688 /* Nonzero return here means we completed our search */ 1689 ret = adjust_slots_upwards(path, root_level); 1690 if (ret) 1691 break; 1692 1693 /* Restart search with new slots */ 1694 goto walk_down; 1695 } 1696 1697 level--; 1698 } 1699 1700 ret = 0; 1701 out: 1702 btrfs_free_path(path); 1703 1704 return ret; 1705 } 1706 1707 #define UPDATE_NEW 0 1708 #define UPDATE_OLD 1 1709 /* 1710 * Walk all of the roots that points to the bytenr and adjust their refcnts. 1711 */ 1712 static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info, 1713 struct ulist *roots, struct ulist *tmp, 1714 struct ulist *qgroups, u64 seq, int update_old) 1715 { 1716 struct ulist_node *unode; 1717 struct ulist_iterator uiter; 1718 struct ulist_node *tmp_unode; 1719 struct ulist_iterator tmp_uiter; 1720 struct btrfs_qgroup *qg; 1721 int ret = 0; 1722 1723 if (!roots) 1724 return 0; 1725 ULIST_ITER_INIT(&uiter); 1726 while ((unode = ulist_next(roots, &uiter))) { 1727 qg = find_qgroup_rb(fs_info, unode->val); 1728 if (!qg) 1729 continue; 1730 1731 ulist_reinit(tmp); 1732 ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg), 1733 GFP_ATOMIC); 1734 if (ret < 0) 1735 return ret; 1736 ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC); 1737 if (ret < 0) 1738 return ret; 1739 ULIST_ITER_INIT(&tmp_uiter); 1740 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1741 struct btrfs_qgroup_list *glist; 1742 1743 qg = unode_aux_to_qgroup(tmp_unode); 1744 if (update_old) 1745 btrfs_qgroup_update_old_refcnt(qg, seq, 1); 1746 else 1747 btrfs_qgroup_update_new_refcnt(qg, seq, 1); 1748 list_for_each_entry(glist, &qg->groups, next_group) { 1749 ret = ulist_add(qgroups, glist->group->qgroupid, 1750 qgroup_to_aux(glist->group), 1751 GFP_ATOMIC); 1752 if (ret < 0) 1753 return ret; 1754 ret = ulist_add(tmp, glist->group->qgroupid, 1755 qgroup_to_aux(glist->group), 1756 GFP_ATOMIC); 1757 if (ret < 0) 1758 return ret; 1759 } 1760 } 1761 } 1762 return 0; 1763 } 1764 1765 /* 1766 * Update qgroup rfer/excl counters. 1767 * Rfer update is easy, codes can explain themselves. 1768 * 1769 * Excl update is tricky, the update is split into 2 part. 1770 * Part 1: Possible exclusive <-> sharing detect: 1771 * | A | !A | 1772 * ------------------------------------- 1773 * B | * | - | 1774 * ------------------------------------- 1775 * !B | + | ** | 1776 * ------------------------------------- 1777 * 1778 * Conditions: 1779 * A: cur_old_roots < nr_old_roots (not exclusive before) 1780 * !A: cur_old_roots == nr_old_roots (possible exclusive before) 1781 * B: cur_new_roots < nr_new_roots (not exclusive now) 1782 * !B: cur_new_roots == nr_new_roots (possible exclusive now) 1783 * 1784 * Results: 1785 * +: Possible sharing -> exclusive -: Possible exclusive -> sharing 1786 * *: Definitely not changed. **: Possible unchanged. 1787 * 1788 * For !A and !B condition, the exception is cur_old/new_roots == 0 case. 1789 * 1790 * To make the logic clear, we first use condition A and B to split 1791 * combination into 4 results. 1792 * 1793 * Then, for result "+" and "-", check old/new_roots == 0 case, as in them 1794 * only on variant maybe 0. 1795 * 1796 * Lastly, check result **, since there are 2 variants maybe 0, split them 1797 * again(2x2). 1798 * But this time we don't need to consider other things, the codes and logic 1799 * is easy to understand now. 1800 */ 1801 static int qgroup_update_counters(struct btrfs_fs_info *fs_info, 1802 struct ulist *qgroups, 1803 u64 nr_old_roots, 1804 u64 nr_new_roots, 1805 u64 num_bytes, u64 seq) 1806 { 1807 struct ulist_node *unode; 1808 struct ulist_iterator uiter; 1809 struct btrfs_qgroup *qg; 1810 u64 cur_new_count, cur_old_count; 1811 1812 ULIST_ITER_INIT(&uiter); 1813 while ((unode = ulist_next(qgroups, &uiter))) { 1814 bool dirty = false; 1815 1816 qg = unode_aux_to_qgroup(unode); 1817 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); 1818 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); 1819 1820 trace_qgroup_update_counters(fs_info, qg->qgroupid, 1821 cur_old_count, cur_new_count); 1822 1823 /* Rfer update part */ 1824 if (cur_old_count == 0 && cur_new_count > 0) { 1825 qg->rfer += num_bytes; 1826 qg->rfer_cmpr += num_bytes; 1827 dirty = true; 1828 } 1829 if (cur_old_count > 0 && cur_new_count == 0) { 1830 qg->rfer -= num_bytes; 1831 qg->rfer_cmpr -= num_bytes; 1832 dirty = true; 1833 } 1834 1835 /* Excl update part */ 1836 /* Exclusive/none -> shared case */ 1837 if (cur_old_count == nr_old_roots && 1838 cur_new_count < nr_new_roots) { 1839 /* Exclusive -> shared */ 1840 if (cur_old_count != 0) { 1841 qg->excl -= num_bytes; 1842 qg->excl_cmpr -= num_bytes; 1843 dirty = true; 1844 } 1845 } 1846 1847 /* Shared -> exclusive/none case */ 1848 if (cur_old_count < nr_old_roots && 1849 cur_new_count == nr_new_roots) { 1850 /* Shared->exclusive */ 1851 if (cur_new_count != 0) { 1852 qg->excl += num_bytes; 1853 qg->excl_cmpr += num_bytes; 1854 dirty = true; 1855 } 1856 } 1857 1858 /* Exclusive/none -> exclusive/none case */ 1859 if (cur_old_count == nr_old_roots && 1860 cur_new_count == nr_new_roots) { 1861 if (cur_old_count == 0) { 1862 /* None -> exclusive/none */ 1863 1864 if (cur_new_count != 0) { 1865 /* None -> exclusive */ 1866 qg->excl += num_bytes; 1867 qg->excl_cmpr += num_bytes; 1868 dirty = true; 1869 } 1870 /* None -> none, nothing changed */ 1871 } else { 1872 /* Exclusive -> exclusive/none */ 1873 1874 if (cur_new_count == 0) { 1875 /* Exclusive -> none */ 1876 qg->excl -= num_bytes; 1877 qg->excl_cmpr -= num_bytes; 1878 dirty = true; 1879 } 1880 /* Exclusive -> exclusive, nothing changed */ 1881 } 1882 } 1883 1884 if (dirty) 1885 qgroup_dirty(fs_info, qg); 1886 } 1887 return 0; 1888 } 1889 1890 /* 1891 * Check if the @roots potentially is a list of fs tree roots 1892 * 1893 * Return 0 for definitely not a fs/subvol tree roots ulist 1894 * Return 1 for possible fs/subvol tree roots in the list (considering an empty 1895 * one as well) 1896 */ 1897 static int maybe_fs_roots(struct ulist *roots) 1898 { 1899 struct ulist_node *unode; 1900 struct ulist_iterator uiter; 1901 1902 /* Empty one, still possible for fs roots */ 1903 if (!roots || roots->nnodes == 0) 1904 return 1; 1905 1906 ULIST_ITER_INIT(&uiter); 1907 unode = ulist_next(roots, &uiter); 1908 if (!unode) 1909 return 1; 1910 1911 /* 1912 * If it contains fs tree roots, then it must belong to fs/subvol 1913 * trees. 1914 * If it contains a non-fs tree, it won't be shared with fs/subvol trees. 1915 */ 1916 return is_fstree(unode->val); 1917 } 1918 1919 int 1920 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 1921 struct btrfs_fs_info *fs_info, 1922 u64 bytenr, u64 num_bytes, 1923 struct ulist *old_roots, struct ulist *new_roots) 1924 { 1925 struct ulist *qgroups = NULL; 1926 struct ulist *tmp = NULL; 1927 u64 seq; 1928 u64 nr_new_roots = 0; 1929 u64 nr_old_roots = 0; 1930 int ret = 0; 1931 1932 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1933 return 0; 1934 1935 if (new_roots) { 1936 if (!maybe_fs_roots(new_roots)) 1937 goto out_free; 1938 nr_new_roots = new_roots->nnodes; 1939 } 1940 if (old_roots) { 1941 if (!maybe_fs_roots(old_roots)) 1942 goto out_free; 1943 nr_old_roots = old_roots->nnodes; 1944 } 1945 1946 /* Quick exit, either not fs tree roots, or won't affect any qgroup */ 1947 if (nr_old_roots == 0 && nr_new_roots == 0) 1948 goto out_free; 1949 1950 BUG_ON(!fs_info->quota_root); 1951 1952 trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes, 1953 nr_old_roots, nr_new_roots); 1954 1955 qgroups = ulist_alloc(GFP_NOFS); 1956 if (!qgroups) { 1957 ret = -ENOMEM; 1958 goto out_free; 1959 } 1960 tmp = ulist_alloc(GFP_NOFS); 1961 if (!tmp) { 1962 ret = -ENOMEM; 1963 goto out_free; 1964 } 1965 1966 mutex_lock(&fs_info->qgroup_rescan_lock); 1967 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 1968 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) { 1969 mutex_unlock(&fs_info->qgroup_rescan_lock); 1970 ret = 0; 1971 goto out_free; 1972 } 1973 } 1974 mutex_unlock(&fs_info->qgroup_rescan_lock); 1975 1976 spin_lock(&fs_info->qgroup_lock); 1977 seq = fs_info->qgroup_seq; 1978 1979 /* Update old refcnts using old_roots */ 1980 ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq, 1981 UPDATE_OLD); 1982 if (ret < 0) 1983 goto out; 1984 1985 /* Update new refcnts using new_roots */ 1986 ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq, 1987 UPDATE_NEW); 1988 if (ret < 0) 1989 goto out; 1990 1991 qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots, 1992 num_bytes, seq); 1993 1994 /* 1995 * Bump qgroup_seq to avoid seq overlap 1996 */ 1997 fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; 1998 out: 1999 spin_unlock(&fs_info->qgroup_lock); 2000 out_free: 2001 ulist_free(tmp); 2002 ulist_free(qgroups); 2003 ulist_free(old_roots); 2004 ulist_free(new_roots); 2005 return ret; 2006 } 2007 2008 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, 2009 struct btrfs_fs_info *fs_info) 2010 { 2011 struct btrfs_qgroup_extent_record *record; 2012 struct btrfs_delayed_ref_root *delayed_refs; 2013 struct ulist *new_roots = NULL; 2014 struct rb_node *node; 2015 u64 qgroup_to_skip; 2016 int ret = 0; 2017 2018 delayed_refs = &trans->transaction->delayed_refs; 2019 qgroup_to_skip = delayed_refs->qgroup_to_skip; 2020 while ((node = rb_first(&delayed_refs->dirty_extent_root))) { 2021 record = rb_entry(node, struct btrfs_qgroup_extent_record, 2022 node); 2023 2024 trace_btrfs_qgroup_account_extents(fs_info, record); 2025 2026 if (!ret) { 2027 /* 2028 * Old roots should be searched when inserting qgroup 2029 * extent record 2030 */ 2031 if (WARN_ON(!record->old_roots)) { 2032 /* Search commit root to find old_roots */ 2033 ret = btrfs_find_all_roots(NULL, fs_info, 2034 record->bytenr, 0, 2035 &record->old_roots); 2036 if (ret < 0) 2037 goto cleanup; 2038 } 2039 2040 /* 2041 * Use SEQ_LAST as time_seq to do special search, which 2042 * doesn't lock tree or delayed_refs and search current 2043 * root. It's safe inside commit_transaction(). 2044 */ 2045 ret = btrfs_find_all_roots(trans, fs_info, 2046 record->bytenr, SEQ_LAST, &new_roots); 2047 if (ret < 0) 2048 goto cleanup; 2049 if (qgroup_to_skip) { 2050 ulist_del(new_roots, qgroup_to_skip, 0); 2051 ulist_del(record->old_roots, qgroup_to_skip, 2052 0); 2053 } 2054 ret = btrfs_qgroup_account_extent(trans, fs_info, 2055 record->bytenr, record->num_bytes, 2056 record->old_roots, new_roots); 2057 record->old_roots = NULL; 2058 new_roots = NULL; 2059 } 2060 cleanup: 2061 ulist_free(record->old_roots); 2062 ulist_free(new_roots); 2063 new_roots = NULL; 2064 rb_erase(node, &delayed_refs->dirty_extent_root); 2065 kfree(record); 2066 2067 } 2068 return ret; 2069 } 2070 2071 /* 2072 * called from commit_transaction. Writes all changed qgroups to disk. 2073 */ 2074 int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 2075 struct btrfs_fs_info *fs_info) 2076 { 2077 struct btrfs_root *quota_root = fs_info->quota_root; 2078 int ret = 0; 2079 int start_rescan_worker = 0; 2080 2081 if (!quota_root) 2082 goto out; 2083 2084 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 2085 test_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2086 start_rescan_worker = 1; 2087 2088 if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2089 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2090 if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags)) 2091 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2092 2093 spin_lock(&fs_info->qgroup_lock); 2094 while (!list_empty(&fs_info->dirty_qgroups)) { 2095 struct btrfs_qgroup *qgroup; 2096 qgroup = list_first_entry(&fs_info->dirty_qgroups, 2097 struct btrfs_qgroup, dirty); 2098 list_del_init(&qgroup->dirty); 2099 spin_unlock(&fs_info->qgroup_lock); 2100 ret = update_qgroup_info_item(trans, quota_root, qgroup); 2101 if (ret) 2102 fs_info->qgroup_flags |= 2103 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2104 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 2105 if (ret) 2106 fs_info->qgroup_flags |= 2107 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2108 spin_lock(&fs_info->qgroup_lock); 2109 } 2110 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2111 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; 2112 else 2113 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2114 spin_unlock(&fs_info->qgroup_lock); 2115 2116 ret = update_qgroup_status_item(trans, fs_info, quota_root); 2117 if (ret) 2118 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2119 2120 if (!ret && start_rescan_worker) { 2121 ret = qgroup_rescan_init(fs_info, 0, 1); 2122 if (!ret) { 2123 qgroup_rescan_zero_tracking(fs_info); 2124 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2125 &fs_info->qgroup_rescan_work); 2126 } 2127 ret = 0; 2128 } 2129 2130 out: 2131 2132 return ret; 2133 } 2134 2135 /* 2136 * Copy the accounting information between qgroups. This is necessary 2137 * when a snapshot or a subvolume is created. Throwing an error will 2138 * cause a transaction abort so we take extra care here to only error 2139 * when a readonly fs is a reasonable outcome. 2140 */ 2141 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 2142 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 2143 struct btrfs_qgroup_inherit *inherit) 2144 { 2145 int ret = 0; 2146 int i; 2147 u64 *i_qgroups; 2148 struct btrfs_root *quota_root = fs_info->quota_root; 2149 struct btrfs_qgroup *srcgroup; 2150 struct btrfs_qgroup *dstgroup; 2151 u32 level_size = 0; 2152 u64 nums; 2153 2154 mutex_lock(&fs_info->qgroup_ioctl_lock); 2155 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2156 goto out; 2157 2158 if (!quota_root) { 2159 ret = -EINVAL; 2160 goto out; 2161 } 2162 2163 if (inherit) { 2164 i_qgroups = (u64 *)(inherit + 1); 2165 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 2166 2 * inherit->num_excl_copies; 2167 for (i = 0; i < nums; ++i) { 2168 srcgroup = find_qgroup_rb(fs_info, *i_qgroups); 2169 2170 /* 2171 * Zero out invalid groups so we can ignore 2172 * them later. 2173 */ 2174 if (!srcgroup || 2175 ((srcgroup->qgroupid >> 48) <= (objectid >> 48))) 2176 *i_qgroups = 0ULL; 2177 2178 ++i_qgroups; 2179 } 2180 } 2181 2182 /* 2183 * create a tracking group for the subvol itself 2184 */ 2185 ret = add_qgroup_item(trans, quota_root, objectid); 2186 if (ret) 2187 goto out; 2188 2189 if (srcid) { 2190 struct btrfs_root *srcroot; 2191 struct btrfs_key srckey; 2192 2193 srckey.objectid = srcid; 2194 srckey.type = BTRFS_ROOT_ITEM_KEY; 2195 srckey.offset = (u64)-1; 2196 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); 2197 if (IS_ERR(srcroot)) { 2198 ret = PTR_ERR(srcroot); 2199 goto out; 2200 } 2201 2202 level_size = fs_info->nodesize; 2203 } 2204 2205 /* 2206 * add qgroup to all inherited groups 2207 */ 2208 if (inherit) { 2209 i_qgroups = (u64 *)(inherit + 1); 2210 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) { 2211 if (*i_qgroups == 0) 2212 continue; 2213 ret = add_qgroup_relation_item(trans, quota_root, 2214 objectid, *i_qgroups); 2215 if (ret && ret != -EEXIST) 2216 goto out; 2217 ret = add_qgroup_relation_item(trans, quota_root, 2218 *i_qgroups, objectid); 2219 if (ret && ret != -EEXIST) 2220 goto out; 2221 } 2222 ret = 0; 2223 } 2224 2225 2226 spin_lock(&fs_info->qgroup_lock); 2227 2228 dstgroup = add_qgroup_rb(fs_info, objectid); 2229 if (IS_ERR(dstgroup)) { 2230 ret = PTR_ERR(dstgroup); 2231 goto unlock; 2232 } 2233 2234 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { 2235 dstgroup->lim_flags = inherit->lim.flags; 2236 dstgroup->max_rfer = inherit->lim.max_rfer; 2237 dstgroup->max_excl = inherit->lim.max_excl; 2238 dstgroup->rsv_rfer = inherit->lim.rsv_rfer; 2239 dstgroup->rsv_excl = inherit->lim.rsv_excl; 2240 2241 ret = update_qgroup_limit_item(trans, quota_root, dstgroup); 2242 if (ret) { 2243 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2244 btrfs_info(fs_info, 2245 "unable to update quota limit for %llu", 2246 dstgroup->qgroupid); 2247 goto unlock; 2248 } 2249 } 2250 2251 if (srcid) { 2252 srcgroup = find_qgroup_rb(fs_info, srcid); 2253 if (!srcgroup) 2254 goto unlock; 2255 2256 /* 2257 * We call inherit after we clone the root in order to make sure 2258 * our counts don't go crazy, so at this point the only 2259 * difference between the two roots should be the root node. 2260 */ 2261 dstgroup->rfer = srcgroup->rfer; 2262 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2263 dstgroup->excl = level_size; 2264 dstgroup->excl_cmpr = level_size; 2265 srcgroup->excl = level_size; 2266 srcgroup->excl_cmpr = level_size; 2267 2268 /* inherit the limit info */ 2269 dstgroup->lim_flags = srcgroup->lim_flags; 2270 dstgroup->max_rfer = srcgroup->max_rfer; 2271 dstgroup->max_excl = srcgroup->max_excl; 2272 dstgroup->rsv_rfer = srcgroup->rsv_rfer; 2273 dstgroup->rsv_excl = srcgroup->rsv_excl; 2274 2275 qgroup_dirty(fs_info, dstgroup); 2276 qgroup_dirty(fs_info, srcgroup); 2277 } 2278 2279 if (!inherit) 2280 goto unlock; 2281 2282 i_qgroups = (u64 *)(inherit + 1); 2283 for (i = 0; i < inherit->num_qgroups; ++i) { 2284 if (*i_qgroups) { 2285 ret = add_relation_rb(fs_info, objectid, *i_qgroups); 2286 if (ret) 2287 goto unlock; 2288 } 2289 ++i_qgroups; 2290 } 2291 2292 for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) { 2293 struct btrfs_qgroup *src; 2294 struct btrfs_qgroup *dst; 2295 2296 if (!i_qgroups[0] || !i_qgroups[1]) 2297 continue; 2298 2299 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2300 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2301 2302 if (!src || !dst) { 2303 ret = -EINVAL; 2304 goto unlock; 2305 } 2306 2307 dst->rfer = src->rfer - level_size; 2308 dst->rfer_cmpr = src->rfer_cmpr - level_size; 2309 } 2310 for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) { 2311 struct btrfs_qgroup *src; 2312 struct btrfs_qgroup *dst; 2313 2314 if (!i_qgroups[0] || !i_qgroups[1]) 2315 continue; 2316 2317 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2318 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2319 2320 if (!src || !dst) { 2321 ret = -EINVAL; 2322 goto unlock; 2323 } 2324 2325 dst->excl = src->excl + level_size; 2326 dst->excl_cmpr = src->excl_cmpr + level_size; 2327 } 2328 2329 unlock: 2330 spin_unlock(&fs_info->qgroup_lock); 2331 out: 2332 mutex_unlock(&fs_info->qgroup_ioctl_lock); 2333 return ret; 2334 } 2335 2336 static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) 2337 { 2338 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2339 qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer) 2340 return false; 2341 2342 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 2343 qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl) 2344 return false; 2345 2346 return true; 2347 } 2348 2349 static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce) 2350 { 2351 struct btrfs_root *quota_root; 2352 struct btrfs_qgroup *qgroup; 2353 struct btrfs_fs_info *fs_info = root->fs_info; 2354 u64 ref_root = root->root_key.objectid; 2355 int ret = 0; 2356 int retried = 0; 2357 struct ulist_node *unode; 2358 struct ulist_iterator uiter; 2359 2360 if (!is_fstree(ref_root)) 2361 return 0; 2362 2363 if (num_bytes == 0) 2364 return 0; 2365 2366 if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) && 2367 capable(CAP_SYS_RESOURCE)) 2368 enforce = false; 2369 2370 retry: 2371 spin_lock(&fs_info->qgroup_lock); 2372 quota_root = fs_info->quota_root; 2373 if (!quota_root) 2374 goto out; 2375 2376 qgroup = find_qgroup_rb(fs_info, ref_root); 2377 if (!qgroup) 2378 goto out; 2379 2380 /* 2381 * in a first step, we check all affected qgroups if any limits would 2382 * be exceeded 2383 */ 2384 ulist_reinit(fs_info->qgroup_ulist); 2385 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2386 (uintptr_t)qgroup, GFP_ATOMIC); 2387 if (ret < 0) 2388 goto out; 2389 ULIST_ITER_INIT(&uiter); 2390 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2391 struct btrfs_qgroup *qg; 2392 struct btrfs_qgroup_list *glist; 2393 2394 qg = unode_aux_to_qgroup(unode); 2395 2396 if (enforce && !qgroup_check_limits(qg, num_bytes)) { 2397 /* 2398 * Commit the tree and retry, since we may have 2399 * deletions which would free up space. 2400 */ 2401 if (!retried && qg->reserved > 0) { 2402 struct btrfs_trans_handle *trans; 2403 2404 spin_unlock(&fs_info->qgroup_lock); 2405 ret = btrfs_start_delalloc_inodes(root, 0); 2406 if (ret) 2407 return ret; 2408 btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); 2409 trans = btrfs_join_transaction(root); 2410 if (IS_ERR(trans)) 2411 return PTR_ERR(trans); 2412 ret = btrfs_commit_transaction(trans); 2413 if (ret) 2414 return ret; 2415 retried++; 2416 goto retry; 2417 } 2418 ret = -EDQUOT; 2419 goto out; 2420 } 2421 2422 list_for_each_entry(glist, &qg->groups, next_group) { 2423 ret = ulist_add(fs_info->qgroup_ulist, 2424 glist->group->qgroupid, 2425 (uintptr_t)glist->group, GFP_ATOMIC); 2426 if (ret < 0) 2427 goto out; 2428 } 2429 } 2430 ret = 0; 2431 /* 2432 * no limits exceeded, now record the reservation into all qgroups 2433 */ 2434 ULIST_ITER_INIT(&uiter); 2435 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2436 struct btrfs_qgroup *qg; 2437 2438 qg = unode_aux_to_qgroup(unode); 2439 2440 trace_qgroup_update_reserve(fs_info, qg, num_bytes); 2441 qg->reserved += num_bytes; 2442 } 2443 2444 out: 2445 spin_unlock(&fs_info->qgroup_lock); 2446 return ret; 2447 } 2448 2449 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, 2450 u64 ref_root, u64 num_bytes) 2451 { 2452 struct btrfs_root *quota_root; 2453 struct btrfs_qgroup *qgroup; 2454 struct ulist_node *unode; 2455 struct ulist_iterator uiter; 2456 int ret = 0; 2457 2458 if (!is_fstree(ref_root)) 2459 return; 2460 2461 if (num_bytes == 0) 2462 return; 2463 2464 spin_lock(&fs_info->qgroup_lock); 2465 2466 quota_root = fs_info->quota_root; 2467 if (!quota_root) 2468 goto out; 2469 2470 qgroup = find_qgroup_rb(fs_info, ref_root); 2471 if (!qgroup) 2472 goto out; 2473 2474 ulist_reinit(fs_info->qgroup_ulist); 2475 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2476 (uintptr_t)qgroup, GFP_ATOMIC); 2477 if (ret < 0) 2478 goto out; 2479 ULIST_ITER_INIT(&uiter); 2480 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2481 struct btrfs_qgroup *qg; 2482 struct btrfs_qgroup_list *glist; 2483 2484 qg = unode_aux_to_qgroup(unode); 2485 2486 trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes); 2487 if (qg->reserved < num_bytes) 2488 report_reserved_underflow(fs_info, qg, num_bytes); 2489 else 2490 qg->reserved -= num_bytes; 2491 2492 list_for_each_entry(glist, &qg->groups, next_group) { 2493 ret = ulist_add(fs_info->qgroup_ulist, 2494 glist->group->qgroupid, 2495 (uintptr_t)glist->group, GFP_ATOMIC); 2496 if (ret < 0) 2497 goto out; 2498 } 2499 } 2500 2501 out: 2502 spin_unlock(&fs_info->qgroup_lock); 2503 } 2504 2505 /* 2506 * returns < 0 on error, 0 when more leafs are to be scanned. 2507 * returns 1 when done. 2508 */ 2509 static int 2510 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2511 struct btrfs_trans_handle *trans) 2512 { 2513 struct btrfs_key found; 2514 struct extent_buffer *scratch_leaf = NULL; 2515 struct ulist *roots = NULL; 2516 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); 2517 u64 num_bytes; 2518 int slot; 2519 int ret; 2520 2521 mutex_lock(&fs_info->qgroup_rescan_lock); 2522 ret = btrfs_search_slot_for_read(fs_info->extent_root, 2523 &fs_info->qgroup_rescan_progress, 2524 path, 1, 0); 2525 2526 btrfs_debug(fs_info, 2527 "current progress key (%llu %u %llu), search_slot ret %d", 2528 fs_info->qgroup_rescan_progress.objectid, 2529 fs_info->qgroup_rescan_progress.type, 2530 fs_info->qgroup_rescan_progress.offset, ret); 2531 2532 if (ret) { 2533 /* 2534 * The rescan is about to end, we will not be scanning any 2535 * further blocks. We cannot unset the RESCAN flag here, because 2536 * we want to commit the transaction if everything went well. 2537 * To make the live accounting work in this phase, we set our 2538 * scan progress pointer such that every real extent objectid 2539 * will be smaller. 2540 */ 2541 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 2542 btrfs_release_path(path); 2543 mutex_unlock(&fs_info->qgroup_rescan_lock); 2544 return ret; 2545 } 2546 2547 btrfs_item_key_to_cpu(path->nodes[0], &found, 2548 btrfs_header_nritems(path->nodes[0]) - 1); 2549 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; 2550 2551 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2552 scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]); 2553 if (!scratch_leaf) { 2554 ret = -ENOMEM; 2555 mutex_unlock(&fs_info->qgroup_rescan_lock); 2556 goto out; 2557 } 2558 extent_buffer_get(scratch_leaf); 2559 btrfs_tree_read_lock(scratch_leaf); 2560 btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK); 2561 slot = path->slots[0]; 2562 btrfs_release_path(path); 2563 mutex_unlock(&fs_info->qgroup_rescan_lock); 2564 2565 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 2566 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 2567 if (found.type != BTRFS_EXTENT_ITEM_KEY && 2568 found.type != BTRFS_METADATA_ITEM_KEY) 2569 continue; 2570 if (found.type == BTRFS_METADATA_ITEM_KEY) 2571 num_bytes = fs_info->nodesize; 2572 else 2573 num_bytes = found.offset; 2574 2575 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 2576 &roots); 2577 if (ret < 0) 2578 goto out; 2579 /* For rescan, just pass old_roots as NULL */ 2580 ret = btrfs_qgroup_account_extent(trans, fs_info, 2581 found.objectid, num_bytes, NULL, roots); 2582 if (ret < 0) 2583 goto out; 2584 } 2585 out: 2586 if (scratch_leaf) { 2587 btrfs_tree_read_unlock_blocking(scratch_leaf); 2588 free_extent_buffer(scratch_leaf); 2589 } 2590 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2591 2592 return ret; 2593 } 2594 2595 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) 2596 { 2597 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, 2598 qgroup_rescan_work); 2599 struct btrfs_path *path; 2600 struct btrfs_trans_handle *trans = NULL; 2601 int err = -ENOMEM; 2602 int ret = 0; 2603 2604 path = btrfs_alloc_path(); 2605 if (!path) 2606 goto out; 2607 2608 err = 0; 2609 while (!err && !btrfs_fs_closing(fs_info)) { 2610 trans = btrfs_start_transaction(fs_info->fs_root, 0); 2611 if (IS_ERR(trans)) { 2612 err = PTR_ERR(trans); 2613 break; 2614 } 2615 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { 2616 err = -EINTR; 2617 } else { 2618 err = qgroup_rescan_leaf(fs_info, path, trans); 2619 } 2620 if (err > 0) 2621 btrfs_commit_transaction(trans); 2622 else 2623 btrfs_end_transaction(trans); 2624 } 2625 2626 out: 2627 btrfs_free_path(path); 2628 2629 mutex_lock(&fs_info->qgroup_rescan_lock); 2630 if (!btrfs_fs_closing(fs_info)) 2631 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2632 2633 if (err > 0 && 2634 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 2635 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2636 } else if (err < 0) { 2637 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2638 } 2639 mutex_unlock(&fs_info->qgroup_rescan_lock); 2640 2641 /* 2642 * only update status, since the previous part has already updated the 2643 * qgroup info. 2644 */ 2645 trans = btrfs_start_transaction(fs_info->quota_root, 1); 2646 if (IS_ERR(trans)) { 2647 err = PTR_ERR(trans); 2648 btrfs_err(fs_info, 2649 "fail to start transaction for status update: %d\n", 2650 err); 2651 goto done; 2652 } 2653 ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); 2654 if (ret < 0) { 2655 err = ret; 2656 btrfs_err(fs_info, "fail to update qgroup status: %d", err); 2657 } 2658 btrfs_end_transaction(trans); 2659 2660 if (btrfs_fs_closing(fs_info)) { 2661 btrfs_info(fs_info, "qgroup scan paused"); 2662 } else if (err >= 0) { 2663 btrfs_info(fs_info, "qgroup scan completed%s", 2664 err > 0 ? " (inconsistency flag cleared)" : ""); 2665 } else { 2666 btrfs_err(fs_info, "qgroup scan failed with %d", err); 2667 } 2668 2669 done: 2670 mutex_lock(&fs_info->qgroup_rescan_lock); 2671 fs_info->qgroup_rescan_running = false; 2672 mutex_unlock(&fs_info->qgroup_rescan_lock); 2673 complete_all(&fs_info->qgroup_rescan_completion); 2674 } 2675 2676 /* 2677 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all 2678 * memory required for the rescan context. 2679 */ 2680 static int 2681 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 2682 int init_flags) 2683 { 2684 int ret = 0; 2685 2686 if (!init_flags && 2687 (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) || 2688 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) { 2689 ret = -EINVAL; 2690 goto err; 2691 } 2692 2693 mutex_lock(&fs_info->qgroup_rescan_lock); 2694 spin_lock(&fs_info->qgroup_lock); 2695 2696 if (init_flags) { 2697 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2698 ret = -EINPROGRESS; 2699 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 2700 ret = -EINVAL; 2701 2702 if (ret) { 2703 spin_unlock(&fs_info->qgroup_lock); 2704 mutex_unlock(&fs_info->qgroup_rescan_lock); 2705 goto err; 2706 } 2707 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2708 } 2709 2710 memset(&fs_info->qgroup_rescan_progress, 0, 2711 sizeof(fs_info->qgroup_rescan_progress)); 2712 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 2713 init_completion(&fs_info->qgroup_rescan_completion); 2714 fs_info->qgroup_rescan_running = true; 2715 2716 spin_unlock(&fs_info->qgroup_lock); 2717 mutex_unlock(&fs_info->qgroup_rescan_lock); 2718 2719 memset(&fs_info->qgroup_rescan_work, 0, 2720 sizeof(fs_info->qgroup_rescan_work)); 2721 btrfs_init_work(&fs_info->qgroup_rescan_work, 2722 btrfs_qgroup_rescan_helper, 2723 btrfs_qgroup_rescan_worker, NULL, NULL); 2724 2725 if (ret) { 2726 err: 2727 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret); 2728 return ret; 2729 } 2730 2731 return 0; 2732 } 2733 2734 static void 2735 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) 2736 { 2737 struct rb_node *n; 2738 struct btrfs_qgroup *qgroup; 2739 2740 spin_lock(&fs_info->qgroup_lock); 2741 /* clear all current qgroup tracking information */ 2742 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { 2743 qgroup = rb_entry(n, struct btrfs_qgroup, node); 2744 qgroup->rfer = 0; 2745 qgroup->rfer_cmpr = 0; 2746 qgroup->excl = 0; 2747 qgroup->excl_cmpr = 0; 2748 } 2749 spin_unlock(&fs_info->qgroup_lock); 2750 } 2751 2752 int 2753 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) 2754 { 2755 int ret = 0; 2756 struct btrfs_trans_handle *trans; 2757 2758 ret = qgroup_rescan_init(fs_info, 0, 1); 2759 if (ret) 2760 return ret; 2761 2762 /* 2763 * We have set the rescan_progress to 0, which means no more 2764 * delayed refs will be accounted by btrfs_qgroup_account_ref. 2765 * However, btrfs_qgroup_account_ref may be right after its call 2766 * to btrfs_find_all_roots, in which case it would still do the 2767 * accounting. 2768 * To solve this, we're committing the transaction, which will 2769 * ensure we run all delayed refs and only after that, we are 2770 * going to clear all tracking information for a clean start. 2771 */ 2772 2773 trans = btrfs_join_transaction(fs_info->fs_root); 2774 if (IS_ERR(trans)) { 2775 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2776 return PTR_ERR(trans); 2777 } 2778 ret = btrfs_commit_transaction(trans); 2779 if (ret) { 2780 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2781 return ret; 2782 } 2783 2784 qgroup_rescan_zero_tracking(fs_info); 2785 2786 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2787 &fs_info->qgroup_rescan_work); 2788 2789 return 0; 2790 } 2791 2792 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 2793 bool interruptible) 2794 { 2795 int running; 2796 int ret = 0; 2797 2798 mutex_lock(&fs_info->qgroup_rescan_lock); 2799 spin_lock(&fs_info->qgroup_lock); 2800 running = fs_info->qgroup_rescan_running; 2801 spin_unlock(&fs_info->qgroup_lock); 2802 mutex_unlock(&fs_info->qgroup_rescan_lock); 2803 2804 if (!running) 2805 return 0; 2806 2807 if (interruptible) 2808 ret = wait_for_completion_interruptible( 2809 &fs_info->qgroup_rescan_completion); 2810 else 2811 wait_for_completion(&fs_info->qgroup_rescan_completion); 2812 2813 return ret; 2814 } 2815 2816 /* 2817 * this is only called from open_ctree where we're still single threaded, thus 2818 * locking is omitted here. 2819 */ 2820 void 2821 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) 2822 { 2823 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2824 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2825 &fs_info->qgroup_rescan_work); 2826 } 2827 2828 /* 2829 * Reserve qgroup space for range [start, start + len). 2830 * 2831 * This function will either reserve space from related qgroups or doing 2832 * nothing if the range is already reserved. 2833 * 2834 * Return 0 for successful reserve 2835 * Return <0 for error (including -EQUOT) 2836 * 2837 * NOTE: this function may sleep for memory allocation. 2838 * if btrfs_qgroup_reserve_data() is called multiple times with 2839 * same @reserved, caller must ensure when error happens it's OK 2840 * to free *ALL* reserved space. 2841 */ 2842 int btrfs_qgroup_reserve_data(struct inode *inode, 2843 struct extent_changeset **reserved_ret, u64 start, 2844 u64 len) 2845 { 2846 struct btrfs_root *root = BTRFS_I(inode)->root; 2847 struct ulist_node *unode; 2848 struct ulist_iterator uiter; 2849 struct extent_changeset *reserved; 2850 u64 orig_reserved; 2851 u64 to_reserve; 2852 int ret; 2853 2854 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || 2855 !is_fstree(root->objectid) || len == 0) 2856 return 0; 2857 2858 /* @reserved parameter is mandatory for qgroup */ 2859 if (WARN_ON(!reserved_ret)) 2860 return -EINVAL; 2861 if (!*reserved_ret) { 2862 *reserved_ret = extent_changeset_alloc(); 2863 if (!*reserved_ret) 2864 return -ENOMEM; 2865 } 2866 reserved = *reserved_ret; 2867 /* Record already reserved space */ 2868 orig_reserved = reserved->bytes_changed; 2869 ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 2870 start + len -1, EXTENT_QGROUP_RESERVED, reserved); 2871 2872 /* Newly reserved space */ 2873 to_reserve = reserved->bytes_changed - orig_reserved; 2874 trace_btrfs_qgroup_reserve_data(inode, start, len, 2875 to_reserve, QGROUP_RESERVE); 2876 if (ret < 0) 2877 goto cleanup; 2878 ret = qgroup_reserve(root, to_reserve, true); 2879 if (ret < 0) 2880 goto cleanup; 2881 2882 return ret; 2883 2884 cleanup: 2885 /* cleanup *ALL* already reserved ranges */ 2886 ULIST_ITER_INIT(&uiter); 2887 while ((unode = ulist_next(&reserved->range_changed, &uiter))) 2888 clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, 2889 unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL, 2890 GFP_NOFS); 2891 extent_changeset_release(reserved); 2892 return ret; 2893 } 2894 2895 /* Free ranges specified by @reserved, normally in error path */ 2896 static int qgroup_free_reserved_data(struct inode *inode, 2897 struct extent_changeset *reserved, u64 start, u64 len) 2898 { 2899 struct btrfs_root *root = BTRFS_I(inode)->root; 2900 struct ulist_node *unode; 2901 struct ulist_iterator uiter; 2902 struct extent_changeset changeset; 2903 int freed = 0; 2904 int ret; 2905 2906 extent_changeset_init(&changeset); 2907 len = round_up(start + len, root->fs_info->sectorsize); 2908 start = round_down(start, root->fs_info->sectorsize); 2909 2910 ULIST_ITER_INIT(&uiter); 2911 while ((unode = ulist_next(&reserved->range_changed, &uiter))) { 2912 u64 range_start = unode->val; 2913 /* unode->aux is the inclusive end */ 2914 u64 range_len = unode->aux - range_start + 1; 2915 u64 free_start; 2916 u64 free_len; 2917 2918 extent_changeset_release(&changeset); 2919 2920 /* Only free range in range [start, start + len) */ 2921 if (range_start >= start + len || 2922 range_start + range_len <= start) 2923 continue; 2924 free_start = max(range_start, start); 2925 free_len = min(start + len, range_start + range_len) - 2926 free_start; 2927 /* 2928 * TODO: To also modify reserved->ranges_reserved to reflect 2929 * the modification. 2930 * 2931 * However as long as we free qgroup reserved according to 2932 * EXTENT_QGROUP_RESERVED, we won't double free. 2933 * So not need to rush. 2934 */ 2935 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree, 2936 free_start, free_start + free_len - 1, 2937 EXTENT_QGROUP_RESERVED, &changeset); 2938 if (ret < 0) 2939 goto out; 2940 freed += changeset.bytes_changed; 2941 } 2942 btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed); 2943 ret = freed; 2944 out: 2945 extent_changeset_release(&changeset); 2946 return ret; 2947 } 2948 2949 static int __btrfs_qgroup_release_data(struct inode *inode, 2950 struct extent_changeset *reserved, u64 start, u64 len, 2951 int free) 2952 { 2953 struct extent_changeset changeset; 2954 int trace_op = QGROUP_RELEASE; 2955 int ret; 2956 2957 /* In release case, we shouldn't have @reserved */ 2958 WARN_ON(!free && reserved); 2959 if (free && reserved) 2960 return qgroup_free_reserved_data(inode, reserved, start, len); 2961 extent_changeset_init(&changeset); 2962 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 2963 start + len -1, EXTENT_QGROUP_RESERVED, &changeset); 2964 if (ret < 0) 2965 goto out; 2966 2967 if (free) 2968 trace_op = QGROUP_FREE; 2969 trace_btrfs_qgroup_release_data(inode, start, len, 2970 changeset.bytes_changed, trace_op); 2971 if (free) 2972 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, 2973 BTRFS_I(inode)->root->objectid, 2974 changeset.bytes_changed); 2975 ret = changeset.bytes_changed; 2976 out: 2977 extent_changeset_release(&changeset); 2978 return ret; 2979 } 2980 2981 /* 2982 * Free a reserved space range from io_tree and related qgroups 2983 * 2984 * Should be called when a range of pages get invalidated before reaching disk. 2985 * Or for error cleanup case. 2986 * if @reserved is given, only reserved range in [@start, @start + @len) will 2987 * be freed. 2988 * 2989 * For data written to disk, use btrfs_qgroup_release_data(). 2990 * 2991 * NOTE: This function may sleep for memory allocation. 2992 */ 2993 int btrfs_qgroup_free_data(struct inode *inode, 2994 struct extent_changeset *reserved, u64 start, u64 len) 2995 { 2996 return __btrfs_qgroup_release_data(inode, reserved, start, len, 1); 2997 } 2998 2999 /* 3000 * Release a reserved space range from io_tree only. 3001 * 3002 * Should be called when a range of pages get written to disk and corresponding 3003 * FILE_EXTENT is inserted into corresponding root. 3004 * 3005 * Since new qgroup accounting framework will only update qgroup numbers at 3006 * commit_transaction() time, its reserved space shouldn't be freed from 3007 * related qgroups. 3008 * 3009 * But we should release the range from io_tree, to allow further write to be 3010 * COWed. 3011 * 3012 * NOTE: This function may sleep for memory allocation. 3013 */ 3014 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len) 3015 { 3016 return __btrfs_qgroup_release_data(inode, NULL, start, len, 0); 3017 } 3018 3019 int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, 3020 bool enforce) 3021 { 3022 struct btrfs_fs_info *fs_info = root->fs_info; 3023 int ret; 3024 3025 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3026 !is_fstree(root->objectid) || num_bytes == 0) 3027 return 0; 3028 3029 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 3030 trace_qgroup_meta_reserve(root, (s64)num_bytes); 3031 ret = qgroup_reserve(root, num_bytes, enforce); 3032 if (ret < 0) 3033 return ret; 3034 atomic64_add(num_bytes, &root->qgroup_meta_rsv); 3035 return ret; 3036 } 3037 3038 void btrfs_qgroup_free_meta_all(struct btrfs_root *root) 3039 { 3040 struct btrfs_fs_info *fs_info = root->fs_info; 3041 u64 reserved; 3042 3043 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3044 !is_fstree(root->objectid)) 3045 return; 3046 3047 reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0); 3048 if (reserved == 0) 3049 return; 3050 trace_qgroup_meta_reserve(root, -(s64)reserved); 3051 btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved); 3052 } 3053 3054 void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) 3055 { 3056 struct btrfs_fs_info *fs_info = root->fs_info; 3057 3058 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3059 !is_fstree(root->objectid)) 3060 return; 3061 3062 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 3063 WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes); 3064 atomic64_sub(num_bytes, &root->qgroup_meta_rsv); 3065 trace_qgroup_meta_reserve(root, -(s64)num_bytes); 3066 btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes); 3067 } 3068 3069 /* 3070 * Check qgroup reserved space leaking, normally at destroy inode 3071 * time 3072 */ 3073 void btrfs_qgroup_check_reserved_leak(struct inode *inode) 3074 { 3075 struct extent_changeset changeset; 3076 struct ulist_node *unode; 3077 struct ulist_iterator iter; 3078 int ret; 3079 3080 extent_changeset_init(&changeset); 3081 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 3082 EXTENT_QGROUP_RESERVED, &changeset); 3083 3084 WARN_ON(ret < 0); 3085 if (WARN_ON(changeset.bytes_changed)) { 3086 ULIST_ITER_INIT(&iter); 3087 while ((unode = ulist_next(&changeset.range_changed, &iter))) { 3088 btrfs_warn(BTRFS_I(inode)->root->fs_info, 3089 "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu", 3090 inode->i_ino, unode->val, unode->aux); 3091 } 3092 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, 3093 BTRFS_I(inode)->root->objectid, 3094 changeset.bytes_changed); 3095 3096 } 3097 extent_changeset_release(&changeset); 3098 } 3099