1 /* 2 * Copyright (C) 2011 STRATO. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include <linux/workqueue.h> 26 #include <linux/btrfs.h> 27 28 #include "ctree.h" 29 #include "transaction.h" 30 #include "disk-io.h" 31 #include "locking.h" 32 #include "ulist.h" 33 #include "backref.h" 34 #include "extent_io.h" 35 #include "qgroup.h" 36 37 38 /* TODO XXX FIXME 39 * - subvol delete -> delete when ref goes to 0? delete limits also? 40 * - reorganize keys 41 * - compressed 42 * - sync 43 * - copy also limits on subvol creation 44 * - limit 45 * - caches fuer ulists 46 * - performance benchmarks 47 * - check all ioctl parameters 48 */ 49 50 /* 51 * one struct for each qgroup, organized in fs_info->qgroup_tree. 52 */ 53 struct btrfs_qgroup { 54 u64 qgroupid; 55 56 /* 57 * state 58 */ 59 u64 rfer; /* referenced */ 60 u64 rfer_cmpr; /* referenced compressed */ 61 u64 excl; /* exclusive */ 62 u64 excl_cmpr; /* exclusive compressed */ 63 64 /* 65 * limits 66 */ 67 u64 lim_flags; /* which limits are set */ 68 u64 max_rfer; 69 u64 max_excl; 70 u64 rsv_rfer; 71 u64 rsv_excl; 72 73 /* 74 * reservation tracking 75 */ 76 u64 reserved; 77 78 /* 79 * lists 80 */ 81 struct list_head groups; /* groups this group is member of */ 82 struct list_head members; /* groups that are members of this group */ 83 struct list_head dirty; /* dirty groups */ 84 struct rb_node node; /* tree of qgroups */ 85 86 /* 87 * temp variables for accounting operations 88 * Refer to qgroup_shared_accounting() for details. 89 */ 90 u64 old_refcnt; 91 u64 new_refcnt; 92 }; 93 94 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq, 95 int mod) 96 { 97 if (qg->old_refcnt < seq) 98 qg->old_refcnt = seq; 99 qg->old_refcnt += mod; 100 } 101 102 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq, 103 int mod) 104 { 105 if (qg->new_refcnt < seq) 106 qg->new_refcnt = seq; 107 qg->new_refcnt += mod; 108 } 109 110 static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq) 111 { 112 if (qg->old_refcnt < seq) 113 return 0; 114 return qg->old_refcnt - seq; 115 } 116 117 static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq) 118 { 119 if (qg->new_refcnt < seq) 120 return 0; 121 return qg->new_refcnt - seq; 122 } 123 124 /* 125 * glue structure to represent the relations between qgroups. 126 */ 127 struct btrfs_qgroup_list { 128 struct list_head next_group; 129 struct list_head next_member; 130 struct btrfs_qgroup *group; 131 struct btrfs_qgroup *member; 132 }; 133 134 static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg) 135 { 136 return (u64)(uintptr_t)qg; 137 } 138 139 static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n) 140 { 141 return (struct btrfs_qgroup *)(uintptr_t)n->aux; 142 } 143 144 static int 145 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 146 int init_flags); 147 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); 148 149 /* must be called with qgroup_ioctl_lock held */ 150 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 151 u64 qgroupid) 152 { 153 struct rb_node *n = fs_info->qgroup_tree.rb_node; 154 struct btrfs_qgroup *qgroup; 155 156 while (n) { 157 qgroup = rb_entry(n, struct btrfs_qgroup, node); 158 if (qgroup->qgroupid < qgroupid) 159 n = n->rb_left; 160 else if (qgroup->qgroupid > qgroupid) 161 n = n->rb_right; 162 else 163 return qgroup; 164 } 165 return NULL; 166 } 167 168 /* must be called with qgroup_lock held */ 169 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, 170 u64 qgroupid) 171 { 172 struct rb_node **p = &fs_info->qgroup_tree.rb_node; 173 struct rb_node *parent = NULL; 174 struct btrfs_qgroup *qgroup; 175 176 while (*p) { 177 parent = *p; 178 qgroup = rb_entry(parent, struct btrfs_qgroup, node); 179 180 if (qgroup->qgroupid < qgroupid) 181 p = &(*p)->rb_left; 182 else if (qgroup->qgroupid > qgroupid) 183 p = &(*p)->rb_right; 184 else 185 return qgroup; 186 } 187 188 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); 189 if (!qgroup) 190 return ERR_PTR(-ENOMEM); 191 192 qgroup->qgroupid = qgroupid; 193 INIT_LIST_HEAD(&qgroup->groups); 194 INIT_LIST_HEAD(&qgroup->members); 195 INIT_LIST_HEAD(&qgroup->dirty); 196 197 rb_link_node(&qgroup->node, parent, p); 198 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); 199 200 return qgroup; 201 } 202 203 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) 204 { 205 struct btrfs_qgroup_list *list; 206 207 list_del(&qgroup->dirty); 208 while (!list_empty(&qgroup->groups)) { 209 list = list_first_entry(&qgroup->groups, 210 struct btrfs_qgroup_list, next_group); 211 list_del(&list->next_group); 212 list_del(&list->next_member); 213 kfree(list); 214 } 215 216 while (!list_empty(&qgroup->members)) { 217 list = list_first_entry(&qgroup->members, 218 struct btrfs_qgroup_list, next_member); 219 list_del(&list->next_group); 220 list_del(&list->next_member); 221 kfree(list); 222 } 223 kfree(qgroup); 224 } 225 226 /* must be called with qgroup_lock held */ 227 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) 228 { 229 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); 230 231 if (!qgroup) 232 return -ENOENT; 233 234 rb_erase(&qgroup->node, &fs_info->qgroup_tree); 235 __del_qgroup_rb(qgroup); 236 return 0; 237 } 238 239 /* must be called with qgroup_lock held */ 240 static int add_relation_rb(struct btrfs_fs_info *fs_info, 241 u64 memberid, u64 parentid) 242 { 243 struct btrfs_qgroup *member; 244 struct btrfs_qgroup *parent; 245 struct btrfs_qgroup_list *list; 246 247 member = find_qgroup_rb(fs_info, memberid); 248 parent = find_qgroup_rb(fs_info, parentid); 249 if (!member || !parent) 250 return -ENOENT; 251 252 list = kzalloc(sizeof(*list), GFP_ATOMIC); 253 if (!list) 254 return -ENOMEM; 255 256 list->group = parent; 257 list->member = member; 258 list_add_tail(&list->next_group, &member->groups); 259 list_add_tail(&list->next_member, &parent->members); 260 261 return 0; 262 } 263 264 /* must be called with qgroup_lock held */ 265 static int del_relation_rb(struct btrfs_fs_info *fs_info, 266 u64 memberid, u64 parentid) 267 { 268 struct btrfs_qgroup *member; 269 struct btrfs_qgroup *parent; 270 struct btrfs_qgroup_list *list; 271 272 member = find_qgroup_rb(fs_info, memberid); 273 parent = find_qgroup_rb(fs_info, parentid); 274 if (!member || !parent) 275 return -ENOENT; 276 277 list_for_each_entry(list, &member->groups, next_group) { 278 if (list->group == parent) { 279 list_del(&list->next_group); 280 list_del(&list->next_member); 281 kfree(list); 282 return 0; 283 } 284 } 285 return -ENOENT; 286 } 287 288 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 289 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 290 u64 rfer, u64 excl) 291 { 292 struct btrfs_qgroup *qgroup; 293 294 qgroup = find_qgroup_rb(fs_info, qgroupid); 295 if (!qgroup) 296 return -EINVAL; 297 if (qgroup->rfer != rfer || qgroup->excl != excl) 298 return -EINVAL; 299 return 0; 300 } 301 #endif 302 303 /* 304 * The full config is read in one go, only called from open_ctree() 305 * It doesn't use any locking, as at this point we're still single-threaded 306 */ 307 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) 308 { 309 struct btrfs_key key; 310 struct btrfs_key found_key; 311 struct btrfs_root *quota_root = fs_info->quota_root; 312 struct btrfs_path *path = NULL; 313 struct extent_buffer *l; 314 int slot; 315 int ret = 0; 316 u64 flags = 0; 317 u64 rescan_progress = 0; 318 319 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 320 return 0; 321 322 fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); 323 if (!fs_info->qgroup_ulist) { 324 ret = -ENOMEM; 325 goto out; 326 } 327 328 path = btrfs_alloc_path(); 329 if (!path) { 330 ret = -ENOMEM; 331 goto out; 332 } 333 334 /* default this to quota off, in case no status key is found */ 335 fs_info->qgroup_flags = 0; 336 337 /* 338 * pass 1: read status, all qgroup infos and limits 339 */ 340 key.objectid = 0; 341 key.type = 0; 342 key.offset = 0; 343 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); 344 if (ret) 345 goto out; 346 347 while (1) { 348 struct btrfs_qgroup *qgroup; 349 350 slot = path->slots[0]; 351 l = path->nodes[0]; 352 btrfs_item_key_to_cpu(l, &found_key, slot); 353 354 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { 355 struct btrfs_qgroup_status_item *ptr; 356 357 ptr = btrfs_item_ptr(l, slot, 358 struct btrfs_qgroup_status_item); 359 360 if (btrfs_qgroup_status_version(l, ptr) != 361 BTRFS_QGROUP_STATUS_VERSION) { 362 btrfs_err(fs_info, 363 "old qgroup version, quota disabled"); 364 goto out; 365 } 366 if (btrfs_qgroup_status_generation(l, ptr) != 367 fs_info->generation) { 368 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 369 btrfs_err(fs_info, 370 "qgroup generation mismatch, marked as inconsistent"); 371 } 372 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 373 ptr); 374 rescan_progress = btrfs_qgroup_status_rescan(l, ptr); 375 goto next1; 376 } 377 378 if (found_key.type != BTRFS_QGROUP_INFO_KEY && 379 found_key.type != BTRFS_QGROUP_LIMIT_KEY) 380 goto next1; 381 382 qgroup = find_qgroup_rb(fs_info, found_key.offset); 383 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 384 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 385 btrfs_err(fs_info, "inconsistent qgroup config"); 386 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 387 } 388 if (!qgroup) { 389 qgroup = add_qgroup_rb(fs_info, found_key.offset); 390 if (IS_ERR(qgroup)) { 391 ret = PTR_ERR(qgroup); 392 goto out; 393 } 394 } 395 switch (found_key.type) { 396 case BTRFS_QGROUP_INFO_KEY: { 397 struct btrfs_qgroup_info_item *ptr; 398 399 ptr = btrfs_item_ptr(l, slot, 400 struct btrfs_qgroup_info_item); 401 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); 402 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); 403 qgroup->excl = btrfs_qgroup_info_excl(l, ptr); 404 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); 405 /* generation currently unused */ 406 break; 407 } 408 case BTRFS_QGROUP_LIMIT_KEY: { 409 struct btrfs_qgroup_limit_item *ptr; 410 411 ptr = btrfs_item_ptr(l, slot, 412 struct btrfs_qgroup_limit_item); 413 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); 414 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); 415 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); 416 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); 417 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); 418 break; 419 } 420 } 421 next1: 422 ret = btrfs_next_item(quota_root, path); 423 if (ret < 0) 424 goto out; 425 if (ret) 426 break; 427 } 428 btrfs_release_path(path); 429 430 /* 431 * pass 2: read all qgroup relations 432 */ 433 key.objectid = 0; 434 key.type = BTRFS_QGROUP_RELATION_KEY; 435 key.offset = 0; 436 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); 437 if (ret) 438 goto out; 439 while (1) { 440 slot = path->slots[0]; 441 l = path->nodes[0]; 442 btrfs_item_key_to_cpu(l, &found_key, slot); 443 444 if (found_key.type != BTRFS_QGROUP_RELATION_KEY) 445 goto next2; 446 447 if (found_key.objectid > found_key.offset) { 448 /* parent <- member, not needed to build config */ 449 /* FIXME should we omit the key completely? */ 450 goto next2; 451 } 452 453 ret = add_relation_rb(fs_info, found_key.objectid, 454 found_key.offset); 455 if (ret == -ENOENT) { 456 btrfs_warn(fs_info, 457 "orphan qgroup relation 0x%llx->0x%llx", 458 found_key.objectid, found_key.offset); 459 ret = 0; /* ignore the error */ 460 } 461 if (ret) 462 goto out; 463 next2: 464 ret = btrfs_next_item(quota_root, path); 465 if (ret < 0) 466 goto out; 467 if (ret) 468 break; 469 } 470 out: 471 fs_info->qgroup_flags |= flags; 472 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 473 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 474 else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && 475 ret >= 0) 476 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 477 btrfs_free_path(path); 478 479 if (ret < 0) { 480 ulist_free(fs_info->qgroup_ulist); 481 fs_info->qgroup_ulist = NULL; 482 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 483 } 484 485 return ret < 0 ? ret : 0; 486 } 487 488 /* 489 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), 490 * first two are in single-threaded paths.And for the third one, we have set 491 * quota_root to be null with qgroup_lock held before, so it is safe to clean 492 * up the in-memory structures without qgroup_lock held. 493 */ 494 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 495 { 496 struct rb_node *n; 497 struct btrfs_qgroup *qgroup; 498 499 while ((n = rb_first(&fs_info->qgroup_tree))) { 500 qgroup = rb_entry(n, struct btrfs_qgroup, node); 501 rb_erase(n, &fs_info->qgroup_tree); 502 __del_qgroup_rb(qgroup); 503 } 504 /* 505 * we call btrfs_free_qgroup_config() when umounting 506 * filesystem and disabling quota, so we set qgroup_ulist 507 * to be null here to avoid double free. 508 */ 509 ulist_free(fs_info->qgroup_ulist); 510 fs_info->qgroup_ulist = NULL; 511 } 512 513 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 514 struct btrfs_root *quota_root, 515 u64 src, u64 dst) 516 { 517 int ret; 518 struct btrfs_path *path; 519 struct btrfs_key key; 520 521 path = btrfs_alloc_path(); 522 if (!path) 523 return -ENOMEM; 524 525 key.objectid = src; 526 key.type = BTRFS_QGROUP_RELATION_KEY; 527 key.offset = dst; 528 529 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 530 531 btrfs_mark_buffer_dirty(path->nodes[0]); 532 533 btrfs_free_path(path); 534 return ret; 535 } 536 537 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, 538 struct btrfs_root *quota_root, 539 u64 src, u64 dst) 540 { 541 int ret; 542 struct btrfs_path *path; 543 struct btrfs_key key; 544 545 path = btrfs_alloc_path(); 546 if (!path) 547 return -ENOMEM; 548 549 key.objectid = src; 550 key.type = BTRFS_QGROUP_RELATION_KEY; 551 key.offset = dst; 552 553 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 554 if (ret < 0) 555 goto out; 556 557 if (ret > 0) { 558 ret = -ENOENT; 559 goto out; 560 } 561 562 ret = btrfs_del_item(trans, quota_root, path); 563 out: 564 btrfs_free_path(path); 565 return ret; 566 } 567 568 static int add_qgroup_item(struct btrfs_trans_handle *trans, 569 struct btrfs_root *quota_root, u64 qgroupid) 570 { 571 int ret; 572 struct btrfs_path *path; 573 struct btrfs_qgroup_info_item *qgroup_info; 574 struct btrfs_qgroup_limit_item *qgroup_limit; 575 struct extent_buffer *leaf; 576 struct btrfs_key key; 577 578 if (btrfs_is_testing(quota_root->fs_info)) 579 return 0; 580 581 path = btrfs_alloc_path(); 582 if (!path) 583 return -ENOMEM; 584 585 key.objectid = 0; 586 key.type = BTRFS_QGROUP_INFO_KEY; 587 key.offset = qgroupid; 588 589 /* 590 * Avoid a transaction abort by catching -EEXIST here. In that 591 * case, we proceed by re-initializing the existing structure 592 * on disk. 593 */ 594 595 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 596 sizeof(*qgroup_info)); 597 if (ret && ret != -EEXIST) 598 goto out; 599 600 leaf = path->nodes[0]; 601 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], 602 struct btrfs_qgroup_info_item); 603 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); 604 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); 605 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); 606 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); 607 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); 608 609 btrfs_mark_buffer_dirty(leaf); 610 611 btrfs_release_path(path); 612 613 key.type = BTRFS_QGROUP_LIMIT_KEY; 614 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 615 sizeof(*qgroup_limit)); 616 if (ret && ret != -EEXIST) 617 goto out; 618 619 leaf = path->nodes[0]; 620 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], 621 struct btrfs_qgroup_limit_item); 622 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); 623 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); 624 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); 625 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 626 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 627 628 btrfs_mark_buffer_dirty(leaf); 629 630 ret = 0; 631 out: 632 btrfs_free_path(path); 633 return ret; 634 } 635 636 static int del_qgroup_item(struct btrfs_trans_handle *trans, 637 struct btrfs_root *quota_root, u64 qgroupid) 638 { 639 int ret; 640 struct btrfs_path *path; 641 struct btrfs_key key; 642 643 path = btrfs_alloc_path(); 644 if (!path) 645 return -ENOMEM; 646 647 key.objectid = 0; 648 key.type = BTRFS_QGROUP_INFO_KEY; 649 key.offset = qgroupid; 650 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 651 if (ret < 0) 652 goto out; 653 654 if (ret > 0) { 655 ret = -ENOENT; 656 goto out; 657 } 658 659 ret = btrfs_del_item(trans, quota_root, path); 660 if (ret) 661 goto out; 662 663 btrfs_release_path(path); 664 665 key.type = BTRFS_QGROUP_LIMIT_KEY; 666 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 667 if (ret < 0) 668 goto out; 669 670 if (ret > 0) { 671 ret = -ENOENT; 672 goto out; 673 } 674 675 ret = btrfs_del_item(trans, quota_root, path); 676 677 out: 678 btrfs_free_path(path); 679 return ret; 680 } 681 682 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 683 struct btrfs_root *root, 684 struct btrfs_qgroup *qgroup) 685 { 686 struct btrfs_path *path; 687 struct btrfs_key key; 688 struct extent_buffer *l; 689 struct btrfs_qgroup_limit_item *qgroup_limit; 690 int ret; 691 int slot; 692 693 key.objectid = 0; 694 key.type = BTRFS_QGROUP_LIMIT_KEY; 695 key.offset = qgroup->qgroupid; 696 697 path = btrfs_alloc_path(); 698 if (!path) 699 return -ENOMEM; 700 701 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 702 if (ret > 0) 703 ret = -ENOENT; 704 705 if (ret) 706 goto out; 707 708 l = path->nodes[0]; 709 slot = path->slots[0]; 710 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 711 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags); 712 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer); 713 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); 714 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); 715 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); 716 717 btrfs_mark_buffer_dirty(l); 718 719 out: 720 btrfs_free_path(path); 721 return ret; 722 } 723 724 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 725 struct btrfs_root *root, 726 struct btrfs_qgroup *qgroup) 727 { 728 struct btrfs_path *path; 729 struct btrfs_key key; 730 struct extent_buffer *l; 731 struct btrfs_qgroup_info_item *qgroup_info; 732 int ret; 733 int slot; 734 735 if (btrfs_is_testing(root->fs_info)) 736 return 0; 737 738 key.objectid = 0; 739 key.type = BTRFS_QGROUP_INFO_KEY; 740 key.offset = qgroup->qgroupid; 741 742 path = btrfs_alloc_path(); 743 if (!path) 744 return -ENOMEM; 745 746 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 747 if (ret > 0) 748 ret = -ENOENT; 749 750 if (ret) 751 goto out; 752 753 l = path->nodes[0]; 754 slot = path->slots[0]; 755 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); 756 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 757 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 758 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 759 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 760 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 761 762 btrfs_mark_buffer_dirty(l); 763 764 out: 765 btrfs_free_path(path); 766 return ret; 767 } 768 769 static int update_qgroup_status_item(struct btrfs_trans_handle *trans, 770 struct btrfs_fs_info *fs_info, 771 struct btrfs_root *root) 772 { 773 struct btrfs_path *path; 774 struct btrfs_key key; 775 struct extent_buffer *l; 776 struct btrfs_qgroup_status_item *ptr; 777 int ret; 778 int slot; 779 780 key.objectid = 0; 781 key.type = BTRFS_QGROUP_STATUS_KEY; 782 key.offset = 0; 783 784 path = btrfs_alloc_path(); 785 if (!path) 786 return -ENOMEM; 787 788 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 789 if (ret > 0) 790 ret = -ENOENT; 791 792 if (ret) 793 goto out; 794 795 l = path->nodes[0]; 796 slot = path->slots[0]; 797 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 798 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 799 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 800 btrfs_set_qgroup_status_rescan(l, ptr, 801 fs_info->qgroup_rescan_progress.objectid); 802 803 btrfs_mark_buffer_dirty(l); 804 805 out: 806 btrfs_free_path(path); 807 return ret; 808 } 809 810 /* 811 * called with qgroup_lock held 812 */ 813 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 814 struct btrfs_root *root) 815 { 816 struct btrfs_path *path; 817 struct btrfs_key key; 818 struct extent_buffer *leaf = NULL; 819 int ret; 820 int nr = 0; 821 822 path = btrfs_alloc_path(); 823 if (!path) 824 return -ENOMEM; 825 826 path->leave_spinning = 1; 827 828 key.objectid = 0; 829 key.offset = 0; 830 key.type = 0; 831 832 while (1) { 833 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 834 if (ret < 0) 835 goto out; 836 leaf = path->nodes[0]; 837 nr = btrfs_header_nritems(leaf); 838 if (!nr) 839 break; 840 /* 841 * delete the leaf one by one 842 * since the whole tree is going 843 * to be deleted. 844 */ 845 path->slots[0] = 0; 846 ret = btrfs_del_items(trans, root, path, 0, nr); 847 if (ret) 848 goto out; 849 850 btrfs_release_path(path); 851 } 852 ret = 0; 853 out: 854 set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags); 855 btrfs_free_path(path); 856 return ret; 857 } 858 859 int btrfs_quota_enable(struct btrfs_trans_handle *trans, 860 struct btrfs_fs_info *fs_info) 861 { 862 struct btrfs_root *quota_root; 863 struct btrfs_root *tree_root = fs_info->tree_root; 864 struct btrfs_path *path = NULL; 865 struct btrfs_qgroup_status_item *ptr; 866 struct extent_buffer *leaf; 867 struct btrfs_key key; 868 struct btrfs_key found_key; 869 struct btrfs_qgroup *qgroup = NULL; 870 int ret = 0; 871 int slot; 872 873 mutex_lock(&fs_info->qgroup_ioctl_lock); 874 if (fs_info->quota_root) { 875 set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); 876 goto out; 877 } 878 879 fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS); 880 if (!fs_info->qgroup_ulist) { 881 ret = -ENOMEM; 882 goto out; 883 } 884 885 /* 886 * initially create the quota tree 887 */ 888 quota_root = btrfs_create_tree(trans, fs_info, 889 BTRFS_QUOTA_TREE_OBJECTID); 890 if (IS_ERR(quota_root)) { 891 ret = PTR_ERR(quota_root); 892 goto out; 893 } 894 895 path = btrfs_alloc_path(); 896 if (!path) { 897 ret = -ENOMEM; 898 goto out_free_root; 899 } 900 901 key.objectid = 0; 902 key.type = BTRFS_QGROUP_STATUS_KEY; 903 key.offset = 0; 904 905 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 906 sizeof(*ptr)); 907 if (ret) 908 goto out_free_path; 909 910 leaf = path->nodes[0]; 911 ptr = btrfs_item_ptr(leaf, path->slots[0], 912 struct btrfs_qgroup_status_item); 913 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); 914 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); 915 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 916 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 917 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 918 btrfs_set_qgroup_status_rescan(leaf, ptr, 0); 919 920 btrfs_mark_buffer_dirty(leaf); 921 922 key.objectid = 0; 923 key.type = BTRFS_ROOT_REF_KEY; 924 key.offset = 0; 925 926 btrfs_release_path(path); 927 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 928 if (ret > 0) 929 goto out_add_root; 930 if (ret < 0) 931 goto out_free_path; 932 933 934 while (1) { 935 slot = path->slots[0]; 936 leaf = path->nodes[0]; 937 btrfs_item_key_to_cpu(leaf, &found_key, slot); 938 939 if (found_key.type == BTRFS_ROOT_REF_KEY) { 940 ret = add_qgroup_item(trans, quota_root, 941 found_key.offset); 942 if (ret) 943 goto out_free_path; 944 945 qgroup = add_qgroup_rb(fs_info, found_key.offset); 946 if (IS_ERR(qgroup)) { 947 ret = PTR_ERR(qgroup); 948 goto out_free_path; 949 } 950 } 951 ret = btrfs_next_item(tree_root, path); 952 if (ret < 0) 953 goto out_free_path; 954 if (ret) 955 break; 956 } 957 958 out_add_root: 959 btrfs_release_path(path); 960 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 961 if (ret) 962 goto out_free_path; 963 964 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 965 if (IS_ERR(qgroup)) { 966 ret = PTR_ERR(qgroup); 967 goto out_free_path; 968 } 969 spin_lock(&fs_info->qgroup_lock); 970 fs_info->quota_root = quota_root; 971 set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); 972 spin_unlock(&fs_info->qgroup_lock); 973 out_free_path: 974 btrfs_free_path(path); 975 out_free_root: 976 if (ret) { 977 free_extent_buffer(quota_root->node); 978 free_extent_buffer(quota_root->commit_root); 979 kfree(quota_root); 980 } 981 out: 982 if (ret) { 983 ulist_free(fs_info->qgroup_ulist); 984 fs_info->qgroup_ulist = NULL; 985 } 986 mutex_unlock(&fs_info->qgroup_ioctl_lock); 987 return ret; 988 } 989 990 int btrfs_quota_disable(struct btrfs_trans_handle *trans, 991 struct btrfs_fs_info *fs_info) 992 { 993 struct btrfs_root *tree_root = fs_info->tree_root; 994 struct btrfs_root *quota_root; 995 int ret = 0; 996 997 mutex_lock(&fs_info->qgroup_ioctl_lock); 998 if (!fs_info->quota_root) 999 goto out; 1000 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 1001 set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags); 1002 btrfs_qgroup_wait_for_completion(fs_info, false); 1003 spin_lock(&fs_info->qgroup_lock); 1004 quota_root = fs_info->quota_root; 1005 fs_info->quota_root = NULL; 1006 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 1007 spin_unlock(&fs_info->qgroup_lock); 1008 1009 btrfs_free_qgroup_config(fs_info); 1010 1011 ret = btrfs_clean_quota_tree(trans, quota_root); 1012 if (ret) 1013 goto out; 1014 1015 ret = btrfs_del_root(trans, tree_root, "a_root->root_key); 1016 if (ret) 1017 goto out; 1018 1019 list_del("a_root->dirty_list); 1020 1021 btrfs_tree_lock(quota_root->node); 1022 clean_tree_block(trans, fs_info, quota_root->node); 1023 btrfs_tree_unlock(quota_root->node); 1024 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 1025 1026 free_extent_buffer(quota_root->node); 1027 free_extent_buffer(quota_root->commit_root); 1028 kfree(quota_root); 1029 out: 1030 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1031 return ret; 1032 } 1033 1034 static void qgroup_dirty(struct btrfs_fs_info *fs_info, 1035 struct btrfs_qgroup *qgroup) 1036 { 1037 if (list_empty(&qgroup->dirty)) 1038 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 1039 } 1040 1041 /* 1042 * The easy accounting, if we are adding/removing the only ref for an extent 1043 * then this qgroup and all of the parent qgroups get their reference and 1044 * exclusive counts adjusted. 1045 * 1046 * Caller should hold fs_info->qgroup_lock. 1047 */ 1048 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1049 struct ulist *tmp, u64 ref_root, 1050 u64 num_bytes, int sign) 1051 { 1052 struct btrfs_qgroup *qgroup; 1053 struct btrfs_qgroup_list *glist; 1054 struct ulist_node *unode; 1055 struct ulist_iterator uiter; 1056 int ret = 0; 1057 1058 qgroup = find_qgroup_rb(fs_info, ref_root); 1059 if (!qgroup) 1060 goto out; 1061 1062 qgroup->rfer += sign * num_bytes; 1063 qgroup->rfer_cmpr += sign * num_bytes; 1064 1065 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1066 qgroup->excl += sign * num_bytes; 1067 qgroup->excl_cmpr += sign * num_bytes; 1068 if (sign > 0) 1069 qgroup->reserved -= num_bytes; 1070 1071 qgroup_dirty(fs_info, qgroup); 1072 1073 /* Get all of the parent groups that contain this qgroup */ 1074 list_for_each_entry(glist, &qgroup->groups, next_group) { 1075 ret = ulist_add(tmp, glist->group->qgroupid, 1076 qgroup_to_aux(glist->group), GFP_ATOMIC); 1077 if (ret < 0) 1078 goto out; 1079 } 1080 1081 /* Iterate all of the parents and adjust their reference counts */ 1082 ULIST_ITER_INIT(&uiter); 1083 while ((unode = ulist_next(tmp, &uiter))) { 1084 qgroup = unode_aux_to_qgroup(unode); 1085 qgroup->rfer += sign * num_bytes; 1086 qgroup->rfer_cmpr += sign * num_bytes; 1087 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1088 qgroup->excl += sign * num_bytes; 1089 if (sign > 0) 1090 qgroup->reserved -= num_bytes; 1091 qgroup->excl_cmpr += sign * num_bytes; 1092 qgroup_dirty(fs_info, qgroup); 1093 1094 /* Add any parents of the parents */ 1095 list_for_each_entry(glist, &qgroup->groups, next_group) { 1096 ret = ulist_add(tmp, glist->group->qgroupid, 1097 qgroup_to_aux(glist->group), GFP_ATOMIC); 1098 if (ret < 0) 1099 goto out; 1100 } 1101 } 1102 ret = 0; 1103 out: 1104 return ret; 1105 } 1106 1107 1108 /* 1109 * Quick path for updating qgroup with only excl refs. 1110 * 1111 * In that case, just update all parent will be enough. 1112 * Or we needs to do a full rescan. 1113 * Caller should also hold fs_info->qgroup_lock. 1114 * 1115 * Return 0 for quick update, return >0 for need to full rescan 1116 * and mark INCONSISTENT flag. 1117 * Return < 0 for other error. 1118 */ 1119 static int quick_update_accounting(struct btrfs_fs_info *fs_info, 1120 struct ulist *tmp, u64 src, u64 dst, 1121 int sign) 1122 { 1123 struct btrfs_qgroup *qgroup; 1124 int ret = 1; 1125 int err = 0; 1126 1127 qgroup = find_qgroup_rb(fs_info, src); 1128 if (!qgroup) 1129 goto out; 1130 if (qgroup->excl == qgroup->rfer) { 1131 ret = 0; 1132 err = __qgroup_excl_accounting(fs_info, tmp, dst, 1133 qgroup->excl, sign); 1134 if (err < 0) { 1135 ret = err; 1136 goto out; 1137 } 1138 } 1139 out: 1140 if (ret) 1141 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1142 return ret; 1143 } 1144 1145 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1146 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1147 { 1148 struct btrfs_root *quota_root; 1149 struct btrfs_qgroup *parent; 1150 struct btrfs_qgroup *member; 1151 struct btrfs_qgroup_list *list; 1152 struct ulist *tmp; 1153 int ret = 0; 1154 1155 /* Check the level of src and dst first */ 1156 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) 1157 return -EINVAL; 1158 1159 tmp = ulist_alloc(GFP_NOFS); 1160 if (!tmp) 1161 return -ENOMEM; 1162 1163 mutex_lock(&fs_info->qgroup_ioctl_lock); 1164 quota_root = fs_info->quota_root; 1165 if (!quota_root) { 1166 ret = -EINVAL; 1167 goto out; 1168 } 1169 member = find_qgroup_rb(fs_info, src); 1170 parent = find_qgroup_rb(fs_info, dst); 1171 if (!member || !parent) { 1172 ret = -EINVAL; 1173 goto out; 1174 } 1175 1176 /* check if such qgroup relation exist firstly */ 1177 list_for_each_entry(list, &member->groups, next_group) { 1178 if (list->group == parent) { 1179 ret = -EEXIST; 1180 goto out; 1181 } 1182 } 1183 1184 ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1185 if (ret) 1186 goto out; 1187 1188 ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1189 if (ret) { 1190 del_qgroup_relation_item(trans, quota_root, src, dst); 1191 goto out; 1192 } 1193 1194 spin_lock(&fs_info->qgroup_lock); 1195 ret = add_relation_rb(fs_info, src, dst); 1196 if (ret < 0) { 1197 spin_unlock(&fs_info->qgroup_lock); 1198 goto out; 1199 } 1200 ret = quick_update_accounting(fs_info, tmp, src, dst, 1); 1201 spin_unlock(&fs_info->qgroup_lock); 1202 out: 1203 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1204 ulist_free(tmp); 1205 return ret; 1206 } 1207 1208 int __del_qgroup_relation(struct btrfs_trans_handle *trans, 1209 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1210 { 1211 struct btrfs_root *quota_root; 1212 struct btrfs_qgroup *parent; 1213 struct btrfs_qgroup *member; 1214 struct btrfs_qgroup_list *list; 1215 struct ulist *tmp; 1216 int ret = 0; 1217 int err; 1218 1219 tmp = ulist_alloc(GFP_NOFS); 1220 if (!tmp) 1221 return -ENOMEM; 1222 1223 quota_root = fs_info->quota_root; 1224 if (!quota_root) { 1225 ret = -EINVAL; 1226 goto out; 1227 } 1228 1229 member = find_qgroup_rb(fs_info, src); 1230 parent = find_qgroup_rb(fs_info, dst); 1231 if (!member || !parent) { 1232 ret = -EINVAL; 1233 goto out; 1234 } 1235 1236 /* check if such qgroup relation exist firstly */ 1237 list_for_each_entry(list, &member->groups, next_group) { 1238 if (list->group == parent) 1239 goto exist; 1240 } 1241 ret = -ENOENT; 1242 goto out; 1243 exist: 1244 ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1245 err = del_qgroup_relation_item(trans, quota_root, dst, src); 1246 if (err && !ret) 1247 ret = err; 1248 1249 spin_lock(&fs_info->qgroup_lock); 1250 del_relation_rb(fs_info, src, dst); 1251 ret = quick_update_accounting(fs_info, tmp, src, dst, -1); 1252 spin_unlock(&fs_info->qgroup_lock); 1253 out: 1254 ulist_free(tmp); 1255 return ret; 1256 } 1257 1258 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1259 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1260 { 1261 int ret = 0; 1262 1263 mutex_lock(&fs_info->qgroup_ioctl_lock); 1264 ret = __del_qgroup_relation(trans, fs_info, src, dst); 1265 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1266 1267 return ret; 1268 } 1269 1270 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1271 struct btrfs_fs_info *fs_info, u64 qgroupid) 1272 { 1273 struct btrfs_root *quota_root; 1274 struct btrfs_qgroup *qgroup; 1275 int ret = 0; 1276 1277 mutex_lock(&fs_info->qgroup_ioctl_lock); 1278 quota_root = fs_info->quota_root; 1279 if (!quota_root) { 1280 ret = -EINVAL; 1281 goto out; 1282 } 1283 qgroup = find_qgroup_rb(fs_info, qgroupid); 1284 if (qgroup) { 1285 ret = -EEXIST; 1286 goto out; 1287 } 1288 1289 ret = add_qgroup_item(trans, quota_root, qgroupid); 1290 if (ret) 1291 goto out; 1292 1293 spin_lock(&fs_info->qgroup_lock); 1294 qgroup = add_qgroup_rb(fs_info, qgroupid); 1295 spin_unlock(&fs_info->qgroup_lock); 1296 1297 if (IS_ERR(qgroup)) 1298 ret = PTR_ERR(qgroup); 1299 out: 1300 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1301 return ret; 1302 } 1303 1304 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 1305 struct btrfs_fs_info *fs_info, u64 qgroupid) 1306 { 1307 struct btrfs_root *quota_root; 1308 struct btrfs_qgroup *qgroup; 1309 struct btrfs_qgroup_list *list; 1310 int ret = 0; 1311 1312 mutex_lock(&fs_info->qgroup_ioctl_lock); 1313 quota_root = fs_info->quota_root; 1314 if (!quota_root) { 1315 ret = -EINVAL; 1316 goto out; 1317 } 1318 1319 qgroup = find_qgroup_rb(fs_info, qgroupid); 1320 if (!qgroup) { 1321 ret = -ENOENT; 1322 goto out; 1323 } else { 1324 /* check if there are no children of this qgroup */ 1325 if (!list_empty(&qgroup->members)) { 1326 ret = -EBUSY; 1327 goto out; 1328 } 1329 } 1330 ret = del_qgroup_item(trans, quota_root, qgroupid); 1331 1332 while (!list_empty(&qgroup->groups)) { 1333 list = list_first_entry(&qgroup->groups, 1334 struct btrfs_qgroup_list, next_group); 1335 ret = __del_qgroup_relation(trans, fs_info, 1336 qgroupid, 1337 list->group->qgroupid); 1338 if (ret) 1339 goto out; 1340 } 1341 1342 spin_lock(&fs_info->qgroup_lock); 1343 del_qgroup_rb(fs_info, qgroupid); 1344 spin_unlock(&fs_info->qgroup_lock); 1345 out: 1346 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1347 return ret; 1348 } 1349 1350 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 1351 struct btrfs_fs_info *fs_info, u64 qgroupid, 1352 struct btrfs_qgroup_limit *limit) 1353 { 1354 struct btrfs_root *quota_root; 1355 struct btrfs_qgroup *qgroup; 1356 int ret = 0; 1357 /* Sometimes we would want to clear the limit on this qgroup. 1358 * To meet this requirement, we treat the -1 as a special value 1359 * which tell kernel to clear the limit on this qgroup. 1360 */ 1361 const u64 CLEAR_VALUE = -1; 1362 1363 mutex_lock(&fs_info->qgroup_ioctl_lock); 1364 quota_root = fs_info->quota_root; 1365 if (!quota_root) { 1366 ret = -EINVAL; 1367 goto out; 1368 } 1369 1370 qgroup = find_qgroup_rb(fs_info, qgroupid); 1371 if (!qgroup) { 1372 ret = -ENOENT; 1373 goto out; 1374 } 1375 1376 spin_lock(&fs_info->qgroup_lock); 1377 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { 1378 if (limit->max_rfer == CLEAR_VALUE) { 1379 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1380 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1381 qgroup->max_rfer = 0; 1382 } else { 1383 qgroup->max_rfer = limit->max_rfer; 1384 } 1385 } 1386 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { 1387 if (limit->max_excl == CLEAR_VALUE) { 1388 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1389 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1390 qgroup->max_excl = 0; 1391 } else { 1392 qgroup->max_excl = limit->max_excl; 1393 } 1394 } 1395 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { 1396 if (limit->rsv_rfer == CLEAR_VALUE) { 1397 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1398 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1399 qgroup->rsv_rfer = 0; 1400 } else { 1401 qgroup->rsv_rfer = limit->rsv_rfer; 1402 } 1403 } 1404 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { 1405 if (limit->rsv_excl == CLEAR_VALUE) { 1406 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1407 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1408 qgroup->rsv_excl = 0; 1409 } else { 1410 qgroup->rsv_excl = limit->rsv_excl; 1411 } 1412 } 1413 qgroup->lim_flags |= limit->flags; 1414 1415 spin_unlock(&fs_info->qgroup_lock); 1416 1417 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 1418 if (ret) { 1419 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1420 btrfs_info(fs_info, "unable to update quota limit for %llu", 1421 qgroupid); 1422 } 1423 1424 out: 1425 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1426 return ret; 1427 } 1428 1429 int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, 1430 struct btrfs_fs_info *fs_info) 1431 { 1432 struct btrfs_qgroup_extent_record *record; 1433 struct btrfs_delayed_ref_root *delayed_refs; 1434 struct rb_node *node; 1435 u64 qgroup_to_skip; 1436 int ret = 0; 1437 1438 delayed_refs = &trans->transaction->delayed_refs; 1439 qgroup_to_skip = delayed_refs->qgroup_to_skip; 1440 1441 /* 1442 * No need to do lock, since this function will only be called in 1443 * btrfs_commit_transaction(). 1444 */ 1445 node = rb_first(&delayed_refs->dirty_extent_root); 1446 while (node) { 1447 record = rb_entry(node, struct btrfs_qgroup_extent_record, 1448 node); 1449 ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0, 1450 &record->old_roots); 1451 if (ret < 0) 1452 break; 1453 if (qgroup_to_skip) 1454 ulist_del(record->old_roots, qgroup_to_skip, 0); 1455 node = rb_next(node); 1456 } 1457 return ret; 1458 } 1459 1460 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, 1461 struct btrfs_delayed_ref_root *delayed_refs, 1462 struct btrfs_qgroup_extent_record *record) 1463 { 1464 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; 1465 struct rb_node *parent_node = NULL; 1466 struct btrfs_qgroup_extent_record *entry; 1467 u64 bytenr = record->bytenr; 1468 1469 assert_spin_locked(&delayed_refs->lock); 1470 trace_btrfs_qgroup_trace_extent(fs_info, record); 1471 1472 while (*p) { 1473 parent_node = *p; 1474 entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, 1475 node); 1476 if (bytenr < entry->bytenr) 1477 p = &(*p)->rb_left; 1478 else if (bytenr > entry->bytenr) 1479 p = &(*p)->rb_right; 1480 else 1481 return 1; 1482 } 1483 1484 rb_link_node(&record->node, parent_node, p); 1485 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); 1486 return 0; 1487 } 1488 1489 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, 1490 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, 1491 gfp_t gfp_flag) 1492 { 1493 struct btrfs_qgroup_extent_record *record; 1494 struct btrfs_delayed_ref_root *delayed_refs; 1495 int ret; 1496 1497 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) 1498 || bytenr == 0 || num_bytes == 0) 1499 return 0; 1500 if (WARN_ON(trans == NULL)) 1501 return -EINVAL; 1502 record = kmalloc(sizeof(*record), gfp_flag); 1503 if (!record) 1504 return -ENOMEM; 1505 1506 delayed_refs = &trans->transaction->delayed_refs; 1507 record->bytenr = bytenr; 1508 record->num_bytes = num_bytes; 1509 record->old_roots = NULL; 1510 1511 spin_lock(&delayed_refs->lock); 1512 ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record); 1513 spin_unlock(&delayed_refs->lock); 1514 if (ret > 0) 1515 kfree(record); 1516 return 0; 1517 } 1518 1519 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 1520 struct btrfs_fs_info *fs_info, 1521 struct extent_buffer *eb) 1522 { 1523 int nr = btrfs_header_nritems(eb); 1524 int i, extent_type, ret; 1525 struct btrfs_key key; 1526 struct btrfs_file_extent_item *fi; 1527 u64 bytenr, num_bytes; 1528 1529 /* We can be called directly from walk_up_proc() */ 1530 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1531 return 0; 1532 1533 for (i = 0; i < nr; i++) { 1534 btrfs_item_key_to_cpu(eb, &key, i); 1535 1536 if (key.type != BTRFS_EXTENT_DATA_KEY) 1537 continue; 1538 1539 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); 1540 /* filter out non qgroup-accountable extents */ 1541 extent_type = btrfs_file_extent_type(eb, fi); 1542 1543 if (extent_type == BTRFS_FILE_EXTENT_INLINE) 1544 continue; 1545 1546 bytenr = btrfs_file_extent_disk_bytenr(eb, fi); 1547 if (!bytenr) 1548 continue; 1549 1550 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 1551 1552 ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr, 1553 num_bytes, GFP_NOFS); 1554 if (ret) 1555 return ret; 1556 } 1557 return 0; 1558 } 1559 1560 /* 1561 * Walk up the tree from the bottom, freeing leaves and any interior 1562 * nodes which have had all slots visited. If a node (leaf or 1563 * interior) is freed, the node above it will have it's slot 1564 * incremented. The root node will never be freed. 1565 * 1566 * At the end of this function, we should have a path which has all 1567 * slots incremented to the next position for a search. If we need to 1568 * read a new node it will be NULL and the node above it will have the 1569 * correct slot selected for a later read. 1570 * 1571 * If we increment the root nodes slot counter past the number of 1572 * elements, 1 is returned to signal completion of the search. 1573 */ 1574 static int adjust_slots_upwards(struct btrfs_root *root, 1575 struct btrfs_path *path, int root_level) 1576 { 1577 int level = 0; 1578 int nr, slot; 1579 struct extent_buffer *eb; 1580 1581 if (root_level == 0) 1582 return 1; 1583 1584 while (level <= root_level) { 1585 eb = path->nodes[level]; 1586 nr = btrfs_header_nritems(eb); 1587 path->slots[level]++; 1588 slot = path->slots[level]; 1589 if (slot >= nr || level == 0) { 1590 /* 1591 * Don't free the root - we will detect this 1592 * condition after our loop and return a 1593 * positive value for caller to stop walking the tree. 1594 */ 1595 if (level != root_level) { 1596 btrfs_tree_unlock_rw(eb, path->locks[level]); 1597 path->locks[level] = 0; 1598 1599 free_extent_buffer(eb); 1600 path->nodes[level] = NULL; 1601 path->slots[level] = 0; 1602 } 1603 } else { 1604 /* 1605 * We have a valid slot to walk back down 1606 * from. Stop here so caller can process these 1607 * new nodes. 1608 */ 1609 break; 1610 } 1611 1612 level++; 1613 } 1614 1615 eb = path->nodes[root_level]; 1616 if (path->slots[root_level] >= btrfs_header_nritems(eb)) 1617 return 1; 1618 1619 return 0; 1620 } 1621 1622 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 1623 struct btrfs_root *root, 1624 struct extent_buffer *root_eb, 1625 u64 root_gen, int root_level) 1626 { 1627 struct btrfs_fs_info *fs_info = root->fs_info; 1628 int ret = 0; 1629 int level; 1630 struct extent_buffer *eb = root_eb; 1631 struct btrfs_path *path = NULL; 1632 1633 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL); 1634 BUG_ON(root_eb == NULL); 1635 1636 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1637 return 0; 1638 1639 if (!extent_buffer_uptodate(root_eb)) { 1640 ret = btrfs_read_buffer(root_eb, root_gen); 1641 if (ret) 1642 goto out; 1643 } 1644 1645 if (root_level == 0) { 1646 ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb); 1647 goto out; 1648 } 1649 1650 path = btrfs_alloc_path(); 1651 if (!path) 1652 return -ENOMEM; 1653 1654 /* 1655 * Walk down the tree. Missing extent blocks are filled in as 1656 * we go. Metadata is accounted every time we read a new 1657 * extent block. 1658 * 1659 * When we reach a leaf, we account for file extent items in it, 1660 * walk back up the tree (adjusting slot pointers as we go) 1661 * and restart the search process. 1662 */ 1663 extent_buffer_get(root_eb); /* For path */ 1664 path->nodes[root_level] = root_eb; 1665 path->slots[root_level] = 0; 1666 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ 1667 walk_down: 1668 level = root_level; 1669 while (level >= 0) { 1670 if (path->nodes[level] == NULL) { 1671 int parent_slot; 1672 u64 child_gen; 1673 u64 child_bytenr; 1674 1675 /* 1676 * We need to get child blockptr/gen from parent before 1677 * we can read it. 1678 */ 1679 eb = path->nodes[level + 1]; 1680 parent_slot = path->slots[level + 1]; 1681 child_bytenr = btrfs_node_blockptr(eb, parent_slot); 1682 child_gen = btrfs_node_ptr_generation(eb, parent_slot); 1683 1684 eb = read_tree_block(fs_info, child_bytenr, child_gen); 1685 if (IS_ERR(eb)) { 1686 ret = PTR_ERR(eb); 1687 goto out; 1688 } else if (!extent_buffer_uptodate(eb)) { 1689 free_extent_buffer(eb); 1690 ret = -EIO; 1691 goto out; 1692 } 1693 1694 path->nodes[level] = eb; 1695 path->slots[level] = 0; 1696 1697 btrfs_tree_read_lock(eb); 1698 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1699 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 1700 1701 ret = btrfs_qgroup_trace_extent(trans, fs_info, 1702 child_bytenr, 1703 fs_info->nodesize, 1704 GFP_NOFS); 1705 if (ret) 1706 goto out; 1707 } 1708 1709 if (level == 0) { 1710 ret = btrfs_qgroup_trace_leaf_items(trans,fs_info, 1711 path->nodes[level]); 1712 if (ret) 1713 goto out; 1714 1715 /* Nonzero return here means we completed our search */ 1716 ret = adjust_slots_upwards(root, path, root_level); 1717 if (ret) 1718 break; 1719 1720 /* Restart search with new slots */ 1721 goto walk_down; 1722 } 1723 1724 level--; 1725 } 1726 1727 ret = 0; 1728 out: 1729 btrfs_free_path(path); 1730 1731 return ret; 1732 } 1733 1734 #define UPDATE_NEW 0 1735 #define UPDATE_OLD 1 1736 /* 1737 * Walk all of the roots that points to the bytenr and adjust their refcnts. 1738 */ 1739 static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info, 1740 struct ulist *roots, struct ulist *tmp, 1741 struct ulist *qgroups, u64 seq, int update_old) 1742 { 1743 struct ulist_node *unode; 1744 struct ulist_iterator uiter; 1745 struct ulist_node *tmp_unode; 1746 struct ulist_iterator tmp_uiter; 1747 struct btrfs_qgroup *qg; 1748 int ret = 0; 1749 1750 if (!roots) 1751 return 0; 1752 ULIST_ITER_INIT(&uiter); 1753 while ((unode = ulist_next(roots, &uiter))) { 1754 qg = find_qgroup_rb(fs_info, unode->val); 1755 if (!qg) 1756 continue; 1757 1758 ulist_reinit(tmp); 1759 ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg), 1760 GFP_ATOMIC); 1761 if (ret < 0) 1762 return ret; 1763 ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC); 1764 if (ret < 0) 1765 return ret; 1766 ULIST_ITER_INIT(&tmp_uiter); 1767 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1768 struct btrfs_qgroup_list *glist; 1769 1770 qg = unode_aux_to_qgroup(tmp_unode); 1771 if (update_old) 1772 btrfs_qgroup_update_old_refcnt(qg, seq, 1); 1773 else 1774 btrfs_qgroup_update_new_refcnt(qg, seq, 1); 1775 list_for_each_entry(glist, &qg->groups, next_group) { 1776 ret = ulist_add(qgroups, glist->group->qgroupid, 1777 qgroup_to_aux(glist->group), 1778 GFP_ATOMIC); 1779 if (ret < 0) 1780 return ret; 1781 ret = ulist_add(tmp, glist->group->qgroupid, 1782 qgroup_to_aux(glist->group), 1783 GFP_ATOMIC); 1784 if (ret < 0) 1785 return ret; 1786 } 1787 } 1788 } 1789 return 0; 1790 } 1791 1792 /* 1793 * Update qgroup rfer/excl counters. 1794 * Rfer update is easy, codes can explain themselves. 1795 * 1796 * Excl update is tricky, the update is split into 2 part. 1797 * Part 1: Possible exclusive <-> sharing detect: 1798 * | A | !A | 1799 * ------------------------------------- 1800 * B | * | - | 1801 * ------------------------------------- 1802 * !B | + | ** | 1803 * ------------------------------------- 1804 * 1805 * Conditions: 1806 * A: cur_old_roots < nr_old_roots (not exclusive before) 1807 * !A: cur_old_roots == nr_old_roots (possible exclusive before) 1808 * B: cur_new_roots < nr_new_roots (not exclusive now) 1809 * !B: cur_new_roots == nr_new_roots (possible exclusive now) 1810 * 1811 * Results: 1812 * +: Possible sharing -> exclusive -: Possible exclusive -> sharing 1813 * *: Definitely not changed. **: Possible unchanged. 1814 * 1815 * For !A and !B condition, the exception is cur_old/new_roots == 0 case. 1816 * 1817 * To make the logic clear, we first use condition A and B to split 1818 * combination into 4 results. 1819 * 1820 * Then, for result "+" and "-", check old/new_roots == 0 case, as in them 1821 * only on variant maybe 0. 1822 * 1823 * Lastly, check result **, since there are 2 variants maybe 0, split them 1824 * again(2x2). 1825 * But this time we don't need to consider other things, the codes and logic 1826 * is easy to understand now. 1827 */ 1828 static int qgroup_update_counters(struct btrfs_fs_info *fs_info, 1829 struct ulist *qgroups, 1830 u64 nr_old_roots, 1831 u64 nr_new_roots, 1832 u64 num_bytes, u64 seq) 1833 { 1834 struct ulist_node *unode; 1835 struct ulist_iterator uiter; 1836 struct btrfs_qgroup *qg; 1837 u64 cur_new_count, cur_old_count; 1838 1839 ULIST_ITER_INIT(&uiter); 1840 while ((unode = ulist_next(qgroups, &uiter))) { 1841 bool dirty = false; 1842 1843 qg = unode_aux_to_qgroup(unode); 1844 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); 1845 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); 1846 1847 trace_qgroup_update_counters(fs_info, qg->qgroupid, 1848 cur_old_count, cur_new_count); 1849 1850 /* Rfer update part */ 1851 if (cur_old_count == 0 && cur_new_count > 0) { 1852 qg->rfer += num_bytes; 1853 qg->rfer_cmpr += num_bytes; 1854 dirty = true; 1855 } 1856 if (cur_old_count > 0 && cur_new_count == 0) { 1857 qg->rfer -= num_bytes; 1858 qg->rfer_cmpr -= num_bytes; 1859 dirty = true; 1860 } 1861 1862 /* Excl update part */ 1863 /* Exclusive/none -> shared case */ 1864 if (cur_old_count == nr_old_roots && 1865 cur_new_count < nr_new_roots) { 1866 /* Exclusive -> shared */ 1867 if (cur_old_count != 0) { 1868 qg->excl -= num_bytes; 1869 qg->excl_cmpr -= num_bytes; 1870 dirty = true; 1871 } 1872 } 1873 1874 /* Shared -> exclusive/none case */ 1875 if (cur_old_count < nr_old_roots && 1876 cur_new_count == nr_new_roots) { 1877 /* Shared->exclusive */ 1878 if (cur_new_count != 0) { 1879 qg->excl += num_bytes; 1880 qg->excl_cmpr += num_bytes; 1881 dirty = true; 1882 } 1883 } 1884 1885 /* Exclusive/none -> exclusive/none case */ 1886 if (cur_old_count == nr_old_roots && 1887 cur_new_count == nr_new_roots) { 1888 if (cur_old_count == 0) { 1889 /* None -> exclusive/none */ 1890 1891 if (cur_new_count != 0) { 1892 /* None -> exclusive */ 1893 qg->excl += num_bytes; 1894 qg->excl_cmpr += num_bytes; 1895 dirty = true; 1896 } 1897 /* None -> none, nothing changed */ 1898 } else { 1899 /* Exclusive -> exclusive/none */ 1900 1901 if (cur_new_count == 0) { 1902 /* Exclusive -> none */ 1903 qg->excl -= num_bytes; 1904 qg->excl_cmpr -= num_bytes; 1905 dirty = true; 1906 } 1907 /* Exclusive -> exclusive, nothing changed */ 1908 } 1909 } 1910 1911 if (dirty) 1912 qgroup_dirty(fs_info, qg); 1913 } 1914 return 0; 1915 } 1916 1917 int 1918 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 1919 struct btrfs_fs_info *fs_info, 1920 u64 bytenr, u64 num_bytes, 1921 struct ulist *old_roots, struct ulist *new_roots) 1922 { 1923 struct ulist *qgroups = NULL; 1924 struct ulist *tmp = NULL; 1925 u64 seq; 1926 u64 nr_new_roots = 0; 1927 u64 nr_old_roots = 0; 1928 int ret = 0; 1929 1930 if (new_roots) 1931 nr_new_roots = new_roots->nnodes; 1932 if (old_roots) 1933 nr_old_roots = old_roots->nnodes; 1934 1935 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1936 goto out_free; 1937 BUG_ON(!fs_info->quota_root); 1938 1939 trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes, 1940 nr_old_roots, nr_new_roots); 1941 1942 qgroups = ulist_alloc(GFP_NOFS); 1943 if (!qgroups) { 1944 ret = -ENOMEM; 1945 goto out_free; 1946 } 1947 tmp = ulist_alloc(GFP_NOFS); 1948 if (!tmp) { 1949 ret = -ENOMEM; 1950 goto out_free; 1951 } 1952 1953 mutex_lock(&fs_info->qgroup_rescan_lock); 1954 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 1955 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) { 1956 mutex_unlock(&fs_info->qgroup_rescan_lock); 1957 ret = 0; 1958 goto out_free; 1959 } 1960 } 1961 mutex_unlock(&fs_info->qgroup_rescan_lock); 1962 1963 spin_lock(&fs_info->qgroup_lock); 1964 seq = fs_info->qgroup_seq; 1965 1966 /* Update old refcnts using old_roots */ 1967 ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq, 1968 UPDATE_OLD); 1969 if (ret < 0) 1970 goto out; 1971 1972 /* Update new refcnts using new_roots */ 1973 ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq, 1974 UPDATE_NEW); 1975 if (ret < 0) 1976 goto out; 1977 1978 qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots, 1979 num_bytes, seq); 1980 1981 /* 1982 * Bump qgroup_seq to avoid seq overlap 1983 */ 1984 fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; 1985 out: 1986 spin_unlock(&fs_info->qgroup_lock); 1987 out_free: 1988 ulist_free(tmp); 1989 ulist_free(qgroups); 1990 ulist_free(old_roots); 1991 ulist_free(new_roots); 1992 return ret; 1993 } 1994 1995 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, 1996 struct btrfs_fs_info *fs_info) 1997 { 1998 struct btrfs_qgroup_extent_record *record; 1999 struct btrfs_delayed_ref_root *delayed_refs; 2000 struct ulist *new_roots = NULL; 2001 struct rb_node *node; 2002 u64 qgroup_to_skip; 2003 int ret = 0; 2004 2005 delayed_refs = &trans->transaction->delayed_refs; 2006 qgroup_to_skip = delayed_refs->qgroup_to_skip; 2007 while ((node = rb_first(&delayed_refs->dirty_extent_root))) { 2008 record = rb_entry(node, struct btrfs_qgroup_extent_record, 2009 node); 2010 2011 trace_btrfs_qgroup_account_extents(fs_info, record); 2012 2013 if (!ret) { 2014 /* 2015 * Use (u64)-1 as time_seq to do special search, which 2016 * doesn't lock tree or delayed_refs and search current 2017 * root. It's safe inside commit_transaction(). 2018 */ 2019 ret = btrfs_find_all_roots(trans, fs_info, 2020 record->bytenr, (u64)-1, &new_roots); 2021 if (ret < 0) 2022 goto cleanup; 2023 if (qgroup_to_skip) 2024 ulist_del(new_roots, qgroup_to_skip, 0); 2025 ret = btrfs_qgroup_account_extent(trans, fs_info, 2026 record->bytenr, record->num_bytes, 2027 record->old_roots, new_roots); 2028 record->old_roots = NULL; 2029 new_roots = NULL; 2030 } 2031 cleanup: 2032 ulist_free(record->old_roots); 2033 ulist_free(new_roots); 2034 new_roots = NULL; 2035 rb_erase(node, &delayed_refs->dirty_extent_root); 2036 kfree(record); 2037 2038 } 2039 return ret; 2040 } 2041 2042 /* 2043 * called from commit_transaction. Writes all changed qgroups to disk. 2044 */ 2045 int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 2046 struct btrfs_fs_info *fs_info) 2047 { 2048 struct btrfs_root *quota_root = fs_info->quota_root; 2049 int ret = 0; 2050 int start_rescan_worker = 0; 2051 2052 if (!quota_root) 2053 goto out; 2054 2055 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 2056 test_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2057 start_rescan_worker = 1; 2058 2059 if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2060 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2061 if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags)) 2062 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2063 2064 spin_lock(&fs_info->qgroup_lock); 2065 while (!list_empty(&fs_info->dirty_qgroups)) { 2066 struct btrfs_qgroup *qgroup; 2067 qgroup = list_first_entry(&fs_info->dirty_qgroups, 2068 struct btrfs_qgroup, dirty); 2069 list_del_init(&qgroup->dirty); 2070 spin_unlock(&fs_info->qgroup_lock); 2071 ret = update_qgroup_info_item(trans, quota_root, qgroup); 2072 if (ret) 2073 fs_info->qgroup_flags |= 2074 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2075 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 2076 if (ret) 2077 fs_info->qgroup_flags |= 2078 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2079 spin_lock(&fs_info->qgroup_lock); 2080 } 2081 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2082 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; 2083 else 2084 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2085 spin_unlock(&fs_info->qgroup_lock); 2086 2087 ret = update_qgroup_status_item(trans, fs_info, quota_root); 2088 if (ret) 2089 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2090 2091 if (!ret && start_rescan_worker) { 2092 ret = qgroup_rescan_init(fs_info, 0, 1); 2093 if (!ret) { 2094 qgroup_rescan_zero_tracking(fs_info); 2095 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2096 &fs_info->qgroup_rescan_work); 2097 } 2098 ret = 0; 2099 } 2100 2101 out: 2102 2103 return ret; 2104 } 2105 2106 /* 2107 * Copy the accounting information between qgroups. This is necessary 2108 * when a snapshot or a subvolume is created. Throwing an error will 2109 * cause a transaction abort so we take extra care here to only error 2110 * when a readonly fs is a reasonable outcome. 2111 */ 2112 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 2113 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 2114 struct btrfs_qgroup_inherit *inherit) 2115 { 2116 int ret = 0; 2117 int i; 2118 u64 *i_qgroups; 2119 struct btrfs_root *quota_root = fs_info->quota_root; 2120 struct btrfs_qgroup *srcgroup; 2121 struct btrfs_qgroup *dstgroup; 2122 u32 level_size = 0; 2123 u64 nums; 2124 2125 mutex_lock(&fs_info->qgroup_ioctl_lock); 2126 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2127 goto out; 2128 2129 if (!quota_root) { 2130 ret = -EINVAL; 2131 goto out; 2132 } 2133 2134 if (inherit) { 2135 i_qgroups = (u64 *)(inherit + 1); 2136 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 2137 2 * inherit->num_excl_copies; 2138 for (i = 0; i < nums; ++i) { 2139 srcgroup = find_qgroup_rb(fs_info, *i_qgroups); 2140 2141 /* 2142 * Zero out invalid groups so we can ignore 2143 * them later. 2144 */ 2145 if (!srcgroup || 2146 ((srcgroup->qgroupid >> 48) <= (objectid >> 48))) 2147 *i_qgroups = 0ULL; 2148 2149 ++i_qgroups; 2150 } 2151 } 2152 2153 /* 2154 * create a tracking group for the subvol itself 2155 */ 2156 ret = add_qgroup_item(trans, quota_root, objectid); 2157 if (ret) 2158 goto out; 2159 2160 if (srcid) { 2161 struct btrfs_root *srcroot; 2162 struct btrfs_key srckey; 2163 2164 srckey.objectid = srcid; 2165 srckey.type = BTRFS_ROOT_ITEM_KEY; 2166 srckey.offset = (u64)-1; 2167 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); 2168 if (IS_ERR(srcroot)) { 2169 ret = PTR_ERR(srcroot); 2170 goto out; 2171 } 2172 2173 rcu_read_lock(); 2174 level_size = fs_info->nodesize; 2175 rcu_read_unlock(); 2176 } 2177 2178 /* 2179 * add qgroup to all inherited groups 2180 */ 2181 if (inherit) { 2182 i_qgroups = (u64 *)(inherit + 1); 2183 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) { 2184 if (*i_qgroups == 0) 2185 continue; 2186 ret = add_qgroup_relation_item(trans, quota_root, 2187 objectid, *i_qgroups); 2188 if (ret && ret != -EEXIST) 2189 goto out; 2190 ret = add_qgroup_relation_item(trans, quota_root, 2191 *i_qgroups, objectid); 2192 if (ret && ret != -EEXIST) 2193 goto out; 2194 } 2195 ret = 0; 2196 } 2197 2198 2199 spin_lock(&fs_info->qgroup_lock); 2200 2201 dstgroup = add_qgroup_rb(fs_info, objectid); 2202 if (IS_ERR(dstgroup)) { 2203 ret = PTR_ERR(dstgroup); 2204 goto unlock; 2205 } 2206 2207 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { 2208 dstgroup->lim_flags = inherit->lim.flags; 2209 dstgroup->max_rfer = inherit->lim.max_rfer; 2210 dstgroup->max_excl = inherit->lim.max_excl; 2211 dstgroup->rsv_rfer = inherit->lim.rsv_rfer; 2212 dstgroup->rsv_excl = inherit->lim.rsv_excl; 2213 2214 ret = update_qgroup_limit_item(trans, quota_root, dstgroup); 2215 if (ret) { 2216 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2217 btrfs_info(fs_info, 2218 "unable to update quota limit for %llu", 2219 dstgroup->qgroupid); 2220 goto unlock; 2221 } 2222 } 2223 2224 if (srcid) { 2225 srcgroup = find_qgroup_rb(fs_info, srcid); 2226 if (!srcgroup) 2227 goto unlock; 2228 2229 /* 2230 * We call inherit after we clone the root in order to make sure 2231 * our counts don't go crazy, so at this point the only 2232 * difference between the two roots should be the root node. 2233 */ 2234 dstgroup->rfer = srcgroup->rfer; 2235 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2236 dstgroup->excl = level_size; 2237 dstgroup->excl_cmpr = level_size; 2238 srcgroup->excl = level_size; 2239 srcgroup->excl_cmpr = level_size; 2240 2241 /* inherit the limit info */ 2242 dstgroup->lim_flags = srcgroup->lim_flags; 2243 dstgroup->max_rfer = srcgroup->max_rfer; 2244 dstgroup->max_excl = srcgroup->max_excl; 2245 dstgroup->rsv_rfer = srcgroup->rsv_rfer; 2246 dstgroup->rsv_excl = srcgroup->rsv_excl; 2247 2248 qgroup_dirty(fs_info, dstgroup); 2249 qgroup_dirty(fs_info, srcgroup); 2250 } 2251 2252 if (!inherit) 2253 goto unlock; 2254 2255 i_qgroups = (u64 *)(inherit + 1); 2256 for (i = 0; i < inherit->num_qgroups; ++i) { 2257 if (*i_qgroups) { 2258 ret = add_relation_rb(fs_info, objectid, *i_qgroups); 2259 if (ret) 2260 goto unlock; 2261 } 2262 ++i_qgroups; 2263 } 2264 2265 for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) { 2266 struct btrfs_qgroup *src; 2267 struct btrfs_qgroup *dst; 2268 2269 if (!i_qgroups[0] || !i_qgroups[1]) 2270 continue; 2271 2272 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2273 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2274 2275 if (!src || !dst) { 2276 ret = -EINVAL; 2277 goto unlock; 2278 } 2279 2280 dst->rfer = src->rfer - level_size; 2281 dst->rfer_cmpr = src->rfer_cmpr - level_size; 2282 } 2283 for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) { 2284 struct btrfs_qgroup *src; 2285 struct btrfs_qgroup *dst; 2286 2287 if (!i_qgroups[0] || !i_qgroups[1]) 2288 continue; 2289 2290 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2291 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2292 2293 if (!src || !dst) { 2294 ret = -EINVAL; 2295 goto unlock; 2296 } 2297 2298 dst->excl = src->excl + level_size; 2299 dst->excl_cmpr = src->excl_cmpr + level_size; 2300 } 2301 2302 unlock: 2303 spin_unlock(&fs_info->qgroup_lock); 2304 out: 2305 mutex_unlock(&fs_info->qgroup_ioctl_lock); 2306 return ret; 2307 } 2308 2309 static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes) 2310 { 2311 struct btrfs_root *quota_root; 2312 struct btrfs_qgroup *qgroup; 2313 struct btrfs_fs_info *fs_info = root->fs_info; 2314 u64 ref_root = root->root_key.objectid; 2315 int ret = 0; 2316 struct ulist_node *unode; 2317 struct ulist_iterator uiter; 2318 2319 if (!is_fstree(ref_root)) 2320 return 0; 2321 2322 if (num_bytes == 0) 2323 return 0; 2324 2325 spin_lock(&fs_info->qgroup_lock); 2326 quota_root = fs_info->quota_root; 2327 if (!quota_root) 2328 goto out; 2329 2330 qgroup = find_qgroup_rb(fs_info, ref_root); 2331 if (!qgroup) 2332 goto out; 2333 2334 /* 2335 * in a first step, we check all affected qgroups if any limits would 2336 * be exceeded 2337 */ 2338 ulist_reinit(fs_info->qgroup_ulist); 2339 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2340 (uintptr_t)qgroup, GFP_ATOMIC); 2341 if (ret < 0) 2342 goto out; 2343 ULIST_ITER_INIT(&uiter); 2344 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2345 struct btrfs_qgroup *qg; 2346 struct btrfs_qgroup_list *glist; 2347 2348 qg = unode_aux_to_qgroup(unode); 2349 2350 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2351 qg->reserved + (s64)qg->rfer + num_bytes > 2352 qg->max_rfer) { 2353 ret = -EDQUOT; 2354 goto out; 2355 } 2356 2357 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 2358 qg->reserved + (s64)qg->excl + num_bytes > 2359 qg->max_excl) { 2360 ret = -EDQUOT; 2361 goto out; 2362 } 2363 2364 list_for_each_entry(glist, &qg->groups, next_group) { 2365 ret = ulist_add(fs_info->qgroup_ulist, 2366 glist->group->qgroupid, 2367 (uintptr_t)glist->group, GFP_ATOMIC); 2368 if (ret < 0) 2369 goto out; 2370 } 2371 } 2372 ret = 0; 2373 /* 2374 * no limits exceeded, now record the reservation into all qgroups 2375 */ 2376 ULIST_ITER_INIT(&uiter); 2377 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2378 struct btrfs_qgroup *qg; 2379 2380 qg = unode_aux_to_qgroup(unode); 2381 2382 qg->reserved += num_bytes; 2383 } 2384 2385 out: 2386 spin_unlock(&fs_info->qgroup_lock); 2387 return ret; 2388 } 2389 2390 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, 2391 u64 ref_root, u64 num_bytes) 2392 { 2393 struct btrfs_root *quota_root; 2394 struct btrfs_qgroup *qgroup; 2395 struct ulist_node *unode; 2396 struct ulist_iterator uiter; 2397 int ret = 0; 2398 2399 if (!is_fstree(ref_root)) 2400 return; 2401 2402 if (num_bytes == 0) 2403 return; 2404 2405 spin_lock(&fs_info->qgroup_lock); 2406 2407 quota_root = fs_info->quota_root; 2408 if (!quota_root) 2409 goto out; 2410 2411 qgroup = find_qgroup_rb(fs_info, ref_root); 2412 if (!qgroup) 2413 goto out; 2414 2415 ulist_reinit(fs_info->qgroup_ulist); 2416 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2417 (uintptr_t)qgroup, GFP_ATOMIC); 2418 if (ret < 0) 2419 goto out; 2420 ULIST_ITER_INIT(&uiter); 2421 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2422 struct btrfs_qgroup *qg; 2423 struct btrfs_qgroup_list *glist; 2424 2425 qg = unode_aux_to_qgroup(unode); 2426 2427 qg->reserved -= num_bytes; 2428 2429 list_for_each_entry(glist, &qg->groups, next_group) { 2430 ret = ulist_add(fs_info->qgroup_ulist, 2431 glist->group->qgroupid, 2432 (uintptr_t)glist->group, GFP_ATOMIC); 2433 if (ret < 0) 2434 goto out; 2435 } 2436 } 2437 2438 out: 2439 spin_unlock(&fs_info->qgroup_lock); 2440 } 2441 2442 static inline void qgroup_free(struct btrfs_root *root, u64 num_bytes) 2443 { 2444 return btrfs_qgroup_free_refroot(root->fs_info, root->objectid, 2445 num_bytes); 2446 } 2447 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans) 2448 { 2449 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq) 2450 return; 2451 btrfs_err(trans->fs_info, 2452 "qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x", 2453 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not", 2454 (u32)(trans->delayed_ref_elem.seq >> 32), 2455 (u32)trans->delayed_ref_elem.seq); 2456 BUG(); 2457 } 2458 2459 /* 2460 * returns < 0 on error, 0 when more leafs are to be scanned. 2461 * returns 1 when done. 2462 */ 2463 static int 2464 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2465 struct btrfs_trans_handle *trans) 2466 { 2467 struct btrfs_key found; 2468 struct extent_buffer *scratch_leaf = NULL; 2469 struct ulist *roots = NULL; 2470 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); 2471 u64 num_bytes; 2472 int slot; 2473 int ret; 2474 2475 mutex_lock(&fs_info->qgroup_rescan_lock); 2476 ret = btrfs_search_slot_for_read(fs_info->extent_root, 2477 &fs_info->qgroup_rescan_progress, 2478 path, 1, 0); 2479 2480 btrfs_debug(fs_info, 2481 "current progress key (%llu %u %llu), search_slot ret %d", 2482 fs_info->qgroup_rescan_progress.objectid, 2483 fs_info->qgroup_rescan_progress.type, 2484 fs_info->qgroup_rescan_progress.offset, ret); 2485 2486 if (ret) { 2487 /* 2488 * The rescan is about to end, we will not be scanning any 2489 * further blocks. We cannot unset the RESCAN flag here, because 2490 * we want to commit the transaction if everything went well. 2491 * To make the live accounting work in this phase, we set our 2492 * scan progress pointer such that every real extent objectid 2493 * will be smaller. 2494 */ 2495 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 2496 btrfs_release_path(path); 2497 mutex_unlock(&fs_info->qgroup_rescan_lock); 2498 return ret; 2499 } 2500 2501 btrfs_item_key_to_cpu(path->nodes[0], &found, 2502 btrfs_header_nritems(path->nodes[0]) - 1); 2503 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; 2504 2505 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2506 scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]); 2507 if (!scratch_leaf) { 2508 ret = -ENOMEM; 2509 mutex_unlock(&fs_info->qgroup_rescan_lock); 2510 goto out; 2511 } 2512 extent_buffer_get(scratch_leaf); 2513 btrfs_tree_read_lock(scratch_leaf); 2514 btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK); 2515 slot = path->slots[0]; 2516 btrfs_release_path(path); 2517 mutex_unlock(&fs_info->qgroup_rescan_lock); 2518 2519 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 2520 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 2521 if (found.type != BTRFS_EXTENT_ITEM_KEY && 2522 found.type != BTRFS_METADATA_ITEM_KEY) 2523 continue; 2524 if (found.type == BTRFS_METADATA_ITEM_KEY) 2525 num_bytes = fs_info->nodesize; 2526 else 2527 num_bytes = found.offset; 2528 2529 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 2530 &roots); 2531 if (ret < 0) 2532 goto out; 2533 /* For rescan, just pass old_roots as NULL */ 2534 ret = btrfs_qgroup_account_extent(trans, fs_info, 2535 found.objectid, num_bytes, NULL, roots); 2536 if (ret < 0) 2537 goto out; 2538 } 2539 out: 2540 if (scratch_leaf) { 2541 btrfs_tree_read_unlock_blocking(scratch_leaf); 2542 free_extent_buffer(scratch_leaf); 2543 } 2544 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2545 2546 return ret; 2547 } 2548 2549 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) 2550 { 2551 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, 2552 qgroup_rescan_work); 2553 struct btrfs_path *path; 2554 struct btrfs_trans_handle *trans = NULL; 2555 int err = -ENOMEM; 2556 int ret = 0; 2557 2558 path = btrfs_alloc_path(); 2559 if (!path) 2560 goto out; 2561 2562 err = 0; 2563 while (!err && !btrfs_fs_closing(fs_info)) { 2564 trans = btrfs_start_transaction(fs_info->fs_root, 0); 2565 if (IS_ERR(trans)) { 2566 err = PTR_ERR(trans); 2567 break; 2568 } 2569 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { 2570 err = -EINTR; 2571 } else { 2572 err = qgroup_rescan_leaf(fs_info, path, trans); 2573 } 2574 if (err > 0) 2575 btrfs_commit_transaction(trans); 2576 else 2577 btrfs_end_transaction(trans); 2578 } 2579 2580 out: 2581 btrfs_free_path(path); 2582 2583 mutex_lock(&fs_info->qgroup_rescan_lock); 2584 if (!btrfs_fs_closing(fs_info)) 2585 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2586 2587 if (err > 0 && 2588 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 2589 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2590 } else if (err < 0) { 2591 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2592 } 2593 mutex_unlock(&fs_info->qgroup_rescan_lock); 2594 2595 /* 2596 * only update status, since the previous part has already updated the 2597 * qgroup info. 2598 */ 2599 trans = btrfs_start_transaction(fs_info->quota_root, 1); 2600 if (IS_ERR(trans)) { 2601 err = PTR_ERR(trans); 2602 btrfs_err(fs_info, 2603 "fail to start transaction for status update: %d\n", 2604 err); 2605 goto done; 2606 } 2607 ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); 2608 if (ret < 0) { 2609 err = ret; 2610 btrfs_err(fs_info, "fail to update qgroup status: %d", err); 2611 } 2612 btrfs_end_transaction(trans); 2613 2614 if (btrfs_fs_closing(fs_info)) { 2615 btrfs_info(fs_info, "qgroup scan paused"); 2616 } else if (err >= 0) { 2617 btrfs_info(fs_info, "qgroup scan completed%s", 2618 err > 0 ? " (inconsistency flag cleared)" : ""); 2619 } else { 2620 btrfs_err(fs_info, "qgroup scan failed with %d", err); 2621 } 2622 2623 done: 2624 mutex_lock(&fs_info->qgroup_rescan_lock); 2625 fs_info->qgroup_rescan_running = false; 2626 mutex_unlock(&fs_info->qgroup_rescan_lock); 2627 complete_all(&fs_info->qgroup_rescan_completion); 2628 } 2629 2630 /* 2631 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all 2632 * memory required for the rescan context. 2633 */ 2634 static int 2635 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 2636 int init_flags) 2637 { 2638 int ret = 0; 2639 2640 if (!init_flags && 2641 (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) || 2642 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) { 2643 ret = -EINVAL; 2644 goto err; 2645 } 2646 2647 mutex_lock(&fs_info->qgroup_rescan_lock); 2648 spin_lock(&fs_info->qgroup_lock); 2649 2650 if (init_flags) { 2651 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2652 ret = -EINPROGRESS; 2653 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 2654 ret = -EINVAL; 2655 2656 if (ret) { 2657 spin_unlock(&fs_info->qgroup_lock); 2658 mutex_unlock(&fs_info->qgroup_rescan_lock); 2659 goto err; 2660 } 2661 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2662 } 2663 2664 memset(&fs_info->qgroup_rescan_progress, 0, 2665 sizeof(fs_info->qgroup_rescan_progress)); 2666 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 2667 init_completion(&fs_info->qgroup_rescan_completion); 2668 fs_info->qgroup_rescan_running = true; 2669 2670 spin_unlock(&fs_info->qgroup_lock); 2671 mutex_unlock(&fs_info->qgroup_rescan_lock); 2672 2673 memset(&fs_info->qgroup_rescan_work, 0, 2674 sizeof(fs_info->qgroup_rescan_work)); 2675 btrfs_init_work(&fs_info->qgroup_rescan_work, 2676 btrfs_qgroup_rescan_helper, 2677 btrfs_qgroup_rescan_worker, NULL, NULL); 2678 2679 if (ret) { 2680 err: 2681 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret); 2682 return ret; 2683 } 2684 2685 return 0; 2686 } 2687 2688 static void 2689 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) 2690 { 2691 struct rb_node *n; 2692 struct btrfs_qgroup *qgroup; 2693 2694 spin_lock(&fs_info->qgroup_lock); 2695 /* clear all current qgroup tracking information */ 2696 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { 2697 qgroup = rb_entry(n, struct btrfs_qgroup, node); 2698 qgroup->rfer = 0; 2699 qgroup->rfer_cmpr = 0; 2700 qgroup->excl = 0; 2701 qgroup->excl_cmpr = 0; 2702 } 2703 spin_unlock(&fs_info->qgroup_lock); 2704 } 2705 2706 int 2707 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) 2708 { 2709 int ret = 0; 2710 struct btrfs_trans_handle *trans; 2711 2712 ret = qgroup_rescan_init(fs_info, 0, 1); 2713 if (ret) 2714 return ret; 2715 2716 /* 2717 * We have set the rescan_progress to 0, which means no more 2718 * delayed refs will be accounted by btrfs_qgroup_account_ref. 2719 * However, btrfs_qgroup_account_ref may be right after its call 2720 * to btrfs_find_all_roots, in which case it would still do the 2721 * accounting. 2722 * To solve this, we're committing the transaction, which will 2723 * ensure we run all delayed refs and only after that, we are 2724 * going to clear all tracking information for a clean start. 2725 */ 2726 2727 trans = btrfs_join_transaction(fs_info->fs_root); 2728 if (IS_ERR(trans)) { 2729 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2730 return PTR_ERR(trans); 2731 } 2732 ret = btrfs_commit_transaction(trans); 2733 if (ret) { 2734 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2735 return ret; 2736 } 2737 2738 qgroup_rescan_zero_tracking(fs_info); 2739 2740 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2741 &fs_info->qgroup_rescan_work); 2742 2743 return 0; 2744 } 2745 2746 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 2747 bool interruptible) 2748 { 2749 int running; 2750 int ret = 0; 2751 2752 mutex_lock(&fs_info->qgroup_rescan_lock); 2753 spin_lock(&fs_info->qgroup_lock); 2754 running = fs_info->qgroup_rescan_running; 2755 spin_unlock(&fs_info->qgroup_lock); 2756 mutex_unlock(&fs_info->qgroup_rescan_lock); 2757 2758 if (!running) 2759 return 0; 2760 2761 if (interruptible) 2762 ret = wait_for_completion_interruptible( 2763 &fs_info->qgroup_rescan_completion); 2764 else 2765 wait_for_completion(&fs_info->qgroup_rescan_completion); 2766 2767 return ret; 2768 } 2769 2770 /* 2771 * this is only called from open_ctree where we're still single threaded, thus 2772 * locking is omitted here. 2773 */ 2774 void 2775 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) 2776 { 2777 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2778 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2779 &fs_info->qgroup_rescan_work); 2780 } 2781 2782 /* 2783 * Reserve qgroup space for range [start, start + len). 2784 * 2785 * This function will either reserve space from related qgroups or doing 2786 * nothing if the range is already reserved. 2787 * 2788 * Return 0 for successful reserve 2789 * Return <0 for error (including -EQUOT) 2790 * 2791 * NOTE: this function may sleep for memory allocation. 2792 */ 2793 int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len) 2794 { 2795 struct btrfs_root *root = BTRFS_I(inode)->root; 2796 struct extent_changeset changeset; 2797 struct ulist_node *unode; 2798 struct ulist_iterator uiter; 2799 int ret; 2800 2801 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || 2802 !is_fstree(root->objectid) || len == 0) 2803 return 0; 2804 2805 changeset.bytes_changed = 0; 2806 changeset.range_changed = ulist_alloc(GFP_NOFS); 2807 ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 2808 start + len -1, EXTENT_QGROUP_RESERVED, &changeset); 2809 trace_btrfs_qgroup_reserve_data(inode, start, len, 2810 changeset.bytes_changed, 2811 QGROUP_RESERVE); 2812 if (ret < 0) 2813 goto cleanup; 2814 ret = qgroup_reserve(root, changeset.bytes_changed); 2815 if (ret < 0) 2816 goto cleanup; 2817 2818 ulist_free(changeset.range_changed); 2819 return ret; 2820 2821 cleanup: 2822 /* cleanup already reserved ranges */ 2823 ULIST_ITER_INIT(&uiter); 2824 while ((unode = ulist_next(changeset.range_changed, &uiter))) 2825 clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, 2826 unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL, 2827 GFP_NOFS); 2828 ulist_free(changeset.range_changed); 2829 return ret; 2830 } 2831 2832 static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len, 2833 int free) 2834 { 2835 struct extent_changeset changeset; 2836 int trace_op = QGROUP_RELEASE; 2837 int ret; 2838 2839 changeset.bytes_changed = 0; 2840 changeset.range_changed = ulist_alloc(GFP_NOFS); 2841 if (!changeset.range_changed) 2842 return -ENOMEM; 2843 2844 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 2845 start + len -1, EXTENT_QGROUP_RESERVED, &changeset); 2846 if (ret < 0) 2847 goto out; 2848 2849 if (free) { 2850 qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed); 2851 trace_op = QGROUP_FREE; 2852 } 2853 trace_btrfs_qgroup_release_data(inode, start, len, 2854 changeset.bytes_changed, trace_op); 2855 out: 2856 ulist_free(changeset.range_changed); 2857 return ret; 2858 } 2859 2860 /* 2861 * Free a reserved space range from io_tree and related qgroups 2862 * 2863 * Should be called when a range of pages get invalidated before reaching disk. 2864 * Or for error cleanup case. 2865 * 2866 * For data written to disk, use btrfs_qgroup_release_data(). 2867 * 2868 * NOTE: This function may sleep for memory allocation. 2869 */ 2870 int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len) 2871 { 2872 return __btrfs_qgroup_release_data(inode, start, len, 1); 2873 } 2874 2875 /* 2876 * Release a reserved space range from io_tree only. 2877 * 2878 * Should be called when a range of pages get written to disk and corresponding 2879 * FILE_EXTENT is inserted into corresponding root. 2880 * 2881 * Since new qgroup accounting framework will only update qgroup numbers at 2882 * commit_transaction() time, its reserved space shouldn't be freed from 2883 * related qgroups. 2884 * 2885 * But we should release the range from io_tree, to allow further write to be 2886 * COWed. 2887 * 2888 * NOTE: This function may sleep for memory allocation. 2889 */ 2890 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len) 2891 { 2892 return __btrfs_qgroup_release_data(inode, start, len, 0); 2893 } 2894 2895 int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes) 2896 { 2897 struct btrfs_fs_info *fs_info = root->fs_info; 2898 int ret; 2899 2900 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 2901 !is_fstree(root->objectid) || num_bytes == 0) 2902 return 0; 2903 2904 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 2905 ret = qgroup_reserve(root, num_bytes); 2906 if (ret < 0) 2907 return ret; 2908 atomic_add(num_bytes, &root->qgroup_meta_rsv); 2909 return ret; 2910 } 2911 2912 void btrfs_qgroup_free_meta_all(struct btrfs_root *root) 2913 { 2914 struct btrfs_fs_info *fs_info = root->fs_info; 2915 int reserved; 2916 2917 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 2918 !is_fstree(root->objectid)) 2919 return; 2920 2921 reserved = atomic_xchg(&root->qgroup_meta_rsv, 0); 2922 if (reserved == 0) 2923 return; 2924 qgroup_free(root, reserved); 2925 } 2926 2927 void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) 2928 { 2929 struct btrfs_fs_info *fs_info = root->fs_info; 2930 2931 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 2932 !is_fstree(root->objectid)) 2933 return; 2934 2935 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 2936 WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes); 2937 atomic_sub(num_bytes, &root->qgroup_meta_rsv); 2938 qgroup_free(root, num_bytes); 2939 } 2940 2941 /* 2942 * Check qgroup reserved space leaking, normally at destroy inode 2943 * time 2944 */ 2945 void btrfs_qgroup_check_reserved_leak(struct inode *inode) 2946 { 2947 struct extent_changeset changeset; 2948 struct ulist_node *unode; 2949 struct ulist_iterator iter; 2950 int ret; 2951 2952 changeset.bytes_changed = 0; 2953 changeset.range_changed = ulist_alloc(GFP_NOFS); 2954 if (WARN_ON(!changeset.range_changed)) 2955 return; 2956 2957 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 2958 EXTENT_QGROUP_RESERVED, &changeset); 2959 2960 WARN_ON(ret < 0); 2961 if (WARN_ON(changeset.bytes_changed)) { 2962 ULIST_ITER_INIT(&iter); 2963 while ((unode = ulist_next(changeset.range_changed, &iter))) { 2964 btrfs_warn(BTRFS_I(inode)->root->fs_info, 2965 "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu", 2966 inode->i_ino, unode->val, unode->aux); 2967 } 2968 qgroup_free(BTRFS_I(inode)->root, changeset.bytes_changed); 2969 } 2970 ulist_free(changeset.range_changed); 2971 } 2972