1 /* 2 * Copyright (C) 2011 STRATO. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include <linux/workqueue.h> 26 #include <linux/btrfs.h> 27 28 #include "ctree.h" 29 #include "transaction.h" 30 #include "disk-io.h" 31 #include "locking.h" 32 #include "ulist.h" 33 #include "backref.h" 34 #include "extent_io.h" 35 #include "qgroup.h" 36 37 38 /* TODO XXX FIXME 39 * - subvol delete -> delete when ref goes to 0? delete limits also? 40 * - reorganize keys 41 * - compressed 42 * - sync 43 * - copy also limits on subvol creation 44 * - limit 45 * - caches fuer ulists 46 * - performance benchmarks 47 * - check all ioctl parameters 48 */ 49 50 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq, 51 int mod) 52 { 53 if (qg->old_refcnt < seq) 54 qg->old_refcnt = seq; 55 qg->old_refcnt += mod; 56 } 57 58 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq, 59 int mod) 60 { 61 if (qg->new_refcnt < seq) 62 qg->new_refcnt = seq; 63 qg->new_refcnt += mod; 64 } 65 66 static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq) 67 { 68 if (qg->old_refcnt < seq) 69 return 0; 70 return qg->old_refcnt - seq; 71 } 72 73 static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq) 74 { 75 if (qg->new_refcnt < seq) 76 return 0; 77 return qg->new_refcnt - seq; 78 } 79 80 /* 81 * glue structure to represent the relations between qgroups. 82 */ 83 struct btrfs_qgroup_list { 84 struct list_head next_group; 85 struct list_head next_member; 86 struct btrfs_qgroup *group; 87 struct btrfs_qgroup *member; 88 }; 89 90 static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg) 91 { 92 return (u64)(uintptr_t)qg; 93 } 94 95 static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n) 96 { 97 return (struct btrfs_qgroup *)(uintptr_t)n->aux; 98 } 99 100 static int 101 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 102 int init_flags); 103 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); 104 105 /* must be called with qgroup_ioctl_lock held */ 106 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 107 u64 qgroupid) 108 { 109 struct rb_node *n = fs_info->qgroup_tree.rb_node; 110 struct btrfs_qgroup *qgroup; 111 112 while (n) { 113 qgroup = rb_entry(n, struct btrfs_qgroup, node); 114 if (qgroup->qgroupid < qgroupid) 115 n = n->rb_left; 116 else if (qgroup->qgroupid > qgroupid) 117 n = n->rb_right; 118 else 119 return qgroup; 120 } 121 return NULL; 122 } 123 124 /* must be called with qgroup_lock held */ 125 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, 126 u64 qgroupid) 127 { 128 struct rb_node **p = &fs_info->qgroup_tree.rb_node; 129 struct rb_node *parent = NULL; 130 struct btrfs_qgroup *qgroup; 131 132 while (*p) { 133 parent = *p; 134 qgroup = rb_entry(parent, struct btrfs_qgroup, node); 135 136 if (qgroup->qgroupid < qgroupid) 137 p = &(*p)->rb_left; 138 else if (qgroup->qgroupid > qgroupid) 139 p = &(*p)->rb_right; 140 else 141 return qgroup; 142 } 143 144 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); 145 if (!qgroup) 146 return ERR_PTR(-ENOMEM); 147 148 qgroup->qgroupid = qgroupid; 149 INIT_LIST_HEAD(&qgroup->groups); 150 INIT_LIST_HEAD(&qgroup->members); 151 INIT_LIST_HEAD(&qgroup->dirty); 152 153 rb_link_node(&qgroup->node, parent, p); 154 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); 155 156 return qgroup; 157 } 158 159 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) 160 { 161 struct btrfs_qgroup_list *list; 162 163 list_del(&qgroup->dirty); 164 while (!list_empty(&qgroup->groups)) { 165 list = list_first_entry(&qgroup->groups, 166 struct btrfs_qgroup_list, next_group); 167 list_del(&list->next_group); 168 list_del(&list->next_member); 169 kfree(list); 170 } 171 172 while (!list_empty(&qgroup->members)) { 173 list = list_first_entry(&qgroup->members, 174 struct btrfs_qgroup_list, next_member); 175 list_del(&list->next_group); 176 list_del(&list->next_member); 177 kfree(list); 178 } 179 kfree(qgroup); 180 } 181 182 /* must be called with qgroup_lock held */ 183 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) 184 { 185 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); 186 187 if (!qgroup) 188 return -ENOENT; 189 190 rb_erase(&qgroup->node, &fs_info->qgroup_tree); 191 __del_qgroup_rb(qgroup); 192 return 0; 193 } 194 195 /* must be called with qgroup_lock held */ 196 static int add_relation_rb(struct btrfs_fs_info *fs_info, 197 u64 memberid, u64 parentid) 198 { 199 struct btrfs_qgroup *member; 200 struct btrfs_qgroup *parent; 201 struct btrfs_qgroup_list *list; 202 203 member = find_qgroup_rb(fs_info, memberid); 204 parent = find_qgroup_rb(fs_info, parentid); 205 if (!member || !parent) 206 return -ENOENT; 207 208 list = kzalloc(sizeof(*list), GFP_ATOMIC); 209 if (!list) 210 return -ENOMEM; 211 212 list->group = parent; 213 list->member = member; 214 list_add_tail(&list->next_group, &member->groups); 215 list_add_tail(&list->next_member, &parent->members); 216 217 return 0; 218 } 219 220 /* must be called with qgroup_lock held */ 221 static int del_relation_rb(struct btrfs_fs_info *fs_info, 222 u64 memberid, u64 parentid) 223 { 224 struct btrfs_qgroup *member; 225 struct btrfs_qgroup *parent; 226 struct btrfs_qgroup_list *list; 227 228 member = find_qgroup_rb(fs_info, memberid); 229 parent = find_qgroup_rb(fs_info, parentid); 230 if (!member || !parent) 231 return -ENOENT; 232 233 list_for_each_entry(list, &member->groups, next_group) { 234 if (list->group == parent) { 235 list_del(&list->next_group); 236 list_del(&list->next_member); 237 kfree(list); 238 return 0; 239 } 240 } 241 return -ENOENT; 242 } 243 244 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 245 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 246 u64 rfer, u64 excl) 247 { 248 struct btrfs_qgroup *qgroup; 249 250 qgroup = find_qgroup_rb(fs_info, qgroupid); 251 if (!qgroup) 252 return -EINVAL; 253 if (qgroup->rfer != rfer || qgroup->excl != excl) 254 return -EINVAL; 255 return 0; 256 } 257 #endif 258 259 /* 260 * The full config is read in one go, only called from open_ctree() 261 * It doesn't use any locking, as at this point we're still single-threaded 262 */ 263 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) 264 { 265 struct btrfs_key key; 266 struct btrfs_key found_key; 267 struct btrfs_root *quota_root = fs_info->quota_root; 268 struct btrfs_path *path = NULL; 269 struct extent_buffer *l; 270 int slot; 271 int ret = 0; 272 u64 flags = 0; 273 u64 rescan_progress = 0; 274 275 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 276 return 0; 277 278 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 279 if (!fs_info->qgroup_ulist) { 280 ret = -ENOMEM; 281 goto out; 282 } 283 284 path = btrfs_alloc_path(); 285 if (!path) { 286 ret = -ENOMEM; 287 goto out; 288 } 289 290 /* default this to quota off, in case no status key is found */ 291 fs_info->qgroup_flags = 0; 292 293 /* 294 * pass 1: read status, all qgroup infos and limits 295 */ 296 key.objectid = 0; 297 key.type = 0; 298 key.offset = 0; 299 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); 300 if (ret) 301 goto out; 302 303 while (1) { 304 struct btrfs_qgroup *qgroup; 305 306 slot = path->slots[0]; 307 l = path->nodes[0]; 308 btrfs_item_key_to_cpu(l, &found_key, slot); 309 310 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { 311 struct btrfs_qgroup_status_item *ptr; 312 313 ptr = btrfs_item_ptr(l, slot, 314 struct btrfs_qgroup_status_item); 315 316 if (btrfs_qgroup_status_version(l, ptr) != 317 BTRFS_QGROUP_STATUS_VERSION) { 318 btrfs_err(fs_info, 319 "old qgroup version, quota disabled"); 320 goto out; 321 } 322 if (btrfs_qgroup_status_generation(l, ptr) != 323 fs_info->generation) { 324 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 325 btrfs_err(fs_info, 326 "qgroup generation mismatch, marked as inconsistent"); 327 } 328 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 329 ptr); 330 rescan_progress = btrfs_qgroup_status_rescan(l, ptr); 331 goto next1; 332 } 333 334 if (found_key.type != BTRFS_QGROUP_INFO_KEY && 335 found_key.type != BTRFS_QGROUP_LIMIT_KEY) 336 goto next1; 337 338 qgroup = find_qgroup_rb(fs_info, found_key.offset); 339 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 340 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 341 btrfs_err(fs_info, "inconsistent qgroup config"); 342 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 343 } 344 if (!qgroup) { 345 qgroup = add_qgroup_rb(fs_info, found_key.offset); 346 if (IS_ERR(qgroup)) { 347 ret = PTR_ERR(qgroup); 348 goto out; 349 } 350 } 351 switch (found_key.type) { 352 case BTRFS_QGROUP_INFO_KEY: { 353 struct btrfs_qgroup_info_item *ptr; 354 355 ptr = btrfs_item_ptr(l, slot, 356 struct btrfs_qgroup_info_item); 357 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); 358 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); 359 qgroup->excl = btrfs_qgroup_info_excl(l, ptr); 360 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); 361 /* generation currently unused */ 362 break; 363 } 364 case BTRFS_QGROUP_LIMIT_KEY: { 365 struct btrfs_qgroup_limit_item *ptr; 366 367 ptr = btrfs_item_ptr(l, slot, 368 struct btrfs_qgroup_limit_item); 369 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); 370 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); 371 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); 372 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); 373 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); 374 break; 375 } 376 } 377 next1: 378 ret = btrfs_next_item(quota_root, path); 379 if (ret < 0) 380 goto out; 381 if (ret) 382 break; 383 } 384 btrfs_release_path(path); 385 386 /* 387 * pass 2: read all qgroup relations 388 */ 389 key.objectid = 0; 390 key.type = BTRFS_QGROUP_RELATION_KEY; 391 key.offset = 0; 392 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); 393 if (ret) 394 goto out; 395 while (1) { 396 slot = path->slots[0]; 397 l = path->nodes[0]; 398 btrfs_item_key_to_cpu(l, &found_key, slot); 399 400 if (found_key.type != BTRFS_QGROUP_RELATION_KEY) 401 goto next2; 402 403 if (found_key.objectid > found_key.offset) { 404 /* parent <- member, not needed to build config */ 405 /* FIXME should we omit the key completely? */ 406 goto next2; 407 } 408 409 ret = add_relation_rb(fs_info, found_key.objectid, 410 found_key.offset); 411 if (ret == -ENOENT) { 412 btrfs_warn(fs_info, 413 "orphan qgroup relation 0x%llx->0x%llx", 414 found_key.objectid, found_key.offset); 415 ret = 0; /* ignore the error */ 416 } 417 if (ret) 418 goto out; 419 next2: 420 ret = btrfs_next_item(quota_root, path); 421 if (ret < 0) 422 goto out; 423 if (ret) 424 break; 425 } 426 out: 427 fs_info->qgroup_flags |= flags; 428 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 429 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 430 else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && 431 ret >= 0) 432 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 433 btrfs_free_path(path); 434 435 if (ret < 0) { 436 ulist_free(fs_info->qgroup_ulist); 437 fs_info->qgroup_ulist = NULL; 438 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 439 } 440 441 return ret < 0 ? ret : 0; 442 } 443 444 /* 445 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), 446 * first two are in single-threaded paths.And for the third one, we have set 447 * quota_root to be null with qgroup_lock held before, so it is safe to clean 448 * up the in-memory structures without qgroup_lock held. 449 */ 450 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 451 { 452 struct rb_node *n; 453 struct btrfs_qgroup *qgroup; 454 455 while ((n = rb_first(&fs_info->qgroup_tree))) { 456 qgroup = rb_entry(n, struct btrfs_qgroup, node); 457 rb_erase(n, &fs_info->qgroup_tree); 458 __del_qgroup_rb(qgroup); 459 } 460 /* 461 * we call btrfs_free_qgroup_config() when umounting 462 * filesystem and disabling quota, so we set qgroup_ulist 463 * to be null here to avoid double free. 464 */ 465 ulist_free(fs_info->qgroup_ulist); 466 fs_info->qgroup_ulist = NULL; 467 } 468 469 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 470 struct btrfs_root *quota_root, 471 u64 src, u64 dst) 472 { 473 int ret; 474 struct btrfs_path *path; 475 struct btrfs_key key; 476 477 path = btrfs_alloc_path(); 478 if (!path) 479 return -ENOMEM; 480 481 key.objectid = src; 482 key.type = BTRFS_QGROUP_RELATION_KEY; 483 key.offset = dst; 484 485 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 486 487 btrfs_mark_buffer_dirty(path->nodes[0]); 488 489 btrfs_free_path(path); 490 return ret; 491 } 492 493 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, 494 struct btrfs_root *quota_root, 495 u64 src, u64 dst) 496 { 497 int ret; 498 struct btrfs_path *path; 499 struct btrfs_key key; 500 501 path = btrfs_alloc_path(); 502 if (!path) 503 return -ENOMEM; 504 505 key.objectid = src; 506 key.type = BTRFS_QGROUP_RELATION_KEY; 507 key.offset = dst; 508 509 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 510 if (ret < 0) 511 goto out; 512 513 if (ret > 0) { 514 ret = -ENOENT; 515 goto out; 516 } 517 518 ret = btrfs_del_item(trans, quota_root, path); 519 out: 520 btrfs_free_path(path); 521 return ret; 522 } 523 524 static int add_qgroup_item(struct btrfs_trans_handle *trans, 525 struct btrfs_root *quota_root, u64 qgroupid) 526 { 527 int ret; 528 struct btrfs_path *path; 529 struct btrfs_qgroup_info_item *qgroup_info; 530 struct btrfs_qgroup_limit_item *qgroup_limit; 531 struct extent_buffer *leaf; 532 struct btrfs_key key; 533 534 if (btrfs_is_testing(quota_root->fs_info)) 535 return 0; 536 537 path = btrfs_alloc_path(); 538 if (!path) 539 return -ENOMEM; 540 541 key.objectid = 0; 542 key.type = BTRFS_QGROUP_INFO_KEY; 543 key.offset = qgroupid; 544 545 /* 546 * Avoid a transaction abort by catching -EEXIST here. In that 547 * case, we proceed by re-initializing the existing structure 548 * on disk. 549 */ 550 551 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 552 sizeof(*qgroup_info)); 553 if (ret && ret != -EEXIST) 554 goto out; 555 556 leaf = path->nodes[0]; 557 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], 558 struct btrfs_qgroup_info_item); 559 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); 560 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); 561 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); 562 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); 563 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); 564 565 btrfs_mark_buffer_dirty(leaf); 566 567 btrfs_release_path(path); 568 569 key.type = BTRFS_QGROUP_LIMIT_KEY; 570 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 571 sizeof(*qgroup_limit)); 572 if (ret && ret != -EEXIST) 573 goto out; 574 575 leaf = path->nodes[0]; 576 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], 577 struct btrfs_qgroup_limit_item); 578 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); 579 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); 580 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); 581 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 582 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 583 584 btrfs_mark_buffer_dirty(leaf); 585 586 ret = 0; 587 out: 588 btrfs_free_path(path); 589 return ret; 590 } 591 592 static int del_qgroup_item(struct btrfs_trans_handle *trans, 593 struct btrfs_root *quota_root, u64 qgroupid) 594 { 595 int ret; 596 struct btrfs_path *path; 597 struct btrfs_key key; 598 599 path = btrfs_alloc_path(); 600 if (!path) 601 return -ENOMEM; 602 603 key.objectid = 0; 604 key.type = BTRFS_QGROUP_INFO_KEY; 605 key.offset = qgroupid; 606 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 607 if (ret < 0) 608 goto out; 609 610 if (ret > 0) { 611 ret = -ENOENT; 612 goto out; 613 } 614 615 ret = btrfs_del_item(trans, quota_root, path); 616 if (ret) 617 goto out; 618 619 btrfs_release_path(path); 620 621 key.type = BTRFS_QGROUP_LIMIT_KEY; 622 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 623 if (ret < 0) 624 goto out; 625 626 if (ret > 0) { 627 ret = -ENOENT; 628 goto out; 629 } 630 631 ret = btrfs_del_item(trans, quota_root, path); 632 633 out: 634 btrfs_free_path(path); 635 return ret; 636 } 637 638 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 639 struct btrfs_root *root, 640 struct btrfs_qgroup *qgroup) 641 { 642 struct btrfs_path *path; 643 struct btrfs_key key; 644 struct extent_buffer *l; 645 struct btrfs_qgroup_limit_item *qgroup_limit; 646 int ret; 647 int slot; 648 649 key.objectid = 0; 650 key.type = BTRFS_QGROUP_LIMIT_KEY; 651 key.offset = qgroup->qgroupid; 652 653 path = btrfs_alloc_path(); 654 if (!path) 655 return -ENOMEM; 656 657 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 658 if (ret > 0) 659 ret = -ENOENT; 660 661 if (ret) 662 goto out; 663 664 l = path->nodes[0]; 665 slot = path->slots[0]; 666 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 667 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags); 668 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer); 669 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); 670 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); 671 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); 672 673 btrfs_mark_buffer_dirty(l); 674 675 out: 676 btrfs_free_path(path); 677 return ret; 678 } 679 680 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 681 struct btrfs_root *root, 682 struct btrfs_qgroup *qgroup) 683 { 684 struct btrfs_path *path; 685 struct btrfs_key key; 686 struct extent_buffer *l; 687 struct btrfs_qgroup_info_item *qgroup_info; 688 int ret; 689 int slot; 690 691 if (btrfs_is_testing(root->fs_info)) 692 return 0; 693 694 key.objectid = 0; 695 key.type = BTRFS_QGROUP_INFO_KEY; 696 key.offset = qgroup->qgroupid; 697 698 path = btrfs_alloc_path(); 699 if (!path) 700 return -ENOMEM; 701 702 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 703 if (ret > 0) 704 ret = -ENOENT; 705 706 if (ret) 707 goto out; 708 709 l = path->nodes[0]; 710 slot = path->slots[0]; 711 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); 712 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 713 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 714 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 715 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 716 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 717 718 btrfs_mark_buffer_dirty(l); 719 720 out: 721 btrfs_free_path(path); 722 return ret; 723 } 724 725 static int update_qgroup_status_item(struct btrfs_trans_handle *trans, 726 struct btrfs_fs_info *fs_info, 727 struct btrfs_root *root) 728 { 729 struct btrfs_path *path; 730 struct btrfs_key key; 731 struct extent_buffer *l; 732 struct btrfs_qgroup_status_item *ptr; 733 int ret; 734 int slot; 735 736 key.objectid = 0; 737 key.type = BTRFS_QGROUP_STATUS_KEY; 738 key.offset = 0; 739 740 path = btrfs_alloc_path(); 741 if (!path) 742 return -ENOMEM; 743 744 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 745 if (ret > 0) 746 ret = -ENOENT; 747 748 if (ret) 749 goto out; 750 751 l = path->nodes[0]; 752 slot = path->slots[0]; 753 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 754 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 755 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 756 btrfs_set_qgroup_status_rescan(l, ptr, 757 fs_info->qgroup_rescan_progress.objectid); 758 759 btrfs_mark_buffer_dirty(l); 760 761 out: 762 btrfs_free_path(path); 763 return ret; 764 } 765 766 /* 767 * called with qgroup_lock held 768 */ 769 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 770 struct btrfs_root *root) 771 { 772 struct btrfs_path *path; 773 struct btrfs_key key; 774 struct extent_buffer *leaf = NULL; 775 int ret; 776 int nr = 0; 777 778 path = btrfs_alloc_path(); 779 if (!path) 780 return -ENOMEM; 781 782 path->leave_spinning = 1; 783 784 key.objectid = 0; 785 key.offset = 0; 786 key.type = 0; 787 788 while (1) { 789 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 790 if (ret < 0) 791 goto out; 792 leaf = path->nodes[0]; 793 nr = btrfs_header_nritems(leaf); 794 if (!nr) 795 break; 796 /* 797 * delete the leaf one by one 798 * since the whole tree is going 799 * to be deleted. 800 */ 801 path->slots[0] = 0; 802 ret = btrfs_del_items(trans, root, path, 0, nr); 803 if (ret) 804 goto out; 805 806 btrfs_release_path(path); 807 } 808 ret = 0; 809 out: 810 set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags); 811 btrfs_free_path(path); 812 return ret; 813 } 814 815 int btrfs_quota_enable(struct btrfs_trans_handle *trans, 816 struct btrfs_fs_info *fs_info) 817 { 818 struct btrfs_root *quota_root; 819 struct btrfs_root *tree_root = fs_info->tree_root; 820 struct btrfs_path *path = NULL; 821 struct btrfs_qgroup_status_item *ptr; 822 struct extent_buffer *leaf; 823 struct btrfs_key key; 824 struct btrfs_key found_key; 825 struct btrfs_qgroup *qgroup = NULL; 826 int ret = 0; 827 int slot; 828 829 mutex_lock(&fs_info->qgroup_ioctl_lock); 830 if (fs_info->quota_root) { 831 set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); 832 goto out; 833 } 834 835 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 836 if (!fs_info->qgroup_ulist) { 837 ret = -ENOMEM; 838 goto out; 839 } 840 841 /* 842 * initially create the quota tree 843 */ 844 quota_root = btrfs_create_tree(trans, fs_info, 845 BTRFS_QUOTA_TREE_OBJECTID); 846 if (IS_ERR(quota_root)) { 847 ret = PTR_ERR(quota_root); 848 goto out; 849 } 850 851 path = btrfs_alloc_path(); 852 if (!path) { 853 ret = -ENOMEM; 854 goto out_free_root; 855 } 856 857 key.objectid = 0; 858 key.type = BTRFS_QGROUP_STATUS_KEY; 859 key.offset = 0; 860 861 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 862 sizeof(*ptr)); 863 if (ret) 864 goto out_free_path; 865 866 leaf = path->nodes[0]; 867 ptr = btrfs_item_ptr(leaf, path->slots[0], 868 struct btrfs_qgroup_status_item); 869 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); 870 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); 871 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 872 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 873 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 874 btrfs_set_qgroup_status_rescan(leaf, ptr, 0); 875 876 btrfs_mark_buffer_dirty(leaf); 877 878 key.objectid = 0; 879 key.type = BTRFS_ROOT_REF_KEY; 880 key.offset = 0; 881 882 btrfs_release_path(path); 883 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 884 if (ret > 0) 885 goto out_add_root; 886 if (ret < 0) 887 goto out_free_path; 888 889 890 while (1) { 891 slot = path->slots[0]; 892 leaf = path->nodes[0]; 893 btrfs_item_key_to_cpu(leaf, &found_key, slot); 894 895 if (found_key.type == BTRFS_ROOT_REF_KEY) { 896 ret = add_qgroup_item(trans, quota_root, 897 found_key.offset); 898 if (ret) 899 goto out_free_path; 900 901 qgroup = add_qgroup_rb(fs_info, found_key.offset); 902 if (IS_ERR(qgroup)) { 903 ret = PTR_ERR(qgroup); 904 goto out_free_path; 905 } 906 } 907 ret = btrfs_next_item(tree_root, path); 908 if (ret < 0) 909 goto out_free_path; 910 if (ret) 911 break; 912 } 913 914 out_add_root: 915 btrfs_release_path(path); 916 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 917 if (ret) 918 goto out_free_path; 919 920 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 921 if (IS_ERR(qgroup)) { 922 ret = PTR_ERR(qgroup); 923 goto out_free_path; 924 } 925 spin_lock(&fs_info->qgroup_lock); 926 fs_info->quota_root = quota_root; 927 set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); 928 spin_unlock(&fs_info->qgroup_lock); 929 out_free_path: 930 btrfs_free_path(path); 931 out_free_root: 932 if (ret) { 933 free_extent_buffer(quota_root->node); 934 free_extent_buffer(quota_root->commit_root); 935 kfree(quota_root); 936 } 937 out: 938 if (ret) { 939 ulist_free(fs_info->qgroup_ulist); 940 fs_info->qgroup_ulist = NULL; 941 } 942 mutex_unlock(&fs_info->qgroup_ioctl_lock); 943 return ret; 944 } 945 946 int btrfs_quota_disable(struct btrfs_trans_handle *trans, 947 struct btrfs_fs_info *fs_info) 948 { 949 struct btrfs_root *tree_root = fs_info->tree_root; 950 struct btrfs_root *quota_root; 951 int ret = 0; 952 953 mutex_lock(&fs_info->qgroup_ioctl_lock); 954 if (!fs_info->quota_root) 955 goto out; 956 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 957 set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags); 958 btrfs_qgroup_wait_for_completion(fs_info, false); 959 spin_lock(&fs_info->qgroup_lock); 960 quota_root = fs_info->quota_root; 961 fs_info->quota_root = NULL; 962 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 963 spin_unlock(&fs_info->qgroup_lock); 964 965 btrfs_free_qgroup_config(fs_info); 966 967 ret = btrfs_clean_quota_tree(trans, quota_root); 968 if (ret) 969 goto out; 970 971 ret = btrfs_del_root(trans, tree_root, "a_root->root_key); 972 if (ret) 973 goto out; 974 975 list_del("a_root->dirty_list); 976 977 btrfs_tree_lock(quota_root->node); 978 clean_tree_block(fs_info, quota_root->node); 979 btrfs_tree_unlock(quota_root->node); 980 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 981 982 free_extent_buffer(quota_root->node); 983 free_extent_buffer(quota_root->commit_root); 984 kfree(quota_root); 985 out: 986 mutex_unlock(&fs_info->qgroup_ioctl_lock); 987 return ret; 988 } 989 990 static void qgroup_dirty(struct btrfs_fs_info *fs_info, 991 struct btrfs_qgroup *qgroup) 992 { 993 if (list_empty(&qgroup->dirty)) 994 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 995 } 996 997 static void report_reserved_underflow(struct btrfs_fs_info *fs_info, 998 struct btrfs_qgroup *qgroup, 999 u64 num_bytes) 1000 { 1001 #ifdef CONFIG_BTRFS_DEBUG 1002 WARN_ON(qgroup->reserved < num_bytes); 1003 btrfs_debug(fs_info, 1004 "qgroup %llu reserved space underflow, have: %llu, to free: %llu", 1005 qgroup->qgroupid, qgroup->reserved, num_bytes); 1006 #endif 1007 qgroup->reserved = 0; 1008 } 1009 /* 1010 * The easy accounting, if we are adding/removing the only ref for an extent 1011 * then this qgroup and all of the parent qgroups get their reference and 1012 * exclusive counts adjusted. 1013 * 1014 * Caller should hold fs_info->qgroup_lock. 1015 */ 1016 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1017 struct ulist *tmp, u64 ref_root, 1018 u64 num_bytes, int sign) 1019 { 1020 struct btrfs_qgroup *qgroup; 1021 struct btrfs_qgroup_list *glist; 1022 struct ulist_node *unode; 1023 struct ulist_iterator uiter; 1024 int ret = 0; 1025 1026 qgroup = find_qgroup_rb(fs_info, ref_root); 1027 if (!qgroup) 1028 goto out; 1029 1030 qgroup->rfer += sign * num_bytes; 1031 qgroup->rfer_cmpr += sign * num_bytes; 1032 1033 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1034 qgroup->excl += sign * num_bytes; 1035 qgroup->excl_cmpr += sign * num_bytes; 1036 if (sign > 0) { 1037 trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes); 1038 if (qgroup->reserved < num_bytes) 1039 report_reserved_underflow(fs_info, qgroup, num_bytes); 1040 else 1041 qgroup->reserved -= num_bytes; 1042 } 1043 1044 qgroup_dirty(fs_info, qgroup); 1045 1046 /* Get all of the parent groups that contain this qgroup */ 1047 list_for_each_entry(glist, &qgroup->groups, next_group) { 1048 ret = ulist_add(tmp, glist->group->qgroupid, 1049 qgroup_to_aux(glist->group), GFP_ATOMIC); 1050 if (ret < 0) 1051 goto out; 1052 } 1053 1054 /* Iterate all of the parents and adjust their reference counts */ 1055 ULIST_ITER_INIT(&uiter); 1056 while ((unode = ulist_next(tmp, &uiter))) { 1057 qgroup = unode_aux_to_qgroup(unode); 1058 qgroup->rfer += sign * num_bytes; 1059 qgroup->rfer_cmpr += sign * num_bytes; 1060 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1061 qgroup->excl += sign * num_bytes; 1062 if (sign > 0) { 1063 trace_qgroup_update_reserve(fs_info, qgroup, 1064 -(s64)num_bytes); 1065 if (qgroup->reserved < num_bytes) 1066 report_reserved_underflow(fs_info, qgroup, 1067 num_bytes); 1068 else 1069 qgroup->reserved -= num_bytes; 1070 } 1071 qgroup->excl_cmpr += sign * num_bytes; 1072 qgroup_dirty(fs_info, qgroup); 1073 1074 /* Add any parents of the parents */ 1075 list_for_each_entry(glist, &qgroup->groups, next_group) { 1076 ret = ulist_add(tmp, glist->group->qgroupid, 1077 qgroup_to_aux(glist->group), GFP_ATOMIC); 1078 if (ret < 0) 1079 goto out; 1080 } 1081 } 1082 ret = 0; 1083 out: 1084 return ret; 1085 } 1086 1087 1088 /* 1089 * Quick path for updating qgroup with only excl refs. 1090 * 1091 * In that case, just update all parent will be enough. 1092 * Or we needs to do a full rescan. 1093 * Caller should also hold fs_info->qgroup_lock. 1094 * 1095 * Return 0 for quick update, return >0 for need to full rescan 1096 * and mark INCONSISTENT flag. 1097 * Return < 0 for other error. 1098 */ 1099 static int quick_update_accounting(struct btrfs_fs_info *fs_info, 1100 struct ulist *tmp, u64 src, u64 dst, 1101 int sign) 1102 { 1103 struct btrfs_qgroup *qgroup; 1104 int ret = 1; 1105 int err = 0; 1106 1107 qgroup = find_qgroup_rb(fs_info, src); 1108 if (!qgroup) 1109 goto out; 1110 if (qgroup->excl == qgroup->rfer) { 1111 ret = 0; 1112 err = __qgroup_excl_accounting(fs_info, tmp, dst, 1113 qgroup->excl, sign); 1114 if (err < 0) { 1115 ret = err; 1116 goto out; 1117 } 1118 } 1119 out: 1120 if (ret) 1121 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1122 return ret; 1123 } 1124 1125 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1126 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1127 { 1128 struct btrfs_root *quota_root; 1129 struct btrfs_qgroup *parent; 1130 struct btrfs_qgroup *member; 1131 struct btrfs_qgroup_list *list; 1132 struct ulist *tmp; 1133 int ret = 0; 1134 1135 /* Check the level of src and dst first */ 1136 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) 1137 return -EINVAL; 1138 1139 tmp = ulist_alloc(GFP_KERNEL); 1140 if (!tmp) 1141 return -ENOMEM; 1142 1143 mutex_lock(&fs_info->qgroup_ioctl_lock); 1144 quota_root = fs_info->quota_root; 1145 if (!quota_root) { 1146 ret = -EINVAL; 1147 goto out; 1148 } 1149 member = find_qgroup_rb(fs_info, src); 1150 parent = find_qgroup_rb(fs_info, dst); 1151 if (!member || !parent) { 1152 ret = -EINVAL; 1153 goto out; 1154 } 1155 1156 /* check if such qgroup relation exist firstly */ 1157 list_for_each_entry(list, &member->groups, next_group) { 1158 if (list->group == parent) { 1159 ret = -EEXIST; 1160 goto out; 1161 } 1162 } 1163 1164 ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1165 if (ret) 1166 goto out; 1167 1168 ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1169 if (ret) { 1170 del_qgroup_relation_item(trans, quota_root, src, dst); 1171 goto out; 1172 } 1173 1174 spin_lock(&fs_info->qgroup_lock); 1175 ret = add_relation_rb(fs_info, src, dst); 1176 if (ret < 0) { 1177 spin_unlock(&fs_info->qgroup_lock); 1178 goto out; 1179 } 1180 ret = quick_update_accounting(fs_info, tmp, src, dst, 1); 1181 spin_unlock(&fs_info->qgroup_lock); 1182 out: 1183 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1184 ulist_free(tmp); 1185 return ret; 1186 } 1187 1188 static int __del_qgroup_relation(struct btrfs_trans_handle *trans, 1189 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1190 { 1191 struct btrfs_root *quota_root; 1192 struct btrfs_qgroup *parent; 1193 struct btrfs_qgroup *member; 1194 struct btrfs_qgroup_list *list; 1195 struct ulist *tmp; 1196 int ret = 0; 1197 int err; 1198 1199 tmp = ulist_alloc(GFP_KERNEL); 1200 if (!tmp) 1201 return -ENOMEM; 1202 1203 quota_root = fs_info->quota_root; 1204 if (!quota_root) { 1205 ret = -EINVAL; 1206 goto out; 1207 } 1208 1209 member = find_qgroup_rb(fs_info, src); 1210 parent = find_qgroup_rb(fs_info, dst); 1211 if (!member || !parent) { 1212 ret = -EINVAL; 1213 goto out; 1214 } 1215 1216 /* check if such qgroup relation exist firstly */ 1217 list_for_each_entry(list, &member->groups, next_group) { 1218 if (list->group == parent) 1219 goto exist; 1220 } 1221 ret = -ENOENT; 1222 goto out; 1223 exist: 1224 ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1225 err = del_qgroup_relation_item(trans, quota_root, dst, src); 1226 if (err && !ret) 1227 ret = err; 1228 1229 spin_lock(&fs_info->qgroup_lock); 1230 del_relation_rb(fs_info, src, dst); 1231 ret = quick_update_accounting(fs_info, tmp, src, dst, -1); 1232 spin_unlock(&fs_info->qgroup_lock); 1233 out: 1234 ulist_free(tmp); 1235 return ret; 1236 } 1237 1238 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1239 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1240 { 1241 int ret = 0; 1242 1243 mutex_lock(&fs_info->qgroup_ioctl_lock); 1244 ret = __del_qgroup_relation(trans, fs_info, src, dst); 1245 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1246 1247 return ret; 1248 } 1249 1250 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1251 struct btrfs_fs_info *fs_info, u64 qgroupid) 1252 { 1253 struct btrfs_root *quota_root; 1254 struct btrfs_qgroup *qgroup; 1255 int ret = 0; 1256 1257 mutex_lock(&fs_info->qgroup_ioctl_lock); 1258 quota_root = fs_info->quota_root; 1259 if (!quota_root) { 1260 ret = -EINVAL; 1261 goto out; 1262 } 1263 qgroup = find_qgroup_rb(fs_info, qgroupid); 1264 if (qgroup) { 1265 ret = -EEXIST; 1266 goto out; 1267 } 1268 1269 ret = add_qgroup_item(trans, quota_root, qgroupid); 1270 if (ret) 1271 goto out; 1272 1273 spin_lock(&fs_info->qgroup_lock); 1274 qgroup = add_qgroup_rb(fs_info, qgroupid); 1275 spin_unlock(&fs_info->qgroup_lock); 1276 1277 if (IS_ERR(qgroup)) 1278 ret = PTR_ERR(qgroup); 1279 out: 1280 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1281 return ret; 1282 } 1283 1284 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 1285 struct btrfs_fs_info *fs_info, u64 qgroupid) 1286 { 1287 struct btrfs_root *quota_root; 1288 struct btrfs_qgroup *qgroup; 1289 struct btrfs_qgroup_list *list; 1290 int ret = 0; 1291 1292 mutex_lock(&fs_info->qgroup_ioctl_lock); 1293 quota_root = fs_info->quota_root; 1294 if (!quota_root) { 1295 ret = -EINVAL; 1296 goto out; 1297 } 1298 1299 qgroup = find_qgroup_rb(fs_info, qgroupid); 1300 if (!qgroup) { 1301 ret = -ENOENT; 1302 goto out; 1303 } else { 1304 /* check if there are no children of this qgroup */ 1305 if (!list_empty(&qgroup->members)) { 1306 ret = -EBUSY; 1307 goto out; 1308 } 1309 } 1310 ret = del_qgroup_item(trans, quota_root, qgroupid); 1311 1312 while (!list_empty(&qgroup->groups)) { 1313 list = list_first_entry(&qgroup->groups, 1314 struct btrfs_qgroup_list, next_group); 1315 ret = __del_qgroup_relation(trans, fs_info, 1316 qgroupid, 1317 list->group->qgroupid); 1318 if (ret) 1319 goto out; 1320 } 1321 1322 spin_lock(&fs_info->qgroup_lock); 1323 del_qgroup_rb(fs_info, qgroupid); 1324 spin_unlock(&fs_info->qgroup_lock); 1325 out: 1326 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1327 return ret; 1328 } 1329 1330 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 1331 struct btrfs_fs_info *fs_info, u64 qgroupid, 1332 struct btrfs_qgroup_limit *limit) 1333 { 1334 struct btrfs_root *quota_root; 1335 struct btrfs_qgroup *qgroup; 1336 int ret = 0; 1337 /* Sometimes we would want to clear the limit on this qgroup. 1338 * To meet this requirement, we treat the -1 as a special value 1339 * which tell kernel to clear the limit on this qgroup. 1340 */ 1341 const u64 CLEAR_VALUE = -1; 1342 1343 mutex_lock(&fs_info->qgroup_ioctl_lock); 1344 quota_root = fs_info->quota_root; 1345 if (!quota_root) { 1346 ret = -EINVAL; 1347 goto out; 1348 } 1349 1350 qgroup = find_qgroup_rb(fs_info, qgroupid); 1351 if (!qgroup) { 1352 ret = -ENOENT; 1353 goto out; 1354 } 1355 1356 spin_lock(&fs_info->qgroup_lock); 1357 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { 1358 if (limit->max_rfer == CLEAR_VALUE) { 1359 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1360 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1361 qgroup->max_rfer = 0; 1362 } else { 1363 qgroup->max_rfer = limit->max_rfer; 1364 } 1365 } 1366 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { 1367 if (limit->max_excl == CLEAR_VALUE) { 1368 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1369 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1370 qgroup->max_excl = 0; 1371 } else { 1372 qgroup->max_excl = limit->max_excl; 1373 } 1374 } 1375 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { 1376 if (limit->rsv_rfer == CLEAR_VALUE) { 1377 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1378 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1379 qgroup->rsv_rfer = 0; 1380 } else { 1381 qgroup->rsv_rfer = limit->rsv_rfer; 1382 } 1383 } 1384 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { 1385 if (limit->rsv_excl == CLEAR_VALUE) { 1386 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1387 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1388 qgroup->rsv_excl = 0; 1389 } else { 1390 qgroup->rsv_excl = limit->rsv_excl; 1391 } 1392 } 1393 qgroup->lim_flags |= limit->flags; 1394 1395 spin_unlock(&fs_info->qgroup_lock); 1396 1397 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 1398 if (ret) { 1399 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1400 btrfs_info(fs_info, "unable to update quota limit for %llu", 1401 qgroupid); 1402 } 1403 1404 out: 1405 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1406 return ret; 1407 } 1408 1409 int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, 1410 struct btrfs_fs_info *fs_info) 1411 { 1412 struct btrfs_qgroup_extent_record *record; 1413 struct btrfs_delayed_ref_root *delayed_refs; 1414 struct rb_node *node; 1415 u64 qgroup_to_skip; 1416 int ret = 0; 1417 1418 delayed_refs = &trans->transaction->delayed_refs; 1419 qgroup_to_skip = delayed_refs->qgroup_to_skip; 1420 1421 /* 1422 * No need to do lock, since this function will only be called in 1423 * btrfs_commit_transaction(). 1424 */ 1425 node = rb_first(&delayed_refs->dirty_extent_root); 1426 while (node) { 1427 record = rb_entry(node, struct btrfs_qgroup_extent_record, 1428 node); 1429 if (WARN_ON(!record->old_roots)) 1430 ret = btrfs_find_all_roots(NULL, fs_info, 1431 record->bytenr, 0, &record->old_roots); 1432 if (ret < 0) 1433 break; 1434 if (qgroup_to_skip) 1435 ulist_del(record->old_roots, qgroup_to_skip, 0); 1436 node = rb_next(node); 1437 } 1438 return ret; 1439 } 1440 1441 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, 1442 struct btrfs_delayed_ref_root *delayed_refs, 1443 struct btrfs_qgroup_extent_record *record) 1444 { 1445 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; 1446 struct rb_node *parent_node = NULL; 1447 struct btrfs_qgroup_extent_record *entry; 1448 u64 bytenr = record->bytenr; 1449 1450 assert_spin_locked(&delayed_refs->lock); 1451 trace_btrfs_qgroup_trace_extent(fs_info, record); 1452 1453 while (*p) { 1454 parent_node = *p; 1455 entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, 1456 node); 1457 if (bytenr < entry->bytenr) 1458 p = &(*p)->rb_left; 1459 else if (bytenr > entry->bytenr) 1460 p = &(*p)->rb_right; 1461 else 1462 return 1; 1463 } 1464 1465 rb_link_node(&record->node, parent_node, p); 1466 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); 1467 return 0; 1468 } 1469 1470 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, 1471 struct btrfs_qgroup_extent_record *qrecord) 1472 { 1473 struct ulist *old_root; 1474 u64 bytenr = qrecord->bytenr; 1475 int ret; 1476 1477 ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root); 1478 if (ret < 0) 1479 return ret; 1480 1481 /* 1482 * Here we don't need to get the lock of 1483 * trans->transaction->delayed_refs, since inserted qrecord won't 1484 * be deleted, only qrecord->node may be modified (new qrecord insert) 1485 * 1486 * So modifying qrecord->old_roots is safe here 1487 */ 1488 qrecord->old_roots = old_root; 1489 return 0; 1490 } 1491 1492 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, 1493 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, 1494 gfp_t gfp_flag) 1495 { 1496 struct btrfs_qgroup_extent_record *record; 1497 struct btrfs_delayed_ref_root *delayed_refs; 1498 int ret; 1499 1500 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) 1501 || bytenr == 0 || num_bytes == 0) 1502 return 0; 1503 if (WARN_ON(trans == NULL)) 1504 return -EINVAL; 1505 record = kmalloc(sizeof(*record), gfp_flag); 1506 if (!record) 1507 return -ENOMEM; 1508 1509 delayed_refs = &trans->transaction->delayed_refs; 1510 record->bytenr = bytenr; 1511 record->num_bytes = num_bytes; 1512 record->old_roots = NULL; 1513 1514 spin_lock(&delayed_refs->lock); 1515 ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record); 1516 spin_unlock(&delayed_refs->lock); 1517 if (ret > 0) { 1518 kfree(record); 1519 return 0; 1520 } 1521 return btrfs_qgroup_trace_extent_post(fs_info, record); 1522 } 1523 1524 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 1525 struct btrfs_fs_info *fs_info, 1526 struct extent_buffer *eb) 1527 { 1528 int nr = btrfs_header_nritems(eb); 1529 int i, extent_type, ret; 1530 struct btrfs_key key; 1531 struct btrfs_file_extent_item *fi; 1532 u64 bytenr, num_bytes; 1533 1534 /* We can be called directly from walk_up_proc() */ 1535 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1536 return 0; 1537 1538 for (i = 0; i < nr; i++) { 1539 btrfs_item_key_to_cpu(eb, &key, i); 1540 1541 if (key.type != BTRFS_EXTENT_DATA_KEY) 1542 continue; 1543 1544 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); 1545 /* filter out non qgroup-accountable extents */ 1546 extent_type = btrfs_file_extent_type(eb, fi); 1547 1548 if (extent_type == BTRFS_FILE_EXTENT_INLINE) 1549 continue; 1550 1551 bytenr = btrfs_file_extent_disk_bytenr(eb, fi); 1552 if (!bytenr) 1553 continue; 1554 1555 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 1556 1557 ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr, 1558 num_bytes, GFP_NOFS); 1559 if (ret) 1560 return ret; 1561 } 1562 return 0; 1563 } 1564 1565 /* 1566 * Walk up the tree from the bottom, freeing leaves and any interior 1567 * nodes which have had all slots visited. If a node (leaf or 1568 * interior) is freed, the node above it will have it's slot 1569 * incremented. The root node will never be freed. 1570 * 1571 * At the end of this function, we should have a path which has all 1572 * slots incremented to the next position for a search. If we need to 1573 * read a new node it will be NULL and the node above it will have the 1574 * correct slot selected for a later read. 1575 * 1576 * If we increment the root nodes slot counter past the number of 1577 * elements, 1 is returned to signal completion of the search. 1578 */ 1579 static int adjust_slots_upwards(struct btrfs_path *path, int root_level) 1580 { 1581 int level = 0; 1582 int nr, slot; 1583 struct extent_buffer *eb; 1584 1585 if (root_level == 0) 1586 return 1; 1587 1588 while (level <= root_level) { 1589 eb = path->nodes[level]; 1590 nr = btrfs_header_nritems(eb); 1591 path->slots[level]++; 1592 slot = path->slots[level]; 1593 if (slot >= nr || level == 0) { 1594 /* 1595 * Don't free the root - we will detect this 1596 * condition after our loop and return a 1597 * positive value for caller to stop walking the tree. 1598 */ 1599 if (level != root_level) { 1600 btrfs_tree_unlock_rw(eb, path->locks[level]); 1601 path->locks[level] = 0; 1602 1603 free_extent_buffer(eb); 1604 path->nodes[level] = NULL; 1605 path->slots[level] = 0; 1606 } 1607 } else { 1608 /* 1609 * We have a valid slot to walk back down 1610 * from. Stop here so caller can process these 1611 * new nodes. 1612 */ 1613 break; 1614 } 1615 1616 level++; 1617 } 1618 1619 eb = path->nodes[root_level]; 1620 if (path->slots[root_level] >= btrfs_header_nritems(eb)) 1621 return 1; 1622 1623 return 0; 1624 } 1625 1626 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 1627 struct btrfs_root *root, 1628 struct extent_buffer *root_eb, 1629 u64 root_gen, int root_level) 1630 { 1631 struct btrfs_fs_info *fs_info = root->fs_info; 1632 int ret = 0; 1633 int level; 1634 struct extent_buffer *eb = root_eb; 1635 struct btrfs_path *path = NULL; 1636 1637 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL); 1638 BUG_ON(root_eb == NULL); 1639 1640 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1641 return 0; 1642 1643 if (!extent_buffer_uptodate(root_eb)) { 1644 ret = btrfs_read_buffer(root_eb, root_gen); 1645 if (ret) 1646 goto out; 1647 } 1648 1649 if (root_level == 0) { 1650 ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb); 1651 goto out; 1652 } 1653 1654 path = btrfs_alloc_path(); 1655 if (!path) 1656 return -ENOMEM; 1657 1658 /* 1659 * Walk down the tree. Missing extent blocks are filled in as 1660 * we go. Metadata is accounted every time we read a new 1661 * extent block. 1662 * 1663 * When we reach a leaf, we account for file extent items in it, 1664 * walk back up the tree (adjusting slot pointers as we go) 1665 * and restart the search process. 1666 */ 1667 extent_buffer_get(root_eb); /* For path */ 1668 path->nodes[root_level] = root_eb; 1669 path->slots[root_level] = 0; 1670 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ 1671 walk_down: 1672 level = root_level; 1673 while (level >= 0) { 1674 if (path->nodes[level] == NULL) { 1675 int parent_slot; 1676 u64 child_gen; 1677 u64 child_bytenr; 1678 1679 /* 1680 * We need to get child blockptr/gen from parent before 1681 * we can read it. 1682 */ 1683 eb = path->nodes[level + 1]; 1684 parent_slot = path->slots[level + 1]; 1685 child_bytenr = btrfs_node_blockptr(eb, parent_slot); 1686 child_gen = btrfs_node_ptr_generation(eb, parent_slot); 1687 1688 eb = read_tree_block(fs_info, child_bytenr, child_gen); 1689 if (IS_ERR(eb)) { 1690 ret = PTR_ERR(eb); 1691 goto out; 1692 } else if (!extent_buffer_uptodate(eb)) { 1693 free_extent_buffer(eb); 1694 ret = -EIO; 1695 goto out; 1696 } 1697 1698 path->nodes[level] = eb; 1699 path->slots[level] = 0; 1700 1701 btrfs_tree_read_lock(eb); 1702 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1703 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 1704 1705 ret = btrfs_qgroup_trace_extent(trans, fs_info, 1706 child_bytenr, 1707 fs_info->nodesize, 1708 GFP_NOFS); 1709 if (ret) 1710 goto out; 1711 } 1712 1713 if (level == 0) { 1714 ret = btrfs_qgroup_trace_leaf_items(trans,fs_info, 1715 path->nodes[level]); 1716 if (ret) 1717 goto out; 1718 1719 /* Nonzero return here means we completed our search */ 1720 ret = adjust_slots_upwards(path, root_level); 1721 if (ret) 1722 break; 1723 1724 /* Restart search with new slots */ 1725 goto walk_down; 1726 } 1727 1728 level--; 1729 } 1730 1731 ret = 0; 1732 out: 1733 btrfs_free_path(path); 1734 1735 return ret; 1736 } 1737 1738 #define UPDATE_NEW 0 1739 #define UPDATE_OLD 1 1740 /* 1741 * Walk all of the roots that points to the bytenr and adjust their refcnts. 1742 */ 1743 static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info, 1744 struct ulist *roots, struct ulist *tmp, 1745 struct ulist *qgroups, u64 seq, int update_old) 1746 { 1747 struct ulist_node *unode; 1748 struct ulist_iterator uiter; 1749 struct ulist_node *tmp_unode; 1750 struct ulist_iterator tmp_uiter; 1751 struct btrfs_qgroup *qg; 1752 int ret = 0; 1753 1754 if (!roots) 1755 return 0; 1756 ULIST_ITER_INIT(&uiter); 1757 while ((unode = ulist_next(roots, &uiter))) { 1758 qg = find_qgroup_rb(fs_info, unode->val); 1759 if (!qg) 1760 continue; 1761 1762 ulist_reinit(tmp); 1763 ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg), 1764 GFP_ATOMIC); 1765 if (ret < 0) 1766 return ret; 1767 ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC); 1768 if (ret < 0) 1769 return ret; 1770 ULIST_ITER_INIT(&tmp_uiter); 1771 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1772 struct btrfs_qgroup_list *glist; 1773 1774 qg = unode_aux_to_qgroup(tmp_unode); 1775 if (update_old) 1776 btrfs_qgroup_update_old_refcnt(qg, seq, 1); 1777 else 1778 btrfs_qgroup_update_new_refcnt(qg, seq, 1); 1779 list_for_each_entry(glist, &qg->groups, next_group) { 1780 ret = ulist_add(qgroups, glist->group->qgroupid, 1781 qgroup_to_aux(glist->group), 1782 GFP_ATOMIC); 1783 if (ret < 0) 1784 return ret; 1785 ret = ulist_add(tmp, glist->group->qgroupid, 1786 qgroup_to_aux(glist->group), 1787 GFP_ATOMIC); 1788 if (ret < 0) 1789 return ret; 1790 } 1791 } 1792 } 1793 return 0; 1794 } 1795 1796 /* 1797 * Update qgroup rfer/excl counters. 1798 * Rfer update is easy, codes can explain themselves. 1799 * 1800 * Excl update is tricky, the update is split into 2 part. 1801 * Part 1: Possible exclusive <-> sharing detect: 1802 * | A | !A | 1803 * ------------------------------------- 1804 * B | * | - | 1805 * ------------------------------------- 1806 * !B | + | ** | 1807 * ------------------------------------- 1808 * 1809 * Conditions: 1810 * A: cur_old_roots < nr_old_roots (not exclusive before) 1811 * !A: cur_old_roots == nr_old_roots (possible exclusive before) 1812 * B: cur_new_roots < nr_new_roots (not exclusive now) 1813 * !B: cur_new_roots == nr_new_roots (possible exclusive now) 1814 * 1815 * Results: 1816 * +: Possible sharing -> exclusive -: Possible exclusive -> sharing 1817 * *: Definitely not changed. **: Possible unchanged. 1818 * 1819 * For !A and !B condition, the exception is cur_old/new_roots == 0 case. 1820 * 1821 * To make the logic clear, we first use condition A and B to split 1822 * combination into 4 results. 1823 * 1824 * Then, for result "+" and "-", check old/new_roots == 0 case, as in them 1825 * only on variant maybe 0. 1826 * 1827 * Lastly, check result **, since there are 2 variants maybe 0, split them 1828 * again(2x2). 1829 * But this time we don't need to consider other things, the codes and logic 1830 * is easy to understand now. 1831 */ 1832 static int qgroup_update_counters(struct btrfs_fs_info *fs_info, 1833 struct ulist *qgroups, 1834 u64 nr_old_roots, 1835 u64 nr_new_roots, 1836 u64 num_bytes, u64 seq) 1837 { 1838 struct ulist_node *unode; 1839 struct ulist_iterator uiter; 1840 struct btrfs_qgroup *qg; 1841 u64 cur_new_count, cur_old_count; 1842 1843 ULIST_ITER_INIT(&uiter); 1844 while ((unode = ulist_next(qgroups, &uiter))) { 1845 bool dirty = false; 1846 1847 qg = unode_aux_to_qgroup(unode); 1848 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); 1849 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); 1850 1851 trace_qgroup_update_counters(fs_info, qg->qgroupid, 1852 cur_old_count, cur_new_count); 1853 1854 /* Rfer update part */ 1855 if (cur_old_count == 0 && cur_new_count > 0) { 1856 qg->rfer += num_bytes; 1857 qg->rfer_cmpr += num_bytes; 1858 dirty = true; 1859 } 1860 if (cur_old_count > 0 && cur_new_count == 0) { 1861 qg->rfer -= num_bytes; 1862 qg->rfer_cmpr -= num_bytes; 1863 dirty = true; 1864 } 1865 1866 /* Excl update part */ 1867 /* Exclusive/none -> shared case */ 1868 if (cur_old_count == nr_old_roots && 1869 cur_new_count < nr_new_roots) { 1870 /* Exclusive -> shared */ 1871 if (cur_old_count != 0) { 1872 qg->excl -= num_bytes; 1873 qg->excl_cmpr -= num_bytes; 1874 dirty = true; 1875 } 1876 } 1877 1878 /* Shared -> exclusive/none case */ 1879 if (cur_old_count < nr_old_roots && 1880 cur_new_count == nr_new_roots) { 1881 /* Shared->exclusive */ 1882 if (cur_new_count != 0) { 1883 qg->excl += num_bytes; 1884 qg->excl_cmpr += num_bytes; 1885 dirty = true; 1886 } 1887 } 1888 1889 /* Exclusive/none -> exclusive/none case */ 1890 if (cur_old_count == nr_old_roots && 1891 cur_new_count == nr_new_roots) { 1892 if (cur_old_count == 0) { 1893 /* None -> exclusive/none */ 1894 1895 if (cur_new_count != 0) { 1896 /* None -> exclusive */ 1897 qg->excl += num_bytes; 1898 qg->excl_cmpr += num_bytes; 1899 dirty = true; 1900 } 1901 /* None -> none, nothing changed */ 1902 } else { 1903 /* Exclusive -> exclusive/none */ 1904 1905 if (cur_new_count == 0) { 1906 /* Exclusive -> none */ 1907 qg->excl -= num_bytes; 1908 qg->excl_cmpr -= num_bytes; 1909 dirty = true; 1910 } 1911 /* Exclusive -> exclusive, nothing changed */ 1912 } 1913 } 1914 1915 if (dirty) 1916 qgroup_dirty(fs_info, qg); 1917 } 1918 return 0; 1919 } 1920 1921 int 1922 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 1923 struct btrfs_fs_info *fs_info, 1924 u64 bytenr, u64 num_bytes, 1925 struct ulist *old_roots, struct ulist *new_roots) 1926 { 1927 struct ulist *qgroups = NULL; 1928 struct ulist *tmp = NULL; 1929 u64 seq; 1930 u64 nr_new_roots = 0; 1931 u64 nr_old_roots = 0; 1932 int ret = 0; 1933 1934 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1935 return 0; 1936 1937 if (new_roots) 1938 nr_new_roots = new_roots->nnodes; 1939 if (old_roots) 1940 nr_old_roots = old_roots->nnodes; 1941 1942 BUG_ON(!fs_info->quota_root); 1943 1944 trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes, 1945 nr_old_roots, nr_new_roots); 1946 1947 qgroups = ulist_alloc(GFP_NOFS); 1948 if (!qgroups) { 1949 ret = -ENOMEM; 1950 goto out_free; 1951 } 1952 tmp = ulist_alloc(GFP_NOFS); 1953 if (!tmp) { 1954 ret = -ENOMEM; 1955 goto out_free; 1956 } 1957 1958 mutex_lock(&fs_info->qgroup_rescan_lock); 1959 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 1960 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) { 1961 mutex_unlock(&fs_info->qgroup_rescan_lock); 1962 ret = 0; 1963 goto out_free; 1964 } 1965 } 1966 mutex_unlock(&fs_info->qgroup_rescan_lock); 1967 1968 spin_lock(&fs_info->qgroup_lock); 1969 seq = fs_info->qgroup_seq; 1970 1971 /* Update old refcnts using old_roots */ 1972 ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq, 1973 UPDATE_OLD); 1974 if (ret < 0) 1975 goto out; 1976 1977 /* Update new refcnts using new_roots */ 1978 ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq, 1979 UPDATE_NEW); 1980 if (ret < 0) 1981 goto out; 1982 1983 qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots, 1984 num_bytes, seq); 1985 1986 /* 1987 * Bump qgroup_seq to avoid seq overlap 1988 */ 1989 fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; 1990 out: 1991 spin_unlock(&fs_info->qgroup_lock); 1992 out_free: 1993 ulist_free(tmp); 1994 ulist_free(qgroups); 1995 ulist_free(old_roots); 1996 ulist_free(new_roots); 1997 return ret; 1998 } 1999 2000 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, 2001 struct btrfs_fs_info *fs_info) 2002 { 2003 struct btrfs_qgroup_extent_record *record; 2004 struct btrfs_delayed_ref_root *delayed_refs; 2005 struct ulist *new_roots = NULL; 2006 struct rb_node *node; 2007 u64 qgroup_to_skip; 2008 int ret = 0; 2009 2010 delayed_refs = &trans->transaction->delayed_refs; 2011 qgroup_to_skip = delayed_refs->qgroup_to_skip; 2012 while ((node = rb_first(&delayed_refs->dirty_extent_root))) { 2013 record = rb_entry(node, struct btrfs_qgroup_extent_record, 2014 node); 2015 2016 trace_btrfs_qgroup_account_extents(fs_info, record); 2017 2018 if (!ret) { 2019 /* 2020 * Use SEQ_LAST as time_seq to do special search, which 2021 * doesn't lock tree or delayed_refs and search current 2022 * root. It's safe inside commit_transaction(). 2023 */ 2024 ret = btrfs_find_all_roots(trans, fs_info, 2025 record->bytenr, SEQ_LAST, &new_roots); 2026 if (ret < 0) 2027 goto cleanup; 2028 if (qgroup_to_skip) 2029 ulist_del(new_roots, qgroup_to_skip, 0); 2030 ret = btrfs_qgroup_account_extent(trans, fs_info, 2031 record->bytenr, record->num_bytes, 2032 record->old_roots, new_roots); 2033 record->old_roots = NULL; 2034 new_roots = NULL; 2035 } 2036 cleanup: 2037 ulist_free(record->old_roots); 2038 ulist_free(new_roots); 2039 new_roots = NULL; 2040 rb_erase(node, &delayed_refs->dirty_extent_root); 2041 kfree(record); 2042 2043 } 2044 return ret; 2045 } 2046 2047 /* 2048 * called from commit_transaction. Writes all changed qgroups to disk. 2049 */ 2050 int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 2051 struct btrfs_fs_info *fs_info) 2052 { 2053 struct btrfs_root *quota_root = fs_info->quota_root; 2054 int ret = 0; 2055 int start_rescan_worker = 0; 2056 2057 if (!quota_root) 2058 goto out; 2059 2060 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 2061 test_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2062 start_rescan_worker = 1; 2063 2064 if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2065 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2066 if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags)) 2067 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2068 2069 spin_lock(&fs_info->qgroup_lock); 2070 while (!list_empty(&fs_info->dirty_qgroups)) { 2071 struct btrfs_qgroup *qgroup; 2072 qgroup = list_first_entry(&fs_info->dirty_qgroups, 2073 struct btrfs_qgroup, dirty); 2074 list_del_init(&qgroup->dirty); 2075 spin_unlock(&fs_info->qgroup_lock); 2076 ret = update_qgroup_info_item(trans, quota_root, qgroup); 2077 if (ret) 2078 fs_info->qgroup_flags |= 2079 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2080 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 2081 if (ret) 2082 fs_info->qgroup_flags |= 2083 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2084 spin_lock(&fs_info->qgroup_lock); 2085 } 2086 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2087 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; 2088 else 2089 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2090 spin_unlock(&fs_info->qgroup_lock); 2091 2092 ret = update_qgroup_status_item(trans, fs_info, quota_root); 2093 if (ret) 2094 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2095 2096 if (!ret && start_rescan_worker) { 2097 ret = qgroup_rescan_init(fs_info, 0, 1); 2098 if (!ret) { 2099 qgroup_rescan_zero_tracking(fs_info); 2100 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2101 &fs_info->qgroup_rescan_work); 2102 } 2103 ret = 0; 2104 } 2105 2106 out: 2107 2108 return ret; 2109 } 2110 2111 /* 2112 * Copy the accounting information between qgroups. This is necessary 2113 * when a snapshot or a subvolume is created. Throwing an error will 2114 * cause a transaction abort so we take extra care here to only error 2115 * when a readonly fs is a reasonable outcome. 2116 */ 2117 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 2118 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 2119 struct btrfs_qgroup_inherit *inherit) 2120 { 2121 int ret = 0; 2122 int i; 2123 u64 *i_qgroups; 2124 struct btrfs_root *quota_root = fs_info->quota_root; 2125 struct btrfs_qgroup *srcgroup; 2126 struct btrfs_qgroup *dstgroup; 2127 u32 level_size = 0; 2128 u64 nums; 2129 2130 mutex_lock(&fs_info->qgroup_ioctl_lock); 2131 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2132 goto out; 2133 2134 if (!quota_root) { 2135 ret = -EINVAL; 2136 goto out; 2137 } 2138 2139 if (inherit) { 2140 i_qgroups = (u64 *)(inherit + 1); 2141 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 2142 2 * inherit->num_excl_copies; 2143 for (i = 0; i < nums; ++i) { 2144 srcgroup = find_qgroup_rb(fs_info, *i_qgroups); 2145 2146 /* 2147 * Zero out invalid groups so we can ignore 2148 * them later. 2149 */ 2150 if (!srcgroup || 2151 ((srcgroup->qgroupid >> 48) <= (objectid >> 48))) 2152 *i_qgroups = 0ULL; 2153 2154 ++i_qgroups; 2155 } 2156 } 2157 2158 /* 2159 * create a tracking group for the subvol itself 2160 */ 2161 ret = add_qgroup_item(trans, quota_root, objectid); 2162 if (ret) 2163 goto out; 2164 2165 if (srcid) { 2166 struct btrfs_root *srcroot; 2167 struct btrfs_key srckey; 2168 2169 srckey.objectid = srcid; 2170 srckey.type = BTRFS_ROOT_ITEM_KEY; 2171 srckey.offset = (u64)-1; 2172 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); 2173 if (IS_ERR(srcroot)) { 2174 ret = PTR_ERR(srcroot); 2175 goto out; 2176 } 2177 2178 level_size = fs_info->nodesize; 2179 } 2180 2181 /* 2182 * add qgroup to all inherited groups 2183 */ 2184 if (inherit) { 2185 i_qgroups = (u64 *)(inherit + 1); 2186 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) { 2187 if (*i_qgroups == 0) 2188 continue; 2189 ret = add_qgroup_relation_item(trans, quota_root, 2190 objectid, *i_qgroups); 2191 if (ret && ret != -EEXIST) 2192 goto out; 2193 ret = add_qgroup_relation_item(trans, quota_root, 2194 *i_qgroups, objectid); 2195 if (ret && ret != -EEXIST) 2196 goto out; 2197 } 2198 ret = 0; 2199 } 2200 2201 2202 spin_lock(&fs_info->qgroup_lock); 2203 2204 dstgroup = add_qgroup_rb(fs_info, objectid); 2205 if (IS_ERR(dstgroup)) { 2206 ret = PTR_ERR(dstgroup); 2207 goto unlock; 2208 } 2209 2210 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { 2211 dstgroup->lim_flags = inherit->lim.flags; 2212 dstgroup->max_rfer = inherit->lim.max_rfer; 2213 dstgroup->max_excl = inherit->lim.max_excl; 2214 dstgroup->rsv_rfer = inherit->lim.rsv_rfer; 2215 dstgroup->rsv_excl = inherit->lim.rsv_excl; 2216 2217 ret = update_qgroup_limit_item(trans, quota_root, dstgroup); 2218 if (ret) { 2219 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2220 btrfs_info(fs_info, 2221 "unable to update quota limit for %llu", 2222 dstgroup->qgroupid); 2223 goto unlock; 2224 } 2225 } 2226 2227 if (srcid) { 2228 srcgroup = find_qgroup_rb(fs_info, srcid); 2229 if (!srcgroup) 2230 goto unlock; 2231 2232 /* 2233 * We call inherit after we clone the root in order to make sure 2234 * our counts don't go crazy, so at this point the only 2235 * difference between the two roots should be the root node. 2236 */ 2237 dstgroup->rfer = srcgroup->rfer; 2238 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2239 dstgroup->excl = level_size; 2240 dstgroup->excl_cmpr = level_size; 2241 srcgroup->excl = level_size; 2242 srcgroup->excl_cmpr = level_size; 2243 2244 /* inherit the limit info */ 2245 dstgroup->lim_flags = srcgroup->lim_flags; 2246 dstgroup->max_rfer = srcgroup->max_rfer; 2247 dstgroup->max_excl = srcgroup->max_excl; 2248 dstgroup->rsv_rfer = srcgroup->rsv_rfer; 2249 dstgroup->rsv_excl = srcgroup->rsv_excl; 2250 2251 qgroup_dirty(fs_info, dstgroup); 2252 qgroup_dirty(fs_info, srcgroup); 2253 } 2254 2255 if (!inherit) 2256 goto unlock; 2257 2258 i_qgroups = (u64 *)(inherit + 1); 2259 for (i = 0; i < inherit->num_qgroups; ++i) { 2260 if (*i_qgroups) { 2261 ret = add_relation_rb(fs_info, objectid, *i_qgroups); 2262 if (ret) 2263 goto unlock; 2264 } 2265 ++i_qgroups; 2266 } 2267 2268 for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) { 2269 struct btrfs_qgroup *src; 2270 struct btrfs_qgroup *dst; 2271 2272 if (!i_qgroups[0] || !i_qgroups[1]) 2273 continue; 2274 2275 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2276 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2277 2278 if (!src || !dst) { 2279 ret = -EINVAL; 2280 goto unlock; 2281 } 2282 2283 dst->rfer = src->rfer - level_size; 2284 dst->rfer_cmpr = src->rfer_cmpr - level_size; 2285 } 2286 for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) { 2287 struct btrfs_qgroup *src; 2288 struct btrfs_qgroup *dst; 2289 2290 if (!i_qgroups[0] || !i_qgroups[1]) 2291 continue; 2292 2293 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2294 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2295 2296 if (!src || !dst) { 2297 ret = -EINVAL; 2298 goto unlock; 2299 } 2300 2301 dst->excl = src->excl + level_size; 2302 dst->excl_cmpr = src->excl_cmpr + level_size; 2303 } 2304 2305 unlock: 2306 spin_unlock(&fs_info->qgroup_lock); 2307 out: 2308 mutex_unlock(&fs_info->qgroup_ioctl_lock); 2309 return ret; 2310 } 2311 2312 static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) 2313 { 2314 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2315 qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer) 2316 return false; 2317 2318 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 2319 qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl) 2320 return false; 2321 2322 return true; 2323 } 2324 2325 static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce) 2326 { 2327 struct btrfs_root *quota_root; 2328 struct btrfs_qgroup *qgroup; 2329 struct btrfs_fs_info *fs_info = root->fs_info; 2330 u64 ref_root = root->root_key.objectid; 2331 int ret = 0; 2332 int retried = 0; 2333 struct ulist_node *unode; 2334 struct ulist_iterator uiter; 2335 2336 if (!is_fstree(ref_root)) 2337 return 0; 2338 2339 if (num_bytes == 0) 2340 return 0; 2341 retry: 2342 spin_lock(&fs_info->qgroup_lock); 2343 quota_root = fs_info->quota_root; 2344 if (!quota_root) 2345 goto out; 2346 2347 qgroup = find_qgroup_rb(fs_info, ref_root); 2348 if (!qgroup) 2349 goto out; 2350 2351 /* 2352 * in a first step, we check all affected qgroups if any limits would 2353 * be exceeded 2354 */ 2355 ulist_reinit(fs_info->qgroup_ulist); 2356 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2357 (uintptr_t)qgroup, GFP_ATOMIC); 2358 if (ret < 0) 2359 goto out; 2360 ULIST_ITER_INIT(&uiter); 2361 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2362 struct btrfs_qgroup *qg; 2363 struct btrfs_qgroup_list *glist; 2364 2365 qg = unode_aux_to_qgroup(unode); 2366 2367 if (enforce && !qgroup_check_limits(qg, num_bytes)) { 2368 /* 2369 * Commit the tree and retry, since we may have 2370 * deletions which would free up space. 2371 */ 2372 if (!retried && qg->reserved > 0) { 2373 struct btrfs_trans_handle *trans; 2374 2375 spin_unlock(&fs_info->qgroup_lock); 2376 ret = btrfs_start_delalloc_inodes(root, 0); 2377 if (ret) 2378 return ret; 2379 btrfs_wait_ordered_extents(root, -1, 0, (u64)-1); 2380 trans = btrfs_join_transaction(root); 2381 if (IS_ERR(trans)) 2382 return PTR_ERR(trans); 2383 ret = btrfs_commit_transaction(trans); 2384 if (ret) 2385 return ret; 2386 retried++; 2387 goto retry; 2388 } 2389 ret = -EDQUOT; 2390 goto out; 2391 } 2392 2393 list_for_each_entry(glist, &qg->groups, next_group) { 2394 ret = ulist_add(fs_info->qgroup_ulist, 2395 glist->group->qgroupid, 2396 (uintptr_t)glist->group, GFP_ATOMIC); 2397 if (ret < 0) 2398 goto out; 2399 } 2400 } 2401 ret = 0; 2402 /* 2403 * no limits exceeded, now record the reservation into all qgroups 2404 */ 2405 ULIST_ITER_INIT(&uiter); 2406 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2407 struct btrfs_qgroup *qg; 2408 2409 qg = unode_aux_to_qgroup(unode); 2410 2411 trace_qgroup_update_reserve(fs_info, qg, num_bytes); 2412 qg->reserved += num_bytes; 2413 } 2414 2415 out: 2416 spin_unlock(&fs_info->qgroup_lock); 2417 return ret; 2418 } 2419 2420 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, 2421 u64 ref_root, u64 num_bytes) 2422 { 2423 struct btrfs_root *quota_root; 2424 struct btrfs_qgroup *qgroup; 2425 struct ulist_node *unode; 2426 struct ulist_iterator uiter; 2427 int ret = 0; 2428 2429 if (!is_fstree(ref_root)) 2430 return; 2431 2432 if (num_bytes == 0) 2433 return; 2434 2435 spin_lock(&fs_info->qgroup_lock); 2436 2437 quota_root = fs_info->quota_root; 2438 if (!quota_root) 2439 goto out; 2440 2441 qgroup = find_qgroup_rb(fs_info, ref_root); 2442 if (!qgroup) 2443 goto out; 2444 2445 ulist_reinit(fs_info->qgroup_ulist); 2446 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2447 (uintptr_t)qgroup, GFP_ATOMIC); 2448 if (ret < 0) 2449 goto out; 2450 ULIST_ITER_INIT(&uiter); 2451 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2452 struct btrfs_qgroup *qg; 2453 struct btrfs_qgroup_list *glist; 2454 2455 qg = unode_aux_to_qgroup(unode); 2456 2457 trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes); 2458 if (qg->reserved < num_bytes) 2459 report_reserved_underflow(fs_info, qg, num_bytes); 2460 else 2461 qg->reserved -= num_bytes; 2462 2463 list_for_each_entry(glist, &qg->groups, next_group) { 2464 ret = ulist_add(fs_info->qgroup_ulist, 2465 glist->group->qgroupid, 2466 (uintptr_t)glist->group, GFP_ATOMIC); 2467 if (ret < 0) 2468 goto out; 2469 } 2470 } 2471 2472 out: 2473 spin_unlock(&fs_info->qgroup_lock); 2474 } 2475 2476 /* 2477 * returns < 0 on error, 0 when more leafs are to be scanned. 2478 * returns 1 when done. 2479 */ 2480 static int 2481 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2482 struct btrfs_trans_handle *trans) 2483 { 2484 struct btrfs_key found; 2485 struct extent_buffer *scratch_leaf = NULL; 2486 struct ulist *roots = NULL; 2487 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); 2488 u64 num_bytes; 2489 int slot; 2490 int ret; 2491 2492 mutex_lock(&fs_info->qgroup_rescan_lock); 2493 ret = btrfs_search_slot_for_read(fs_info->extent_root, 2494 &fs_info->qgroup_rescan_progress, 2495 path, 1, 0); 2496 2497 btrfs_debug(fs_info, 2498 "current progress key (%llu %u %llu), search_slot ret %d", 2499 fs_info->qgroup_rescan_progress.objectid, 2500 fs_info->qgroup_rescan_progress.type, 2501 fs_info->qgroup_rescan_progress.offset, ret); 2502 2503 if (ret) { 2504 /* 2505 * The rescan is about to end, we will not be scanning any 2506 * further blocks. We cannot unset the RESCAN flag here, because 2507 * we want to commit the transaction if everything went well. 2508 * To make the live accounting work in this phase, we set our 2509 * scan progress pointer such that every real extent objectid 2510 * will be smaller. 2511 */ 2512 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 2513 btrfs_release_path(path); 2514 mutex_unlock(&fs_info->qgroup_rescan_lock); 2515 return ret; 2516 } 2517 2518 btrfs_item_key_to_cpu(path->nodes[0], &found, 2519 btrfs_header_nritems(path->nodes[0]) - 1); 2520 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; 2521 2522 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2523 scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]); 2524 if (!scratch_leaf) { 2525 ret = -ENOMEM; 2526 mutex_unlock(&fs_info->qgroup_rescan_lock); 2527 goto out; 2528 } 2529 extent_buffer_get(scratch_leaf); 2530 btrfs_tree_read_lock(scratch_leaf); 2531 btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK); 2532 slot = path->slots[0]; 2533 btrfs_release_path(path); 2534 mutex_unlock(&fs_info->qgroup_rescan_lock); 2535 2536 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 2537 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 2538 if (found.type != BTRFS_EXTENT_ITEM_KEY && 2539 found.type != BTRFS_METADATA_ITEM_KEY) 2540 continue; 2541 if (found.type == BTRFS_METADATA_ITEM_KEY) 2542 num_bytes = fs_info->nodesize; 2543 else 2544 num_bytes = found.offset; 2545 2546 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 2547 &roots); 2548 if (ret < 0) 2549 goto out; 2550 /* For rescan, just pass old_roots as NULL */ 2551 ret = btrfs_qgroup_account_extent(trans, fs_info, 2552 found.objectid, num_bytes, NULL, roots); 2553 if (ret < 0) 2554 goto out; 2555 } 2556 out: 2557 if (scratch_leaf) { 2558 btrfs_tree_read_unlock_blocking(scratch_leaf); 2559 free_extent_buffer(scratch_leaf); 2560 } 2561 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2562 2563 return ret; 2564 } 2565 2566 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) 2567 { 2568 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, 2569 qgroup_rescan_work); 2570 struct btrfs_path *path; 2571 struct btrfs_trans_handle *trans = NULL; 2572 int err = -ENOMEM; 2573 int ret = 0; 2574 2575 path = btrfs_alloc_path(); 2576 if (!path) 2577 goto out; 2578 2579 err = 0; 2580 while (!err && !btrfs_fs_closing(fs_info)) { 2581 trans = btrfs_start_transaction(fs_info->fs_root, 0); 2582 if (IS_ERR(trans)) { 2583 err = PTR_ERR(trans); 2584 break; 2585 } 2586 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { 2587 err = -EINTR; 2588 } else { 2589 err = qgroup_rescan_leaf(fs_info, path, trans); 2590 } 2591 if (err > 0) 2592 btrfs_commit_transaction(trans); 2593 else 2594 btrfs_end_transaction(trans); 2595 } 2596 2597 out: 2598 btrfs_free_path(path); 2599 2600 mutex_lock(&fs_info->qgroup_rescan_lock); 2601 if (!btrfs_fs_closing(fs_info)) 2602 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2603 2604 if (err > 0 && 2605 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 2606 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2607 } else if (err < 0) { 2608 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2609 } 2610 mutex_unlock(&fs_info->qgroup_rescan_lock); 2611 2612 /* 2613 * only update status, since the previous part has already updated the 2614 * qgroup info. 2615 */ 2616 trans = btrfs_start_transaction(fs_info->quota_root, 1); 2617 if (IS_ERR(trans)) { 2618 err = PTR_ERR(trans); 2619 btrfs_err(fs_info, 2620 "fail to start transaction for status update: %d\n", 2621 err); 2622 goto done; 2623 } 2624 ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); 2625 if (ret < 0) { 2626 err = ret; 2627 btrfs_err(fs_info, "fail to update qgroup status: %d", err); 2628 } 2629 btrfs_end_transaction(trans); 2630 2631 if (btrfs_fs_closing(fs_info)) { 2632 btrfs_info(fs_info, "qgroup scan paused"); 2633 } else if (err >= 0) { 2634 btrfs_info(fs_info, "qgroup scan completed%s", 2635 err > 0 ? " (inconsistency flag cleared)" : ""); 2636 } else { 2637 btrfs_err(fs_info, "qgroup scan failed with %d", err); 2638 } 2639 2640 done: 2641 mutex_lock(&fs_info->qgroup_rescan_lock); 2642 fs_info->qgroup_rescan_running = false; 2643 mutex_unlock(&fs_info->qgroup_rescan_lock); 2644 complete_all(&fs_info->qgroup_rescan_completion); 2645 } 2646 2647 /* 2648 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all 2649 * memory required for the rescan context. 2650 */ 2651 static int 2652 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 2653 int init_flags) 2654 { 2655 int ret = 0; 2656 2657 if (!init_flags && 2658 (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) || 2659 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) { 2660 ret = -EINVAL; 2661 goto err; 2662 } 2663 2664 mutex_lock(&fs_info->qgroup_rescan_lock); 2665 spin_lock(&fs_info->qgroup_lock); 2666 2667 if (init_flags) { 2668 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2669 ret = -EINPROGRESS; 2670 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 2671 ret = -EINVAL; 2672 2673 if (ret) { 2674 spin_unlock(&fs_info->qgroup_lock); 2675 mutex_unlock(&fs_info->qgroup_rescan_lock); 2676 goto err; 2677 } 2678 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2679 } 2680 2681 memset(&fs_info->qgroup_rescan_progress, 0, 2682 sizeof(fs_info->qgroup_rescan_progress)); 2683 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 2684 init_completion(&fs_info->qgroup_rescan_completion); 2685 fs_info->qgroup_rescan_running = true; 2686 2687 spin_unlock(&fs_info->qgroup_lock); 2688 mutex_unlock(&fs_info->qgroup_rescan_lock); 2689 2690 memset(&fs_info->qgroup_rescan_work, 0, 2691 sizeof(fs_info->qgroup_rescan_work)); 2692 btrfs_init_work(&fs_info->qgroup_rescan_work, 2693 btrfs_qgroup_rescan_helper, 2694 btrfs_qgroup_rescan_worker, NULL, NULL); 2695 2696 if (ret) { 2697 err: 2698 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret); 2699 return ret; 2700 } 2701 2702 return 0; 2703 } 2704 2705 static void 2706 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) 2707 { 2708 struct rb_node *n; 2709 struct btrfs_qgroup *qgroup; 2710 2711 spin_lock(&fs_info->qgroup_lock); 2712 /* clear all current qgroup tracking information */ 2713 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { 2714 qgroup = rb_entry(n, struct btrfs_qgroup, node); 2715 qgroup->rfer = 0; 2716 qgroup->rfer_cmpr = 0; 2717 qgroup->excl = 0; 2718 qgroup->excl_cmpr = 0; 2719 } 2720 spin_unlock(&fs_info->qgroup_lock); 2721 } 2722 2723 int 2724 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) 2725 { 2726 int ret = 0; 2727 struct btrfs_trans_handle *trans; 2728 2729 ret = qgroup_rescan_init(fs_info, 0, 1); 2730 if (ret) 2731 return ret; 2732 2733 /* 2734 * We have set the rescan_progress to 0, which means no more 2735 * delayed refs will be accounted by btrfs_qgroup_account_ref. 2736 * However, btrfs_qgroup_account_ref may be right after its call 2737 * to btrfs_find_all_roots, in which case it would still do the 2738 * accounting. 2739 * To solve this, we're committing the transaction, which will 2740 * ensure we run all delayed refs and only after that, we are 2741 * going to clear all tracking information for a clean start. 2742 */ 2743 2744 trans = btrfs_join_transaction(fs_info->fs_root); 2745 if (IS_ERR(trans)) { 2746 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2747 return PTR_ERR(trans); 2748 } 2749 ret = btrfs_commit_transaction(trans); 2750 if (ret) { 2751 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2752 return ret; 2753 } 2754 2755 qgroup_rescan_zero_tracking(fs_info); 2756 2757 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2758 &fs_info->qgroup_rescan_work); 2759 2760 return 0; 2761 } 2762 2763 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 2764 bool interruptible) 2765 { 2766 int running; 2767 int ret = 0; 2768 2769 mutex_lock(&fs_info->qgroup_rescan_lock); 2770 spin_lock(&fs_info->qgroup_lock); 2771 running = fs_info->qgroup_rescan_running; 2772 spin_unlock(&fs_info->qgroup_lock); 2773 mutex_unlock(&fs_info->qgroup_rescan_lock); 2774 2775 if (!running) 2776 return 0; 2777 2778 if (interruptible) 2779 ret = wait_for_completion_interruptible( 2780 &fs_info->qgroup_rescan_completion); 2781 else 2782 wait_for_completion(&fs_info->qgroup_rescan_completion); 2783 2784 return ret; 2785 } 2786 2787 /* 2788 * this is only called from open_ctree where we're still single threaded, thus 2789 * locking is omitted here. 2790 */ 2791 void 2792 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) 2793 { 2794 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2795 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2796 &fs_info->qgroup_rescan_work); 2797 } 2798 2799 /* 2800 * Reserve qgroup space for range [start, start + len). 2801 * 2802 * This function will either reserve space from related qgroups or doing 2803 * nothing if the range is already reserved. 2804 * 2805 * Return 0 for successful reserve 2806 * Return <0 for error (including -EQUOT) 2807 * 2808 * NOTE: this function may sleep for memory allocation. 2809 */ 2810 int btrfs_qgroup_reserve_data(struct inode *inode, u64 start, u64 len) 2811 { 2812 struct btrfs_root *root = BTRFS_I(inode)->root; 2813 struct extent_changeset changeset; 2814 struct ulist_node *unode; 2815 struct ulist_iterator uiter; 2816 int ret; 2817 2818 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || 2819 !is_fstree(root->objectid) || len == 0) 2820 return 0; 2821 2822 changeset.bytes_changed = 0; 2823 ulist_init(&changeset.range_changed); 2824 ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 2825 start + len -1, EXTENT_QGROUP_RESERVED, &changeset); 2826 trace_btrfs_qgroup_reserve_data(inode, start, len, 2827 changeset.bytes_changed, 2828 QGROUP_RESERVE); 2829 if (ret < 0) 2830 goto cleanup; 2831 ret = qgroup_reserve(root, changeset.bytes_changed, true); 2832 if (ret < 0) 2833 goto cleanup; 2834 2835 ulist_release(&changeset.range_changed); 2836 return ret; 2837 2838 cleanup: 2839 /* cleanup already reserved ranges */ 2840 ULIST_ITER_INIT(&uiter); 2841 while ((unode = ulist_next(&changeset.range_changed, &uiter))) 2842 clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, 2843 unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL, 2844 GFP_NOFS); 2845 ulist_release(&changeset.range_changed); 2846 return ret; 2847 } 2848 2849 static int __btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len, 2850 int free) 2851 { 2852 struct extent_changeset changeset; 2853 int trace_op = QGROUP_RELEASE; 2854 int ret; 2855 2856 changeset.bytes_changed = 0; 2857 ulist_init(&changeset.range_changed); 2858 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 2859 start + len -1, EXTENT_QGROUP_RESERVED, &changeset); 2860 if (ret < 0) 2861 goto out; 2862 2863 if (free) 2864 trace_op = QGROUP_FREE; 2865 trace_btrfs_qgroup_release_data(inode, start, len, 2866 changeset.bytes_changed, trace_op); 2867 if (free) 2868 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, 2869 BTRFS_I(inode)->root->objectid, 2870 changeset.bytes_changed); 2871 out: 2872 ulist_release(&changeset.range_changed); 2873 return ret; 2874 } 2875 2876 /* 2877 * Free a reserved space range from io_tree and related qgroups 2878 * 2879 * Should be called when a range of pages get invalidated before reaching disk. 2880 * Or for error cleanup case. 2881 * 2882 * For data written to disk, use btrfs_qgroup_release_data(). 2883 * 2884 * NOTE: This function may sleep for memory allocation. 2885 */ 2886 int btrfs_qgroup_free_data(struct inode *inode, u64 start, u64 len) 2887 { 2888 return __btrfs_qgroup_release_data(inode, start, len, 1); 2889 } 2890 2891 /* 2892 * Release a reserved space range from io_tree only. 2893 * 2894 * Should be called when a range of pages get written to disk and corresponding 2895 * FILE_EXTENT is inserted into corresponding root. 2896 * 2897 * Since new qgroup accounting framework will only update qgroup numbers at 2898 * commit_transaction() time, its reserved space shouldn't be freed from 2899 * related qgroups. 2900 * 2901 * But we should release the range from io_tree, to allow further write to be 2902 * COWed. 2903 * 2904 * NOTE: This function may sleep for memory allocation. 2905 */ 2906 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len) 2907 { 2908 return __btrfs_qgroup_release_data(inode, start, len, 0); 2909 } 2910 2911 int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, 2912 bool enforce) 2913 { 2914 struct btrfs_fs_info *fs_info = root->fs_info; 2915 int ret; 2916 2917 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 2918 !is_fstree(root->objectid) || num_bytes == 0) 2919 return 0; 2920 2921 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 2922 trace_qgroup_meta_reserve(root, (s64)num_bytes); 2923 ret = qgroup_reserve(root, num_bytes, enforce); 2924 if (ret < 0) 2925 return ret; 2926 atomic64_add(num_bytes, &root->qgroup_meta_rsv); 2927 return ret; 2928 } 2929 2930 void btrfs_qgroup_free_meta_all(struct btrfs_root *root) 2931 { 2932 struct btrfs_fs_info *fs_info = root->fs_info; 2933 u64 reserved; 2934 2935 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 2936 !is_fstree(root->objectid)) 2937 return; 2938 2939 reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0); 2940 if (reserved == 0) 2941 return; 2942 trace_qgroup_meta_reserve(root, -(s64)reserved); 2943 btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved); 2944 } 2945 2946 void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) 2947 { 2948 struct btrfs_fs_info *fs_info = root->fs_info; 2949 2950 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 2951 !is_fstree(root->objectid)) 2952 return; 2953 2954 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 2955 WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes); 2956 atomic64_sub(num_bytes, &root->qgroup_meta_rsv); 2957 trace_qgroup_meta_reserve(root, -(s64)num_bytes); 2958 btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes); 2959 } 2960 2961 /* 2962 * Check qgroup reserved space leaking, normally at destroy inode 2963 * time 2964 */ 2965 void btrfs_qgroup_check_reserved_leak(struct inode *inode) 2966 { 2967 struct extent_changeset changeset; 2968 struct ulist_node *unode; 2969 struct ulist_iterator iter; 2970 int ret; 2971 2972 changeset.bytes_changed = 0; 2973 ulist_init(&changeset.range_changed); 2974 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 2975 EXTENT_QGROUP_RESERVED, &changeset); 2976 2977 WARN_ON(ret < 0); 2978 if (WARN_ON(changeset.bytes_changed)) { 2979 ULIST_ITER_INIT(&iter); 2980 while ((unode = ulist_next(&changeset.range_changed, &iter))) { 2981 btrfs_warn(BTRFS_I(inode)->root->fs_info, 2982 "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu", 2983 inode->i_ino, unode->val, unode->aux); 2984 } 2985 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, 2986 BTRFS_I(inode)->root->objectid, 2987 changeset.bytes_changed); 2988 2989 } 2990 ulist_release(&changeset.range_changed); 2991 } 2992