1 /* 2 * Copyright (C) 2011 STRATO. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include <linux/workqueue.h> 26 #include <linux/btrfs.h> 27 28 #include "ctree.h" 29 #include "transaction.h" 30 #include "disk-io.h" 31 #include "locking.h" 32 #include "ulist.h" 33 #include "backref.h" 34 #include "extent_io.h" 35 #include "qgroup.h" 36 37 38 /* TODO XXX FIXME 39 * - subvol delete -> delete when ref goes to 0? delete limits also? 40 * - reorganize keys 41 * - compressed 42 * - sync 43 * - copy also limits on subvol creation 44 * - limit 45 * - caches fuer ulists 46 * - performance benchmarks 47 * - check all ioctl parameters 48 */ 49 50 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq, 51 int mod) 52 { 53 if (qg->old_refcnt < seq) 54 qg->old_refcnt = seq; 55 qg->old_refcnt += mod; 56 } 57 58 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq, 59 int mod) 60 { 61 if (qg->new_refcnt < seq) 62 qg->new_refcnt = seq; 63 qg->new_refcnt += mod; 64 } 65 66 static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq) 67 { 68 if (qg->old_refcnt < seq) 69 return 0; 70 return qg->old_refcnt - seq; 71 } 72 73 static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq) 74 { 75 if (qg->new_refcnt < seq) 76 return 0; 77 return qg->new_refcnt - seq; 78 } 79 80 /* 81 * glue structure to represent the relations between qgroups. 82 */ 83 struct btrfs_qgroup_list { 84 struct list_head next_group; 85 struct list_head next_member; 86 struct btrfs_qgroup *group; 87 struct btrfs_qgroup *member; 88 }; 89 90 static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg) 91 { 92 return (u64)(uintptr_t)qg; 93 } 94 95 static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n) 96 { 97 return (struct btrfs_qgroup *)(uintptr_t)n->aux; 98 } 99 100 static int 101 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 102 int init_flags); 103 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); 104 105 /* must be called with qgroup_ioctl_lock held */ 106 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 107 u64 qgroupid) 108 { 109 struct rb_node *n = fs_info->qgroup_tree.rb_node; 110 struct btrfs_qgroup *qgroup; 111 112 while (n) { 113 qgroup = rb_entry(n, struct btrfs_qgroup, node); 114 if (qgroup->qgroupid < qgroupid) 115 n = n->rb_left; 116 else if (qgroup->qgroupid > qgroupid) 117 n = n->rb_right; 118 else 119 return qgroup; 120 } 121 return NULL; 122 } 123 124 /* must be called with qgroup_lock held */ 125 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, 126 u64 qgroupid) 127 { 128 struct rb_node **p = &fs_info->qgroup_tree.rb_node; 129 struct rb_node *parent = NULL; 130 struct btrfs_qgroup *qgroup; 131 132 while (*p) { 133 parent = *p; 134 qgroup = rb_entry(parent, struct btrfs_qgroup, node); 135 136 if (qgroup->qgroupid < qgroupid) 137 p = &(*p)->rb_left; 138 else if (qgroup->qgroupid > qgroupid) 139 p = &(*p)->rb_right; 140 else 141 return qgroup; 142 } 143 144 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); 145 if (!qgroup) 146 return ERR_PTR(-ENOMEM); 147 148 qgroup->qgroupid = qgroupid; 149 INIT_LIST_HEAD(&qgroup->groups); 150 INIT_LIST_HEAD(&qgroup->members); 151 INIT_LIST_HEAD(&qgroup->dirty); 152 153 rb_link_node(&qgroup->node, parent, p); 154 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); 155 156 return qgroup; 157 } 158 159 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) 160 { 161 struct btrfs_qgroup_list *list; 162 163 list_del(&qgroup->dirty); 164 while (!list_empty(&qgroup->groups)) { 165 list = list_first_entry(&qgroup->groups, 166 struct btrfs_qgroup_list, next_group); 167 list_del(&list->next_group); 168 list_del(&list->next_member); 169 kfree(list); 170 } 171 172 while (!list_empty(&qgroup->members)) { 173 list = list_first_entry(&qgroup->members, 174 struct btrfs_qgroup_list, next_member); 175 list_del(&list->next_group); 176 list_del(&list->next_member); 177 kfree(list); 178 } 179 kfree(qgroup); 180 } 181 182 /* must be called with qgroup_lock held */ 183 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) 184 { 185 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); 186 187 if (!qgroup) 188 return -ENOENT; 189 190 rb_erase(&qgroup->node, &fs_info->qgroup_tree); 191 __del_qgroup_rb(qgroup); 192 return 0; 193 } 194 195 /* must be called with qgroup_lock held */ 196 static int add_relation_rb(struct btrfs_fs_info *fs_info, 197 u64 memberid, u64 parentid) 198 { 199 struct btrfs_qgroup *member; 200 struct btrfs_qgroup *parent; 201 struct btrfs_qgroup_list *list; 202 203 member = find_qgroup_rb(fs_info, memberid); 204 parent = find_qgroup_rb(fs_info, parentid); 205 if (!member || !parent) 206 return -ENOENT; 207 208 list = kzalloc(sizeof(*list), GFP_ATOMIC); 209 if (!list) 210 return -ENOMEM; 211 212 list->group = parent; 213 list->member = member; 214 list_add_tail(&list->next_group, &member->groups); 215 list_add_tail(&list->next_member, &parent->members); 216 217 return 0; 218 } 219 220 /* must be called with qgroup_lock held */ 221 static int del_relation_rb(struct btrfs_fs_info *fs_info, 222 u64 memberid, u64 parentid) 223 { 224 struct btrfs_qgroup *member; 225 struct btrfs_qgroup *parent; 226 struct btrfs_qgroup_list *list; 227 228 member = find_qgroup_rb(fs_info, memberid); 229 parent = find_qgroup_rb(fs_info, parentid); 230 if (!member || !parent) 231 return -ENOENT; 232 233 list_for_each_entry(list, &member->groups, next_group) { 234 if (list->group == parent) { 235 list_del(&list->next_group); 236 list_del(&list->next_member); 237 kfree(list); 238 return 0; 239 } 240 } 241 return -ENOENT; 242 } 243 244 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 245 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 246 u64 rfer, u64 excl) 247 { 248 struct btrfs_qgroup *qgroup; 249 250 qgroup = find_qgroup_rb(fs_info, qgroupid); 251 if (!qgroup) 252 return -EINVAL; 253 if (qgroup->rfer != rfer || qgroup->excl != excl) 254 return -EINVAL; 255 return 0; 256 } 257 #endif 258 259 /* 260 * The full config is read in one go, only called from open_ctree() 261 * It doesn't use any locking, as at this point we're still single-threaded 262 */ 263 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) 264 { 265 struct btrfs_key key; 266 struct btrfs_key found_key; 267 struct btrfs_root *quota_root = fs_info->quota_root; 268 struct btrfs_path *path = NULL; 269 struct extent_buffer *l; 270 int slot; 271 int ret = 0; 272 u64 flags = 0; 273 u64 rescan_progress = 0; 274 275 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 276 return 0; 277 278 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 279 if (!fs_info->qgroup_ulist) { 280 ret = -ENOMEM; 281 goto out; 282 } 283 284 path = btrfs_alloc_path(); 285 if (!path) { 286 ret = -ENOMEM; 287 goto out; 288 } 289 290 /* default this to quota off, in case no status key is found */ 291 fs_info->qgroup_flags = 0; 292 293 /* 294 * pass 1: read status, all qgroup infos and limits 295 */ 296 key.objectid = 0; 297 key.type = 0; 298 key.offset = 0; 299 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); 300 if (ret) 301 goto out; 302 303 while (1) { 304 struct btrfs_qgroup *qgroup; 305 306 slot = path->slots[0]; 307 l = path->nodes[0]; 308 btrfs_item_key_to_cpu(l, &found_key, slot); 309 310 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { 311 struct btrfs_qgroup_status_item *ptr; 312 313 ptr = btrfs_item_ptr(l, slot, 314 struct btrfs_qgroup_status_item); 315 316 if (btrfs_qgroup_status_version(l, ptr) != 317 BTRFS_QGROUP_STATUS_VERSION) { 318 btrfs_err(fs_info, 319 "old qgroup version, quota disabled"); 320 goto out; 321 } 322 if (btrfs_qgroup_status_generation(l, ptr) != 323 fs_info->generation) { 324 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 325 btrfs_err(fs_info, 326 "qgroup generation mismatch, marked as inconsistent"); 327 } 328 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 329 ptr); 330 rescan_progress = btrfs_qgroup_status_rescan(l, ptr); 331 goto next1; 332 } 333 334 if (found_key.type != BTRFS_QGROUP_INFO_KEY && 335 found_key.type != BTRFS_QGROUP_LIMIT_KEY) 336 goto next1; 337 338 qgroup = find_qgroup_rb(fs_info, found_key.offset); 339 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 340 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 341 btrfs_err(fs_info, "inconsistent qgroup config"); 342 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 343 } 344 if (!qgroup) { 345 qgroup = add_qgroup_rb(fs_info, found_key.offset); 346 if (IS_ERR(qgroup)) { 347 ret = PTR_ERR(qgroup); 348 goto out; 349 } 350 } 351 switch (found_key.type) { 352 case BTRFS_QGROUP_INFO_KEY: { 353 struct btrfs_qgroup_info_item *ptr; 354 355 ptr = btrfs_item_ptr(l, slot, 356 struct btrfs_qgroup_info_item); 357 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); 358 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); 359 qgroup->excl = btrfs_qgroup_info_excl(l, ptr); 360 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); 361 /* generation currently unused */ 362 break; 363 } 364 case BTRFS_QGROUP_LIMIT_KEY: { 365 struct btrfs_qgroup_limit_item *ptr; 366 367 ptr = btrfs_item_ptr(l, slot, 368 struct btrfs_qgroup_limit_item); 369 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); 370 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); 371 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); 372 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); 373 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); 374 break; 375 } 376 } 377 next1: 378 ret = btrfs_next_item(quota_root, path); 379 if (ret < 0) 380 goto out; 381 if (ret) 382 break; 383 } 384 btrfs_release_path(path); 385 386 /* 387 * pass 2: read all qgroup relations 388 */ 389 key.objectid = 0; 390 key.type = BTRFS_QGROUP_RELATION_KEY; 391 key.offset = 0; 392 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); 393 if (ret) 394 goto out; 395 while (1) { 396 slot = path->slots[0]; 397 l = path->nodes[0]; 398 btrfs_item_key_to_cpu(l, &found_key, slot); 399 400 if (found_key.type != BTRFS_QGROUP_RELATION_KEY) 401 goto next2; 402 403 if (found_key.objectid > found_key.offset) { 404 /* parent <- member, not needed to build config */ 405 /* FIXME should we omit the key completely? */ 406 goto next2; 407 } 408 409 ret = add_relation_rb(fs_info, found_key.objectid, 410 found_key.offset); 411 if (ret == -ENOENT) { 412 btrfs_warn(fs_info, 413 "orphan qgroup relation 0x%llx->0x%llx", 414 found_key.objectid, found_key.offset); 415 ret = 0; /* ignore the error */ 416 } 417 if (ret) 418 goto out; 419 next2: 420 ret = btrfs_next_item(quota_root, path); 421 if (ret < 0) 422 goto out; 423 if (ret) 424 break; 425 } 426 out: 427 fs_info->qgroup_flags |= flags; 428 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 429 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 430 else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && 431 ret >= 0) 432 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 433 btrfs_free_path(path); 434 435 if (ret < 0) { 436 ulist_free(fs_info->qgroup_ulist); 437 fs_info->qgroup_ulist = NULL; 438 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 439 } 440 441 return ret < 0 ? ret : 0; 442 } 443 444 /* 445 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), 446 * first two are in single-threaded paths.And for the third one, we have set 447 * quota_root to be null with qgroup_lock held before, so it is safe to clean 448 * up the in-memory structures without qgroup_lock held. 449 */ 450 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 451 { 452 struct rb_node *n; 453 struct btrfs_qgroup *qgroup; 454 455 while ((n = rb_first(&fs_info->qgroup_tree))) { 456 qgroup = rb_entry(n, struct btrfs_qgroup, node); 457 rb_erase(n, &fs_info->qgroup_tree); 458 __del_qgroup_rb(qgroup); 459 } 460 /* 461 * we call btrfs_free_qgroup_config() when umounting 462 * filesystem and disabling quota, so we set qgroup_ulist 463 * to be null here to avoid double free. 464 */ 465 ulist_free(fs_info->qgroup_ulist); 466 fs_info->qgroup_ulist = NULL; 467 } 468 469 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 470 struct btrfs_root *quota_root, 471 u64 src, u64 dst) 472 { 473 int ret; 474 struct btrfs_path *path; 475 struct btrfs_key key; 476 477 path = btrfs_alloc_path(); 478 if (!path) 479 return -ENOMEM; 480 481 key.objectid = src; 482 key.type = BTRFS_QGROUP_RELATION_KEY; 483 key.offset = dst; 484 485 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 486 487 btrfs_mark_buffer_dirty(path->nodes[0]); 488 489 btrfs_free_path(path); 490 return ret; 491 } 492 493 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, 494 struct btrfs_root *quota_root, 495 u64 src, u64 dst) 496 { 497 int ret; 498 struct btrfs_path *path; 499 struct btrfs_key key; 500 501 path = btrfs_alloc_path(); 502 if (!path) 503 return -ENOMEM; 504 505 key.objectid = src; 506 key.type = BTRFS_QGROUP_RELATION_KEY; 507 key.offset = dst; 508 509 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 510 if (ret < 0) 511 goto out; 512 513 if (ret > 0) { 514 ret = -ENOENT; 515 goto out; 516 } 517 518 ret = btrfs_del_item(trans, quota_root, path); 519 out: 520 btrfs_free_path(path); 521 return ret; 522 } 523 524 static int add_qgroup_item(struct btrfs_trans_handle *trans, 525 struct btrfs_root *quota_root, u64 qgroupid) 526 { 527 int ret; 528 struct btrfs_path *path; 529 struct btrfs_qgroup_info_item *qgroup_info; 530 struct btrfs_qgroup_limit_item *qgroup_limit; 531 struct extent_buffer *leaf; 532 struct btrfs_key key; 533 534 if (btrfs_is_testing(quota_root->fs_info)) 535 return 0; 536 537 path = btrfs_alloc_path(); 538 if (!path) 539 return -ENOMEM; 540 541 key.objectid = 0; 542 key.type = BTRFS_QGROUP_INFO_KEY; 543 key.offset = qgroupid; 544 545 /* 546 * Avoid a transaction abort by catching -EEXIST here. In that 547 * case, we proceed by re-initializing the existing structure 548 * on disk. 549 */ 550 551 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 552 sizeof(*qgroup_info)); 553 if (ret && ret != -EEXIST) 554 goto out; 555 556 leaf = path->nodes[0]; 557 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], 558 struct btrfs_qgroup_info_item); 559 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); 560 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); 561 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); 562 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); 563 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); 564 565 btrfs_mark_buffer_dirty(leaf); 566 567 btrfs_release_path(path); 568 569 key.type = BTRFS_QGROUP_LIMIT_KEY; 570 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 571 sizeof(*qgroup_limit)); 572 if (ret && ret != -EEXIST) 573 goto out; 574 575 leaf = path->nodes[0]; 576 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], 577 struct btrfs_qgroup_limit_item); 578 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); 579 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); 580 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); 581 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 582 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 583 584 btrfs_mark_buffer_dirty(leaf); 585 586 ret = 0; 587 out: 588 btrfs_free_path(path); 589 return ret; 590 } 591 592 static int del_qgroup_item(struct btrfs_trans_handle *trans, 593 struct btrfs_root *quota_root, u64 qgroupid) 594 { 595 int ret; 596 struct btrfs_path *path; 597 struct btrfs_key key; 598 599 path = btrfs_alloc_path(); 600 if (!path) 601 return -ENOMEM; 602 603 key.objectid = 0; 604 key.type = BTRFS_QGROUP_INFO_KEY; 605 key.offset = qgroupid; 606 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 607 if (ret < 0) 608 goto out; 609 610 if (ret > 0) { 611 ret = -ENOENT; 612 goto out; 613 } 614 615 ret = btrfs_del_item(trans, quota_root, path); 616 if (ret) 617 goto out; 618 619 btrfs_release_path(path); 620 621 key.type = BTRFS_QGROUP_LIMIT_KEY; 622 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 623 if (ret < 0) 624 goto out; 625 626 if (ret > 0) { 627 ret = -ENOENT; 628 goto out; 629 } 630 631 ret = btrfs_del_item(trans, quota_root, path); 632 633 out: 634 btrfs_free_path(path); 635 return ret; 636 } 637 638 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 639 struct btrfs_root *root, 640 struct btrfs_qgroup *qgroup) 641 { 642 struct btrfs_path *path; 643 struct btrfs_key key; 644 struct extent_buffer *l; 645 struct btrfs_qgroup_limit_item *qgroup_limit; 646 int ret; 647 int slot; 648 649 key.objectid = 0; 650 key.type = BTRFS_QGROUP_LIMIT_KEY; 651 key.offset = qgroup->qgroupid; 652 653 path = btrfs_alloc_path(); 654 if (!path) 655 return -ENOMEM; 656 657 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 658 if (ret > 0) 659 ret = -ENOENT; 660 661 if (ret) 662 goto out; 663 664 l = path->nodes[0]; 665 slot = path->slots[0]; 666 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 667 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags); 668 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer); 669 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); 670 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); 671 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); 672 673 btrfs_mark_buffer_dirty(l); 674 675 out: 676 btrfs_free_path(path); 677 return ret; 678 } 679 680 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 681 struct btrfs_root *root, 682 struct btrfs_qgroup *qgroup) 683 { 684 struct btrfs_path *path; 685 struct btrfs_key key; 686 struct extent_buffer *l; 687 struct btrfs_qgroup_info_item *qgroup_info; 688 int ret; 689 int slot; 690 691 if (btrfs_is_testing(root->fs_info)) 692 return 0; 693 694 key.objectid = 0; 695 key.type = BTRFS_QGROUP_INFO_KEY; 696 key.offset = qgroup->qgroupid; 697 698 path = btrfs_alloc_path(); 699 if (!path) 700 return -ENOMEM; 701 702 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 703 if (ret > 0) 704 ret = -ENOENT; 705 706 if (ret) 707 goto out; 708 709 l = path->nodes[0]; 710 slot = path->slots[0]; 711 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); 712 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 713 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 714 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 715 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 716 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 717 718 btrfs_mark_buffer_dirty(l); 719 720 out: 721 btrfs_free_path(path); 722 return ret; 723 } 724 725 static int update_qgroup_status_item(struct btrfs_trans_handle *trans, 726 struct btrfs_fs_info *fs_info, 727 struct btrfs_root *root) 728 { 729 struct btrfs_path *path; 730 struct btrfs_key key; 731 struct extent_buffer *l; 732 struct btrfs_qgroup_status_item *ptr; 733 int ret; 734 int slot; 735 736 key.objectid = 0; 737 key.type = BTRFS_QGROUP_STATUS_KEY; 738 key.offset = 0; 739 740 path = btrfs_alloc_path(); 741 if (!path) 742 return -ENOMEM; 743 744 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 745 if (ret > 0) 746 ret = -ENOENT; 747 748 if (ret) 749 goto out; 750 751 l = path->nodes[0]; 752 slot = path->slots[0]; 753 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 754 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 755 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 756 btrfs_set_qgroup_status_rescan(l, ptr, 757 fs_info->qgroup_rescan_progress.objectid); 758 759 btrfs_mark_buffer_dirty(l); 760 761 out: 762 btrfs_free_path(path); 763 return ret; 764 } 765 766 /* 767 * called with qgroup_lock held 768 */ 769 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 770 struct btrfs_root *root) 771 { 772 struct btrfs_path *path; 773 struct btrfs_key key; 774 struct extent_buffer *leaf = NULL; 775 int ret; 776 int nr = 0; 777 778 path = btrfs_alloc_path(); 779 if (!path) 780 return -ENOMEM; 781 782 path->leave_spinning = 1; 783 784 key.objectid = 0; 785 key.offset = 0; 786 key.type = 0; 787 788 while (1) { 789 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 790 if (ret < 0) 791 goto out; 792 leaf = path->nodes[0]; 793 nr = btrfs_header_nritems(leaf); 794 if (!nr) 795 break; 796 /* 797 * delete the leaf one by one 798 * since the whole tree is going 799 * to be deleted. 800 */ 801 path->slots[0] = 0; 802 ret = btrfs_del_items(trans, root, path, 0, nr); 803 if (ret) 804 goto out; 805 806 btrfs_release_path(path); 807 } 808 ret = 0; 809 out: 810 btrfs_free_path(path); 811 return ret; 812 } 813 814 int btrfs_quota_enable(struct btrfs_trans_handle *trans, 815 struct btrfs_fs_info *fs_info) 816 { 817 struct btrfs_root *quota_root; 818 struct btrfs_root *tree_root = fs_info->tree_root; 819 struct btrfs_path *path = NULL; 820 struct btrfs_qgroup_status_item *ptr; 821 struct extent_buffer *leaf; 822 struct btrfs_key key; 823 struct btrfs_key found_key; 824 struct btrfs_qgroup *qgroup = NULL; 825 int ret = 0; 826 int slot; 827 828 mutex_lock(&fs_info->qgroup_ioctl_lock); 829 if (fs_info->quota_root) { 830 set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); 831 goto out; 832 } 833 834 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 835 if (!fs_info->qgroup_ulist) { 836 ret = -ENOMEM; 837 goto out; 838 } 839 840 /* 841 * initially create the quota tree 842 */ 843 quota_root = btrfs_create_tree(trans, fs_info, 844 BTRFS_QUOTA_TREE_OBJECTID); 845 if (IS_ERR(quota_root)) { 846 ret = PTR_ERR(quota_root); 847 goto out; 848 } 849 850 path = btrfs_alloc_path(); 851 if (!path) { 852 ret = -ENOMEM; 853 goto out_free_root; 854 } 855 856 key.objectid = 0; 857 key.type = BTRFS_QGROUP_STATUS_KEY; 858 key.offset = 0; 859 860 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 861 sizeof(*ptr)); 862 if (ret) 863 goto out_free_path; 864 865 leaf = path->nodes[0]; 866 ptr = btrfs_item_ptr(leaf, path->slots[0], 867 struct btrfs_qgroup_status_item); 868 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); 869 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); 870 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 871 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 872 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 873 btrfs_set_qgroup_status_rescan(leaf, ptr, 0); 874 875 btrfs_mark_buffer_dirty(leaf); 876 877 key.objectid = 0; 878 key.type = BTRFS_ROOT_REF_KEY; 879 key.offset = 0; 880 881 btrfs_release_path(path); 882 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 883 if (ret > 0) 884 goto out_add_root; 885 if (ret < 0) 886 goto out_free_path; 887 888 889 while (1) { 890 slot = path->slots[0]; 891 leaf = path->nodes[0]; 892 btrfs_item_key_to_cpu(leaf, &found_key, slot); 893 894 if (found_key.type == BTRFS_ROOT_REF_KEY) { 895 ret = add_qgroup_item(trans, quota_root, 896 found_key.offset); 897 if (ret) 898 goto out_free_path; 899 900 qgroup = add_qgroup_rb(fs_info, found_key.offset); 901 if (IS_ERR(qgroup)) { 902 ret = PTR_ERR(qgroup); 903 goto out_free_path; 904 } 905 } 906 ret = btrfs_next_item(tree_root, path); 907 if (ret < 0) 908 goto out_free_path; 909 if (ret) 910 break; 911 } 912 913 out_add_root: 914 btrfs_release_path(path); 915 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 916 if (ret) 917 goto out_free_path; 918 919 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 920 if (IS_ERR(qgroup)) { 921 ret = PTR_ERR(qgroup); 922 goto out_free_path; 923 } 924 spin_lock(&fs_info->qgroup_lock); 925 fs_info->quota_root = quota_root; 926 set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); 927 spin_unlock(&fs_info->qgroup_lock); 928 out_free_path: 929 btrfs_free_path(path); 930 out_free_root: 931 if (ret) { 932 free_extent_buffer(quota_root->node); 933 free_extent_buffer(quota_root->commit_root); 934 kfree(quota_root); 935 } 936 out: 937 if (ret) { 938 ulist_free(fs_info->qgroup_ulist); 939 fs_info->qgroup_ulist = NULL; 940 } 941 mutex_unlock(&fs_info->qgroup_ioctl_lock); 942 return ret; 943 } 944 945 int btrfs_quota_disable(struct btrfs_trans_handle *trans, 946 struct btrfs_fs_info *fs_info) 947 { 948 struct btrfs_root *quota_root; 949 int ret = 0; 950 951 mutex_lock(&fs_info->qgroup_ioctl_lock); 952 if (!fs_info->quota_root) 953 goto out; 954 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 955 btrfs_qgroup_wait_for_completion(fs_info, false); 956 spin_lock(&fs_info->qgroup_lock); 957 quota_root = fs_info->quota_root; 958 fs_info->quota_root = NULL; 959 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 960 spin_unlock(&fs_info->qgroup_lock); 961 962 btrfs_free_qgroup_config(fs_info); 963 964 ret = btrfs_clean_quota_tree(trans, quota_root); 965 if (ret) 966 goto out; 967 968 ret = btrfs_del_root(trans, fs_info, "a_root->root_key); 969 if (ret) 970 goto out; 971 972 list_del("a_root->dirty_list); 973 974 btrfs_tree_lock(quota_root->node); 975 clean_tree_block(fs_info, quota_root->node); 976 btrfs_tree_unlock(quota_root->node); 977 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 978 979 free_extent_buffer(quota_root->node); 980 free_extent_buffer(quota_root->commit_root); 981 kfree(quota_root); 982 out: 983 mutex_unlock(&fs_info->qgroup_ioctl_lock); 984 return ret; 985 } 986 987 static void qgroup_dirty(struct btrfs_fs_info *fs_info, 988 struct btrfs_qgroup *qgroup) 989 { 990 if (list_empty(&qgroup->dirty)) 991 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 992 } 993 994 static void report_reserved_underflow(struct btrfs_fs_info *fs_info, 995 struct btrfs_qgroup *qgroup, 996 u64 num_bytes) 997 { 998 #ifdef CONFIG_BTRFS_DEBUG 999 WARN_ON(qgroup->reserved < num_bytes); 1000 btrfs_debug(fs_info, 1001 "qgroup %llu reserved space underflow, have: %llu, to free: %llu", 1002 qgroup->qgroupid, qgroup->reserved, num_bytes); 1003 #endif 1004 qgroup->reserved = 0; 1005 } 1006 /* 1007 * The easy accounting, if we are adding/removing the only ref for an extent 1008 * then this qgroup and all of the parent qgroups get their reference and 1009 * exclusive counts adjusted. 1010 * 1011 * Caller should hold fs_info->qgroup_lock. 1012 */ 1013 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1014 struct ulist *tmp, u64 ref_root, 1015 u64 num_bytes, int sign) 1016 { 1017 struct btrfs_qgroup *qgroup; 1018 struct btrfs_qgroup_list *glist; 1019 struct ulist_node *unode; 1020 struct ulist_iterator uiter; 1021 int ret = 0; 1022 1023 qgroup = find_qgroup_rb(fs_info, ref_root); 1024 if (!qgroup) 1025 goto out; 1026 1027 qgroup->rfer += sign * num_bytes; 1028 qgroup->rfer_cmpr += sign * num_bytes; 1029 1030 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1031 qgroup->excl += sign * num_bytes; 1032 qgroup->excl_cmpr += sign * num_bytes; 1033 if (sign > 0) { 1034 trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes); 1035 if (qgroup->reserved < num_bytes) 1036 report_reserved_underflow(fs_info, qgroup, num_bytes); 1037 else 1038 qgroup->reserved -= num_bytes; 1039 } 1040 1041 qgroup_dirty(fs_info, qgroup); 1042 1043 /* Get all of the parent groups that contain this qgroup */ 1044 list_for_each_entry(glist, &qgroup->groups, next_group) { 1045 ret = ulist_add(tmp, glist->group->qgroupid, 1046 qgroup_to_aux(glist->group), GFP_ATOMIC); 1047 if (ret < 0) 1048 goto out; 1049 } 1050 1051 /* Iterate all of the parents and adjust their reference counts */ 1052 ULIST_ITER_INIT(&uiter); 1053 while ((unode = ulist_next(tmp, &uiter))) { 1054 qgroup = unode_aux_to_qgroup(unode); 1055 qgroup->rfer += sign * num_bytes; 1056 qgroup->rfer_cmpr += sign * num_bytes; 1057 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1058 qgroup->excl += sign * num_bytes; 1059 if (sign > 0) { 1060 trace_qgroup_update_reserve(fs_info, qgroup, 1061 -(s64)num_bytes); 1062 if (qgroup->reserved < num_bytes) 1063 report_reserved_underflow(fs_info, qgroup, 1064 num_bytes); 1065 else 1066 qgroup->reserved -= num_bytes; 1067 } 1068 qgroup->excl_cmpr += sign * num_bytes; 1069 qgroup_dirty(fs_info, qgroup); 1070 1071 /* Add any parents of the parents */ 1072 list_for_each_entry(glist, &qgroup->groups, next_group) { 1073 ret = ulist_add(tmp, glist->group->qgroupid, 1074 qgroup_to_aux(glist->group), GFP_ATOMIC); 1075 if (ret < 0) 1076 goto out; 1077 } 1078 } 1079 ret = 0; 1080 out: 1081 return ret; 1082 } 1083 1084 1085 /* 1086 * Quick path for updating qgroup with only excl refs. 1087 * 1088 * In that case, just update all parent will be enough. 1089 * Or we needs to do a full rescan. 1090 * Caller should also hold fs_info->qgroup_lock. 1091 * 1092 * Return 0 for quick update, return >0 for need to full rescan 1093 * and mark INCONSISTENT flag. 1094 * Return < 0 for other error. 1095 */ 1096 static int quick_update_accounting(struct btrfs_fs_info *fs_info, 1097 struct ulist *tmp, u64 src, u64 dst, 1098 int sign) 1099 { 1100 struct btrfs_qgroup *qgroup; 1101 int ret = 1; 1102 int err = 0; 1103 1104 qgroup = find_qgroup_rb(fs_info, src); 1105 if (!qgroup) 1106 goto out; 1107 if (qgroup->excl == qgroup->rfer) { 1108 ret = 0; 1109 err = __qgroup_excl_accounting(fs_info, tmp, dst, 1110 qgroup->excl, sign); 1111 if (err < 0) { 1112 ret = err; 1113 goto out; 1114 } 1115 } 1116 out: 1117 if (ret) 1118 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1119 return ret; 1120 } 1121 1122 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1123 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1124 { 1125 struct btrfs_root *quota_root; 1126 struct btrfs_qgroup *parent; 1127 struct btrfs_qgroup *member; 1128 struct btrfs_qgroup_list *list; 1129 struct ulist *tmp; 1130 int ret = 0; 1131 1132 /* Check the level of src and dst first */ 1133 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) 1134 return -EINVAL; 1135 1136 tmp = ulist_alloc(GFP_KERNEL); 1137 if (!tmp) 1138 return -ENOMEM; 1139 1140 mutex_lock(&fs_info->qgroup_ioctl_lock); 1141 quota_root = fs_info->quota_root; 1142 if (!quota_root) { 1143 ret = -EINVAL; 1144 goto out; 1145 } 1146 member = find_qgroup_rb(fs_info, src); 1147 parent = find_qgroup_rb(fs_info, dst); 1148 if (!member || !parent) { 1149 ret = -EINVAL; 1150 goto out; 1151 } 1152 1153 /* check if such qgroup relation exist firstly */ 1154 list_for_each_entry(list, &member->groups, next_group) { 1155 if (list->group == parent) { 1156 ret = -EEXIST; 1157 goto out; 1158 } 1159 } 1160 1161 ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1162 if (ret) 1163 goto out; 1164 1165 ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1166 if (ret) { 1167 del_qgroup_relation_item(trans, quota_root, src, dst); 1168 goto out; 1169 } 1170 1171 spin_lock(&fs_info->qgroup_lock); 1172 ret = add_relation_rb(fs_info, src, dst); 1173 if (ret < 0) { 1174 spin_unlock(&fs_info->qgroup_lock); 1175 goto out; 1176 } 1177 ret = quick_update_accounting(fs_info, tmp, src, dst, 1); 1178 spin_unlock(&fs_info->qgroup_lock); 1179 out: 1180 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1181 ulist_free(tmp); 1182 return ret; 1183 } 1184 1185 static int __del_qgroup_relation(struct btrfs_trans_handle *trans, 1186 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1187 { 1188 struct btrfs_root *quota_root; 1189 struct btrfs_qgroup *parent; 1190 struct btrfs_qgroup *member; 1191 struct btrfs_qgroup_list *list; 1192 struct ulist *tmp; 1193 int ret = 0; 1194 int err; 1195 1196 tmp = ulist_alloc(GFP_KERNEL); 1197 if (!tmp) 1198 return -ENOMEM; 1199 1200 quota_root = fs_info->quota_root; 1201 if (!quota_root) { 1202 ret = -EINVAL; 1203 goto out; 1204 } 1205 1206 member = find_qgroup_rb(fs_info, src); 1207 parent = find_qgroup_rb(fs_info, dst); 1208 if (!member || !parent) { 1209 ret = -EINVAL; 1210 goto out; 1211 } 1212 1213 /* check if such qgroup relation exist firstly */ 1214 list_for_each_entry(list, &member->groups, next_group) { 1215 if (list->group == parent) 1216 goto exist; 1217 } 1218 ret = -ENOENT; 1219 goto out; 1220 exist: 1221 ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1222 err = del_qgroup_relation_item(trans, quota_root, dst, src); 1223 if (err && !ret) 1224 ret = err; 1225 1226 spin_lock(&fs_info->qgroup_lock); 1227 del_relation_rb(fs_info, src, dst); 1228 ret = quick_update_accounting(fs_info, tmp, src, dst, -1); 1229 spin_unlock(&fs_info->qgroup_lock); 1230 out: 1231 ulist_free(tmp); 1232 return ret; 1233 } 1234 1235 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1236 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1237 { 1238 int ret = 0; 1239 1240 mutex_lock(&fs_info->qgroup_ioctl_lock); 1241 ret = __del_qgroup_relation(trans, fs_info, src, dst); 1242 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1243 1244 return ret; 1245 } 1246 1247 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1248 struct btrfs_fs_info *fs_info, u64 qgroupid) 1249 { 1250 struct btrfs_root *quota_root; 1251 struct btrfs_qgroup *qgroup; 1252 int ret = 0; 1253 1254 mutex_lock(&fs_info->qgroup_ioctl_lock); 1255 quota_root = fs_info->quota_root; 1256 if (!quota_root) { 1257 ret = -EINVAL; 1258 goto out; 1259 } 1260 qgroup = find_qgroup_rb(fs_info, qgroupid); 1261 if (qgroup) { 1262 ret = -EEXIST; 1263 goto out; 1264 } 1265 1266 ret = add_qgroup_item(trans, quota_root, qgroupid); 1267 if (ret) 1268 goto out; 1269 1270 spin_lock(&fs_info->qgroup_lock); 1271 qgroup = add_qgroup_rb(fs_info, qgroupid); 1272 spin_unlock(&fs_info->qgroup_lock); 1273 1274 if (IS_ERR(qgroup)) 1275 ret = PTR_ERR(qgroup); 1276 out: 1277 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1278 return ret; 1279 } 1280 1281 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 1282 struct btrfs_fs_info *fs_info, u64 qgroupid) 1283 { 1284 struct btrfs_root *quota_root; 1285 struct btrfs_qgroup *qgroup; 1286 struct btrfs_qgroup_list *list; 1287 int ret = 0; 1288 1289 mutex_lock(&fs_info->qgroup_ioctl_lock); 1290 quota_root = fs_info->quota_root; 1291 if (!quota_root) { 1292 ret = -EINVAL; 1293 goto out; 1294 } 1295 1296 qgroup = find_qgroup_rb(fs_info, qgroupid); 1297 if (!qgroup) { 1298 ret = -ENOENT; 1299 goto out; 1300 } else { 1301 /* check if there are no children of this qgroup */ 1302 if (!list_empty(&qgroup->members)) { 1303 ret = -EBUSY; 1304 goto out; 1305 } 1306 } 1307 ret = del_qgroup_item(trans, quota_root, qgroupid); 1308 if (ret && ret != -ENOENT) 1309 goto out; 1310 1311 while (!list_empty(&qgroup->groups)) { 1312 list = list_first_entry(&qgroup->groups, 1313 struct btrfs_qgroup_list, next_group); 1314 ret = __del_qgroup_relation(trans, fs_info, 1315 qgroupid, 1316 list->group->qgroupid); 1317 if (ret) 1318 goto out; 1319 } 1320 1321 spin_lock(&fs_info->qgroup_lock); 1322 del_qgroup_rb(fs_info, qgroupid); 1323 spin_unlock(&fs_info->qgroup_lock); 1324 out: 1325 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1326 return ret; 1327 } 1328 1329 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 1330 struct btrfs_fs_info *fs_info, u64 qgroupid, 1331 struct btrfs_qgroup_limit *limit) 1332 { 1333 struct btrfs_root *quota_root; 1334 struct btrfs_qgroup *qgroup; 1335 int ret = 0; 1336 /* Sometimes we would want to clear the limit on this qgroup. 1337 * To meet this requirement, we treat the -1 as a special value 1338 * which tell kernel to clear the limit on this qgroup. 1339 */ 1340 const u64 CLEAR_VALUE = -1; 1341 1342 mutex_lock(&fs_info->qgroup_ioctl_lock); 1343 quota_root = fs_info->quota_root; 1344 if (!quota_root) { 1345 ret = -EINVAL; 1346 goto out; 1347 } 1348 1349 qgroup = find_qgroup_rb(fs_info, qgroupid); 1350 if (!qgroup) { 1351 ret = -ENOENT; 1352 goto out; 1353 } 1354 1355 spin_lock(&fs_info->qgroup_lock); 1356 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { 1357 if (limit->max_rfer == CLEAR_VALUE) { 1358 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1359 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1360 qgroup->max_rfer = 0; 1361 } else { 1362 qgroup->max_rfer = limit->max_rfer; 1363 } 1364 } 1365 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { 1366 if (limit->max_excl == CLEAR_VALUE) { 1367 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1368 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1369 qgroup->max_excl = 0; 1370 } else { 1371 qgroup->max_excl = limit->max_excl; 1372 } 1373 } 1374 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { 1375 if (limit->rsv_rfer == CLEAR_VALUE) { 1376 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1377 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1378 qgroup->rsv_rfer = 0; 1379 } else { 1380 qgroup->rsv_rfer = limit->rsv_rfer; 1381 } 1382 } 1383 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { 1384 if (limit->rsv_excl == CLEAR_VALUE) { 1385 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1386 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1387 qgroup->rsv_excl = 0; 1388 } else { 1389 qgroup->rsv_excl = limit->rsv_excl; 1390 } 1391 } 1392 qgroup->lim_flags |= limit->flags; 1393 1394 spin_unlock(&fs_info->qgroup_lock); 1395 1396 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 1397 if (ret) { 1398 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1399 btrfs_info(fs_info, "unable to update quota limit for %llu", 1400 qgroupid); 1401 } 1402 1403 out: 1404 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1405 return ret; 1406 } 1407 1408 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, 1409 struct btrfs_delayed_ref_root *delayed_refs, 1410 struct btrfs_qgroup_extent_record *record) 1411 { 1412 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; 1413 struct rb_node *parent_node = NULL; 1414 struct btrfs_qgroup_extent_record *entry; 1415 u64 bytenr = record->bytenr; 1416 1417 assert_spin_locked(&delayed_refs->lock); 1418 trace_btrfs_qgroup_trace_extent(fs_info, record); 1419 1420 while (*p) { 1421 parent_node = *p; 1422 entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, 1423 node); 1424 if (bytenr < entry->bytenr) 1425 p = &(*p)->rb_left; 1426 else if (bytenr > entry->bytenr) 1427 p = &(*p)->rb_right; 1428 else 1429 return 1; 1430 } 1431 1432 rb_link_node(&record->node, parent_node, p); 1433 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); 1434 return 0; 1435 } 1436 1437 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, 1438 struct btrfs_qgroup_extent_record *qrecord) 1439 { 1440 struct ulist *old_root; 1441 u64 bytenr = qrecord->bytenr; 1442 int ret; 1443 1444 ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root); 1445 if (ret < 0) 1446 return ret; 1447 1448 /* 1449 * Here we don't need to get the lock of 1450 * trans->transaction->delayed_refs, since inserted qrecord won't 1451 * be deleted, only qrecord->node may be modified (new qrecord insert) 1452 * 1453 * So modifying qrecord->old_roots is safe here 1454 */ 1455 qrecord->old_roots = old_root; 1456 return 0; 1457 } 1458 1459 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, 1460 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, 1461 gfp_t gfp_flag) 1462 { 1463 struct btrfs_qgroup_extent_record *record; 1464 struct btrfs_delayed_ref_root *delayed_refs; 1465 int ret; 1466 1467 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) 1468 || bytenr == 0 || num_bytes == 0) 1469 return 0; 1470 if (WARN_ON(trans == NULL)) 1471 return -EINVAL; 1472 record = kmalloc(sizeof(*record), gfp_flag); 1473 if (!record) 1474 return -ENOMEM; 1475 1476 delayed_refs = &trans->transaction->delayed_refs; 1477 record->bytenr = bytenr; 1478 record->num_bytes = num_bytes; 1479 record->old_roots = NULL; 1480 1481 spin_lock(&delayed_refs->lock); 1482 ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record); 1483 spin_unlock(&delayed_refs->lock); 1484 if (ret > 0) { 1485 kfree(record); 1486 return 0; 1487 } 1488 return btrfs_qgroup_trace_extent_post(fs_info, record); 1489 } 1490 1491 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 1492 struct btrfs_fs_info *fs_info, 1493 struct extent_buffer *eb) 1494 { 1495 int nr = btrfs_header_nritems(eb); 1496 int i, extent_type, ret; 1497 struct btrfs_key key; 1498 struct btrfs_file_extent_item *fi; 1499 u64 bytenr, num_bytes; 1500 1501 /* We can be called directly from walk_up_proc() */ 1502 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1503 return 0; 1504 1505 for (i = 0; i < nr; i++) { 1506 btrfs_item_key_to_cpu(eb, &key, i); 1507 1508 if (key.type != BTRFS_EXTENT_DATA_KEY) 1509 continue; 1510 1511 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); 1512 /* filter out non qgroup-accountable extents */ 1513 extent_type = btrfs_file_extent_type(eb, fi); 1514 1515 if (extent_type == BTRFS_FILE_EXTENT_INLINE) 1516 continue; 1517 1518 bytenr = btrfs_file_extent_disk_bytenr(eb, fi); 1519 if (!bytenr) 1520 continue; 1521 1522 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 1523 1524 ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr, 1525 num_bytes, GFP_NOFS); 1526 if (ret) 1527 return ret; 1528 } 1529 cond_resched(); 1530 return 0; 1531 } 1532 1533 /* 1534 * Walk up the tree from the bottom, freeing leaves and any interior 1535 * nodes which have had all slots visited. If a node (leaf or 1536 * interior) is freed, the node above it will have it's slot 1537 * incremented. The root node will never be freed. 1538 * 1539 * At the end of this function, we should have a path which has all 1540 * slots incremented to the next position for a search. If we need to 1541 * read a new node it will be NULL and the node above it will have the 1542 * correct slot selected for a later read. 1543 * 1544 * If we increment the root nodes slot counter past the number of 1545 * elements, 1 is returned to signal completion of the search. 1546 */ 1547 static int adjust_slots_upwards(struct btrfs_path *path, int root_level) 1548 { 1549 int level = 0; 1550 int nr, slot; 1551 struct extent_buffer *eb; 1552 1553 if (root_level == 0) 1554 return 1; 1555 1556 while (level <= root_level) { 1557 eb = path->nodes[level]; 1558 nr = btrfs_header_nritems(eb); 1559 path->slots[level]++; 1560 slot = path->slots[level]; 1561 if (slot >= nr || level == 0) { 1562 /* 1563 * Don't free the root - we will detect this 1564 * condition after our loop and return a 1565 * positive value for caller to stop walking the tree. 1566 */ 1567 if (level != root_level) { 1568 btrfs_tree_unlock_rw(eb, path->locks[level]); 1569 path->locks[level] = 0; 1570 1571 free_extent_buffer(eb); 1572 path->nodes[level] = NULL; 1573 path->slots[level] = 0; 1574 } 1575 } else { 1576 /* 1577 * We have a valid slot to walk back down 1578 * from. Stop here so caller can process these 1579 * new nodes. 1580 */ 1581 break; 1582 } 1583 1584 level++; 1585 } 1586 1587 eb = path->nodes[root_level]; 1588 if (path->slots[root_level] >= btrfs_header_nritems(eb)) 1589 return 1; 1590 1591 return 0; 1592 } 1593 1594 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 1595 struct btrfs_root *root, 1596 struct extent_buffer *root_eb, 1597 u64 root_gen, int root_level) 1598 { 1599 struct btrfs_fs_info *fs_info = root->fs_info; 1600 int ret = 0; 1601 int level; 1602 struct extent_buffer *eb = root_eb; 1603 struct btrfs_path *path = NULL; 1604 1605 BUG_ON(root_level < 0 || root_level >= BTRFS_MAX_LEVEL); 1606 BUG_ON(root_eb == NULL); 1607 1608 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1609 return 0; 1610 1611 if (!extent_buffer_uptodate(root_eb)) { 1612 ret = btrfs_read_buffer(root_eb, root_gen); 1613 if (ret) 1614 goto out; 1615 } 1616 1617 if (root_level == 0) { 1618 ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb); 1619 goto out; 1620 } 1621 1622 path = btrfs_alloc_path(); 1623 if (!path) 1624 return -ENOMEM; 1625 1626 /* 1627 * Walk down the tree. Missing extent blocks are filled in as 1628 * we go. Metadata is accounted every time we read a new 1629 * extent block. 1630 * 1631 * When we reach a leaf, we account for file extent items in it, 1632 * walk back up the tree (adjusting slot pointers as we go) 1633 * and restart the search process. 1634 */ 1635 extent_buffer_get(root_eb); /* For path */ 1636 path->nodes[root_level] = root_eb; 1637 path->slots[root_level] = 0; 1638 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ 1639 walk_down: 1640 level = root_level; 1641 while (level >= 0) { 1642 if (path->nodes[level] == NULL) { 1643 int parent_slot; 1644 u64 child_gen; 1645 u64 child_bytenr; 1646 1647 /* 1648 * We need to get child blockptr/gen from parent before 1649 * we can read it. 1650 */ 1651 eb = path->nodes[level + 1]; 1652 parent_slot = path->slots[level + 1]; 1653 child_bytenr = btrfs_node_blockptr(eb, parent_slot); 1654 child_gen = btrfs_node_ptr_generation(eb, parent_slot); 1655 1656 eb = read_tree_block(fs_info, child_bytenr, child_gen); 1657 if (IS_ERR(eb)) { 1658 ret = PTR_ERR(eb); 1659 goto out; 1660 } else if (!extent_buffer_uptodate(eb)) { 1661 free_extent_buffer(eb); 1662 ret = -EIO; 1663 goto out; 1664 } 1665 1666 path->nodes[level] = eb; 1667 path->slots[level] = 0; 1668 1669 btrfs_tree_read_lock(eb); 1670 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1671 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 1672 1673 ret = btrfs_qgroup_trace_extent(trans, fs_info, 1674 child_bytenr, 1675 fs_info->nodesize, 1676 GFP_NOFS); 1677 if (ret) 1678 goto out; 1679 } 1680 1681 if (level == 0) { 1682 ret = btrfs_qgroup_trace_leaf_items(trans,fs_info, 1683 path->nodes[level]); 1684 if (ret) 1685 goto out; 1686 1687 /* Nonzero return here means we completed our search */ 1688 ret = adjust_slots_upwards(path, root_level); 1689 if (ret) 1690 break; 1691 1692 /* Restart search with new slots */ 1693 goto walk_down; 1694 } 1695 1696 level--; 1697 } 1698 1699 ret = 0; 1700 out: 1701 btrfs_free_path(path); 1702 1703 return ret; 1704 } 1705 1706 #define UPDATE_NEW 0 1707 #define UPDATE_OLD 1 1708 /* 1709 * Walk all of the roots that points to the bytenr and adjust their refcnts. 1710 */ 1711 static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info, 1712 struct ulist *roots, struct ulist *tmp, 1713 struct ulist *qgroups, u64 seq, int update_old) 1714 { 1715 struct ulist_node *unode; 1716 struct ulist_iterator uiter; 1717 struct ulist_node *tmp_unode; 1718 struct ulist_iterator tmp_uiter; 1719 struct btrfs_qgroup *qg; 1720 int ret = 0; 1721 1722 if (!roots) 1723 return 0; 1724 ULIST_ITER_INIT(&uiter); 1725 while ((unode = ulist_next(roots, &uiter))) { 1726 qg = find_qgroup_rb(fs_info, unode->val); 1727 if (!qg) 1728 continue; 1729 1730 ulist_reinit(tmp); 1731 ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg), 1732 GFP_ATOMIC); 1733 if (ret < 0) 1734 return ret; 1735 ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC); 1736 if (ret < 0) 1737 return ret; 1738 ULIST_ITER_INIT(&tmp_uiter); 1739 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1740 struct btrfs_qgroup_list *glist; 1741 1742 qg = unode_aux_to_qgroup(tmp_unode); 1743 if (update_old) 1744 btrfs_qgroup_update_old_refcnt(qg, seq, 1); 1745 else 1746 btrfs_qgroup_update_new_refcnt(qg, seq, 1); 1747 list_for_each_entry(glist, &qg->groups, next_group) { 1748 ret = ulist_add(qgroups, glist->group->qgroupid, 1749 qgroup_to_aux(glist->group), 1750 GFP_ATOMIC); 1751 if (ret < 0) 1752 return ret; 1753 ret = ulist_add(tmp, glist->group->qgroupid, 1754 qgroup_to_aux(glist->group), 1755 GFP_ATOMIC); 1756 if (ret < 0) 1757 return ret; 1758 } 1759 } 1760 } 1761 return 0; 1762 } 1763 1764 /* 1765 * Update qgroup rfer/excl counters. 1766 * Rfer update is easy, codes can explain themselves. 1767 * 1768 * Excl update is tricky, the update is split into 2 part. 1769 * Part 1: Possible exclusive <-> sharing detect: 1770 * | A | !A | 1771 * ------------------------------------- 1772 * B | * | - | 1773 * ------------------------------------- 1774 * !B | + | ** | 1775 * ------------------------------------- 1776 * 1777 * Conditions: 1778 * A: cur_old_roots < nr_old_roots (not exclusive before) 1779 * !A: cur_old_roots == nr_old_roots (possible exclusive before) 1780 * B: cur_new_roots < nr_new_roots (not exclusive now) 1781 * !B: cur_new_roots == nr_new_roots (possible exclusive now) 1782 * 1783 * Results: 1784 * +: Possible sharing -> exclusive -: Possible exclusive -> sharing 1785 * *: Definitely not changed. **: Possible unchanged. 1786 * 1787 * For !A and !B condition, the exception is cur_old/new_roots == 0 case. 1788 * 1789 * To make the logic clear, we first use condition A and B to split 1790 * combination into 4 results. 1791 * 1792 * Then, for result "+" and "-", check old/new_roots == 0 case, as in them 1793 * only on variant maybe 0. 1794 * 1795 * Lastly, check result **, since there are 2 variants maybe 0, split them 1796 * again(2x2). 1797 * But this time we don't need to consider other things, the codes and logic 1798 * is easy to understand now. 1799 */ 1800 static int qgroup_update_counters(struct btrfs_fs_info *fs_info, 1801 struct ulist *qgroups, 1802 u64 nr_old_roots, 1803 u64 nr_new_roots, 1804 u64 num_bytes, u64 seq) 1805 { 1806 struct ulist_node *unode; 1807 struct ulist_iterator uiter; 1808 struct btrfs_qgroup *qg; 1809 u64 cur_new_count, cur_old_count; 1810 1811 ULIST_ITER_INIT(&uiter); 1812 while ((unode = ulist_next(qgroups, &uiter))) { 1813 bool dirty = false; 1814 1815 qg = unode_aux_to_qgroup(unode); 1816 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); 1817 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); 1818 1819 trace_qgroup_update_counters(fs_info, qg->qgroupid, 1820 cur_old_count, cur_new_count); 1821 1822 /* Rfer update part */ 1823 if (cur_old_count == 0 && cur_new_count > 0) { 1824 qg->rfer += num_bytes; 1825 qg->rfer_cmpr += num_bytes; 1826 dirty = true; 1827 } 1828 if (cur_old_count > 0 && cur_new_count == 0) { 1829 qg->rfer -= num_bytes; 1830 qg->rfer_cmpr -= num_bytes; 1831 dirty = true; 1832 } 1833 1834 /* Excl update part */ 1835 /* Exclusive/none -> shared case */ 1836 if (cur_old_count == nr_old_roots && 1837 cur_new_count < nr_new_roots) { 1838 /* Exclusive -> shared */ 1839 if (cur_old_count != 0) { 1840 qg->excl -= num_bytes; 1841 qg->excl_cmpr -= num_bytes; 1842 dirty = true; 1843 } 1844 } 1845 1846 /* Shared -> exclusive/none case */ 1847 if (cur_old_count < nr_old_roots && 1848 cur_new_count == nr_new_roots) { 1849 /* Shared->exclusive */ 1850 if (cur_new_count != 0) { 1851 qg->excl += num_bytes; 1852 qg->excl_cmpr += num_bytes; 1853 dirty = true; 1854 } 1855 } 1856 1857 /* Exclusive/none -> exclusive/none case */ 1858 if (cur_old_count == nr_old_roots && 1859 cur_new_count == nr_new_roots) { 1860 if (cur_old_count == 0) { 1861 /* None -> exclusive/none */ 1862 1863 if (cur_new_count != 0) { 1864 /* None -> exclusive */ 1865 qg->excl += num_bytes; 1866 qg->excl_cmpr += num_bytes; 1867 dirty = true; 1868 } 1869 /* None -> none, nothing changed */ 1870 } else { 1871 /* Exclusive -> exclusive/none */ 1872 1873 if (cur_new_count == 0) { 1874 /* Exclusive -> none */ 1875 qg->excl -= num_bytes; 1876 qg->excl_cmpr -= num_bytes; 1877 dirty = true; 1878 } 1879 /* Exclusive -> exclusive, nothing changed */ 1880 } 1881 } 1882 1883 if (dirty) 1884 qgroup_dirty(fs_info, qg); 1885 } 1886 return 0; 1887 } 1888 1889 /* 1890 * Check if the @roots potentially is a list of fs tree roots 1891 * 1892 * Return 0 for definitely not a fs/subvol tree roots ulist 1893 * Return 1 for possible fs/subvol tree roots in the list (considering an empty 1894 * one as well) 1895 */ 1896 static int maybe_fs_roots(struct ulist *roots) 1897 { 1898 struct ulist_node *unode; 1899 struct ulist_iterator uiter; 1900 1901 /* Empty one, still possible for fs roots */ 1902 if (!roots || roots->nnodes == 0) 1903 return 1; 1904 1905 ULIST_ITER_INIT(&uiter); 1906 unode = ulist_next(roots, &uiter); 1907 if (!unode) 1908 return 1; 1909 1910 /* 1911 * If it contains fs tree roots, then it must belong to fs/subvol 1912 * trees. 1913 * If it contains a non-fs tree, it won't be shared with fs/subvol trees. 1914 */ 1915 return is_fstree(unode->val); 1916 } 1917 1918 int 1919 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 1920 struct btrfs_fs_info *fs_info, 1921 u64 bytenr, u64 num_bytes, 1922 struct ulist *old_roots, struct ulist *new_roots) 1923 { 1924 struct ulist *qgroups = NULL; 1925 struct ulist *tmp = NULL; 1926 u64 seq; 1927 u64 nr_new_roots = 0; 1928 u64 nr_old_roots = 0; 1929 int ret = 0; 1930 1931 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1932 return 0; 1933 1934 if (new_roots) { 1935 if (!maybe_fs_roots(new_roots)) 1936 goto out_free; 1937 nr_new_roots = new_roots->nnodes; 1938 } 1939 if (old_roots) { 1940 if (!maybe_fs_roots(old_roots)) 1941 goto out_free; 1942 nr_old_roots = old_roots->nnodes; 1943 } 1944 1945 /* Quick exit, either not fs tree roots, or won't affect any qgroup */ 1946 if (nr_old_roots == 0 && nr_new_roots == 0) 1947 goto out_free; 1948 1949 BUG_ON(!fs_info->quota_root); 1950 1951 trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes, 1952 nr_old_roots, nr_new_roots); 1953 1954 qgroups = ulist_alloc(GFP_NOFS); 1955 if (!qgroups) { 1956 ret = -ENOMEM; 1957 goto out_free; 1958 } 1959 tmp = ulist_alloc(GFP_NOFS); 1960 if (!tmp) { 1961 ret = -ENOMEM; 1962 goto out_free; 1963 } 1964 1965 mutex_lock(&fs_info->qgroup_rescan_lock); 1966 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 1967 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) { 1968 mutex_unlock(&fs_info->qgroup_rescan_lock); 1969 ret = 0; 1970 goto out_free; 1971 } 1972 } 1973 mutex_unlock(&fs_info->qgroup_rescan_lock); 1974 1975 spin_lock(&fs_info->qgroup_lock); 1976 seq = fs_info->qgroup_seq; 1977 1978 /* Update old refcnts using old_roots */ 1979 ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq, 1980 UPDATE_OLD); 1981 if (ret < 0) 1982 goto out; 1983 1984 /* Update new refcnts using new_roots */ 1985 ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq, 1986 UPDATE_NEW); 1987 if (ret < 0) 1988 goto out; 1989 1990 qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots, 1991 num_bytes, seq); 1992 1993 /* 1994 * Bump qgroup_seq to avoid seq overlap 1995 */ 1996 fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; 1997 out: 1998 spin_unlock(&fs_info->qgroup_lock); 1999 out_free: 2000 ulist_free(tmp); 2001 ulist_free(qgroups); 2002 ulist_free(old_roots); 2003 ulist_free(new_roots); 2004 return ret; 2005 } 2006 2007 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, 2008 struct btrfs_fs_info *fs_info) 2009 { 2010 struct btrfs_qgroup_extent_record *record; 2011 struct btrfs_delayed_ref_root *delayed_refs; 2012 struct ulist *new_roots = NULL; 2013 struct rb_node *node; 2014 u64 qgroup_to_skip; 2015 int ret = 0; 2016 2017 delayed_refs = &trans->transaction->delayed_refs; 2018 qgroup_to_skip = delayed_refs->qgroup_to_skip; 2019 while ((node = rb_first(&delayed_refs->dirty_extent_root))) { 2020 record = rb_entry(node, struct btrfs_qgroup_extent_record, 2021 node); 2022 2023 trace_btrfs_qgroup_account_extents(fs_info, record); 2024 2025 if (!ret) { 2026 /* 2027 * Old roots should be searched when inserting qgroup 2028 * extent record 2029 */ 2030 if (WARN_ON(!record->old_roots)) { 2031 /* Search commit root to find old_roots */ 2032 ret = btrfs_find_all_roots(NULL, fs_info, 2033 record->bytenr, 0, 2034 &record->old_roots); 2035 if (ret < 0) 2036 goto cleanup; 2037 } 2038 2039 /* 2040 * Use SEQ_LAST as time_seq to do special search, which 2041 * doesn't lock tree or delayed_refs and search current 2042 * root. It's safe inside commit_transaction(). 2043 */ 2044 ret = btrfs_find_all_roots(trans, fs_info, 2045 record->bytenr, SEQ_LAST, &new_roots); 2046 if (ret < 0) 2047 goto cleanup; 2048 if (qgroup_to_skip) { 2049 ulist_del(new_roots, qgroup_to_skip, 0); 2050 ulist_del(record->old_roots, qgroup_to_skip, 2051 0); 2052 } 2053 ret = btrfs_qgroup_account_extent(trans, fs_info, 2054 record->bytenr, record->num_bytes, 2055 record->old_roots, new_roots); 2056 record->old_roots = NULL; 2057 new_roots = NULL; 2058 } 2059 cleanup: 2060 ulist_free(record->old_roots); 2061 ulist_free(new_roots); 2062 new_roots = NULL; 2063 rb_erase(node, &delayed_refs->dirty_extent_root); 2064 kfree(record); 2065 2066 } 2067 return ret; 2068 } 2069 2070 /* 2071 * called from commit_transaction. Writes all changed qgroups to disk. 2072 */ 2073 int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 2074 struct btrfs_fs_info *fs_info) 2075 { 2076 struct btrfs_root *quota_root = fs_info->quota_root; 2077 int ret = 0; 2078 int start_rescan_worker = 0; 2079 2080 if (!quota_root) 2081 goto out; 2082 2083 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 2084 test_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2085 start_rescan_worker = 1; 2086 2087 if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2088 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2089 2090 spin_lock(&fs_info->qgroup_lock); 2091 while (!list_empty(&fs_info->dirty_qgroups)) { 2092 struct btrfs_qgroup *qgroup; 2093 qgroup = list_first_entry(&fs_info->dirty_qgroups, 2094 struct btrfs_qgroup, dirty); 2095 list_del_init(&qgroup->dirty); 2096 spin_unlock(&fs_info->qgroup_lock); 2097 ret = update_qgroup_info_item(trans, quota_root, qgroup); 2098 if (ret) 2099 fs_info->qgroup_flags |= 2100 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2101 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 2102 if (ret) 2103 fs_info->qgroup_flags |= 2104 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2105 spin_lock(&fs_info->qgroup_lock); 2106 } 2107 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2108 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; 2109 else 2110 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2111 spin_unlock(&fs_info->qgroup_lock); 2112 2113 ret = update_qgroup_status_item(trans, fs_info, quota_root); 2114 if (ret) 2115 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2116 2117 if (!ret && start_rescan_worker) { 2118 ret = qgroup_rescan_init(fs_info, 0, 1); 2119 if (!ret) { 2120 qgroup_rescan_zero_tracking(fs_info); 2121 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2122 &fs_info->qgroup_rescan_work); 2123 } 2124 ret = 0; 2125 } 2126 2127 out: 2128 2129 return ret; 2130 } 2131 2132 /* 2133 * Copy the accounting information between qgroups. This is necessary 2134 * when a snapshot or a subvolume is created. Throwing an error will 2135 * cause a transaction abort so we take extra care here to only error 2136 * when a readonly fs is a reasonable outcome. 2137 */ 2138 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 2139 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 2140 struct btrfs_qgroup_inherit *inherit) 2141 { 2142 int ret = 0; 2143 int i; 2144 u64 *i_qgroups; 2145 struct btrfs_root *quota_root = fs_info->quota_root; 2146 struct btrfs_qgroup *srcgroup; 2147 struct btrfs_qgroup *dstgroup; 2148 u32 level_size = 0; 2149 u64 nums; 2150 2151 mutex_lock(&fs_info->qgroup_ioctl_lock); 2152 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2153 goto out; 2154 2155 if (!quota_root) { 2156 ret = -EINVAL; 2157 goto out; 2158 } 2159 2160 if (inherit) { 2161 i_qgroups = (u64 *)(inherit + 1); 2162 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 2163 2 * inherit->num_excl_copies; 2164 for (i = 0; i < nums; ++i) { 2165 srcgroup = find_qgroup_rb(fs_info, *i_qgroups); 2166 2167 /* 2168 * Zero out invalid groups so we can ignore 2169 * them later. 2170 */ 2171 if (!srcgroup || 2172 ((srcgroup->qgroupid >> 48) <= (objectid >> 48))) 2173 *i_qgroups = 0ULL; 2174 2175 ++i_qgroups; 2176 } 2177 } 2178 2179 /* 2180 * create a tracking group for the subvol itself 2181 */ 2182 ret = add_qgroup_item(trans, quota_root, objectid); 2183 if (ret) 2184 goto out; 2185 2186 if (srcid) { 2187 struct btrfs_root *srcroot; 2188 struct btrfs_key srckey; 2189 2190 srckey.objectid = srcid; 2191 srckey.type = BTRFS_ROOT_ITEM_KEY; 2192 srckey.offset = (u64)-1; 2193 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); 2194 if (IS_ERR(srcroot)) { 2195 ret = PTR_ERR(srcroot); 2196 goto out; 2197 } 2198 2199 level_size = fs_info->nodesize; 2200 } 2201 2202 /* 2203 * add qgroup to all inherited groups 2204 */ 2205 if (inherit) { 2206 i_qgroups = (u64 *)(inherit + 1); 2207 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) { 2208 if (*i_qgroups == 0) 2209 continue; 2210 ret = add_qgroup_relation_item(trans, quota_root, 2211 objectid, *i_qgroups); 2212 if (ret && ret != -EEXIST) 2213 goto out; 2214 ret = add_qgroup_relation_item(trans, quota_root, 2215 *i_qgroups, objectid); 2216 if (ret && ret != -EEXIST) 2217 goto out; 2218 } 2219 ret = 0; 2220 } 2221 2222 2223 spin_lock(&fs_info->qgroup_lock); 2224 2225 dstgroup = add_qgroup_rb(fs_info, objectid); 2226 if (IS_ERR(dstgroup)) { 2227 ret = PTR_ERR(dstgroup); 2228 goto unlock; 2229 } 2230 2231 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { 2232 dstgroup->lim_flags = inherit->lim.flags; 2233 dstgroup->max_rfer = inherit->lim.max_rfer; 2234 dstgroup->max_excl = inherit->lim.max_excl; 2235 dstgroup->rsv_rfer = inherit->lim.rsv_rfer; 2236 dstgroup->rsv_excl = inherit->lim.rsv_excl; 2237 2238 ret = update_qgroup_limit_item(trans, quota_root, dstgroup); 2239 if (ret) { 2240 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2241 btrfs_info(fs_info, 2242 "unable to update quota limit for %llu", 2243 dstgroup->qgroupid); 2244 goto unlock; 2245 } 2246 } 2247 2248 if (srcid) { 2249 srcgroup = find_qgroup_rb(fs_info, srcid); 2250 if (!srcgroup) 2251 goto unlock; 2252 2253 /* 2254 * We call inherit after we clone the root in order to make sure 2255 * our counts don't go crazy, so at this point the only 2256 * difference between the two roots should be the root node. 2257 */ 2258 dstgroup->rfer = srcgroup->rfer; 2259 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2260 dstgroup->excl = level_size; 2261 dstgroup->excl_cmpr = level_size; 2262 srcgroup->excl = level_size; 2263 srcgroup->excl_cmpr = level_size; 2264 2265 /* inherit the limit info */ 2266 dstgroup->lim_flags = srcgroup->lim_flags; 2267 dstgroup->max_rfer = srcgroup->max_rfer; 2268 dstgroup->max_excl = srcgroup->max_excl; 2269 dstgroup->rsv_rfer = srcgroup->rsv_rfer; 2270 dstgroup->rsv_excl = srcgroup->rsv_excl; 2271 2272 qgroup_dirty(fs_info, dstgroup); 2273 qgroup_dirty(fs_info, srcgroup); 2274 } 2275 2276 if (!inherit) 2277 goto unlock; 2278 2279 i_qgroups = (u64 *)(inherit + 1); 2280 for (i = 0; i < inherit->num_qgroups; ++i) { 2281 if (*i_qgroups) { 2282 ret = add_relation_rb(fs_info, objectid, *i_qgroups); 2283 if (ret) 2284 goto unlock; 2285 } 2286 ++i_qgroups; 2287 } 2288 2289 for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) { 2290 struct btrfs_qgroup *src; 2291 struct btrfs_qgroup *dst; 2292 2293 if (!i_qgroups[0] || !i_qgroups[1]) 2294 continue; 2295 2296 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2297 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2298 2299 if (!src || !dst) { 2300 ret = -EINVAL; 2301 goto unlock; 2302 } 2303 2304 dst->rfer = src->rfer - level_size; 2305 dst->rfer_cmpr = src->rfer_cmpr - level_size; 2306 } 2307 for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) { 2308 struct btrfs_qgroup *src; 2309 struct btrfs_qgroup *dst; 2310 2311 if (!i_qgroups[0] || !i_qgroups[1]) 2312 continue; 2313 2314 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2315 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2316 2317 if (!src || !dst) { 2318 ret = -EINVAL; 2319 goto unlock; 2320 } 2321 2322 dst->excl = src->excl + level_size; 2323 dst->excl_cmpr = src->excl_cmpr + level_size; 2324 } 2325 2326 unlock: 2327 spin_unlock(&fs_info->qgroup_lock); 2328 out: 2329 mutex_unlock(&fs_info->qgroup_ioctl_lock); 2330 return ret; 2331 } 2332 2333 static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) 2334 { 2335 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2336 qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer) 2337 return false; 2338 2339 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 2340 qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl) 2341 return false; 2342 2343 return true; 2344 } 2345 2346 static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce) 2347 { 2348 struct btrfs_root *quota_root; 2349 struct btrfs_qgroup *qgroup; 2350 struct btrfs_fs_info *fs_info = root->fs_info; 2351 u64 ref_root = root->root_key.objectid; 2352 int ret = 0; 2353 int retried = 0; 2354 struct ulist_node *unode; 2355 struct ulist_iterator uiter; 2356 2357 if (!is_fstree(ref_root)) 2358 return 0; 2359 2360 if (num_bytes == 0) 2361 return 0; 2362 2363 if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) && 2364 capable(CAP_SYS_RESOURCE)) 2365 enforce = false; 2366 2367 retry: 2368 spin_lock(&fs_info->qgroup_lock); 2369 quota_root = fs_info->quota_root; 2370 if (!quota_root) 2371 goto out; 2372 2373 qgroup = find_qgroup_rb(fs_info, ref_root); 2374 if (!qgroup) 2375 goto out; 2376 2377 /* 2378 * in a first step, we check all affected qgroups if any limits would 2379 * be exceeded 2380 */ 2381 ulist_reinit(fs_info->qgroup_ulist); 2382 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2383 (uintptr_t)qgroup, GFP_ATOMIC); 2384 if (ret < 0) 2385 goto out; 2386 ULIST_ITER_INIT(&uiter); 2387 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2388 struct btrfs_qgroup *qg; 2389 struct btrfs_qgroup_list *glist; 2390 2391 qg = unode_aux_to_qgroup(unode); 2392 2393 if (enforce && !qgroup_check_limits(qg, num_bytes)) { 2394 /* 2395 * Commit the tree and retry, since we may have 2396 * deletions which would free up space. 2397 */ 2398 if (!retried && qg->reserved > 0) { 2399 struct btrfs_trans_handle *trans; 2400 2401 spin_unlock(&fs_info->qgroup_lock); 2402 ret = btrfs_start_delalloc_inodes(root, 0); 2403 if (ret) 2404 return ret; 2405 btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); 2406 trans = btrfs_join_transaction(root); 2407 if (IS_ERR(trans)) 2408 return PTR_ERR(trans); 2409 ret = btrfs_commit_transaction(trans); 2410 if (ret) 2411 return ret; 2412 retried++; 2413 goto retry; 2414 } 2415 ret = -EDQUOT; 2416 goto out; 2417 } 2418 2419 list_for_each_entry(glist, &qg->groups, next_group) { 2420 ret = ulist_add(fs_info->qgroup_ulist, 2421 glist->group->qgroupid, 2422 (uintptr_t)glist->group, GFP_ATOMIC); 2423 if (ret < 0) 2424 goto out; 2425 } 2426 } 2427 ret = 0; 2428 /* 2429 * no limits exceeded, now record the reservation into all qgroups 2430 */ 2431 ULIST_ITER_INIT(&uiter); 2432 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2433 struct btrfs_qgroup *qg; 2434 2435 qg = unode_aux_to_qgroup(unode); 2436 2437 trace_qgroup_update_reserve(fs_info, qg, num_bytes); 2438 qg->reserved += num_bytes; 2439 } 2440 2441 out: 2442 spin_unlock(&fs_info->qgroup_lock); 2443 return ret; 2444 } 2445 2446 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, 2447 u64 ref_root, u64 num_bytes) 2448 { 2449 struct btrfs_root *quota_root; 2450 struct btrfs_qgroup *qgroup; 2451 struct ulist_node *unode; 2452 struct ulist_iterator uiter; 2453 int ret = 0; 2454 2455 if (!is_fstree(ref_root)) 2456 return; 2457 2458 if (num_bytes == 0) 2459 return; 2460 2461 spin_lock(&fs_info->qgroup_lock); 2462 2463 quota_root = fs_info->quota_root; 2464 if (!quota_root) 2465 goto out; 2466 2467 qgroup = find_qgroup_rb(fs_info, ref_root); 2468 if (!qgroup) 2469 goto out; 2470 2471 ulist_reinit(fs_info->qgroup_ulist); 2472 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2473 (uintptr_t)qgroup, GFP_ATOMIC); 2474 if (ret < 0) 2475 goto out; 2476 ULIST_ITER_INIT(&uiter); 2477 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2478 struct btrfs_qgroup *qg; 2479 struct btrfs_qgroup_list *glist; 2480 2481 qg = unode_aux_to_qgroup(unode); 2482 2483 trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes); 2484 if (qg->reserved < num_bytes) 2485 report_reserved_underflow(fs_info, qg, num_bytes); 2486 else 2487 qg->reserved -= num_bytes; 2488 2489 list_for_each_entry(glist, &qg->groups, next_group) { 2490 ret = ulist_add(fs_info->qgroup_ulist, 2491 glist->group->qgroupid, 2492 (uintptr_t)glist->group, GFP_ATOMIC); 2493 if (ret < 0) 2494 goto out; 2495 } 2496 } 2497 2498 out: 2499 spin_unlock(&fs_info->qgroup_lock); 2500 } 2501 2502 /* 2503 * returns < 0 on error, 0 when more leafs are to be scanned. 2504 * returns 1 when done. 2505 */ 2506 static int 2507 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2508 struct btrfs_trans_handle *trans) 2509 { 2510 struct btrfs_key found; 2511 struct extent_buffer *scratch_leaf = NULL; 2512 struct ulist *roots = NULL; 2513 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); 2514 u64 num_bytes; 2515 int slot; 2516 int ret; 2517 2518 mutex_lock(&fs_info->qgroup_rescan_lock); 2519 ret = btrfs_search_slot_for_read(fs_info->extent_root, 2520 &fs_info->qgroup_rescan_progress, 2521 path, 1, 0); 2522 2523 btrfs_debug(fs_info, 2524 "current progress key (%llu %u %llu), search_slot ret %d", 2525 fs_info->qgroup_rescan_progress.objectid, 2526 fs_info->qgroup_rescan_progress.type, 2527 fs_info->qgroup_rescan_progress.offset, ret); 2528 2529 if (ret) { 2530 /* 2531 * The rescan is about to end, we will not be scanning any 2532 * further blocks. We cannot unset the RESCAN flag here, because 2533 * we want to commit the transaction if everything went well. 2534 * To make the live accounting work in this phase, we set our 2535 * scan progress pointer such that every real extent objectid 2536 * will be smaller. 2537 */ 2538 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 2539 btrfs_release_path(path); 2540 mutex_unlock(&fs_info->qgroup_rescan_lock); 2541 return ret; 2542 } 2543 2544 btrfs_item_key_to_cpu(path->nodes[0], &found, 2545 btrfs_header_nritems(path->nodes[0]) - 1); 2546 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; 2547 2548 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2549 scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]); 2550 if (!scratch_leaf) { 2551 ret = -ENOMEM; 2552 mutex_unlock(&fs_info->qgroup_rescan_lock); 2553 goto out; 2554 } 2555 extent_buffer_get(scratch_leaf); 2556 btrfs_tree_read_lock(scratch_leaf); 2557 btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK); 2558 slot = path->slots[0]; 2559 btrfs_release_path(path); 2560 mutex_unlock(&fs_info->qgroup_rescan_lock); 2561 2562 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 2563 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 2564 if (found.type != BTRFS_EXTENT_ITEM_KEY && 2565 found.type != BTRFS_METADATA_ITEM_KEY) 2566 continue; 2567 if (found.type == BTRFS_METADATA_ITEM_KEY) 2568 num_bytes = fs_info->nodesize; 2569 else 2570 num_bytes = found.offset; 2571 2572 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 2573 &roots); 2574 if (ret < 0) 2575 goto out; 2576 /* For rescan, just pass old_roots as NULL */ 2577 ret = btrfs_qgroup_account_extent(trans, fs_info, 2578 found.objectid, num_bytes, NULL, roots); 2579 if (ret < 0) 2580 goto out; 2581 } 2582 out: 2583 if (scratch_leaf) { 2584 btrfs_tree_read_unlock_blocking(scratch_leaf); 2585 free_extent_buffer(scratch_leaf); 2586 } 2587 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2588 2589 return ret; 2590 } 2591 2592 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) 2593 { 2594 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, 2595 qgroup_rescan_work); 2596 struct btrfs_path *path; 2597 struct btrfs_trans_handle *trans = NULL; 2598 int err = -ENOMEM; 2599 int ret = 0; 2600 2601 path = btrfs_alloc_path(); 2602 if (!path) 2603 goto out; 2604 2605 err = 0; 2606 while (!err && !btrfs_fs_closing(fs_info)) { 2607 trans = btrfs_start_transaction(fs_info->fs_root, 0); 2608 if (IS_ERR(trans)) { 2609 err = PTR_ERR(trans); 2610 break; 2611 } 2612 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { 2613 err = -EINTR; 2614 } else { 2615 err = qgroup_rescan_leaf(fs_info, path, trans); 2616 } 2617 if (err > 0) 2618 btrfs_commit_transaction(trans); 2619 else 2620 btrfs_end_transaction(trans); 2621 } 2622 2623 out: 2624 btrfs_free_path(path); 2625 2626 mutex_lock(&fs_info->qgroup_rescan_lock); 2627 if (!btrfs_fs_closing(fs_info)) 2628 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2629 2630 if (err > 0 && 2631 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 2632 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2633 } else if (err < 0) { 2634 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2635 } 2636 mutex_unlock(&fs_info->qgroup_rescan_lock); 2637 2638 /* 2639 * only update status, since the previous part has already updated the 2640 * qgroup info. 2641 */ 2642 trans = btrfs_start_transaction(fs_info->quota_root, 1); 2643 if (IS_ERR(trans)) { 2644 err = PTR_ERR(trans); 2645 btrfs_err(fs_info, 2646 "fail to start transaction for status update: %d", 2647 err); 2648 goto done; 2649 } 2650 ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); 2651 if (ret < 0) { 2652 err = ret; 2653 btrfs_err(fs_info, "fail to update qgroup status: %d", err); 2654 } 2655 btrfs_end_transaction(trans); 2656 2657 if (btrfs_fs_closing(fs_info)) { 2658 btrfs_info(fs_info, "qgroup scan paused"); 2659 } else if (err >= 0) { 2660 btrfs_info(fs_info, "qgroup scan completed%s", 2661 err > 0 ? " (inconsistency flag cleared)" : ""); 2662 } else { 2663 btrfs_err(fs_info, "qgroup scan failed with %d", err); 2664 } 2665 2666 done: 2667 mutex_lock(&fs_info->qgroup_rescan_lock); 2668 fs_info->qgroup_rescan_running = false; 2669 mutex_unlock(&fs_info->qgroup_rescan_lock); 2670 complete_all(&fs_info->qgroup_rescan_completion); 2671 } 2672 2673 /* 2674 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all 2675 * memory required for the rescan context. 2676 */ 2677 static int 2678 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 2679 int init_flags) 2680 { 2681 int ret = 0; 2682 2683 if (!init_flags && 2684 (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) || 2685 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) { 2686 ret = -EINVAL; 2687 goto err; 2688 } 2689 2690 mutex_lock(&fs_info->qgroup_rescan_lock); 2691 spin_lock(&fs_info->qgroup_lock); 2692 2693 if (init_flags) { 2694 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2695 ret = -EINPROGRESS; 2696 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 2697 ret = -EINVAL; 2698 2699 if (ret) { 2700 spin_unlock(&fs_info->qgroup_lock); 2701 mutex_unlock(&fs_info->qgroup_rescan_lock); 2702 goto err; 2703 } 2704 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2705 } 2706 2707 memset(&fs_info->qgroup_rescan_progress, 0, 2708 sizeof(fs_info->qgroup_rescan_progress)); 2709 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 2710 init_completion(&fs_info->qgroup_rescan_completion); 2711 fs_info->qgroup_rescan_running = true; 2712 2713 spin_unlock(&fs_info->qgroup_lock); 2714 mutex_unlock(&fs_info->qgroup_rescan_lock); 2715 2716 memset(&fs_info->qgroup_rescan_work, 0, 2717 sizeof(fs_info->qgroup_rescan_work)); 2718 btrfs_init_work(&fs_info->qgroup_rescan_work, 2719 btrfs_qgroup_rescan_helper, 2720 btrfs_qgroup_rescan_worker, NULL, NULL); 2721 2722 if (ret) { 2723 err: 2724 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret); 2725 return ret; 2726 } 2727 2728 return 0; 2729 } 2730 2731 static void 2732 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) 2733 { 2734 struct rb_node *n; 2735 struct btrfs_qgroup *qgroup; 2736 2737 spin_lock(&fs_info->qgroup_lock); 2738 /* clear all current qgroup tracking information */ 2739 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { 2740 qgroup = rb_entry(n, struct btrfs_qgroup, node); 2741 qgroup->rfer = 0; 2742 qgroup->rfer_cmpr = 0; 2743 qgroup->excl = 0; 2744 qgroup->excl_cmpr = 0; 2745 } 2746 spin_unlock(&fs_info->qgroup_lock); 2747 } 2748 2749 int 2750 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) 2751 { 2752 int ret = 0; 2753 struct btrfs_trans_handle *trans; 2754 2755 ret = qgroup_rescan_init(fs_info, 0, 1); 2756 if (ret) 2757 return ret; 2758 2759 /* 2760 * We have set the rescan_progress to 0, which means no more 2761 * delayed refs will be accounted by btrfs_qgroup_account_ref. 2762 * However, btrfs_qgroup_account_ref may be right after its call 2763 * to btrfs_find_all_roots, in which case it would still do the 2764 * accounting. 2765 * To solve this, we're committing the transaction, which will 2766 * ensure we run all delayed refs and only after that, we are 2767 * going to clear all tracking information for a clean start. 2768 */ 2769 2770 trans = btrfs_join_transaction(fs_info->fs_root); 2771 if (IS_ERR(trans)) { 2772 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2773 return PTR_ERR(trans); 2774 } 2775 ret = btrfs_commit_transaction(trans); 2776 if (ret) { 2777 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2778 return ret; 2779 } 2780 2781 qgroup_rescan_zero_tracking(fs_info); 2782 2783 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2784 &fs_info->qgroup_rescan_work); 2785 2786 return 0; 2787 } 2788 2789 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 2790 bool interruptible) 2791 { 2792 int running; 2793 int ret = 0; 2794 2795 mutex_lock(&fs_info->qgroup_rescan_lock); 2796 spin_lock(&fs_info->qgroup_lock); 2797 running = fs_info->qgroup_rescan_running; 2798 spin_unlock(&fs_info->qgroup_lock); 2799 mutex_unlock(&fs_info->qgroup_rescan_lock); 2800 2801 if (!running) 2802 return 0; 2803 2804 if (interruptible) 2805 ret = wait_for_completion_interruptible( 2806 &fs_info->qgroup_rescan_completion); 2807 else 2808 wait_for_completion(&fs_info->qgroup_rescan_completion); 2809 2810 return ret; 2811 } 2812 2813 /* 2814 * this is only called from open_ctree where we're still single threaded, thus 2815 * locking is omitted here. 2816 */ 2817 void 2818 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) 2819 { 2820 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2821 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2822 &fs_info->qgroup_rescan_work); 2823 } 2824 2825 /* 2826 * Reserve qgroup space for range [start, start + len). 2827 * 2828 * This function will either reserve space from related qgroups or doing 2829 * nothing if the range is already reserved. 2830 * 2831 * Return 0 for successful reserve 2832 * Return <0 for error (including -EQUOT) 2833 * 2834 * NOTE: this function may sleep for memory allocation. 2835 * if btrfs_qgroup_reserve_data() is called multiple times with 2836 * same @reserved, caller must ensure when error happens it's OK 2837 * to free *ALL* reserved space. 2838 */ 2839 int btrfs_qgroup_reserve_data(struct inode *inode, 2840 struct extent_changeset **reserved_ret, u64 start, 2841 u64 len) 2842 { 2843 struct btrfs_root *root = BTRFS_I(inode)->root; 2844 struct ulist_node *unode; 2845 struct ulist_iterator uiter; 2846 struct extent_changeset *reserved; 2847 u64 orig_reserved; 2848 u64 to_reserve; 2849 int ret; 2850 2851 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || 2852 !is_fstree(root->objectid) || len == 0) 2853 return 0; 2854 2855 /* @reserved parameter is mandatory for qgroup */ 2856 if (WARN_ON(!reserved_ret)) 2857 return -EINVAL; 2858 if (!*reserved_ret) { 2859 *reserved_ret = extent_changeset_alloc(); 2860 if (!*reserved_ret) 2861 return -ENOMEM; 2862 } 2863 reserved = *reserved_ret; 2864 /* Record already reserved space */ 2865 orig_reserved = reserved->bytes_changed; 2866 ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 2867 start + len -1, EXTENT_QGROUP_RESERVED, reserved); 2868 2869 /* Newly reserved space */ 2870 to_reserve = reserved->bytes_changed - orig_reserved; 2871 trace_btrfs_qgroup_reserve_data(inode, start, len, 2872 to_reserve, QGROUP_RESERVE); 2873 if (ret < 0) 2874 goto cleanup; 2875 ret = qgroup_reserve(root, to_reserve, true); 2876 if (ret < 0) 2877 goto cleanup; 2878 2879 return ret; 2880 2881 cleanup: 2882 /* cleanup *ALL* already reserved ranges */ 2883 ULIST_ITER_INIT(&uiter); 2884 while ((unode = ulist_next(&reserved->range_changed, &uiter))) 2885 clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, 2886 unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL, 2887 GFP_NOFS); 2888 extent_changeset_release(reserved); 2889 return ret; 2890 } 2891 2892 /* Free ranges specified by @reserved, normally in error path */ 2893 static int qgroup_free_reserved_data(struct inode *inode, 2894 struct extent_changeset *reserved, u64 start, u64 len) 2895 { 2896 struct btrfs_root *root = BTRFS_I(inode)->root; 2897 struct ulist_node *unode; 2898 struct ulist_iterator uiter; 2899 struct extent_changeset changeset; 2900 int freed = 0; 2901 int ret; 2902 2903 extent_changeset_init(&changeset); 2904 len = round_up(start + len, root->fs_info->sectorsize); 2905 start = round_down(start, root->fs_info->sectorsize); 2906 2907 ULIST_ITER_INIT(&uiter); 2908 while ((unode = ulist_next(&reserved->range_changed, &uiter))) { 2909 u64 range_start = unode->val; 2910 /* unode->aux is the inclusive end */ 2911 u64 range_len = unode->aux - range_start + 1; 2912 u64 free_start; 2913 u64 free_len; 2914 2915 extent_changeset_release(&changeset); 2916 2917 /* Only free range in range [start, start + len) */ 2918 if (range_start >= start + len || 2919 range_start + range_len <= start) 2920 continue; 2921 free_start = max(range_start, start); 2922 free_len = min(start + len, range_start + range_len) - 2923 free_start; 2924 /* 2925 * TODO: To also modify reserved->ranges_reserved to reflect 2926 * the modification. 2927 * 2928 * However as long as we free qgroup reserved according to 2929 * EXTENT_QGROUP_RESERVED, we won't double free. 2930 * So not need to rush. 2931 */ 2932 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree, 2933 free_start, free_start + free_len - 1, 2934 EXTENT_QGROUP_RESERVED, &changeset); 2935 if (ret < 0) 2936 goto out; 2937 freed += changeset.bytes_changed; 2938 } 2939 btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed); 2940 ret = freed; 2941 out: 2942 extent_changeset_release(&changeset); 2943 return ret; 2944 } 2945 2946 static int __btrfs_qgroup_release_data(struct inode *inode, 2947 struct extent_changeset *reserved, u64 start, u64 len, 2948 int free) 2949 { 2950 struct extent_changeset changeset; 2951 int trace_op = QGROUP_RELEASE; 2952 int ret; 2953 2954 /* In release case, we shouldn't have @reserved */ 2955 WARN_ON(!free && reserved); 2956 if (free && reserved) 2957 return qgroup_free_reserved_data(inode, reserved, start, len); 2958 extent_changeset_init(&changeset); 2959 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 2960 start + len -1, EXTENT_QGROUP_RESERVED, &changeset); 2961 if (ret < 0) 2962 goto out; 2963 2964 if (free) 2965 trace_op = QGROUP_FREE; 2966 trace_btrfs_qgroup_release_data(inode, start, len, 2967 changeset.bytes_changed, trace_op); 2968 if (free) 2969 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, 2970 BTRFS_I(inode)->root->objectid, 2971 changeset.bytes_changed); 2972 ret = changeset.bytes_changed; 2973 out: 2974 extent_changeset_release(&changeset); 2975 return ret; 2976 } 2977 2978 /* 2979 * Free a reserved space range from io_tree and related qgroups 2980 * 2981 * Should be called when a range of pages get invalidated before reaching disk. 2982 * Or for error cleanup case. 2983 * if @reserved is given, only reserved range in [@start, @start + @len) will 2984 * be freed. 2985 * 2986 * For data written to disk, use btrfs_qgroup_release_data(). 2987 * 2988 * NOTE: This function may sleep for memory allocation. 2989 */ 2990 int btrfs_qgroup_free_data(struct inode *inode, 2991 struct extent_changeset *reserved, u64 start, u64 len) 2992 { 2993 return __btrfs_qgroup_release_data(inode, reserved, start, len, 1); 2994 } 2995 2996 /* 2997 * Release a reserved space range from io_tree only. 2998 * 2999 * Should be called when a range of pages get written to disk and corresponding 3000 * FILE_EXTENT is inserted into corresponding root. 3001 * 3002 * Since new qgroup accounting framework will only update qgroup numbers at 3003 * commit_transaction() time, its reserved space shouldn't be freed from 3004 * related qgroups. 3005 * 3006 * But we should release the range from io_tree, to allow further write to be 3007 * COWed. 3008 * 3009 * NOTE: This function may sleep for memory allocation. 3010 */ 3011 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len) 3012 { 3013 return __btrfs_qgroup_release_data(inode, NULL, start, len, 0); 3014 } 3015 3016 int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, 3017 bool enforce) 3018 { 3019 struct btrfs_fs_info *fs_info = root->fs_info; 3020 int ret; 3021 3022 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3023 !is_fstree(root->objectid) || num_bytes == 0) 3024 return 0; 3025 3026 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 3027 trace_qgroup_meta_reserve(root, (s64)num_bytes); 3028 ret = qgroup_reserve(root, num_bytes, enforce); 3029 if (ret < 0) 3030 return ret; 3031 atomic64_add(num_bytes, &root->qgroup_meta_rsv); 3032 return ret; 3033 } 3034 3035 void btrfs_qgroup_free_meta_all(struct btrfs_root *root) 3036 { 3037 struct btrfs_fs_info *fs_info = root->fs_info; 3038 u64 reserved; 3039 3040 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3041 !is_fstree(root->objectid)) 3042 return; 3043 3044 reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0); 3045 if (reserved == 0) 3046 return; 3047 trace_qgroup_meta_reserve(root, -(s64)reserved); 3048 btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved); 3049 } 3050 3051 void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) 3052 { 3053 struct btrfs_fs_info *fs_info = root->fs_info; 3054 3055 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3056 !is_fstree(root->objectid)) 3057 return; 3058 3059 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 3060 WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes); 3061 atomic64_sub(num_bytes, &root->qgroup_meta_rsv); 3062 trace_qgroup_meta_reserve(root, -(s64)num_bytes); 3063 btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes); 3064 } 3065 3066 /* 3067 * Check qgroup reserved space leaking, normally at destroy inode 3068 * time 3069 */ 3070 void btrfs_qgroup_check_reserved_leak(struct inode *inode) 3071 { 3072 struct extent_changeset changeset; 3073 struct ulist_node *unode; 3074 struct ulist_iterator iter; 3075 int ret; 3076 3077 extent_changeset_init(&changeset); 3078 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 3079 EXTENT_QGROUP_RESERVED, &changeset); 3080 3081 WARN_ON(ret < 0); 3082 if (WARN_ON(changeset.bytes_changed)) { 3083 ULIST_ITER_INIT(&iter); 3084 while ((unode = ulist_next(&changeset.range_changed, &iter))) { 3085 btrfs_warn(BTRFS_I(inode)->root->fs_info, 3086 "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu", 3087 inode->i_ino, unode->val, unode->aux); 3088 } 3089 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, 3090 BTRFS_I(inode)->root->objectid, 3091 changeset.bytes_changed); 3092 3093 } 3094 extent_changeset_release(&changeset); 3095 } 3096