1 /* 2 * Copyright (C) 2011 STRATO. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include <linux/workqueue.h> 26 #include <linux/btrfs.h> 27 28 #include "ctree.h" 29 #include "transaction.h" 30 #include "disk-io.h" 31 #include "locking.h" 32 #include "ulist.h" 33 #include "backref.h" 34 #include "extent_io.h" 35 #include "qgroup.h" 36 37 38 /* TODO XXX FIXME 39 * - subvol delete -> delete when ref goes to 0? delete limits also? 40 * - reorganize keys 41 * - compressed 42 * - sync 43 * - copy also limits on subvol creation 44 * - limit 45 * - caches fuer ulists 46 * - performance benchmarks 47 * - check all ioctl parameters 48 */ 49 50 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq, 51 int mod) 52 { 53 if (qg->old_refcnt < seq) 54 qg->old_refcnt = seq; 55 qg->old_refcnt += mod; 56 } 57 58 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq, 59 int mod) 60 { 61 if (qg->new_refcnt < seq) 62 qg->new_refcnt = seq; 63 qg->new_refcnt += mod; 64 } 65 66 static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq) 67 { 68 if (qg->old_refcnt < seq) 69 return 0; 70 return qg->old_refcnt - seq; 71 } 72 73 static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq) 74 { 75 if (qg->new_refcnt < seq) 76 return 0; 77 return qg->new_refcnt - seq; 78 } 79 80 /* 81 * glue structure to represent the relations between qgroups. 82 */ 83 struct btrfs_qgroup_list { 84 struct list_head next_group; 85 struct list_head next_member; 86 struct btrfs_qgroup *group; 87 struct btrfs_qgroup *member; 88 }; 89 90 static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg) 91 { 92 return (u64)(uintptr_t)qg; 93 } 94 95 static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n) 96 { 97 return (struct btrfs_qgroup *)(uintptr_t)n->aux; 98 } 99 100 static int 101 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 102 int init_flags); 103 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); 104 105 /* must be called with qgroup_ioctl_lock held */ 106 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 107 u64 qgroupid) 108 { 109 struct rb_node *n = fs_info->qgroup_tree.rb_node; 110 struct btrfs_qgroup *qgroup; 111 112 while (n) { 113 qgroup = rb_entry(n, struct btrfs_qgroup, node); 114 if (qgroup->qgroupid < qgroupid) 115 n = n->rb_left; 116 else if (qgroup->qgroupid > qgroupid) 117 n = n->rb_right; 118 else 119 return qgroup; 120 } 121 return NULL; 122 } 123 124 /* must be called with qgroup_lock held */ 125 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, 126 u64 qgroupid) 127 { 128 struct rb_node **p = &fs_info->qgroup_tree.rb_node; 129 struct rb_node *parent = NULL; 130 struct btrfs_qgroup *qgroup; 131 132 while (*p) { 133 parent = *p; 134 qgroup = rb_entry(parent, struct btrfs_qgroup, node); 135 136 if (qgroup->qgroupid < qgroupid) 137 p = &(*p)->rb_left; 138 else if (qgroup->qgroupid > qgroupid) 139 p = &(*p)->rb_right; 140 else 141 return qgroup; 142 } 143 144 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); 145 if (!qgroup) 146 return ERR_PTR(-ENOMEM); 147 148 qgroup->qgroupid = qgroupid; 149 INIT_LIST_HEAD(&qgroup->groups); 150 INIT_LIST_HEAD(&qgroup->members); 151 INIT_LIST_HEAD(&qgroup->dirty); 152 153 rb_link_node(&qgroup->node, parent, p); 154 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); 155 156 return qgroup; 157 } 158 159 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) 160 { 161 struct btrfs_qgroup_list *list; 162 163 list_del(&qgroup->dirty); 164 while (!list_empty(&qgroup->groups)) { 165 list = list_first_entry(&qgroup->groups, 166 struct btrfs_qgroup_list, next_group); 167 list_del(&list->next_group); 168 list_del(&list->next_member); 169 kfree(list); 170 } 171 172 while (!list_empty(&qgroup->members)) { 173 list = list_first_entry(&qgroup->members, 174 struct btrfs_qgroup_list, next_member); 175 list_del(&list->next_group); 176 list_del(&list->next_member); 177 kfree(list); 178 } 179 kfree(qgroup); 180 } 181 182 /* must be called with qgroup_lock held */ 183 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) 184 { 185 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); 186 187 if (!qgroup) 188 return -ENOENT; 189 190 rb_erase(&qgroup->node, &fs_info->qgroup_tree); 191 __del_qgroup_rb(qgroup); 192 return 0; 193 } 194 195 /* must be called with qgroup_lock held */ 196 static int add_relation_rb(struct btrfs_fs_info *fs_info, 197 u64 memberid, u64 parentid) 198 { 199 struct btrfs_qgroup *member; 200 struct btrfs_qgroup *parent; 201 struct btrfs_qgroup_list *list; 202 203 member = find_qgroup_rb(fs_info, memberid); 204 parent = find_qgroup_rb(fs_info, parentid); 205 if (!member || !parent) 206 return -ENOENT; 207 208 list = kzalloc(sizeof(*list), GFP_ATOMIC); 209 if (!list) 210 return -ENOMEM; 211 212 list->group = parent; 213 list->member = member; 214 list_add_tail(&list->next_group, &member->groups); 215 list_add_tail(&list->next_member, &parent->members); 216 217 return 0; 218 } 219 220 /* must be called with qgroup_lock held */ 221 static int del_relation_rb(struct btrfs_fs_info *fs_info, 222 u64 memberid, u64 parentid) 223 { 224 struct btrfs_qgroup *member; 225 struct btrfs_qgroup *parent; 226 struct btrfs_qgroup_list *list; 227 228 member = find_qgroup_rb(fs_info, memberid); 229 parent = find_qgroup_rb(fs_info, parentid); 230 if (!member || !parent) 231 return -ENOENT; 232 233 list_for_each_entry(list, &member->groups, next_group) { 234 if (list->group == parent) { 235 list_del(&list->next_group); 236 list_del(&list->next_member); 237 kfree(list); 238 return 0; 239 } 240 } 241 return -ENOENT; 242 } 243 244 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 245 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 246 u64 rfer, u64 excl) 247 { 248 struct btrfs_qgroup *qgroup; 249 250 qgroup = find_qgroup_rb(fs_info, qgroupid); 251 if (!qgroup) 252 return -EINVAL; 253 if (qgroup->rfer != rfer || qgroup->excl != excl) 254 return -EINVAL; 255 return 0; 256 } 257 #endif 258 259 /* 260 * The full config is read in one go, only called from open_ctree() 261 * It doesn't use any locking, as at this point we're still single-threaded 262 */ 263 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) 264 { 265 struct btrfs_key key; 266 struct btrfs_key found_key; 267 struct btrfs_root *quota_root = fs_info->quota_root; 268 struct btrfs_path *path = NULL; 269 struct extent_buffer *l; 270 int slot; 271 int ret = 0; 272 u64 flags = 0; 273 u64 rescan_progress = 0; 274 275 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 276 return 0; 277 278 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 279 if (!fs_info->qgroup_ulist) { 280 ret = -ENOMEM; 281 goto out; 282 } 283 284 path = btrfs_alloc_path(); 285 if (!path) { 286 ret = -ENOMEM; 287 goto out; 288 } 289 290 /* default this to quota off, in case no status key is found */ 291 fs_info->qgroup_flags = 0; 292 293 /* 294 * pass 1: read status, all qgroup infos and limits 295 */ 296 key.objectid = 0; 297 key.type = 0; 298 key.offset = 0; 299 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); 300 if (ret) 301 goto out; 302 303 while (1) { 304 struct btrfs_qgroup *qgroup; 305 306 slot = path->slots[0]; 307 l = path->nodes[0]; 308 btrfs_item_key_to_cpu(l, &found_key, slot); 309 310 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { 311 struct btrfs_qgroup_status_item *ptr; 312 313 ptr = btrfs_item_ptr(l, slot, 314 struct btrfs_qgroup_status_item); 315 316 if (btrfs_qgroup_status_version(l, ptr) != 317 BTRFS_QGROUP_STATUS_VERSION) { 318 btrfs_err(fs_info, 319 "old qgroup version, quota disabled"); 320 goto out; 321 } 322 if (btrfs_qgroup_status_generation(l, ptr) != 323 fs_info->generation) { 324 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 325 btrfs_err(fs_info, 326 "qgroup generation mismatch, marked as inconsistent"); 327 } 328 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 329 ptr); 330 rescan_progress = btrfs_qgroup_status_rescan(l, ptr); 331 goto next1; 332 } 333 334 if (found_key.type != BTRFS_QGROUP_INFO_KEY && 335 found_key.type != BTRFS_QGROUP_LIMIT_KEY) 336 goto next1; 337 338 qgroup = find_qgroup_rb(fs_info, found_key.offset); 339 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 340 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 341 btrfs_err(fs_info, "inconsistent qgroup config"); 342 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 343 } 344 if (!qgroup) { 345 qgroup = add_qgroup_rb(fs_info, found_key.offset); 346 if (IS_ERR(qgroup)) { 347 ret = PTR_ERR(qgroup); 348 goto out; 349 } 350 } 351 switch (found_key.type) { 352 case BTRFS_QGROUP_INFO_KEY: { 353 struct btrfs_qgroup_info_item *ptr; 354 355 ptr = btrfs_item_ptr(l, slot, 356 struct btrfs_qgroup_info_item); 357 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); 358 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); 359 qgroup->excl = btrfs_qgroup_info_excl(l, ptr); 360 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); 361 /* generation currently unused */ 362 break; 363 } 364 case BTRFS_QGROUP_LIMIT_KEY: { 365 struct btrfs_qgroup_limit_item *ptr; 366 367 ptr = btrfs_item_ptr(l, slot, 368 struct btrfs_qgroup_limit_item); 369 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); 370 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); 371 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); 372 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); 373 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); 374 break; 375 } 376 } 377 next1: 378 ret = btrfs_next_item(quota_root, path); 379 if (ret < 0) 380 goto out; 381 if (ret) 382 break; 383 } 384 btrfs_release_path(path); 385 386 /* 387 * pass 2: read all qgroup relations 388 */ 389 key.objectid = 0; 390 key.type = BTRFS_QGROUP_RELATION_KEY; 391 key.offset = 0; 392 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); 393 if (ret) 394 goto out; 395 while (1) { 396 slot = path->slots[0]; 397 l = path->nodes[0]; 398 btrfs_item_key_to_cpu(l, &found_key, slot); 399 400 if (found_key.type != BTRFS_QGROUP_RELATION_KEY) 401 goto next2; 402 403 if (found_key.objectid > found_key.offset) { 404 /* parent <- member, not needed to build config */ 405 /* FIXME should we omit the key completely? */ 406 goto next2; 407 } 408 409 ret = add_relation_rb(fs_info, found_key.objectid, 410 found_key.offset); 411 if (ret == -ENOENT) { 412 btrfs_warn(fs_info, 413 "orphan qgroup relation 0x%llx->0x%llx", 414 found_key.objectid, found_key.offset); 415 ret = 0; /* ignore the error */ 416 } 417 if (ret) 418 goto out; 419 next2: 420 ret = btrfs_next_item(quota_root, path); 421 if (ret < 0) 422 goto out; 423 if (ret) 424 break; 425 } 426 out: 427 fs_info->qgroup_flags |= flags; 428 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 429 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 430 else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && 431 ret >= 0) 432 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 433 btrfs_free_path(path); 434 435 if (ret < 0) { 436 ulist_free(fs_info->qgroup_ulist); 437 fs_info->qgroup_ulist = NULL; 438 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 439 } 440 441 return ret < 0 ? ret : 0; 442 } 443 444 /* 445 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), 446 * first two are in single-threaded paths.And for the third one, we have set 447 * quota_root to be null with qgroup_lock held before, so it is safe to clean 448 * up the in-memory structures without qgroup_lock held. 449 */ 450 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 451 { 452 struct rb_node *n; 453 struct btrfs_qgroup *qgroup; 454 455 while ((n = rb_first(&fs_info->qgroup_tree))) { 456 qgroup = rb_entry(n, struct btrfs_qgroup, node); 457 rb_erase(n, &fs_info->qgroup_tree); 458 __del_qgroup_rb(qgroup); 459 } 460 /* 461 * we call btrfs_free_qgroup_config() when umounting 462 * filesystem and disabling quota, so we set qgroup_ulist 463 * to be null here to avoid double free. 464 */ 465 ulist_free(fs_info->qgroup_ulist); 466 fs_info->qgroup_ulist = NULL; 467 } 468 469 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 470 struct btrfs_root *quota_root, 471 u64 src, u64 dst) 472 { 473 int ret; 474 struct btrfs_path *path; 475 struct btrfs_key key; 476 477 path = btrfs_alloc_path(); 478 if (!path) 479 return -ENOMEM; 480 481 key.objectid = src; 482 key.type = BTRFS_QGROUP_RELATION_KEY; 483 key.offset = dst; 484 485 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 486 487 btrfs_mark_buffer_dirty(path->nodes[0]); 488 489 btrfs_free_path(path); 490 return ret; 491 } 492 493 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, 494 struct btrfs_root *quota_root, 495 u64 src, u64 dst) 496 { 497 int ret; 498 struct btrfs_path *path; 499 struct btrfs_key key; 500 501 path = btrfs_alloc_path(); 502 if (!path) 503 return -ENOMEM; 504 505 key.objectid = src; 506 key.type = BTRFS_QGROUP_RELATION_KEY; 507 key.offset = dst; 508 509 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 510 if (ret < 0) 511 goto out; 512 513 if (ret > 0) { 514 ret = -ENOENT; 515 goto out; 516 } 517 518 ret = btrfs_del_item(trans, quota_root, path); 519 out: 520 btrfs_free_path(path); 521 return ret; 522 } 523 524 static int add_qgroup_item(struct btrfs_trans_handle *trans, 525 struct btrfs_root *quota_root, u64 qgroupid) 526 { 527 int ret; 528 struct btrfs_path *path; 529 struct btrfs_qgroup_info_item *qgroup_info; 530 struct btrfs_qgroup_limit_item *qgroup_limit; 531 struct extent_buffer *leaf; 532 struct btrfs_key key; 533 534 if (btrfs_is_testing(quota_root->fs_info)) 535 return 0; 536 537 path = btrfs_alloc_path(); 538 if (!path) 539 return -ENOMEM; 540 541 key.objectid = 0; 542 key.type = BTRFS_QGROUP_INFO_KEY; 543 key.offset = qgroupid; 544 545 /* 546 * Avoid a transaction abort by catching -EEXIST here. In that 547 * case, we proceed by re-initializing the existing structure 548 * on disk. 549 */ 550 551 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 552 sizeof(*qgroup_info)); 553 if (ret && ret != -EEXIST) 554 goto out; 555 556 leaf = path->nodes[0]; 557 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], 558 struct btrfs_qgroup_info_item); 559 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); 560 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); 561 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); 562 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); 563 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); 564 565 btrfs_mark_buffer_dirty(leaf); 566 567 btrfs_release_path(path); 568 569 key.type = BTRFS_QGROUP_LIMIT_KEY; 570 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 571 sizeof(*qgroup_limit)); 572 if (ret && ret != -EEXIST) 573 goto out; 574 575 leaf = path->nodes[0]; 576 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], 577 struct btrfs_qgroup_limit_item); 578 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); 579 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); 580 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); 581 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 582 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 583 584 btrfs_mark_buffer_dirty(leaf); 585 586 ret = 0; 587 out: 588 btrfs_free_path(path); 589 return ret; 590 } 591 592 static int del_qgroup_item(struct btrfs_trans_handle *trans, 593 struct btrfs_root *quota_root, u64 qgroupid) 594 { 595 int ret; 596 struct btrfs_path *path; 597 struct btrfs_key key; 598 599 path = btrfs_alloc_path(); 600 if (!path) 601 return -ENOMEM; 602 603 key.objectid = 0; 604 key.type = BTRFS_QGROUP_INFO_KEY; 605 key.offset = qgroupid; 606 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 607 if (ret < 0) 608 goto out; 609 610 if (ret > 0) { 611 ret = -ENOENT; 612 goto out; 613 } 614 615 ret = btrfs_del_item(trans, quota_root, path); 616 if (ret) 617 goto out; 618 619 btrfs_release_path(path); 620 621 key.type = BTRFS_QGROUP_LIMIT_KEY; 622 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 623 if (ret < 0) 624 goto out; 625 626 if (ret > 0) { 627 ret = -ENOENT; 628 goto out; 629 } 630 631 ret = btrfs_del_item(trans, quota_root, path); 632 633 out: 634 btrfs_free_path(path); 635 return ret; 636 } 637 638 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 639 struct btrfs_root *root, 640 struct btrfs_qgroup *qgroup) 641 { 642 struct btrfs_path *path; 643 struct btrfs_key key; 644 struct extent_buffer *l; 645 struct btrfs_qgroup_limit_item *qgroup_limit; 646 int ret; 647 int slot; 648 649 key.objectid = 0; 650 key.type = BTRFS_QGROUP_LIMIT_KEY; 651 key.offset = qgroup->qgroupid; 652 653 path = btrfs_alloc_path(); 654 if (!path) 655 return -ENOMEM; 656 657 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 658 if (ret > 0) 659 ret = -ENOENT; 660 661 if (ret) 662 goto out; 663 664 l = path->nodes[0]; 665 slot = path->slots[0]; 666 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 667 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags); 668 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer); 669 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); 670 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); 671 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); 672 673 btrfs_mark_buffer_dirty(l); 674 675 out: 676 btrfs_free_path(path); 677 return ret; 678 } 679 680 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 681 struct btrfs_root *root, 682 struct btrfs_qgroup *qgroup) 683 { 684 struct btrfs_path *path; 685 struct btrfs_key key; 686 struct extent_buffer *l; 687 struct btrfs_qgroup_info_item *qgroup_info; 688 int ret; 689 int slot; 690 691 if (btrfs_is_testing(root->fs_info)) 692 return 0; 693 694 key.objectid = 0; 695 key.type = BTRFS_QGROUP_INFO_KEY; 696 key.offset = qgroup->qgroupid; 697 698 path = btrfs_alloc_path(); 699 if (!path) 700 return -ENOMEM; 701 702 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 703 if (ret > 0) 704 ret = -ENOENT; 705 706 if (ret) 707 goto out; 708 709 l = path->nodes[0]; 710 slot = path->slots[0]; 711 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); 712 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 713 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 714 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 715 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 716 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 717 718 btrfs_mark_buffer_dirty(l); 719 720 out: 721 btrfs_free_path(path); 722 return ret; 723 } 724 725 static int update_qgroup_status_item(struct btrfs_trans_handle *trans, 726 struct btrfs_fs_info *fs_info, 727 struct btrfs_root *root) 728 { 729 struct btrfs_path *path; 730 struct btrfs_key key; 731 struct extent_buffer *l; 732 struct btrfs_qgroup_status_item *ptr; 733 int ret; 734 int slot; 735 736 key.objectid = 0; 737 key.type = BTRFS_QGROUP_STATUS_KEY; 738 key.offset = 0; 739 740 path = btrfs_alloc_path(); 741 if (!path) 742 return -ENOMEM; 743 744 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 745 if (ret > 0) 746 ret = -ENOENT; 747 748 if (ret) 749 goto out; 750 751 l = path->nodes[0]; 752 slot = path->slots[0]; 753 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 754 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 755 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 756 btrfs_set_qgroup_status_rescan(l, ptr, 757 fs_info->qgroup_rescan_progress.objectid); 758 759 btrfs_mark_buffer_dirty(l); 760 761 out: 762 btrfs_free_path(path); 763 return ret; 764 } 765 766 /* 767 * called with qgroup_lock held 768 */ 769 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 770 struct btrfs_root *root) 771 { 772 struct btrfs_path *path; 773 struct btrfs_key key; 774 struct extent_buffer *leaf = NULL; 775 int ret; 776 int nr = 0; 777 778 path = btrfs_alloc_path(); 779 if (!path) 780 return -ENOMEM; 781 782 path->leave_spinning = 1; 783 784 key.objectid = 0; 785 key.offset = 0; 786 key.type = 0; 787 788 while (1) { 789 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 790 if (ret < 0) 791 goto out; 792 leaf = path->nodes[0]; 793 nr = btrfs_header_nritems(leaf); 794 if (!nr) 795 break; 796 /* 797 * delete the leaf one by one 798 * since the whole tree is going 799 * to be deleted. 800 */ 801 path->slots[0] = 0; 802 ret = btrfs_del_items(trans, root, path, 0, nr); 803 if (ret) 804 goto out; 805 806 btrfs_release_path(path); 807 } 808 ret = 0; 809 out: 810 btrfs_free_path(path); 811 return ret; 812 } 813 814 int btrfs_quota_enable(struct btrfs_trans_handle *trans, 815 struct btrfs_fs_info *fs_info) 816 { 817 struct btrfs_root *quota_root; 818 struct btrfs_root *tree_root = fs_info->tree_root; 819 struct btrfs_path *path = NULL; 820 struct btrfs_qgroup_status_item *ptr; 821 struct extent_buffer *leaf; 822 struct btrfs_key key; 823 struct btrfs_key found_key; 824 struct btrfs_qgroup *qgroup = NULL; 825 int ret = 0; 826 int slot; 827 828 mutex_lock(&fs_info->qgroup_ioctl_lock); 829 if (fs_info->quota_root) { 830 set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); 831 goto out; 832 } 833 834 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 835 if (!fs_info->qgroup_ulist) { 836 ret = -ENOMEM; 837 goto out; 838 } 839 840 /* 841 * initially create the quota tree 842 */ 843 quota_root = btrfs_create_tree(trans, fs_info, 844 BTRFS_QUOTA_TREE_OBJECTID); 845 if (IS_ERR(quota_root)) { 846 ret = PTR_ERR(quota_root); 847 goto out; 848 } 849 850 path = btrfs_alloc_path(); 851 if (!path) { 852 ret = -ENOMEM; 853 goto out_free_root; 854 } 855 856 key.objectid = 0; 857 key.type = BTRFS_QGROUP_STATUS_KEY; 858 key.offset = 0; 859 860 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 861 sizeof(*ptr)); 862 if (ret) 863 goto out_free_path; 864 865 leaf = path->nodes[0]; 866 ptr = btrfs_item_ptr(leaf, path->slots[0], 867 struct btrfs_qgroup_status_item); 868 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); 869 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); 870 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 871 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 872 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 873 btrfs_set_qgroup_status_rescan(leaf, ptr, 0); 874 875 btrfs_mark_buffer_dirty(leaf); 876 877 key.objectid = 0; 878 key.type = BTRFS_ROOT_REF_KEY; 879 key.offset = 0; 880 881 btrfs_release_path(path); 882 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 883 if (ret > 0) 884 goto out_add_root; 885 if (ret < 0) 886 goto out_free_path; 887 888 889 while (1) { 890 slot = path->slots[0]; 891 leaf = path->nodes[0]; 892 btrfs_item_key_to_cpu(leaf, &found_key, slot); 893 894 if (found_key.type == BTRFS_ROOT_REF_KEY) { 895 ret = add_qgroup_item(trans, quota_root, 896 found_key.offset); 897 if (ret) 898 goto out_free_path; 899 900 qgroup = add_qgroup_rb(fs_info, found_key.offset); 901 if (IS_ERR(qgroup)) { 902 ret = PTR_ERR(qgroup); 903 goto out_free_path; 904 } 905 } 906 ret = btrfs_next_item(tree_root, path); 907 if (ret < 0) 908 goto out_free_path; 909 if (ret) 910 break; 911 } 912 913 out_add_root: 914 btrfs_release_path(path); 915 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 916 if (ret) 917 goto out_free_path; 918 919 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 920 if (IS_ERR(qgroup)) { 921 ret = PTR_ERR(qgroup); 922 goto out_free_path; 923 } 924 spin_lock(&fs_info->qgroup_lock); 925 fs_info->quota_root = quota_root; 926 set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); 927 spin_unlock(&fs_info->qgroup_lock); 928 out_free_path: 929 btrfs_free_path(path); 930 out_free_root: 931 if (ret) { 932 free_extent_buffer(quota_root->node); 933 free_extent_buffer(quota_root->commit_root); 934 kfree(quota_root); 935 } 936 out: 937 if (ret) { 938 ulist_free(fs_info->qgroup_ulist); 939 fs_info->qgroup_ulist = NULL; 940 } 941 mutex_unlock(&fs_info->qgroup_ioctl_lock); 942 return ret; 943 } 944 945 int btrfs_quota_disable(struct btrfs_trans_handle *trans, 946 struct btrfs_fs_info *fs_info) 947 { 948 struct btrfs_root *quota_root; 949 int ret = 0; 950 951 mutex_lock(&fs_info->qgroup_ioctl_lock); 952 if (!fs_info->quota_root) 953 goto out; 954 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 955 btrfs_qgroup_wait_for_completion(fs_info, false); 956 spin_lock(&fs_info->qgroup_lock); 957 quota_root = fs_info->quota_root; 958 fs_info->quota_root = NULL; 959 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 960 spin_unlock(&fs_info->qgroup_lock); 961 962 btrfs_free_qgroup_config(fs_info); 963 964 ret = btrfs_clean_quota_tree(trans, quota_root); 965 if (ret) 966 goto out; 967 968 ret = btrfs_del_root(trans, fs_info, "a_root->root_key); 969 if (ret) 970 goto out; 971 972 list_del("a_root->dirty_list); 973 974 btrfs_tree_lock(quota_root->node); 975 clean_tree_block(fs_info, quota_root->node); 976 btrfs_tree_unlock(quota_root->node); 977 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 978 979 free_extent_buffer(quota_root->node); 980 free_extent_buffer(quota_root->commit_root); 981 kfree(quota_root); 982 out: 983 mutex_unlock(&fs_info->qgroup_ioctl_lock); 984 return ret; 985 } 986 987 static void qgroup_dirty(struct btrfs_fs_info *fs_info, 988 struct btrfs_qgroup *qgroup) 989 { 990 if (list_empty(&qgroup->dirty)) 991 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 992 } 993 994 static void report_reserved_underflow(struct btrfs_fs_info *fs_info, 995 struct btrfs_qgroup *qgroup, 996 u64 num_bytes) 997 { 998 #ifdef CONFIG_BTRFS_DEBUG 999 WARN_ON(qgroup->reserved < num_bytes); 1000 btrfs_debug(fs_info, 1001 "qgroup %llu reserved space underflow, have: %llu, to free: %llu", 1002 qgroup->qgroupid, qgroup->reserved, num_bytes); 1003 #endif 1004 qgroup->reserved = 0; 1005 } 1006 /* 1007 * The easy accounting, if we are adding/removing the only ref for an extent 1008 * then this qgroup and all of the parent qgroups get their reference and 1009 * exclusive counts adjusted. 1010 * 1011 * Caller should hold fs_info->qgroup_lock. 1012 */ 1013 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1014 struct ulist *tmp, u64 ref_root, 1015 u64 num_bytes, int sign) 1016 { 1017 struct btrfs_qgroup *qgroup; 1018 struct btrfs_qgroup_list *glist; 1019 struct ulist_node *unode; 1020 struct ulist_iterator uiter; 1021 int ret = 0; 1022 1023 qgroup = find_qgroup_rb(fs_info, ref_root); 1024 if (!qgroup) 1025 goto out; 1026 1027 qgroup->rfer += sign * num_bytes; 1028 qgroup->rfer_cmpr += sign * num_bytes; 1029 1030 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1031 qgroup->excl += sign * num_bytes; 1032 qgroup->excl_cmpr += sign * num_bytes; 1033 if (sign > 0) { 1034 trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes); 1035 if (qgroup->reserved < num_bytes) 1036 report_reserved_underflow(fs_info, qgroup, num_bytes); 1037 else 1038 qgroup->reserved -= num_bytes; 1039 } 1040 1041 qgroup_dirty(fs_info, qgroup); 1042 1043 /* Get all of the parent groups that contain this qgroup */ 1044 list_for_each_entry(glist, &qgroup->groups, next_group) { 1045 ret = ulist_add(tmp, glist->group->qgroupid, 1046 qgroup_to_aux(glist->group), GFP_ATOMIC); 1047 if (ret < 0) 1048 goto out; 1049 } 1050 1051 /* Iterate all of the parents and adjust their reference counts */ 1052 ULIST_ITER_INIT(&uiter); 1053 while ((unode = ulist_next(tmp, &uiter))) { 1054 qgroup = unode_aux_to_qgroup(unode); 1055 qgroup->rfer += sign * num_bytes; 1056 qgroup->rfer_cmpr += sign * num_bytes; 1057 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1058 qgroup->excl += sign * num_bytes; 1059 if (sign > 0) { 1060 trace_qgroup_update_reserve(fs_info, qgroup, 1061 -(s64)num_bytes); 1062 if (qgroup->reserved < num_bytes) 1063 report_reserved_underflow(fs_info, qgroup, 1064 num_bytes); 1065 else 1066 qgroup->reserved -= num_bytes; 1067 } 1068 qgroup->excl_cmpr += sign * num_bytes; 1069 qgroup_dirty(fs_info, qgroup); 1070 1071 /* Add any parents of the parents */ 1072 list_for_each_entry(glist, &qgroup->groups, next_group) { 1073 ret = ulist_add(tmp, glist->group->qgroupid, 1074 qgroup_to_aux(glist->group), GFP_ATOMIC); 1075 if (ret < 0) 1076 goto out; 1077 } 1078 } 1079 ret = 0; 1080 out: 1081 return ret; 1082 } 1083 1084 1085 /* 1086 * Quick path for updating qgroup with only excl refs. 1087 * 1088 * In that case, just update all parent will be enough. 1089 * Or we needs to do a full rescan. 1090 * Caller should also hold fs_info->qgroup_lock. 1091 * 1092 * Return 0 for quick update, return >0 for need to full rescan 1093 * and mark INCONSISTENT flag. 1094 * Return < 0 for other error. 1095 */ 1096 static int quick_update_accounting(struct btrfs_fs_info *fs_info, 1097 struct ulist *tmp, u64 src, u64 dst, 1098 int sign) 1099 { 1100 struct btrfs_qgroup *qgroup; 1101 int ret = 1; 1102 int err = 0; 1103 1104 qgroup = find_qgroup_rb(fs_info, src); 1105 if (!qgroup) 1106 goto out; 1107 if (qgroup->excl == qgroup->rfer) { 1108 ret = 0; 1109 err = __qgroup_excl_accounting(fs_info, tmp, dst, 1110 qgroup->excl, sign); 1111 if (err < 0) { 1112 ret = err; 1113 goto out; 1114 } 1115 } 1116 out: 1117 if (ret) 1118 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1119 return ret; 1120 } 1121 1122 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1123 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1124 { 1125 struct btrfs_root *quota_root; 1126 struct btrfs_qgroup *parent; 1127 struct btrfs_qgroup *member; 1128 struct btrfs_qgroup_list *list; 1129 struct ulist *tmp; 1130 int ret = 0; 1131 1132 /* Check the level of src and dst first */ 1133 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) 1134 return -EINVAL; 1135 1136 tmp = ulist_alloc(GFP_KERNEL); 1137 if (!tmp) 1138 return -ENOMEM; 1139 1140 mutex_lock(&fs_info->qgroup_ioctl_lock); 1141 quota_root = fs_info->quota_root; 1142 if (!quota_root) { 1143 ret = -EINVAL; 1144 goto out; 1145 } 1146 member = find_qgroup_rb(fs_info, src); 1147 parent = find_qgroup_rb(fs_info, dst); 1148 if (!member || !parent) { 1149 ret = -EINVAL; 1150 goto out; 1151 } 1152 1153 /* check if such qgroup relation exist firstly */ 1154 list_for_each_entry(list, &member->groups, next_group) { 1155 if (list->group == parent) { 1156 ret = -EEXIST; 1157 goto out; 1158 } 1159 } 1160 1161 ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1162 if (ret) 1163 goto out; 1164 1165 ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1166 if (ret) { 1167 del_qgroup_relation_item(trans, quota_root, src, dst); 1168 goto out; 1169 } 1170 1171 spin_lock(&fs_info->qgroup_lock); 1172 ret = add_relation_rb(fs_info, src, dst); 1173 if (ret < 0) { 1174 spin_unlock(&fs_info->qgroup_lock); 1175 goto out; 1176 } 1177 ret = quick_update_accounting(fs_info, tmp, src, dst, 1); 1178 spin_unlock(&fs_info->qgroup_lock); 1179 out: 1180 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1181 ulist_free(tmp); 1182 return ret; 1183 } 1184 1185 static int __del_qgroup_relation(struct btrfs_trans_handle *trans, 1186 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1187 { 1188 struct btrfs_root *quota_root; 1189 struct btrfs_qgroup *parent; 1190 struct btrfs_qgroup *member; 1191 struct btrfs_qgroup_list *list; 1192 struct ulist *tmp; 1193 int ret = 0; 1194 int err; 1195 1196 tmp = ulist_alloc(GFP_KERNEL); 1197 if (!tmp) 1198 return -ENOMEM; 1199 1200 quota_root = fs_info->quota_root; 1201 if (!quota_root) { 1202 ret = -EINVAL; 1203 goto out; 1204 } 1205 1206 member = find_qgroup_rb(fs_info, src); 1207 parent = find_qgroup_rb(fs_info, dst); 1208 if (!member || !parent) { 1209 ret = -EINVAL; 1210 goto out; 1211 } 1212 1213 /* check if such qgroup relation exist firstly */ 1214 list_for_each_entry(list, &member->groups, next_group) { 1215 if (list->group == parent) 1216 goto exist; 1217 } 1218 ret = -ENOENT; 1219 goto out; 1220 exist: 1221 ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1222 err = del_qgroup_relation_item(trans, quota_root, dst, src); 1223 if (err && !ret) 1224 ret = err; 1225 1226 spin_lock(&fs_info->qgroup_lock); 1227 del_relation_rb(fs_info, src, dst); 1228 ret = quick_update_accounting(fs_info, tmp, src, dst, -1); 1229 spin_unlock(&fs_info->qgroup_lock); 1230 out: 1231 ulist_free(tmp); 1232 return ret; 1233 } 1234 1235 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1236 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1237 { 1238 int ret = 0; 1239 1240 mutex_lock(&fs_info->qgroup_ioctl_lock); 1241 ret = __del_qgroup_relation(trans, fs_info, src, dst); 1242 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1243 1244 return ret; 1245 } 1246 1247 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1248 struct btrfs_fs_info *fs_info, u64 qgroupid) 1249 { 1250 struct btrfs_root *quota_root; 1251 struct btrfs_qgroup *qgroup; 1252 int ret = 0; 1253 1254 mutex_lock(&fs_info->qgroup_ioctl_lock); 1255 quota_root = fs_info->quota_root; 1256 if (!quota_root) { 1257 ret = -EINVAL; 1258 goto out; 1259 } 1260 qgroup = find_qgroup_rb(fs_info, qgroupid); 1261 if (qgroup) { 1262 ret = -EEXIST; 1263 goto out; 1264 } 1265 1266 ret = add_qgroup_item(trans, quota_root, qgroupid); 1267 if (ret) 1268 goto out; 1269 1270 spin_lock(&fs_info->qgroup_lock); 1271 qgroup = add_qgroup_rb(fs_info, qgroupid); 1272 spin_unlock(&fs_info->qgroup_lock); 1273 1274 if (IS_ERR(qgroup)) 1275 ret = PTR_ERR(qgroup); 1276 out: 1277 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1278 return ret; 1279 } 1280 1281 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 1282 struct btrfs_fs_info *fs_info, u64 qgroupid) 1283 { 1284 struct btrfs_root *quota_root; 1285 struct btrfs_qgroup *qgroup; 1286 struct btrfs_qgroup_list *list; 1287 int ret = 0; 1288 1289 mutex_lock(&fs_info->qgroup_ioctl_lock); 1290 quota_root = fs_info->quota_root; 1291 if (!quota_root) { 1292 ret = -EINVAL; 1293 goto out; 1294 } 1295 1296 qgroup = find_qgroup_rb(fs_info, qgroupid); 1297 if (!qgroup) { 1298 ret = -ENOENT; 1299 goto out; 1300 } else { 1301 /* check if there are no children of this qgroup */ 1302 if (!list_empty(&qgroup->members)) { 1303 ret = -EBUSY; 1304 goto out; 1305 } 1306 } 1307 ret = del_qgroup_item(trans, quota_root, qgroupid); 1308 if (ret && ret != -ENOENT) 1309 goto out; 1310 1311 while (!list_empty(&qgroup->groups)) { 1312 list = list_first_entry(&qgroup->groups, 1313 struct btrfs_qgroup_list, next_group); 1314 ret = __del_qgroup_relation(trans, fs_info, 1315 qgroupid, 1316 list->group->qgroupid); 1317 if (ret) 1318 goto out; 1319 } 1320 1321 spin_lock(&fs_info->qgroup_lock); 1322 del_qgroup_rb(fs_info, qgroupid); 1323 spin_unlock(&fs_info->qgroup_lock); 1324 out: 1325 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1326 return ret; 1327 } 1328 1329 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 1330 struct btrfs_fs_info *fs_info, u64 qgroupid, 1331 struct btrfs_qgroup_limit *limit) 1332 { 1333 struct btrfs_root *quota_root; 1334 struct btrfs_qgroup *qgroup; 1335 int ret = 0; 1336 /* Sometimes we would want to clear the limit on this qgroup. 1337 * To meet this requirement, we treat the -1 as a special value 1338 * which tell kernel to clear the limit on this qgroup. 1339 */ 1340 const u64 CLEAR_VALUE = -1; 1341 1342 mutex_lock(&fs_info->qgroup_ioctl_lock); 1343 quota_root = fs_info->quota_root; 1344 if (!quota_root) { 1345 ret = -EINVAL; 1346 goto out; 1347 } 1348 1349 qgroup = find_qgroup_rb(fs_info, qgroupid); 1350 if (!qgroup) { 1351 ret = -ENOENT; 1352 goto out; 1353 } 1354 1355 spin_lock(&fs_info->qgroup_lock); 1356 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { 1357 if (limit->max_rfer == CLEAR_VALUE) { 1358 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1359 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1360 qgroup->max_rfer = 0; 1361 } else { 1362 qgroup->max_rfer = limit->max_rfer; 1363 } 1364 } 1365 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { 1366 if (limit->max_excl == CLEAR_VALUE) { 1367 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1368 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1369 qgroup->max_excl = 0; 1370 } else { 1371 qgroup->max_excl = limit->max_excl; 1372 } 1373 } 1374 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { 1375 if (limit->rsv_rfer == CLEAR_VALUE) { 1376 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1377 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1378 qgroup->rsv_rfer = 0; 1379 } else { 1380 qgroup->rsv_rfer = limit->rsv_rfer; 1381 } 1382 } 1383 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { 1384 if (limit->rsv_excl == CLEAR_VALUE) { 1385 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1386 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1387 qgroup->rsv_excl = 0; 1388 } else { 1389 qgroup->rsv_excl = limit->rsv_excl; 1390 } 1391 } 1392 qgroup->lim_flags |= limit->flags; 1393 1394 spin_unlock(&fs_info->qgroup_lock); 1395 1396 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 1397 if (ret) { 1398 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1399 btrfs_info(fs_info, "unable to update quota limit for %llu", 1400 qgroupid); 1401 } 1402 1403 out: 1404 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1405 return ret; 1406 } 1407 1408 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, 1409 struct btrfs_delayed_ref_root *delayed_refs, 1410 struct btrfs_qgroup_extent_record *record) 1411 { 1412 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; 1413 struct rb_node *parent_node = NULL; 1414 struct btrfs_qgroup_extent_record *entry; 1415 u64 bytenr = record->bytenr; 1416 1417 assert_spin_locked(&delayed_refs->lock); 1418 trace_btrfs_qgroup_trace_extent(fs_info, record); 1419 1420 while (*p) { 1421 parent_node = *p; 1422 entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, 1423 node); 1424 if (bytenr < entry->bytenr) 1425 p = &(*p)->rb_left; 1426 else if (bytenr > entry->bytenr) 1427 p = &(*p)->rb_right; 1428 else 1429 return 1; 1430 } 1431 1432 rb_link_node(&record->node, parent_node, p); 1433 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); 1434 return 0; 1435 } 1436 1437 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, 1438 struct btrfs_qgroup_extent_record *qrecord) 1439 { 1440 struct ulist *old_root; 1441 u64 bytenr = qrecord->bytenr; 1442 int ret; 1443 1444 ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false); 1445 if (ret < 0) { 1446 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1447 btrfs_warn(fs_info, 1448 "error accounting new delayed refs extent (err code: %d), quota inconsistent", 1449 ret); 1450 return 0; 1451 } 1452 1453 /* 1454 * Here we don't need to get the lock of 1455 * trans->transaction->delayed_refs, since inserted qrecord won't 1456 * be deleted, only qrecord->node may be modified (new qrecord insert) 1457 * 1458 * So modifying qrecord->old_roots is safe here 1459 */ 1460 qrecord->old_roots = old_root; 1461 return 0; 1462 } 1463 1464 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, 1465 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, 1466 gfp_t gfp_flag) 1467 { 1468 struct btrfs_qgroup_extent_record *record; 1469 struct btrfs_delayed_ref_root *delayed_refs; 1470 int ret; 1471 1472 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) 1473 || bytenr == 0 || num_bytes == 0) 1474 return 0; 1475 if (WARN_ON(trans == NULL)) 1476 return -EINVAL; 1477 record = kmalloc(sizeof(*record), gfp_flag); 1478 if (!record) 1479 return -ENOMEM; 1480 1481 delayed_refs = &trans->transaction->delayed_refs; 1482 record->bytenr = bytenr; 1483 record->num_bytes = num_bytes; 1484 record->old_roots = NULL; 1485 1486 spin_lock(&delayed_refs->lock); 1487 ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record); 1488 spin_unlock(&delayed_refs->lock); 1489 if (ret > 0) { 1490 kfree(record); 1491 return 0; 1492 } 1493 return btrfs_qgroup_trace_extent_post(fs_info, record); 1494 } 1495 1496 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 1497 struct btrfs_fs_info *fs_info, 1498 struct extent_buffer *eb) 1499 { 1500 int nr = btrfs_header_nritems(eb); 1501 int i, extent_type, ret; 1502 struct btrfs_key key; 1503 struct btrfs_file_extent_item *fi; 1504 u64 bytenr, num_bytes; 1505 1506 /* We can be called directly from walk_up_proc() */ 1507 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1508 return 0; 1509 1510 for (i = 0; i < nr; i++) { 1511 btrfs_item_key_to_cpu(eb, &key, i); 1512 1513 if (key.type != BTRFS_EXTENT_DATA_KEY) 1514 continue; 1515 1516 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); 1517 /* filter out non qgroup-accountable extents */ 1518 extent_type = btrfs_file_extent_type(eb, fi); 1519 1520 if (extent_type == BTRFS_FILE_EXTENT_INLINE) 1521 continue; 1522 1523 bytenr = btrfs_file_extent_disk_bytenr(eb, fi); 1524 if (!bytenr) 1525 continue; 1526 1527 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 1528 1529 ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr, 1530 num_bytes, GFP_NOFS); 1531 if (ret) 1532 return ret; 1533 } 1534 cond_resched(); 1535 return 0; 1536 } 1537 1538 /* 1539 * Walk up the tree from the bottom, freeing leaves and any interior 1540 * nodes which have had all slots visited. If a node (leaf or 1541 * interior) is freed, the node above it will have it's slot 1542 * incremented. The root node will never be freed. 1543 * 1544 * At the end of this function, we should have a path which has all 1545 * slots incremented to the next position for a search. If we need to 1546 * read a new node it will be NULL and the node above it will have the 1547 * correct slot selected for a later read. 1548 * 1549 * If we increment the root nodes slot counter past the number of 1550 * elements, 1 is returned to signal completion of the search. 1551 */ 1552 static int adjust_slots_upwards(struct btrfs_path *path, int root_level) 1553 { 1554 int level = 0; 1555 int nr, slot; 1556 struct extent_buffer *eb; 1557 1558 if (root_level == 0) 1559 return 1; 1560 1561 while (level <= root_level) { 1562 eb = path->nodes[level]; 1563 nr = btrfs_header_nritems(eb); 1564 path->slots[level]++; 1565 slot = path->slots[level]; 1566 if (slot >= nr || level == 0) { 1567 /* 1568 * Don't free the root - we will detect this 1569 * condition after our loop and return a 1570 * positive value for caller to stop walking the tree. 1571 */ 1572 if (level != root_level) { 1573 btrfs_tree_unlock_rw(eb, path->locks[level]); 1574 path->locks[level] = 0; 1575 1576 free_extent_buffer(eb); 1577 path->nodes[level] = NULL; 1578 path->slots[level] = 0; 1579 } 1580 } else { 1581 /* 1582 * We have a valid slot to walk back down 1583 * from. Stop here so caller can process these 1584 * new nodes. 1585 */ 1586 break; 1587 } 1588 1589 level++; 1590 } 1591 1592 eb = path->nodes[root_level]; 1593 if (path->slots[root_level] >= btrfs_header_nritems(eb)) 1594 return 1; 1595 1596 return 0; 1597 } 1598 1599 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 1600 struct btrfs_root *root, 1601 struct extent_buffer *root_eb, 1602 u64 root_gen, int root_level) 1603 { 1604 struct btrfs_fs_info *fs_info = root->fs_info; 1605 int ret = 0; 1606 int level; 1607 struct extent_buffer *eb = root_eb; 1608 struct btrfs_path *path = NULL; 1609 1610 BUG_ON(root_level < 0 || root_level >= BTRFS_MAX_LEVEL); 1611 BUG_ON(root_eb == NULL); 1612 1613 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1614 return 0; 1615 1616 if (!extent_buffer_uptodate(root_eb)) { 1617 ret = btrfs_read_buffer(root_eb, root_gen); 1618 if (ret) 1619 goto out; 1620 } 1621 1622 if (root_level == 0) { 1623 ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb); 1624 goto out; 1625 } 1626 1627 path = btrfs_alloc_path(); 1628 if (!path) 1629 return -ENOMEM; 1630 1631 /* 1632 * Walk down the tree. Missing extent blocks are filled in as 1633 * we go. Metadata is accounted every time we read a new 1634 * extent block. 1635 * 1636 * When we reach a leaf, we account for file extent items in it, 1637 * walk back up the tree (adjusting slot pointers as we go) 1638 * and restart the search process. 1639 */ 1640 extent_buffer_get(root_eb); /* For path */ 1641 path->nodes[root_level] = root_eb; 1642 path->slots[root_level] = 0; 1643 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ 1644 walk_down: 1645 level = root_level; 1646 while (level >= 0) { 1647 if (path->nodes[level] == NULL) { 1648 int parent_slot; 1649 u64 child_gen; 1650 u64 child_bytenr; 1651 1652 /* 1653 * We need to get child blockptr/gen from parent before 1654 * we can read it. 1655 */ 1656 eb = path->nodes[level + 1]; 1657 parent_slot = path->slots[level + 1]; 1658 child_bytenr = btrfs_node_blockptr(eb, parent_slot); 1659 child_gen = btrfs_node_ptr_generation(eb, parent_slot); 1660 1661 eb = read_tree_block(fs_info, child_bytenr, child_gen); 1662 if (IS_ERR(eb)) { 1663 ret = PTR_ERR(eb); 1664 goto out; 1665 } else if (!extent_buffer_uptodate(eb)) { 1666 free_extent_buffer(eb); 1667 ret = -EIO; 1668 goto out; 1669 } 1670 1671 path->nodes[level] = eb; 1672 path->slots[level] = 0; 1673 1674 btrfs_tree_read_lock(eb); 1675 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1676 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 1677 1678 ret = btrfs_qgroup_trace_extent(trans, fs_info, 1679 child_bytenr, 1680 fs_info->nodesize, 1681 GFP_NOFS); 1682 if (ret) 1683 goto out; 1684 } 1685 1686 if (level == 0) { 1687 ret = btrfs_qgroup_trace_leaf_items(trans,fs_info, 1688 path->nodes[level]); 1689 if (ret) 1690 goto out; 1691 1692 /* Nonzero return here means we completed our search */ 1693 ret = adjust_slots_upwards(path, root_level); 1694 if (ret) 1695 break; 1696 1697 /* Restart search with new slots */ 1698 goto walk_down; 1699 } 1700 1701 level--; 1702 } 1703 1704 ret = 0; 1705 out: 1706 btrfs_free_path(path); 1707 1708 return ret; 1709 } 1710 1711 #define UPDATE_NEW 0 1712 #define UPDATE_OLD 1 1713 /* 1714 * Walk all of the roots that points to the bytenr and adjust their refcnts. 1715 */ 1716 static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info, 1717 struct ulist *roots, struct ulist *tmp, 1718 struct ulist *qgroups, u64 seq, int update_old) 1719 { 1720 struct ulist_node *unode; 1721 struct ulist_iterator uiter; 1722 struct ulist_node *tmp_unode; 1723 struct ulist_iterator tmp_uiter; 1724 struct btrfs_qgroup *qg; 1725 int ret = 0; 1726 1727 if (!roots) 1728 return 0; 1729 ULIST_ITER_INIT(&uiter); 1730 while ((unode = ulist_next(roots, &uiter))) { 1731 qg = find_qgroup_rb(fs_info, unode->val); 1732 if (!qg) 1733 continue; 1734 1735 ulist_reinit(tmp); 1736 ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg), 1737 GFP_ATOMIC); 1738 if (ret < 0) 1739 return ret; 1740 ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC); 1741 if (ret < 0) 1742 return ret; 1743 ULIST_ITER_INIT(&tmp_uiter); 1744 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1745 struct btrfs_qgroup_list *glist; 1746 1747 qg = unode_aux_to_qgroup(tmp_unode); 1748 if (update_old) 1749 btrfs_qgroup_update_old_refcnt(qg, seq, 1); 1750 else 1751 btrfs_qgroup_update_new_refcnt(qg, seq, 1); 1752 list_for_each_entry(glist, &qg->groups, next_group) { 1753 ret = ulist_add(qgroups, glist->group->qgroupid, 1754 qgroup_to_aux(glist->group), 1755 GFP_ATOMIC); 1756 if (ret < 0) 1757 return ret; 1758 ret = ulist_add(tmp, glist->group->qgroupid, 1759 qgroup_to_aux(glist->group), 1760 GFP_ATOMIC); 1761 if (ret < 0) 1762 return ret; 1763 } 1764 } 1765 } 1766 return 0; 1767 } 1768 1769 /* 1770 * Update qgroup rfer/excl counters. 1771 * Rfer update is easy, codes can explain themselves. 1772 * 1773 * Excl update is tricky, the update is split into 2 part. 1774 * Part 1: Possible exclusive <-> sharing detect: 1775 * | A | !A | 1776 * ------------------------------------- 1777 * B | * | - | 1778 * ------------------------------------- 1779 * !B | + | ** | 1780 * ------------------------------------- 1781 * 1782 * Conditions: 1783 * A: cur_old_roots < nr_old_roots (not exclusive before) 1784 * !A: cur_old_roots == nr_old_roots (possible exclusive before) 1785 * B: cur_new_roots < nr_new_roots (not exclusive now) 1786 * !B: cur_new_roots == nr_new_roots (possible exclusive now) 1787 * 1788 * Results: 1789 * +: Possible sharing -> exclusive -: Possible exclusive -> sharing 1790 * *: Definitely not changed. **: Possible unchanged. 1791 * 1792 * For !A and !B condition, the exception is cur_old/new_roots == 0 case. 1793 * 1794 * To make the logic clear, we first use condition A and B to split 1795 * combination into 4 results. 1796 * 1797 * Then, for result "+" and "-", check old/new_roots == 0 case, as in them 1798 * only on variant maybe 0. 1799 * 1800 * Lastly, check result **, since there are 2 variants maybe 0, split them 1801 * again(2x2). 1802 * But this time we don't need to consider other things, the codes and logic 1803 * is easy to understand now. 1804 */ 1805 static int qgroup_update_counters(struct btrfs_fs_info *fs_info, 1806 struct ulist *qgroups, 1807 u64 nr_old_roots, 1808 u64 nr_new_roots, 1809 u64 num_bytes, u64 seq) 1810 { 1811 struct ulist_node *unode; 1812 struct ulist_iterator uiter; 1813 struct btrfs_qgroup *qg; 1814 u64 cur_new_count, cur_old_count; 1815 1816 ULIST_ITER_INIT(&uiter); 1817 while ((unode = ulist_next(qgroups, &uiter))) { 1818 bool dirty = false; 1819 1820 qg = unode_aux_to_qgroup(unode); 1821 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); 1822 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); 1823 1824 trace_qgroup_update_counters(fs_info, qg->qgroupid, 1825 cur_old_count, cur_new_count); 1826 1827 /* Rfer update part */ 1828 if (cur_old_count == 0 && cur_new_count > 0) { 1829 qg->rfer += num_bytes; 1830 qg->rfer_cmpr += num_bytes; 1831 dirty = true; 1832 } 1833 if (cur_old_count > 0 && cur_new_count == 0) { 1834 qg->rfer -= num_bytes; 1835 qg->rfer_cmpr -= num_bytes; 1836 dirty = true; 1837 } 1838 1839 /* Excl update part */ 1840 /* Exclusive/none -> shared case */ 1841 if (cur_old_count == nr_old_roots && 1842 cur_new_count < nr_new_roots) { 1843 /* Exclusive -> shared */ 1844 if (cur_old_count != 0) { 1845 qg->excl -= num_bytes; 1846 qg->excl_cmpr -= num_bytes; 1847 dirty = true; 1848 } 1849 } 1850 1851 /* Shared -> exclusive/none case */ 1852 if (cur_old_count < nr_old_roots && 1853 cur_new_count == nr_new_roots) { 1854 /* Shared->exclusive */ 1855 if (cur_new_count != 0) { 1856 qg->excl += num_bytes; 1857 qg->excl_cmpr += num_bytes; 1858 dirty = true; 1859 } 1860 } 1861 1862 /* Exclusive/none -> exclusive/none case */ 1863 if (cur_old_count == nr_old_roots && 1864 cur_new_count == nr_new_roots) { 1865 if (cur_old_count == 0) { 1866 /* None -> exclusive/none */ 1867 1868 if (cur_new_count != 0) { 1869 /* None -> exclusive */ 1870 qg->excl += num_bytes; 1871 qg->excl_cmpr += num_bytes; 1872 dirty = true; 1873 } 1874 /* None -> none, nothing changed */ 1875 } else { 1876 /* Exclusive -> exclusive/none */ 1877 1878 if (cur_new_count == 0) { 1879 /* Exclusive -> none */ 1880 qg->excl -= num_bytes; 1881 qg->excl_cmpr -= num_bytes; 1882 dirty = true; 1883 } 1884 /* Exclusive -> exclusive, nothing changed */ 1885 } 1886 } 1887 1888 if (dirty) 1889 qgroup_dirty(fs_info, qg); 1890 } 1891 return 0; 1892 } 1893 1894 /* 1895 * Check if the @roots potentially is a list of fs tree roots 1896 * 1897 * Return 0 for definitely not a fs/subvol tree roots ulist 1898 * Return 1 for possible fs/subvol tree roots in the list (considering an empty 1899 * one as well) 1900 */ 1901 static int maybe_fs_roots(struct ulist *roots) 1902 { 1903 struct ulist_node *unode; 1904 struct ulist_iterator uiter; 1905 1906 /* Empty one, still possible for fs roots */ 1907 if (!roots || roots->nnodes == 0) 1908 return 1; 1909 1910 ULIST_ITER_INIT(&uiter); 1911 unode = ulist_next(roots, &uiter); 1912 if (!unode) 1913 return 1; 1914 1915 /* 1916 * If it contains fs tree roots, then it must belong to fs/subvol 1917 * trees. 1918 * If it contains a non-fs tree, it won't be shared with fs/subvol trees. 1919 */ 1920 return is_fstree(unode->val); 1921 } 1922 1923 int 1924 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 1925 struct btrfs_fs_info *fs_info, 1926 u64 bytenr, u64 num_bytes, 1927 struct ulist *old_roots, struct ulist *new_roots) 1928 { 1929 struct ulist *qgroups = NULL; 1930 struct ulist *tmp = NULL; 1931 u64 seq; 1932 u64 nr_new_roots = 0; 1933 u64 nr_old_roots = 0; 1934 int ret = 0; 1935 1936 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1937 return 0; 1938 1939 if (new_roots) { 1940 if (!maybe_fs_roots(new_roots)) 1941 goto out_free; 1942 nr_new_roots = new_roots->nnodes; 1943 } 1944 if (old_roots) { 1945 if (!maybe_fs_roots(old_roots)) 1946 goto out_free; 1947 nr_old_roots = old_roots->nnodes; 1948 } 1949 1950 /* Quick exit, either not fs tree roots, or won't affect any qgroup */ 1951 if (nr_old_roots == 0 && nr_new_roots == 0) 1952 goto out_free; 1953 1954 BUG_ON(!fs_info->quota_root); 1955 1956 trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes, 1957 nr_old_roots, nr_new_roots); 1958 1959 qgroups = ulist_alloc(GFP_NOFS); 1960 if (!qgroups) { 1961 ret = -ENOMEM; 1962 goto out_free; 1963 } 1964 tmp = ulist_alloc(GFP_NOFS); 1965 if (!tmp) { 1966 ret = -ENOMEM; 1967 goto out_free; 1968 } 1969 1970 mutex_lock(&fs_info->qgroup_rescan_lock); 1971 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 1972 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) { 1973 mutex_unlock(&fs_info->qgroup_rescan_lock); 1974 ret = 0; 1975 goto out_free; 1976 } 1977 } 1978 mutex_unlock(&fs_info->qgroup_rescan_lock); 1979 1980 spin_lock(&fs_info->qgroup_lock); 1981 seq = fs_info->qgroup_seq; 1982 1983 /* Update old refcnts using old_roots */ 1984 ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq, 1985 UPDATE_OLD); 1986 if (ret < 0) 1987 goto out; 1988 1989 /* Update new refcnts using new_roots */ 1990 ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq, 1991 UPDATE_NEW); 1992 if (ret < 0) 1993 goto out; 1994 1995 qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots, 1996 num_bytes, seq); 1997 1998 /* 1999 * Bump qgroup_seq to avoid seq overlap 2000 */ 2001 fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; 2002 out: 2003 spin_unlock(&fs_info->qgroup_lock); 2004 out_free: 2005 ulist_free(tmp); 2006 ulist_free(qgroups); 2007 ulist_free(old_roots); 2008 ulist_free(new_roots); 2009 return ret; 2010 } 2011 2012 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, 2013 struct btrfs_fs_info *fs_info) 2014 { 2015 struct btrfs_qgroup_extent_record *record; 2016 struct btrfs_delayed_ref_root *delayed_refs; 2017 struct ulist *new_roots = NULL; 2018 struct rb_node *node; 2019 u64 qgroup_to_skip; 2020 int ret = 0; 2021 2022 delayed_refs = &trans->transaction->delayed_refs; 2023 qgroup_to_skip = delayed_refs->qgroup_to_skip; 2024 while ((node = rb_first(&delayed_refs->dirty_extent_root))) { 2025 record = rb_entry(node, struct btrfs_qgroup_extent_record, 2026 node); 2027 2028 trace_btrfs_qgroup_account_extents(fs_info, record); 2029 2030 if (!ret) { 2031 /* 2032 * Old roots should be searched when inserting qgroup 2033 * extent record 2034 */ 2035 if (WARN_ON(!record->old_roots)) { 2036 /* Search commit root to find old_roots */ 2037 ret = btrfs_find_all_roots(NULL, fs_info, 2038 record->bytenr, 0, 2039 &record->old_roots, false); 2040 if (ret < 0) 2041 goto cleanup; 2042 } 2043 2044 /* 2045 * Use SEQ_LAST as time_seq to do special search, which 2046 * doesn't lock tree or delayed_refs and search current 2047 * root. It's safe inside commit_transaction(). 2048 */ 2049 ret = btrfs_find_all_roots(trans, fs_info, 2050 record->bytenr, SEQ_LAST, &new_roots, false); 2051 if (ret < 0) 2052 goto cleanup; 2053 if (qgroup_to_skip) { 2054 ulist_del(new_roots, qgroup_to_skip, 0); 2055 ulist_del(record->old_roots, qgroup_to_skip, 2056 0); 2057 } 2058 ret = btrfs_qgroup_account_extent(trans, fs_info, 2059 record->bytenr, record->num_bytes, 2060 record->old_roots, new_roots); 2061 record->old_roots = NULL; 2062 new_roots = NULL; 2063 } 2064 cleanup: 2065 ulist_free(record->old_roots); 2066 ulist_free(new_roots); 2067 new_roots = NULL; 2068 rb_erase(node, &delayed_refs->dirty_extent_root); 2069 kfree(record); 2070 2071 } 2072 return ret; 2073 } 2074 2075 /* 2076 * called from commit_transaction. Writes all changed qgroups to disk. 2077 */ 2078 int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 2079 struct btrfs_fs_info *fs_info) 2080 { 2081 struct btrfs_root *quota_root = fs_info->quota_root; 2082 int ret = 0; 2083 int start_rescan_worker = 0; 2084 2085 if (!quota_root) 2086 goto out; 2087 2088 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 2089 test_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2090 start_rescan_worker = 1; 2091 2092 if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2093 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2094 2095 spin_lock(&fs_info->qgroup_lock); 2096 while (!list_empty(&fs_info->dirty_qgroups)) { 2097 struct btrfs_qgroup *qgroup; 2098 qgroup = list_first_entry(&fs_info->dirty_qgroups, 2099 struct btrfs_qgroup, dirty); 2100 list_del_init(&qgroup->dirty); 2101 spin_unlock(&fs_info->qgroup_lock); 2102 ret = update_qgroup_info_item(trans, quota_root, qgroup); 2103 if (ret) 2104 fs_info->qgroup_flags |= 2105 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2106 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 2107 if (ret) 2108 fs_info->qgroup_flags |= 2109 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2110 spin_lock(&fs_info->qgroup_lock); 2111 } 2112 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2113 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; 2114 else 2115 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2116 spin_unlock(&fs_info->qgroup_lock); 2117 2118 ret = update_qgroup_status_item(trans, fs_info, quota_root); 2119 if (ret) 2120 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2121 2122 if (!ret && start_rescan_worker) { 2123 ret = qgroup_rescan_init(fs_info, 0, 1); 2124 if (!ret) { 2125 qgroup_rescan_zero_tracking(fs_info); 2126 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2127 &fs_info->qgroup_rescan_work); 2128 } 2129 ret = 0; 2130 } 2131 2132 out: 2133 2134 return ret; 2135 } 2136 2137 /* 2138 * Copy the accounting information between qgroups. This is necessary 2139 * when a snapshot or a subvolume is created. Throwing an error will 2140 * cause a transaction abort so we take extra care here to only error 2141 * when a readonly fs is a reasonable outcome. 2142 */ 2143 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 2144 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 2145 struct btrfs_qgroup_inherit *inherit) 2146 { 2147 int ret = 0; 2148 int i; 2149 u64 *i_qgroups; 2150 struct btrfs_root *quota_root = fs_info->quota_root; 2151 struct btrfs_qgroup *srcgroup; 2152 struct btrfs_qgroup *dstgroup; 2153 u32 level_size = 0; 2154 u64 nums; 2155 2156 mutex_lock(&fs_info->qgroup_ioctl_lock); 2157 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2158 goto out; 2159 2160 if (!quota_root) { 2161 ret = -EINVAL; 2162 goto out; 2163 } 2164 2165 if (inherit) { 2166 i_qgroups = (u64 *)(inherit + 1); 2167 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 2168 2 * inherit->num_excl_copies; 2169 for (i = 0; i < nums; ++i) { 2170 srcgroup = find_qgroup_rb(fs_info, *i_qgroups); 2171 2172 /* 2173 * Zero out invalid groups so we can ignore 2174 * them later. 2175 */ 2176 if (!srcgroup || 2177 ((srcgroup->qgroupid >> 48) <= (objectid >> 48))) 2178 *i_qgroups = 0ULL; 2179 2180 ++i_qgroups; 2181 } 2182 } 2183 2184 /* 2185 * create a tracking group for the subvol itself 2186 */ 2187 ret = add_qgroup_item(trans, quota_root, objectid); 2188 if (ret) 2189 goto out; 2190 2191 if (srcid) { 2192 struct btrfs_root *srcroot; 2193 struct btrfs_key srckey; 2194 2195 srckey.objectid = srcid; 2196 srckey.type = BTRFS_ROOT_ITEM_KEY; 2197 srckey.offset = (u64)-1; 2198 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); 2199 if (IS_ERR(srcroot)) { 2200 ret = PTR_ERR(srcroot); 2201 goto out; 2202 } 2203 2204 level_size = fs_info->nodesize; 2205 } 2206 2207 /* 2208 * add qgroup to all inherited groups 2209 */ 2210 if (inherit) { 2211 i_qgroups = (u64 *)(inherit + 1); 2212 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) { 2213 if (*i_qgroups == 0) 2214 continue; 2215 ret = add_qgroup_relation_item(trans, quota_root, 2216 objectid, *i_qgroups); 2217 if (ret && ret != -EEXIST) 2218 goto out; 2219 ret = add_qgroup_relation_item(trans, quota_root, 2220 *i_qgroups, objectid); 2221 if (ret && ret != -EEXIST) 2222 goto out; 2223 } 2224 ret = 0; 2225 } 2226 2227 2228 spin_lock(&fs_info->qgroup_lock); 2229 2230 dstgroup = add_qgroup_rb(fs_info, objectid); 2231 if (IS_ERR(dstgroup)) { 2232 ret = PTR_ERR(dstgroup); 2233 goto unlock; 2234 } 2235 2236 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { 2237 dstgroup->lim_flags = inherit->lim.flags; 2238 dstgroup->max_rfer = inherit->lim.max_rfer; 2239 dstgroup->max_excl = inherit->lim.max_excl; 2240 dstgroup->rsv_rfer = inherit->lim.rsv_rfer; 2241 dstgroup->rsv_excl = inherit->lim.rsv_excl; 2242 2243 ret = update_qgroup_limit_item(trans, quota_root, dstgroup); 2244 if (ret) { 2245 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2246 btrfs_info(fs_info, 2247 "unable to update quota limit for %llu", 2248 dstgroup->qgroupid); 2249 goto unlock; 2250 } 2251 } 2252 2253 if (srcid) { 2254 srcgroup = find_qgroup_rb(fs_info, srcid); 2255 if (!srcgroup) 2256 goto unlock; 2257 2258 /* 2259 * We call inherit after we clone the root in order to make sure 2260 * our counts don't go crazy, so at this point the only 2261 * difference between the two roots should be the root node. 2262 */ 2263 dstgroup->rfer = srcgroup->rfer; 2264 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2265 dstgroup->excl = level_size; 2266 dstgroup->excl_cmpr = level_size; 2267 srcgroup->excl = level_size; 2268 srcgroup->excl_cmpr = level_size; 2269 2270 /* inherit the limit info */ 2271 dstgroup->lim_flags = srcgroup->lim_flags; 2272 dstgroup->max_rfer = srcgroup->max_rfer; 2273 dstgroup->max_excl = srcgroup->max_excl; 2274 dstgroup->rsv_rfer = srcgroup->rsv_rfer; 2275 dstgroup->rsv_excl = srcgroup->rsv_excl; 2276 2277 qgroup_dirty(fs_info, dstgroup); 2278 qgroup_dirty(fs_info, srcgroup); 2279 } 2280 2281 if (!inherit) 2282 goto unlock; 2283 2284 i_qgroups = (u64 *)(inherit + 1); 2285 for (i = 0; i < inherit->num_qgroups; ++i) { 2286 if (*i_qgroups) { 2287 ret = add_relation_rb(fs_info, objectid, *i_qgroups); 2288 if (ret) 2289 goto unlock; 2290 } 2291 ++i_qgroups; 2292 } 2293 2294 for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) { 2295 struct btrfs_qgroup *src; 2296 struct btrfs_qgroup *dst; 2297 2298 if (!i_qgroups[0] || !i_qgroups[1]) 2299 continue; 2300 2301 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2302 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2303 2304 if (!src || !dst) { 2305 ret = -EINVAL; 2306 goto unlock; 2307 } 2308 2309 dst->rfer = src->rfer - level_size; 2310 dst->rfer_cmpr = src->rfer_cmpr - level_size; 2311 } 2312 for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) { 2313 struct btrfs_qgroup *src; 2314 struct btrfs_qgroup *dst; 2315 2316 if (!i_qgroups[0] || !i_qgroups[1]) 2317 continue; 2318 2319 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2320 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2321 2322 if (!src || !dst) { 2323 ret = -EINVAL; 2324 goto unlock; 2325 } 2326 2327 dst->excl = src->excl + level_size; 2328 dst->excl_cmpr = src->excl_cmpr + level_size; 2329 } 2330 2331 unlock: 2332 spin_unlock(&fs_info->qgroup_lock); 2333 out: 2334 mutex_unlock(&fs_info->qgroup_ioctl_lock); 2335 return ret; 2336 } 2337 2338 static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) 2339 { 2340 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2341 qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer) 2342 return false; 2343 2344 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 2345 qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl) 2346 return false; 2347 2348 return true; 2349 } 2350 2351 static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce) 2352 { 2353 struct btrfs_root *quota_root; 2354 struct btrfs_qgroup *qgroup; 2355 struct btrfs_fs_info *fs_info = root->fs_info; 2356 u64 ref_root = root->root_key.objectid; 2357 int ret = 0; 2358 int retried = 0; 2359 struct ulist_node *unode; 2360 struct ulist_iterator uiter; 2361 2362 if (!is_fstree(ref_root)) 2363 return 0; 2364 2365 if (num_bytes == 0) 2366 return 0; 2367 2368 if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) && 2369 capable(CAP_SYS_RESOURCE)) 2370 enforce = false; 2371 2372 retry: 2373 spin_lock(&fs_info->qgroup_lock); 2374 quota_root = fs_info->quota_root; 2375 if (!quota_root) 2376 goto out; 2377 2378 qgroup = find_qgroup_rb(fs_info, ref_root); 2379 if (!qgroup) 2380 goto out; 2381 2382 /* 2383 * in a first step, we check all affected qgroups if any limits would 2384 * be exceeded 2385 */ 2386 ulist_reinit(fs_info->qgroup_ulist); 2387 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2388 (uintptr_t)qgroup, GFP_ATOMIC); 2389 if (ret < 0) 2390 goto out; 2391 ULIST_ITER_INIT(&uiter); 2392 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2393 struct btrfs_qgroup *qg; 2394 struct btrfs_qgroup_list *glist; 2395 2396 qg = unode_aux_to_qgroup(unode); 2397 2398 if (enforce && !qgroup_check_limits(qg, num_bytes)) { 2399 /* 2400 * Commit the tree and retry, since we may have 2401 * deletions which would free up space. 2402 */ 2403 if (!retried && qg->reserved > 0) { 2404 struct btrfs_trans_handle *trans; 2405 2406 spin_unlock(&fs_info->qgroup_lock); 2407 ret = btrfs_start_delalloc_inodes(root, 0); 2408 if (ret) 2409 return ret; 2410 btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); 2411 trans = btrfs_join_transaction(root); 2412 if (IS_ERR(trans)) 2413 return PTR_ERR(trans); 2414 ret = btrfs_commit_transaction(trans); 2415 if (ret) 2416 return ret; 2417 retried++; 2418 goto retry; 2419 } 2420 ret = -EDQUOT; 2421 goto out; 2422 } 2423 2424 list_for_each_entry(glist, &qg->groups, next_group) { 2425 ret = ulist_add(fs_info->qgroup_ulist, 2426 glist->group->qgroupid, 2427 (uintptr_t)glist->group, GFP_ATOMIC); 2428 if (ret < 0) 2429 goto out; 2430 } 2431 } 2432 ret = 0; 2433 /* 2434 * no limits exceeded, now record the reservation into all qgroups 2435 */ 2436 ULIST_ITER_INIT(&uiter); 2437 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2438 struct btrfs_qgroup *qg; 2439 2440 qg = unode_aux_to_qgroup(unode); 2441 2442 trace_qgroup_update_reserve(fs_info, qg, num_bytes); 2443 qg->reserved += num_bytes; 2444 } 2445 2446 out: 2447 spin_unlock(&fs_info->qgroup_lock); 2448 return ret; 2449 } 2450 2451 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, 2452 u64 ref_root, u64 num_bytes) 2453 { 2454 struct btrfs_root *quota_root; 2455 struct btrfs_qgroup *qgroup; 2456 struct ulist_node *unode; 2457 struct ulist_iterator uiter; 2458 int ret = 0; 2459 2460 if (!is_fstree(ref_root)) 2461 return; 2462 2463 if (num_bytes == 0) 2464 return; 2465 2466 spin_lock(&fs_info->qgroup_lock); 2467 2468 quota_root = fs_info->quota_root; 2469 if (!quota_root) 2470 goto out; 2471 2472 qgroup = find_qgroup_rb(fs_info, ref_root); 2473 if (!qgroup) 2474 goto out; 2475 2476 ulist_reinit(fs_info->qgroup_ulist); 2477 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2478 (uintptr_t)qgroup, GFP_ATOMIC); 2479 if (ret < 0) 2480 goto out; 2481 ULIST_ITER_INIT(&uiter); 2482 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2483 struct btrfs_qgroup *qg; 2484 struct btrfs_qgroup_list *glist; 2485 2486 qg = unode_aux_to_qgroup(unode); 2487 2488 trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes); 2489 if (qg->reserved < num_bytes) 2490 report_reserved_underflow(fs_info, qg, num_bytes); 2491 else 2492 qg->reserved -= num_bytes; 2493 2494 list_for_each_entry(glist, &qg->groups, next_group) { 2495 ret = ulist_add(fs_info->qgroup_ulist, 2496 glist->group->qgroupid, 2497 (uintptr_t)glist->group, GFP_ATOMIC); 2498 if (ret < 0) 2499 goto out; 2500 } 2501 } 2502 2503 out: 2504 spin_unlock(&fs_info->qgroup_lock); 2505 } 2506 2507 /* 2508 * returns < 0 on error, 0 when more leafs are to be scanned. 2509 * returns 1 when done. 2510 */ 2511 static int 2512 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2513 struct btrfs_trans_handle *trans) 2514 { 2515 struct btrfs_key found; 2516 struct extent_buffer *scratch_leaf = NULL; 2517 struct ulist *roots = NULL; 2518 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); 2519 u64 num_bytes; 2520 int slot; 2521 int ret; 2522 2523 mutex_lock(&fs_info->qgroup_rescan_lock); 2524 ret = btrfs_search_slot_for_read(fs_info->extent_root, 2525 &fs_info->qgroup_rescan_progress, 2526 path, 1, 0); 2527 2528 btrfs_debug(fs_info, 2529 "current progress key (%llu %u %llu), search_slot ret %d", 2530 fs_info->qgroup_rescan_progress.objectid, 2531 fs_info->qgroup_rescan_progress.type, 2532 fs_info->qgroup_rescan_progress.offset, ret); 2533 2534 if (ret) { 2535 /* 2536 * The rescan is about to end, we will not be scanning any 2537 * further blocks. We cannot unset the RESCAN flag here, because 2538 * we want to commit the transaction if everything went well. 2539 * To make the live accounting work in this phase, we set our 2540 * scan progress pointer such that every real extent objectid 2541 * will be smaller. 2542 */ 2543 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 2544 btrfs_release_path(path); 2545 mutex_unlock(&fs_info->qgroup_rescan_lock); 2546 return ret; 2547 } 2548 2549 btrfs_item_key_to_cpu(path->nodes[0], &found, 2550 btrfs_header_nritems(path->nodes[0]) - 1); 2551 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; 2552 2553 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2554 scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]); 2555 if (!scratch_leaf) { 2556 ret = -ENOMEM; 2557 mutex_unlock(&fs_info->qgroup_rescan_lock); 2558 goto out; 2559 } 2560 extent_buffer_get(scratch_leaf); 2561 btrfs_tree_read_lock(scratch_leaf); 2562 btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK); 2563 slot = path->slots[0]; 2564 btrfs_release_path(path); 2565 mutex_unlock(&fs_info->qgroup_rescan_lock); 2566 2567 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 2568 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 2569 if (found.type != BTRFS_EXTENT_ITEM_KEY && 2570 found.type != BTRFS_METADATA_ITEM_KEY) 2571 continue; 2572 if (found.type == BTRFS_METADATA_ITEM_KEY) 2573 num_bytes = fs_info->nodesize; 2574 else 2575 num_bytes = found.offset; 2576 2577 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 2578 &roots, false); 2579 if (ret < 0) 2580 goto out; 2581 /* For rescan, just pass old_roots as NULL */ 2582 ret = btrfs_qgroup_account_extent(trans, fs_info, 2583 found.objectid, num_bytes, NULL, roots); 2584 if (ret < 0) 2585 goto out; 2586 } 2587 out: 2588 if (scratch_leaf) { 2589 btrfs_tree_read_unlock_blocking(scratch_leaf); 2590 free_extent_buffer(scratch_leaf); 2591 } 2592 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2593 2594 return ret; 2595 } 2596 2597 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) 2598 { 2599 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, 2600 qgroup_rescan_work); 2601 struct btrfs_path *path; 2602 struct btrfs_trans_handle *trans = NULL; 2603 int err = -ENOMEM; 2604 int ret = 0; 2605 2606 path = btrfs_alloc_path(); 2607 if (!path) 2608 goto out; 2609 2610 err = 0; 2611 while (!err && !btrfs_fs_closing(fs_info)) { 2612 trans = btrfs_start_transaction(fs_info->fs_root, 0); 2613 if (IS_ERR(trans)) { 2614 err = PTR_ERR(trans); 2615 break; 2616 } 2617 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { 2618 err = -EINTR; 2619 } else { 2620 err = qgroup_rescan_leaf(fs_info, path, trans); 2621 } 2622 if (err > 0) 2623 btrfs_commit_transaction(trans); 2624 else 2625 btrfs_end_transaction(trans); 2626 } 2627 2628 out: 2629 btrfs_free_path(path); 2630 2631 mutex_lock(&fs_info->qgroup_rescan_lock); 2632 if (!btrfs_fs_closing(fs_info)) 2633 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2634 2635 if (err > 0 && 2636 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 2637 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2638 } else if (err < 0) { 2639 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2640 } 2641 mutex_unlock(&fs_info->qgroup_rescan_lock); 2642 2643 /* 2644 * only update status, since the previous part has already updated the 2645 * qgroup info. 2646 */ 2647 trans = btrfs_start_transaction(fs_info->quota_root, 1); 2648 if (IS_ERR(trans)) { 2649 err = PTR_ERR(trans); 2650 btrfs_err(fs_info, 2651 "fail to start transaction for status update: %d", 2652 err); 2653 goto done; 2654 } 2655 ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); 2656 if (ret < 0) { 2657 err = ret; 2658 btrfs_err(fs_info, "fail to update qgroup status: %d", err); 2659 } 2660 btrfs_end_transaction(trans); 2661 2662 if (btrfs_fs_closing(fs_info)) { 2663 btrfs_info(fs_info, "qgroup scan paused"); 2664 } else if (err >= 0) { 2665 btrfs_info(fs_info, "qgroup scan completed%s", 2666 err > 0 ? " (inconsistency flag cleared)" : ""); 2667 } else { 2668 btrfs_err(fs_info, "qgroup scan failed with %d", err); 2669 } 2670 2671 done: 2672 mutex_lock(&fs_info->qgroup_rescan_lock); 2673 fs_info->qgroup_rescan_running = false; 2674 mutex_unlock(&fs_info->qgroup_rescan_lock); 2675 complete_all(&fs_info->qgroup_rescan_completion); 2676 } 2677 2678 /* 2679 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all 2680 * memory required for the rescan context. 2681 */ 2682 static int 2683 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 2684 int init_flags) 2685 { 2686 int ret = 0; 2687 2688 if (!init_flags && 2689 (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) || 2690 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) { 2691 ret = -EINVAL; 2692 goto err; 2693 } 2694 2695 mutex_lock(&fs_info->qgroup_rescan_lock); 2696 spin_lock(&fs_info->qgroup_lock); 2697 2698 if (init_flags) { 2699 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2700 ret = -EINPROGRESS; 2701 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 2702 ret = -EINVAL; 2703 2704 if (ret) { 2705 spin_unlock(&fs_info->qgroup_lock); 2706 mutex_unlock(&fs_info->qgroup_rescan_lock); 2707 goto err; 2708 } 2709 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2710 } 2711 2712 memset(&fs_info->qgroup_rescan_progress, 0, 2713 sizeof(fs_info->qgroup_rescan_progress)); 2714 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 2715 init_completion(&fs_info->qgroup_rescan_completion); 2716 fs_info->qgroup_rescan_running = true; 2717 2718 spin_unlock(&fs_info->qgroup_lock); 2719 mutex_unlock(&fs_info->qgroup_rescan_lock); 2720 2721 memset(&fs_info->qgroup_rescan_work, 0, 2722 sizeof(fs_info->qgroup_rescan_work)); 2723 btrfs_init_work(&fs_info->qgroup_rescan_work, 2724 btrfs_qgroup_rescan_helper, 2725 btrfs_qgroup_rescan_worker, NULL, NULL); 2726 2727 if (ret) { 2728 err: 2729 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret); 2730 return ret; 2731 } 2732 2733 return 0; 2734 } 2735 2736 static void 2737 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) 2738 { 2739 struct rb_node *n; 2740 struct btrfs_qgroup *qgroup; 2741 2742 spin_lock(&fs_info->qgroup_lock); 2743 /* clear all current qgroup tracking information */ 2744 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { 2745 qgroup = rb_entry(n, struct btrfs_qgroup, node); 2746 qgroup->rfer = 0; 2747 qgroup->rfer_cmpr = 0; 2748 qgroup->excl = 0; 2749 qgroup->excl_cmpr = 0; 2750 } 2751 spin_unlock(&fs_info->qgroup_lock); 2752 } 2753 2754 int 2755 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) 2756 { 2757 int ret = 0; 2758 struct btrfs_trans_handle *trans; 2759 2760 ret = qgroup_rescan_init(fs_info, 0, 1); 2761 if (ret) 2762 return ret; 2763 2764 /* 2765 * We have set the rescan_progress to 0, which means no more 2766 * delayed refs will be accounted by btrfs_qgroup_account_ref. 2767 * However, btrfs_qgroup_account_ref may be right after its call 2768 * to btrfs_find_all_roots, in which case it would still do the 2769 * accounting. 2770 * To solve this, we're committing the transaction, which will 2771 * ensure we run all delayed refs and only after that, we are 2772 * going to clear all tracking information for a clean start. 2773 */ 2774 2775 trans = btrfs_join_transaction(fs_info->fs_root); 2776 if (IS_ERR(trans)) { 2777 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2778 return PTR_ERR(trans); 2779 } 2780 ret = btrfs_commit_transaction(trans); 2781 if (ret) { 2782 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2783 return ret; 2784 } 2785 2786 qgroup_rescan_zero_tracking(fs_info); 2787 2788 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2789 &fs_info->qgroup_rescan_work); 2790 2791 return 0; 2792 } 2793 2794 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 2795 bool interruptible) 2796 { 2797 int running; 2798 int ret = 0; 2799 2800 mutex_lock(&fs_info->qgroup_rescan_lock); 2801 spin_lock(&fs_info->qgroup_lock); 2802 running = fs_info->qgroup_rescan_running; 2803 spin_unlock(&fs_info->qgroup_lock); 2804 mutex_unlock(&fs_info->qgroup_rescan_lock); 2805 2806 if (!running) 2807 return 0; 2808 2809 if (interruptible) 2810 ret = wait_for_completion_interruptible( 2811 &fs_info->qgroup_rescan_completion); 2812 else 2813 wait_for_completion(&fs_info->qgroup_rescan_completion); 2814 2815 return ret; 2816 } 2817 2818 /* 2819 * this is only called from open_ctree where we're still single threaded, thus 2820 * locking is omitted here. 2821 */ 2822 void 2823 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) 2824 { 2825 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2826 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2827 &fs_info->qgroup_rescan_work); 2828 } 2829 2830 /* 2831 * Reserve qgroup space for range [start, start + len). 2832 * 2833 * This function will either reserve space from related qgroups or doing 2834 * nothing if the range is already reserved. 2835 * 2836 * Return 0 for successful reserve 2837 * Return <0 for error (including -EQUOT) 2838 * 2839 * NOTE: this function may sleep for memory allocation. 2840 * if btrfs_qgroup_reserve_data() is called multiple times with 2841 * same @reserved, caller must ensure when error happens it's OK 2842 * to free *ALL* reserved space. 2843 */ 2844 int btrfs_qgroup_reserve_data(struct inode *inode, 2845 struct extent_changeset **reserved_ret, u64 start, 2846 u64 len) 2847 { 2848 struct btrfs_root *root = BTRFS_I(inode)->root; 2849 struct ulist_node *unode; 2850 struct ulist_iterator uiter; 2851 struct extent_changeset *reserved; 2852 u64 orig_reserved; 2853 u64 to_reserve; 2854 int ret; 2855 2856 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || 2857 !is_fstree(root->objectid) || len == 0) 2858 return 0; 2859 2860 /* @reserved parameter is mandatory for qgroup */ 2861 if (WARN_ON(!reserved_ret)) 2862 return -EINVAL; 2863 if (!*reserved_ret) { 2864 *reserved_ret = extent_changeset_alloc(); 2865 if (!*reserved_ret) 2866 return -ENOMEM; 2867 } 2868 reserved = *reserved_ret; 2869 /* Record already reserved space */ 2870 orig_reserved = reserved->bytes_changed; 2871 ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 2872 start + len -1, EXTENT_QGROUP_RESERVED, reserved); 2873 2874 /* Newly reserved space */ 2875 to_reserve = reserved->bytes_changed - orig_reserved; 2876 trace_btrfs_qgroup_reserve_data(inode, start, len, 2877 to_reserve, QGROUP_RESERVE); 2878 if (ret < 0) 2879 goto cleanup; 2880 ret = qgroup_reserve(root, to_reserve, true); 2881 if (ret < 0) 2882 goto cleanup; 2883 2884 return ret; 2885 2886 cleanup: 2887 /* cleanup *ALL* already reserved ranges */ 2888 ULIST_ITER_INIT(&uiter); 2889 while ((unode = ulist_next(&reserved->range_changed, &uiter))) 2890 clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, 2891 unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL); 2892 extent_changeset_release(reserved); 2893 return ret; 2894 } 2895 2896 /* Free ranges specified by @reserved, normally in error path */ 2897 static int qgroup_free_reserved_data(struct inode *inode, 2898 struct extent_changeset *reserved, u64 start, u64 len) 2899 { 2900 struct btrfs_root *root = BTRFS_I(inode)->root; 2901 struct ulist_node *unode; 2902 struct ulist_iterator uiter; 2903 struct extent_changeset changeset; 2904 int freed = 0; 2905 int ret; 2906 2907 extent_changeset_init(&changeset); 2908 len = round_up(start + len, root->fs_info->sectorsize); 2909 start = round_down(start, root->fs_info->sectorsize); 2910 2911 ULIST_ITER_INIT(&uiter); 2912 while ((unode = ulist_next(&reserved->range_changed, &uiter))) { 2913 u64 range_start = unode->val; 2914 /* unode->aux is the inclusive end */ 2915 u64 range_len = unode->aux - range_start + 1; 2916 u64 free_start; 2917 u64 free_len; 2918 2919 extent_changeset_release(&changeset); 2920 2921 /* Only free range in range [start, start + len) */ 2922 if (range_start >= start + len || 2923 range_start + range_len <= start) 2924 continue; 2925 free_start = max(range_start, start); 2926 free_len = min(start + len, range_start + range_len) - 2927 free_start; 2928 /* 2929 * TODO: To also modify reserved->ranges_reserved to reflect 2930 * the modification. 2931 * 2932 * However as long as we free qgroup reserved according to 2933 * EXTENT_QGROUP_RESERVED, we won't double free. 2934 * So not need to rush. 2935 */ 2936 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree, 2937 free_start, free_start + free_len - 1, 2938 EXTENT_QGROUP_RESERVED, &changeset); 2939 if (ret < 0) 2940 goto out; 2941 freed += changeset.bytes_changed; 2942 } 2943 btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed); 2944 ret = freed; 2945 out: 2946 extent_changeset_release(&changeset); 2947 return ret; 2948 } 2949 2950 static int __btrfs_qgroup_release_data(struct inode *inode, 2951 struct extent_changeset *reserved, u64 start, u64 len, 2952 int free) 2953 { 2954 struct extent_changeset changeset; 2955 int trace_op = QGROUP_RELEASE; 2956 int ret; 2957 2958 /* In release case, we shouldn't have @reserved */ 2959 WARN_ON(!free && reserved); 2960 if (free && reserved) 2961 return qgroup_free_reserved_data(inode, reserved, start, len); 2962 extent_changeset_init(&changeset); 2963 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 2964 start + len -1, EXTENT_QGROUP_RESERVED, &changeset); 2965 if (ret < 0) 2966 goto out; 2967 2968 if (free) 2969 trace_op = QGROUP_FREE; 2970 trace_btrfs_qgroup_release_data(inode, start, len, 2971 changeset.bytes_changed, trace_op); 2972 if (free) 2973 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, 2974 BTRFS_I(inode)->root->objectid, 2975 changeset.bytes_changed); 2976 ret = changeset.bytes_changed; 2977 out: 2978 extent_changeset_release(&changeset); 2979 return ret; 2980 } 2981 2982 /* 2983 * Free a reserved space range from io_tree and related qgroups 2984 * 2985 * Should be called when a range of pages get invalidated before reaching disk. 2986 * Or for error cleanup case. 2987 * if @reserved is given, only reserved range in [@start, @start + @len) will 2988 * be freed. 2989 * 2990 * For data written to disk, use btrfs_qgroup_release_data(). 2991 * 2992 * NOTE: This function may sleep for memory allocation. 2993 */ 2994 int btrfs_qgroup_free_data(struct inode *inode, 2995 struct extent_changeset *reserved, u64 start, u64 len) 2996 { 2997 return __btrfs_qgroup_release_data(inode, reserved, start, len, 1); 2998 } 2999 3000 /* 3001 * Release a reserved space range from io_tree only. 3002 * 3003 * Should be called when a range of pages get written to disk and corresponding 3004 * FILE_EXTENT is inserted into corresponding root. 3005 * 3006 * Since new qgroup accounting framework will only update qgroup numbers at 3007 * commit_transaction() time, its reserved space shouldn't be freed from 3008 * related qgroups. 3009 * 3010 * But we should release the range from io_tree, to allow further write to be 3011 * COWed. 3012 * 3013 * NOTE: This function may sleep for memory allocation. 3014 */ 3015 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len) 3016 { 3017 return __btrfs_qgroup_release_data(inode, NULL, start, len, 0); 3018 } 3019 3020 int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, 3021 bool enforce) 3022 { 3023 struct btrfs_fs_info *fs_info = root->fs_info; 3024 int ret; 3025 3026 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3027 !is_fstree(root->objectid) || num_bytes == 0) 3028 return 0; 3029 3030 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 3031 trace_qgroup_meta_reserve(root, (s64)num_bytes); 3032 ret = qgroup_reserve(root, num_bytes, enforce); 3033 if (ret < 0) 3034 return ret; 3035 atomic64_add(num_bytes, &root->qgroup_meta_rsv); 3036 return ret; 3037 } 3038 3039 void btrfs_qgroup_free_meta_all(struct btrfs_root *root) 3040 { 3041 struct btrfs_fs_info *fs_info = root->fs_info; 3042 u64 reserved; 3043 3044 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3045 !is_fstree(root->objectid)) 3046 return; 3047 3048 reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0); 3049 if (reserved == 0) 3050 return; 3051 trace_qgroup_meta_reserve(root, -(s64)reserved); 3052 btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved); 3053 } 3054 3055 void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) 3056 { 3057 struct btrfs_fs_info *fs_info = root->fs_info; 3058 3059 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3060 !is_fstree(root->objectid)) 3061 return; 3062 3063 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 3064 WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes); 3065 atomic64_sub(num_bytes, &root->qgroup_meta_rsv); 3066 trace_qgroup_meta_reserve(root, -(s64)num_bytes); 3067 btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes); 3068 } 3069 3070 /* 3071 * Check qgroup reserved space leaking, normally at destroy inode 3072 * time 3073 */ 3074 void btrfs_qgroup_check_reserved_leak(struct inode *inode) 3075 { 3076 struct extent_changeset changeset; 3077 struct ulist_node *unode; 3078 struct ulist_iterator iter; 3079 int ret; 3080 3081 extent_changeset_init(&changeset); 3082 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 3083 EXTENT_QGROUP_RESERVED, &changeset); 3084 3085 WARN_ON(ret < 0); 3086 if (WARN_ON(changeset.bytes_changed)) { 3087 ULIST_ITER_INIT(&iter); 3088 while ((unode = ulist_next(&changeset.range_changed, &iter))) { 3089 btrfs_warn(BTRFS_I(inode)->root->fs_info, 3090 "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu", 3091 inode->i_ino, unode->val, unode->aux); 3092 } 3093 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, 3094 BTRFS_I(inode)->root->objectid, 3095 changeset.bytes_changed); 3096 3097 } 3098 extent_changeset_release(&changeset); 3099 } 3100