1 /* 2 * Copyright (C) 2011 STRATO. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include <linux/workqueue.h> 26 #include <linux/btrfs.h> 27 28 #include "ctree.h" 29 #include "transaction.h" 30 #include "disk-io.h" 31 #include "locking.h" 32 #include "ulist.h" 33 #include "backref.h" 34 #include "extent_io.h" 35 #include "qgroup.h" 36 37 38 /* TODO XXX FIXME 39 * - subvol delete -> delete when ref goes to 0? delete limits also? 40 * - reorganize keys 41 * - compressed 42 * - sync 43 * - copy also limits on subvol creation 44 * - limit 45 * - caches fuer ulists 46 * - performance benchmarks 47 * - check all ioctl parameters 48 */ 49 50 static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq, 51 int mod) 52 { 53 if (qg->old_refcnt < seq) 54 qg->old_refcnt = seq; 55 qg->old_refcnt += mod; 56 } 57 58 static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq, 59 int mod) 60 { 61 if (qg->new_refcnt < seq) 62 qg->new_refcnt = seq; 63 qg->new_refcnt += mod; 64 } 65 66 static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq) 67 { 68 if (qg->old_refcnt < seq) 69 return 0; 70 return qg->old_refcnt - seq; 71 } 72 73 static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq) 74 { 75 if (qg->new_refcnt < seq) 76 return 0; 77 return qg->new_refcnt - seq; 78 } 79 80 /* 81 * glue structure to represent the relations between qgroups. 82 */ 83 struct btrfs_qgroup_list { 84 struct list_head next_group; 85 struct list_head next_member; 86 struct btrfs_qgroup *group; 87 struct btrfs_qgroup *member; 88 }; 89 90 static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg) 91 { 92 return (u64)(uintptr_t)qg; 93 } 94 95 static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n) 96 { 97 return (struct btrfs_qgroup *)(uintptr_t)n->aux; 98 } 99 100 static int 101 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 102 int init_flags); 103 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info); 104 105 /* must be called with qgroup_ioctl_lock held */ 106 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info, 107 u64 qgroupid) 108 { 109 struct rb_node *n = fs_info->qgroup_tree.rb_node; 110 struct btrfs_qgroup *qgroup; 111 112 while (n) { 113 qgroup = rb_entry(n, struct btrfs_qgroup, node); 114 if (qgroup->qgroupid < qgroupid) 115 n = n->rb_left; 116 else if (qgroup->qgroupid > qgroupid) 117 n = n->rb_right; 118 else 119 return qgroup; 120 } 121 return NULL; 122 } 123 124 /* must be called with qgroup_lock held */ 125 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, 126 u64 qgroupid) 127 { 128 struct rb_node **p = &fs_info->qgroup_tree.rb_node; 129 struct rb_node *parent = NULL; 130 struct btrfs_qgroup *qgroup; 131 132 while (*p) { 133 parent = *p; 134 qgroup = rb_entry(parent, struct btrfs_qgroup, node); 135 136 if (qgroup->qgroupid < qgroupid) 137 p = &(*p)->rb_left; 138 else if (qgroup->qgroupid > qgroupid) 139 p = &(*p)->rb_right; 140 else 141 return qgroup; 142 } 143 144 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC); 145 if (!qgroup) 146 return ERR_PTR(-ENOMEM); 147 148 qgroup->qgroupid = qgroupid; 149 INIT_LIST_HEAD(&qgroup->groups); 150 INIT_LIST_HEAD(&qgroup->members); 151 INIT_LIST_HEAD(&qgroup->dirty); 152 153 rb_link_node(&qgroup->node, parent, p); 154 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree); 155 156 return qgroup; 157 } 158 159 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) 160 { 161 struct btrfs_qgroup_list *list; 162 163 list_del(&qgroup->dirty); 164 while (!list_empty(&qgroup->groups)) { 165 list = list_first_entry(&qgroup->groups, 166 struct btrfs_qgroup_list, next_group); 167 list_del(&list->next_group); 168 list_del(&list->next_member); 169 kfree(list); 170 } 171 172 while (!list_empty(&qgroup->members)) { 173 list = list_first_entry(&qgroup->members, 174 struct btrfs_qgroup_list, next_member); 175 list_del(&list->next_group); 176 list_del(&list->next_member); 177 kfree(list); 178 } 179 kfree(qgroup); 180 } 181 182 /* must be called with qgroup_lock held */ 183 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) 184 { 185 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid); 186 187 if (!qgroup) 188 return -ENOENT; 189 190 rb_erase(&qgroup->node, &fs_info->qgroup_tree); 191 __del_qgroup_rb(qgroup); 192 return 0; 193 } 194 195 /* must be called with qgroup_lock held */ 196 static int add_relation_rb(struct btrfs_fs_info *fs_info, 197 u64 memberid, u64 parentid) 198 { 199 struct btrfs_qgroup *member; 200 struct btrfs_qgroup *parent; 201 struct btrfs_qgroup_list *list; 202 203 member = find_qgroup_rb(fs_info, memberid); 204 parent = find_qgroup_rb(fs_info, parentid); 205 if (!member || !parent) 206 return -ENOENT; 207 208 list = kzalloc(sizeof(*list), GFP_ATOMIC); 209 if (!list) 210 return -ENOMEM; 211 212 list->group = parent; 213 list->member = member; 214 list_add_tail(&list->next_group, &member->groups); 215 list_add_tail(&list->next_member, &parent->members); 216 217 return 0; 218 } 219 220 /* must be called with qgroup_lock held */ 221 static int del_relation_rb(struct btrfs_fs_info *fs_info, 222 u64 memberid, u64 parentid) 223 { 224 struct btrfs_qgroup *member; 225 struct btrfs_qgroup *parent; 226 struct btrfs_qgroup_list *list; 227 228 member = find_qgroup_rb(fs_info, memberid); 229 parent = find_qgroup_rb(fs_info, parentid); 230 if (!member || !parent) 231 return -ENOENT; 232 233 list_for_each_entry(list, &member->groups, next_group) { 234 if (list->group == parent) { 235 list_del(&list->next_group); 236 list_del(&list->next_member); 237 kfree(list); 238 return 0; 239 } 240 } 241 return -ENOENT; 242 } 243 244 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 245 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 246 u64 rfer, u64 excl) 247 { 248 struct btrfs_qgroup *qgroup; 249 250 qgroup = find_qgroup_rb(fs_info, qgroupid); 251 if (!qgroup) 252 return -EINVAL; 253 if (qgroup->rfer != rfer || qgroup->excl != excl) 254 return -EINVAL; 255 return 0; 256 } 257 #endif 258 259 /* 260 * The full config is read in one go, only called from open_ctree() 261 * It doesn't use any locking, as at this point we're still single-threaded 262 */ 263 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) 264 { 265 struct btrfs_key key; 266 struct btrfs_key found_key; 267 struct btrfs_root *quota_root = fs_info->quota_root; 268 struct btrfs_path *path = NULL; 269 struct extent_buffer *l; 270 int slot; 271 int ret = 0; 272 u64 flags = 0; 273 u64 rescan_progress = 0; 274 275 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 276 return 0; 277 278 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 279 if (!fs_info->qgroup_ulist) { 280 ret = -ENOMEM; 281 goto out; 282 } 283 284 path = btrfs_alloc_path(); 285 if (!path) { 286 ret = -ENOMEM; 287 goto out; 288 } 289 290 /* default this to quota off, in case no status key is found */ 291 fs_info->qgroup_flags = 0; 292 293 /* 294 * pass 1: read status, all qgroup infos and limits 295 */ 296 key.objectid = 0; 297 key.type = 0; 298 key.offset = 0; 299 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1); 300 if (ret) 301 goto out; 302 303 while (1) { 304 struct btrfs_qgroup *qgroup; 305 306 slot = path->slots[0]; 307 l = path->nodes[0]; 308 btrfs_item_key_to_cpu(l, &found_key, slot); 309 310 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) { 311 struct btrfs_qgroup_status_item *ptr; 312 313 ptr = btrfs_item_ptr(l, slot, 314 struct btrfs_qgroup_status_item); 315 316 if (btrfs_qgroup_status_version(l, ptr) != 317 BTRFS_QGROUP_STATUS_VERSION) { 318 btrfs_err(fs_info, 319 "old qgroup version, quota disabled"); 320 goto out; 321 } 322 if (btrfs_qgroup_status_generation(l, ptr) != 323 fs_info->generation) { 324 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 325 btrfs_err(fs_info, 326 "qgroup generation mismatch, marked as inconsistent"); 327 } 328 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l, 329 ptr); 330 rescan_progress = btrfs_qgroup_status_rescan(l, ptr); 331 goto next1; 332 } 333 334 if (found_key.type != BTRFS_QGROUP_INFO_KEY && 335 found_key.type != BTRFS_QGROUP_LIMIT_KEY) 336 goto next1; 337 338 qgroup = find_qgroup_rb(fs_info, found_key.offset); 339 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) || 340 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) { 341 btrfs_err(fs_info, "inconsistent qgroup config"); 342 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 343 } 344 if (!qgroup) { 345 qgroup = add_qgroup_rb(fs_info, found_key.offset); 346 if (IS_ERR(qgroup)) { 347 ret = PTR_ERR(qgroup); 348 goto out; 349 } 350 } 351 switch (found_key.type) { 352 case BTRFS_QGROUP_INFO_KEY: { 353 struct btrfs_qgroup_info_item *ptr; 354 355 ptr = btrfs_item_ptr(l, slot, 356 struct btrfs_qgroup_info_item); 357 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr); 358 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr); 359 qgroup->excl = btrfs_qgroup_info_excl(l, ptr); 360 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr); 361 /* generation currently unused */ 362 break; 363 } 364 case BTRFS_QGROUP_LIMIT_KEY: { 365 struct btrfs_qgroup_limit_item *ptr; 366 367 ptr = btrfs_item_ptr(l, slot, 368 struct btrfs_qgroup_limit_item); 369 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr); 370 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr); 371 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr); 372 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr); 373 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr); 374 break; 375 } 376 } 377 next1: 378 ret = btrfs_next_item(quota_root, path); 379 if (ret < 0) 380 goto out; 381 if (ret) 382 break; 383 } 384 btrfs_release_path(path); 385 386 /* 387 * pass 2: read all qgroup relations 388 */ 389 key.objectid = 0; 390 key.type = BTRFS_QGROUP_RELATION_KEY; 391 key.offset = 0; 392 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0); 393 if (ret) 394 goto out; 395 while (1) { 396 slot = path->slots[0]; 397 l = path->nodes[0]; 398 btrfs_item_key_to_cpu(l, &found_key, slot); 399 400 if (found_key.type != BTRFS_QGROUP_RELATION_KEY) 401 goto next2; 402 403 if (found_key.objectid > found_key.offset) { 404 /* parent <- member, not needed to build config */ 405 /* FIXME should we omit the key completely? */ 406 goto next2; 407 } 408 409 ret = add_relation_rb(fs_info, found_key.objectid, 410 found_key.offset); 411 if (ret == -ENOENT) { 412 btrfs_warn(fs_info, 413 "orphan qgroup relation 0x%llx->0x%llx", 414 found_key.objectid, found_key.offset); 415 ret = 0; /* ignore the error */ 416 } 417 if (ret) 418 goto out; 419 next2: 420 ret = btrfs_next_item(quota_root, path); 421 if (ret < 0) 422 goto out; 423 if (ret) 424 break; 425 } 426 out: 427 fs_info->qgroup_flags |= flags; 428 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 429 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 430 else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && 431 ret >= 0) 432 ret = qgroup_rescan_init(fs_info, rescan_progress, 0); 433 btrfs_free_path(path); 434 435 if (ret < 0) { 436 ulist_free(fs_info->qgroup_ulist); 437 fs_info->qgroup_ulist = NULL; 438 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 439 } 440 441 return ret < 0 ? ret : 0; 442 } 443 444 /* 445 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), 446 * first two are in single-threaded paths.And for the third one, we have set 447 * quota_root to be null with qgroup_lock held before, so it is safe to clean 448 * up the in-memory structures without qgroup_lock held. 449 */ 450 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) 451 { 452 struct rb_node *n; 453 struct btrfs_qgroup *qgroup; 454 455 while ((n = rb_first(&fs_info->qgroup_tree))) { 456 qgroup = rb_entry(n, struct btrfs_qgroup, node); 457 rb_erase(n, &fs_info->qgroup_tree); 458 __del_qgroup_rb(qgroup); 459 } 460 /* 461 * we call btrfs_free_qgroup_config() when umounting 462 * filesystem and disabling quota, so we set qgroup_ulist 463 * to be null here to avoid double free. 464 */ 465 ulist_free(fs_info->qgroup_ulist); 466 fs_info->qgroup_ulist = NULL; 467 } 468 469 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, 470 struct btrfs_root *quota_root, 471 u64 src, u64 dst) 472 { 473 int ret; 474 struct btrfs_path *path; 475 struct btrfs_key key; 476 477 path = btrfs_alloc_path(); 478 if (!path) 479 return -ENOMEM; 480 481 key.objectid = src; 482 key.type = BTRFS_QGROUP_RELATION_KEY; 483 key.offset = dst; 484 485 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0); 486 487 btrfs_mark_buffer_dirty(path->nodes[0]); 488 489 btrfs_free_path(path); 490 return ret; 491 } 492 493 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, 494 struct btrfs_root *quota_root, 495 u64 src, u64 dst) 496 { 497 int ret; 498 struct btrfs_path *path; 499 struct btrfs_key key; 500 501 path = btrfs_alloc_path(); 502 if (!path) 503 return -ENOMEM; 504 505 key.objectid = src; 506 key.type = BTRFS_QGROUP_RELATION_KEY; 507 key.offset = dst; 508 509 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 510 if (ret < 0) 511 goto out; 512 513 if (ret > 0) { 514 ret = -ENOENT; 515 goto out; 516 } 517 518 ret = btrfs_del_item(trans, quota_root, path); 519 out: 520 btrfs_free_path(path); 521 return ret; 522 } 523 524 static int add_qgroup_item(struct btrfs_trans_handle *trans, 525 struct btrfs_root *quota_root, u64 qgroupid) 526 { 527 int ret; 528 struct btrfs_path *path; 529 struct btrfs_qgroup_info_item *qgroup_info; 530 struct btrfs_qgroup_limit_item *qgroup_limit; 531 struct extent_buffer *leaf; 532 struct btrfs_key key; 533 534 if (btrfs_is_testing(quota_root->fs_info)) 535 return 0; 536 537 path = btrfs_alloc_path(); 538 if (!path) 539 return -ENOMEM; 540 541 key.objectid = 0; 542 key.type = BTRFS_QGROUP_INFO_KEY; 543 key.offset = qgroupid; 544 545 /* 546 * Avoid a transaction abort by catching -EEXIST here. In that 547 * case, we proceed by re-initializing the existing structure 548 * on disk. 549 */ 550 551 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 552 sizeof(*qgroup_info)); 553 if (ret && ret != -EEXIST) 554 goto out; 555 556 leaf = path->nodes[0]; 557 qgroup_info = btrfs_item_ptr(leaf, path->slots[0], 558 struct btrfs_qgroup_info_item); 559 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid); 560 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0); 561 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0); 562 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0); 563 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0); 564 565 btrfs_mark_buffer_dirty(leaf); 566 567 btrfs_release_path(path); 568 569 key.type = BTRFS_QGROUP_LIMIT_KEY; 570 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 571 sizeof(*qgroup_limit)); 572 if (ret && ret != -EEXIST) 573 goto out; 574 575 leaf = path->nodes[0]; 576 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0], 577 struct btrfs_qgroup_limit_item); 578 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0); 579 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0); 580 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0); 581 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0); 582 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0); 583 584 btrfs_mark_buffer_dirty(leaf); 585 586 ret = 0; 587 out: 588 btrfs_free_path(path); 589 return ret; 590 } 591 592 static int del_qgroup_item(struct btrfs_trans_handle *trans, 593 struct btrfs_root *quota_root, u64 qgroupid) 594 { 595 int ret; 596 struct btrfs_path *path; 597 struct btrfs_key key; 598 599 path = btrfs_alloc_path(); 600 if (!path) 601 return -ENOMEM; 602 603 key.objectid = 0; 604 key.type = BTRFS_QGROUP_INFO_KEY; 605 key.offset = qgroupid; 606 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 607 if (ret < 0) 608 goto out; 609 610 if (ret > 0) { 611 ret = -ENOENT; 612 goto out; 613 } 614 615 ret = btrfs_del_item(trans, quota_root, path); 616 if (ret) 617 goto out; 618 619 btrfs_release_path(path); 620 621 key.type = BTRFS_QGROUP_LIMIT_KEY; 622 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1); 623 if (ret < 0) 624 goto out; 625 626 if (ret > 0) { 627 ret = -ENOENT; 628 goto out; 629 } 630 631 ret = btrfs_del_item(trans, quota_root, path); 632 633 out: 634 btrfs_free_path(path); 635 return ret; 636 } 637 638 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans, 639 struct btrfs_root *root, 640 struct btrfs_qgroup *qgroup) 641 { 642 struct btrfs_path *path; 643 struct btrfs_key key; 644 struct extent_buffer *l; 645 struct btrfs_qgroup_limit_item *qgroup_limit; 646 int ret; 647 int slot; 648 649 key.objectid = 0; 650 key.type = BTRFS_QGROUP_LIMIT_KEY; 651 key.offset = qgroup->qgroupid; 652 653 path = btrfs_alloc_path(); 654 if (!path) 655 return -ENOMEM; 656 657 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 658 if (ret > 0) 659 ret = -ENOENT; 660 661 if (ret) 662 goto out; 663 664 l = path->nodes[0]; 665 slot = path->slots[0]; 666 qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item); 667 btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags); 668 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer); 669 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl); 670 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer); 671 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl); 672 673 btrfs_mark_buffer_dirty(l); 674 675 out: 676 btrfs_free_path(path); 677 return ret; 678 } 679 680 static int update_qgroup_info_item(struct btrfs_trans_handle *trans, 681 struct btrfs_root *root, 682 struct btrfs_qgroup *qgroup) 683 { 684 struct btrfs_path *path; 685 struct btrfs_key key; 686 struct extent_buffer *l; 687 struct btrfs_qgroup_info_item *qgroup_info; 688 int ret; 689 int slot; 690 691 if (btrfs_is_testing(root->fs_info)) 692 return 0; 693 694 key.objectid = 0; 695 key.type = BTRFS_QGROUP_INFO_KEY; 696 key.offset = qgroup->qgroupid; 697 698 path = btrfs_alloc_path(); 699 if (!path) 700 return -ENOMEM; 701 702 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 703 if (ret > 0) 704 ret = -ENOENT; 705 706 if (ret) 707 goto out; 708 709 l = path->nodes[0]; 710 slot = path->slots[0]; 711 qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item); 712 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid); 713 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer); 714 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr); 715 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl); 716 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr); 717 718 btrfs_mark_buffer_dirty(l); 719 720 out: 721 btrfs_free_path(path); 722 return ret; 723 } 724 725 static int update_qgroup_status_item(struct btrfs_trans_handle *trans, 726 struct btrfs_fs_info *fs_info, 727 struct btrfs_root *root) 728 { 729 struct btrfs_path *path; 730 struct btrfs_key key; 731 struct extent_buffer *l; 732 struct btrfs_qgroup_status_item *ptr; 733 int ret; 734 int slot; 735 736 key.objectid = 0; 737 key.type = BTRFS_QGROUP_STATUS_KEY; 738 key.offset = 0; 739 740 path = btrfs_alloc_path(); 741 if (!path) 742 return -ENOMEM; 743 744 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 745 if (ret > 0) 746 ret = -ENOENT; 747 748 if (ret) 749 goto out; 750 751 l = path->nodes[0]; 752 slot = path->slots[0]; 753 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item); 754 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags); 755 btrfs_set_qgroup_status_generation(l, ptr, trans->transid); 756 btrfs_set_qgroup_status_rescan(l, ptr, 757 fs_info->qgroup_rescan_progress.objectid); 758 759 btrfs_mark_buffer_dirty(l); 760 761 out: 762 btrfs_free_path(path); 763 return ret; 764 } 765 766 /* 767 * called with qgroup_lock held 768 */ 769 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, 770 struct btrfs_root *root) 771 { 772 struct btrfs_path *path; 773 struct btrfs_key key; 774 struct extent_buffer *leaf = NULL; 775 int ret; 776 int nr = 0; 777 778 path = btrfs_alloc_path(); 779 if (!path) 780 return -ENOMEM; 781 782 path->leave_spinning = 1; 783 784 key.objectid = 0; 785 key.offset = 0; 786 key.type = 0; 787 788 while (1) { 789 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 790 if (ret < 0) 791 goto out; 792 leaf = path->nodes[0]; 793 nr = btrfs_header_nritems(leaf); 794 if (!nr) 795 break; 796 /* 797 * delete the leaf one by one 798 * since the whole tree is going 799 * to be deleted. 800 */ 801 path->slots[0] = 0; 802 ret = btrfs_del_items(trans, root, path, 0, nr); 803 if (ret) 804 goto out; 805 806 btrfs_release_path(path); 807 } 808 ret = 0; 809 out: 810 set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags); 811 btrfs_free_path(path); 812 return ret; 813 } 814 815 int btrfs_quota_enable(struct btrfs_trans_handle *trans, 816 struct btrfs_fs_info *fs_info) 817 { 818 struct btrfs_root *quota_root; 819 struct btrfs_root *tree_root = fs_info->tree_root; 820 struct btrfs_path *path = NULL; 821 struct btrfs_qgroup_status_item *ptr; 822 struct extent_buffer *leaf; 823 struct btrfs_key key; 824 struct btrfs_key found_key; 825 struct btrfs_qgroup *qgroup = NULL; 826 int ret = 0; 827 int slot; 828 829 mutex_lock(&fs_info->qgroup_ioctl_lock); 830 if (fs_info->quota_root) { 831 set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); 832 goto out; 833 } 834 835 fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); 836 if (!fs_info->qgroup_ulist) { 837 ret = -ENOMEM; 838 goto out; 839 } 840 841 /* 842 * initially create the quota tree 843 */ 844 quota_root = btrfs_create_tree(trans, fs_info, 845 BTRFS_QUOTA_TREE_OBJECTID); 846 if (IS_ERR(quota_root)) { 847 ret = PTR_ERR(quota_root); 848 goto out; 849 } 850 851 path = btrfs_alloc_path(); 852 if (!path) { 853 ret = -ENOMEM; 854 goto out_free_root; 855 } 856 857 key.objectid = 0; 858 key.type = BTRFS_QGROUP_STATUS_KEY; 859 key.offset = 0; 860 861 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 862 sizeof(*ptr)); 863 if (ret) 864 goto out_free_path; 865 866 leaf = path->nodes[0]; 867 ptr = btrfs_item_ptr(leaf, path->slots[0], 868 struct btrfs_qgroup_status_item); 869 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid); 870 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION); 871 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON | 872 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 873 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags); 874 btrfs_set_qgroup_status_rescan(leaf, ptr, 0); 875 876 btrfs_mark_buffer_dirty(leaf); 877 878 key.objectid = 0; 879 key.type = BTRFS_ROOT_REF_KEY; 880 key.offset = 0; 881 882 btrfs_release_path(path); 883 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0); 884 if (ret > 0) 885 goto out_add_root; 886 if (ret < 0) 887 goto out_free_path; 888 889 890 while (1) { 891 slot = path->slots[0]; 892 leaf = path->nodes[0]; 893 btrfs_item_key_to_cpu(leaf, &found_key, slot); 894 895 if (found_key.type == BTRFS_ROOT_REF_KEY) { 896 ret = add_qgroup_item(trans, quota_root, 897 found_key.offset); 898 if (ret) 899 goto out_free_path; 900 901 qgroup = add_qgroup_rb(fs_info, found_key.offset); 902 if (IS_ERR(qgroup)) { 903 ret = PTR_ERR(qgroup); 904 goto out_free_path; 905 } 906 } 907 ret = btrfs_next_item(tree_root, path); 908 if (ret < 0) 909 goto out_free_path; 910 if (ret) 911 break; 912 } 913 914 out_add_root: 915 btrfs_release_path(path); 916 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID); 917 if (ret) 918 goto out_free_path; 919 920 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID); 921 if (IS_ERR(qgroup)) { 922 ret = PTR_ERR(qgroup); 923 goto out_free_path; 924 } 925 spin_lock(&fs_info->qgroup_lock); 926 fs_info->quota_root = quota_root; 927 set_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags); 928 spin_unlock(&fs_info->qgroup_lock); 929 out_free_path: 930 btrfs_free_path(path); 931 out_free_root: 932 if (ret) { 933 free_extent_buffer(quota_root->node); 934 free_extent_buffer(quota_root->commit_root); 935 kfree(quota_root); 936 } 937 out: 938 if (ret) { 939 ulist_free(fs_info->qgroup_ulist); 940 fs_info->qgroup_ulist = NULL; 941 } 942 mutex_unlock(&fs_info->qgroup_ioctl_lock); 943 return ret; 944 } 945 946 int btrfs_quota_disable(struct btrfs_trans_handle *trans, 947 struct btrfs_fs_info *fs_info) 948 { 949 struct btrfs_root *quota_root; 950 int ret = 0; 951 952 mutex_lock(&fs_info->qgroup_ioctl_lock); 953 if (!fs_info->quota_root) 954 goto out; 955 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 956 set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags); 957 btrfs_qgroup_wait_for_completion(fs_info, false); 958 spin_lock(&fs_info->qgroup_lock); 959 quota_root = fs_info->quota_root; 960 fs_info->quota_root = NULL; 961 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 962 spin_unlock(&fs_info->qgroup_lock); 963 964 btrfs_free_qgroup_config(fs_info); 965 966 ret = btrfs_clean_quota_tree(trans, quota_root); 967 if (ret) 968 goto out; 969 970 ret = btrfs_del_root(trans, fs_info, "a_root->root_key); 971 if (ret) 972 goto out; 973 974 list_del("a_root->dirty_list); 975 976 btrfs_tree_lock(quota_root->node); 977 clean_tree_block(fs_info, quota_root->node); 978 btrfs_tree_unlock(quota_root->node); 979 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1); 980 981 free_extent_buffer(quota_root->node); 982 free_extent_buffer(quota_root->commit_root); 983 kfree(quota_root); 984 out: 985 mutex_unlock(&fs_info->qgroup_ioctl_lock); 986 return ret; 987 } 988 989 static void qgroup_dirty(struct btrfs_fs_info *fs_info, 990 struct btrfs_qgroup *qgroup) 991 { 992 if (list_empty(&qgroup->dirty)) 993 list_add(&qgroup->dirty, &fs_info->dirty_qgroups); 994 } 995 996 static void report_reserved_underflow(struct btrfs_fs_info *fs_info, 997 struct btrfs_qgroup *qgroup, 998 u64 num_bytes) 999 { 1000 #ifdef CONFIG_BTRFS_DEBUG 1001 WARN_ON(qgroup->reserved < num_bytes); 1002 btrfs_debug(fs_info, 1003 "qgroup %llu reserved space underflow, have: %llu, to free: %llu", 1004 qgroup->qgroupid, qgroup->reserved, num_bytes); 1005 #endif 1006 qgroup->reserved = 0; 1007 } 1008 /* 1009 * The easy accounting, if we are adding/removing the only ref for an extent 1010 * then this qgroup and all of the parent qgroups get their reference and 1011 * exclusive counts adjusted. 1012 * 1013 * Caller should hold fs_info->qgroup_lock. 1014 */ 1015 static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, 1016 struct ulist *tmp, u64 ref_root, 1017 u64 num_bytes, int sign) 1018 { 1019 struct btrfs_qgroup *qgroup; 1020 struct btrfs_qgroup_list *glist; 1021 struct ulist_node *unode; 1022 struct ulist_iterator uiter; 1023 int ret = 0; 1024 1025 qgroup = find_qgroup_rb(fs_info, ref_root); 1026 if (!qgroup) 1027 goto out; 1028 1029 qgroup->rfer += sign * num_bytes; 1030 qgroup->rfer_cmpr += sign * num_bytes; 1031 1032 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1033 qgroup->excl += sign * num_bytes; 1034 qgroup->excl_cmpr += sign * num_bytes; 1035 if (sign > 0) { 1036 trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes); 1037 if (qgroup->reserved < num_bytes) 1038 report_reserved_underflow(fs_info, qgroup, num_bytes); 1039 else 1040 qgroup->reserved -= num_bytes; 1041 } 1042 1043 qgroup_dirty(fs_info, qgroup); 1044 1045 /* Get all of the parent groups that contain this qgroup */ 1046 list_for_each_entry(glist, &qgroup->groups, next_group) { 1047 ret = ulist_add(tmp, glist->group->qgroupid, 1048 qgroup_to_aux(glist->group), GFP_ATOMIC); 1049 if (ret < 0) 1050 goto out; 1051 } 1052 1053 /* Iterate all of the parents and adjust their reference counts */ 1054 ULIST_ITER_INIT(&uiter); 1055 while ((unode = ulist_next(tmp, &uiter))) { 1056 qgroup = unode_aux_to_qgroup(unode); 1057 qgroup->rfer += sign * num_bytes; 1058 qgroup->rfer_cmpr += sign * num_bytes; 1059 WARN_ON(sign < 0 && qgroup->excl < num_bytes); 1060 qgroup->excl += sign * num_bytes; 1061 if (sign > 0) { 1062 trace_qgroup_update_reserve(fs_info, qgroup, 1063 -(s64)num_bytes); 1064 if (qgroup->reserved < num_bytes) 1065 report_reserved_underflow(fs_info, qgroup, 1066 num_bytes); 1067 else 1068 qgroup->reserved -= num_bytes; 1069 } 1070 qgroup->excl_cmpr += sign * num_bytes; 1071 qgroup_dirty(fs_info, qgroup); 1072 1073 /* Add any parents of the parents */ 1074 list_for_each_entry(glist, &qgroup->groups, next_group) { 1075 ret = ulist_add(tmp, glist->group->qgroupid, 1076 qgroup_to_aux(glist->group), GFP_ATOMIC); 1077 if (ret < 0) 1078 goto out; 1079 } 1080 } 1081 ret = 0; 1082 out: 1083 return ret; 1084 } 1085 1086 1087 /* 1088 * Quick path for updating qgroup with only excl refs. 1089 * 1090 * In that case, just update all parent will be enough. 1091 * Or we needs to do a full rescan. 1092 * Caller should also hold fs_info->qgroup_lock. 1093 * 1094 * Return 0 for quick update, return >0 for need to full rescan 1095 * and mark INCONSISTENT flag. 1096 * Return < 0 for other error. 1097 */ 1098 static int quick_update_accounting(struct btrfs_fs_info *fs_info, 1099 struct ulist *tmp, u64 src, u64 dst, 1100 int sign) 1101 { 1102 struct btrfs_qgroup *qgroup; 1103 int ret = 1; 1104 int err = 0; 1105 1106 qgroup = find_qgroup_rb(fs_info, src); 1107 if (!qgroup) 1108 goto out; 1109 if (qgroup->excl == qgroup->rfer) { 1110 ret = 0; 1111 err = __qgroup_excl_accounting(fs_info, tmp, dst, 1112 qgroup->excl, sign); 1113 if (err < 0) { 1114 ret = err; 1115 goto out; 1116 } 1117 } 1118 out: 1119 if (ret) 1120 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1121 return ret; 1122 } 1123 1124 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 1125 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1126 { 1127 struct btrfs_root *quota_root; 1128 struct btrfs_qgroup *parent; 1129 struct btrfs_qgroup *member; 1130 struct btrfs_qgroup_list *list; 1131 struct ulist *tmp; 1132 int ret = 0; 1133 1134 /* Check the level of src and dst first */ 1135 if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) 1136 return -EINVAL; 1137 1138 tmp = ulist_alloc(GFP_KERNEL); 1139 if (!tmp) 1140 return -ENOMEM; 1141 1142 mutex_lock(&fs_info->qgroup_ioctl_lock); 1143 quota_root = fs_info->quota_root; 1144 if (!quota_root) { 1145 ret = -EINVAL; 1146 goto out; 1147 } 1148 member = find_qgroup_rb(fs_info, src); 1149 parent = find_qgroup_rb(fs_info, dst); 1150 if (!member || !parent) { 1151 ret = -EINVAL; 1152 goto out; 1153 } 1154 1155 /* check if such qgroup relation exist firstly */ 1156 list_for_each_entry(list, &member->groups, next_group) { 1157 if (list->group == parent) { 1158 ret = -EEXIST; 1159 goto out; 1160 } 1161 } 1162 1163 ret = add_qgroup_relation_item(trans, quota_root, src, dst); 1164 if (ret) 1165 goto out; 1166 1167 ret = add_qgroup_relation_item(trans, quota_root, dst, src); 1168 if (ret) { 1169 del_qgroup_relation_item(trans, quota_root, src, dst); 1170 goto out; 1171 } 1172 1173 spin_lock(&fs_info->qgroup_lock); 1174 ret = add_relation_rb(fs_info, src, dst); 1175 if (ret < 0) { 1176 spin_unlock(&fs_info->qgroup_lock); 1177 goto out; 1178 } 1179 ret = quick_update_accounting(fs_info, tmp, src, dst, 1); 1180 spin_unlock(&fs_info->qgroup_lock); 1181 out: 1182 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1183 ulist_free(tmp); 1184 return ret; 1185 } 1186 1187 static int __del_qgroup_relation(struct btrfs_trans_handle *trans, 1188 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1189 { 1190 struct btrfs_root *quota_root; 1191 struct btrfs_qgroup *parent; 1192 struct btrfs_qgroup *member; 1193 struct btrfs_qgroup_list *list; 1194 struct ulist *tmp; 1195 int ret = 0; 1196 int err; 1197 1198 tmp = ulist_alloc(GFP_KERNEL); 1199 if (!tmp) 1200 return -ENOMEM; 1201 1202 quota_root = fs_info->quota_root; 1203 if (!quota_root) { 1204 ret = -EINVAL; 1205 goto out; 1206 } 1207 1208 member = find_qgroup_rb(fs_info, src); 1209 parent = find_qgroup_rb(fs_info, dst); 1210 if (!member || !parent) { 1211 ret = -EINVAL; 1212 goto out; 1213 } 1214 1215 /* check if such qgroup relation exist firstly */ 1216 list_for_each_entry(list, &member->groups, next_group) { 1217 if (list->group == parent) 1218 goto exist; 1219 } 1220 ret = -ENOENT; 1221 goto out; 1222 exist: 1223 ret = del_qgroup_relation_item(trans, quota_root, src, dst); 1224 err = del_qgroup_relation_item(trans, quota_root, dst, src); 1225 if (err && !ret) 1226 ret = err; 1227 1228 spin_lock(&fs_info->qgroup_lock); 1229 del_relation_rb(fs_info, src, dst); 1230 ret = quick_update_accounting(fs_info, tmp, src, dst, -1); 1231 spin_unlock(&fs_info->qgroup_lock); 1232 out: 1233 ulist_free(tmp); 1234 return ret; 1235 } 1236 1237 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 1238 struct btrfs_fs_info *fs_info, u64 src, u64 dst) 1239 { 1240 int ret = 0; 1241 1242 mutex_lock(&fs_info->qgroup_ioctl_lock); 1243 ret = __del_qgroup_relation(trans, fs_info, src, dst); 1244 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1245 1246 return ret; 1247 } 1248 1249 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 1250 struct btrfs_fs_info *fs_info, u64 qgroupid) 1251 { 1252 struct btrfs_root *quota_root; 1253 struct btrfs_qgroup *qgroup; 1254 int ret = 0; 1255 1256 mutex_lock(&fs_info->qgroup_ioctl_lock); 1257 quota_root = fs_info->quota_root; 1258 if (!quota_root) { 1259 ret = -EINVAL; 1260 goto out; 1261 } 1262 qgroup = find_qgroup_rb(fs_info, qgroupid); 1263 if (qgroup) { 1264 ret = -EEXIST; 1265 goto out; 1266 } 1267 1268 ret = add_qgroup_item(trans, quota_root, qgroupid); 1269 if (ret) 1270 goto out; 1271 1272 spin_lock(&fs_info->qgroup_lock); 1273 qgroup = add_qgroup_rb(fs_info, qgroupid); 1274 spin_unlock(&fs_info->qgroup_lock); 1275 1276 if (IS_ERR(qgroup)) 1277 ret = PTR_ERR(qgroup); 1278 out: 1279 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1280 return ret; 1281 } 1282 1283 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 1284 struct btrfs_fs_info *fs_info, u64 qgroupid) 1285 { 1286 struct btrfs_root *quota_root; 1287 struct btrfs_qgroup *qgroup; 1288 struct btrfs_qgroup_list *list; 1289 int ret = 0; 1290 1291 mutex_lock(&fs_info->qgroup_ioctl_lock); 1292 quota_root = fs_info->quota_root; 1293 if (!quota_root) { 1294 ret = -EINVAL; 1295 goto out; 1296 } 1297 1298 qgroup = find_qgroup_rb(fs_info, qgroupid); 1299 if (!qgroup) { 1300 ret = -ENOENT; 1301 goto out; 1302 } else { 1303 /* check if there are no children of this qgroup */ 1304 if (!list_empty(&qgroup->members)) { 1305 ret = -EBUSY; 1306 goto out; 1307 } 1308 } 1309 ret = del_qgroup_item(trans, quota_root, qgroupid); 1310 1311 while (!list_empty(&qgroup->groups)) { 1312 list = list_first_entry(&qgroup->groups, 1313 struct btrfs_qgroup_list, next_group); 1314 ret = __del_qgroup_relation(trans, fs_info, 1315 qgroupid, 1316 list->group->qgroupid); 1317 if (ret) 1318 goto out; 1319 } 1320 1321 spin_lock(&fs_info->qgroup_lock); 1322 del_qgroup_rb(fs_info, qgroupid); 1323 spin_unlock(&fs_info->qgroup_lock); 1324 out: 1325 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1326 return ret; 1327 } 1328 1329 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 1330 struct btrfs_fs_info *fs_info, u64 qgroupid, 1331 struct btrfs_qgroup_limit *limit) 1332 { 1333 struct btrfs_root *quota_root; 1334 struct btrfs_qgroup *qgroup; 1335 int ret = 0; 1336 /* Sometimes we would want to clear the limit on this qgroup. 1337 * To meet this requirement, we treat the -1 as a special value 1338 * which tell kernel to clear the limit on this qgroup. 1339 */ 1340 const u64 CLEAR_VALUE = -1; 1341 1342 mutex_lock(&fs_info->qgroup_ioctl_lock); 1343 quota_root = fs_info->quota_root; 1344 if (!quota_root) { 1345 ret = -EINVAL; 1346 goto out; 1347 } 1348 1349 qgroup = find_qgroup_rb(fs_info, qgroupid); 1350 if (!qgroup) { 1351 ret = -ENOENT; 1352 goto out; 1353 } 1354 1355 spin_lock(&fs_info->qgroup_lock); 1356 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { 1357 if (limit->max_rfer == CLEAR_VALUE) { 1358 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1359 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; 1360 qgroup->max_rfer = 0; 1361 } else { 1362 qgroup->max_rfer = limit->max_rfer; 1363 } 1364 } 1365 if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { 1366 if (limit->max_excl == CLEAR_VALUE) { 1367 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1368 limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; 1369 qgroup->max_excl = 0; 1370 } else { 1371 qgroup->max_excl = limit->max_excl; 1372 } 1373 } 1374 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { 1375 if (limit->rsv_rfer == CLEAR_VALUE) { 1376 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1377 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; 1378 qgroup->rsv_rfer = 0; 1379 } else { 1380 qgroup->rsv_rfer = limit->rsv_rfer; 1381 } 1382 } 1383 if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { 1384 if (limit->rsv_excl == CLEAR_VALUE) { 1385 qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1386 limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; 1387 qgroup->rsv_excl = 0; 1388 } else { 1389 qgroup->rsv_excl = limit->rsv_excl; 1390 } 1391 } 1392 qgroup->lim_flags |= limit->flags; 1393 1394 spin_unlock(&fs_info->qgroup_lock); 1395 1396 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 1397 if (ret) { 1398 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 1399 btrfs_info(fs_info, "unable to update quota limit for %llu", 1400 qgroupid); 1401 } 1402 1403 out: 1404 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1405 return ret; 1406 } 1407 1408 int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, 1409 struct btrfs_delayed_ref_root *delayed_refs, 1410 struct btrfs_qgroup_extent_record *record) 1411 { 1412 struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; 1413 struct rb_node *parent_node = NULL; 1414 struct btrfs_qgroup_extent_record *entry; 1415 u64 bytenr = record->bytenr; 1416 1417 assert_spin_locked(&delayed_refs->lock); 1418 trace_btrfs_qgroup_trace_extent(fs_info, record); 1419 1420 while (*p) { 1421 parent_node = *p; 1422 entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, 1423 node); 1424 if (bytenr < entry->bytenr) 1425 p = &(*p)->rb_left; 1426 else if (bytenr > entry->bytenr) 1427 p = &(*p)->rb_right; 1428 else 1429 return 1; 1430 } 1431 1432 rb_link_node(&record->node, parent_node, p); 1433 rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); 1434 return 0; 1435 } 1436 1437 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, 1438 struct btrfs_qgroup_extent_record *qrecord) 1439 { 1440 struct ulist *old_root; 1441 u64 bytenr = qrecord->bytenr; 1442 int ret; 1443 1444 ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root); 1445 if (ret < 0) 1446 return ret; 1447 1448 /* 1449 * Here we don't need to get the lock of 1450 * trans->transaction->delayed_refs, since inserted qrecord won't 1451 * be deleted, only qrecord->node may be modified (new qrecord insert) 1452 * 1453 * So modifying qrecord->old_roots is safe here 1454 */ 1455 qrecord->old_roots = old_root; 1456 return 0; 1457 } 1458 1459 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, 1460 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, 1461 gfp_t gfp_flag) 1462 { 1463 struct btrfs_qgroup_extent_record *record; 1464 struct btrfs_delayed_ref_root *delayed_refs; 1465 int ret; 1466 1467 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) 1468 || bytenr == 0 || num_bytes == 0) 1469 return 0; 1470 if (WARN_ON(trans == NULL)) 1471 return -EINVAL; 1472 record = kmalloc(sizeof(*record), gfp_flag); 1473 if (!record) 1474 return -ENOMEM; 1475 1476 delayed_refs = &trans->transaction->delayed_refs; 1477 record->bytenr = bytenr; 1478 record->num_bytes = num_bytes; 1479 record->old_roots = NULL; 1480 1481 spin_lock(&delayed_refs->lock); 1482 ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record); 1483 spin_unlock(&delayed_refs->lock); 1484 if (ret > 0) { 1485 kfree(record); 1486 return 0; 1487 } 1488 return btrfs_qgroup_trace_extent_post(fs_info, record); 1489 } 1490 1491 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 1492 struct btrfs_fs_info *fs_info, 1493 struct extent_buffer *eb) 1494 { 1495 int nr = btrfs_header_nritems(eb); 1496 int i, extent_type, ret; 1497 struct btrfs_key key; 1498 struct btrfs_file_extent_item *fi; 1499 u64 bytenr, num_bytes; 1500 1501 /* We can be called directly from walk_up_proc() */ 1502 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1503 return 0; 1504 1505 for (i = 0; i < nr; i++) { 1506 btrfs_item_key_to_cpu(eb, &key, i); 1507 1508 if (key.type != BTRFS_EXTENT_DATA_KEY) 1509 continue; 1510 1511 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); 1512 /* filter out non qgroup-accountable extents */ 1513 extent_type = btrfs_file_extent_type(eb, fi); 1514 1515 if (extent_type == BTRFS_FILE_EXTENT_INLINE) 1516 continue; 1517 1518 bytenr = btrfs_file_extent_disk_bytenr(eb, fi); 1519 if (!bytenr) 1520 continue; 1521 1522 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); 1523 1524 ret = btrfs_qgroup_trace_extent(trans, fs_info, bytenr, 1525 num_bytes, GFP_NOFS); 1526 if (ret) 1527 return ret; 1528 } 1529 cond_resched(); 1530 return 0; 1531 } 1532 1533 /* 1534 * Walk up the tree from the bottom, freeing leaves and any interior 1535 * nodes which have had all slots visited. If a node (leaf or 1536 * interior) is freed, the node above it will have it's slot 1537 * incremented. The root node will never be freed. 1538 * 1539 * At the end of this function, we should have a path which has all 1540 * slots incremented to the next position for a search. If we need to 1541 * read a new node it will be NULL and the node above it will have the 1542 * correct slot selected for a later read. 1543 * 1544 * If we increment the root nodes slot counter past the number of 1545 * elements, 1 is returned to signal completion of the search. 1546 */ 1547 static int adjust_slots_upwards(struct btrfs_path *path, int root_level) 1548 { 1549 int level = 0; 1550 int nr, slot; 1551 struct extent_buffer *eb; 1552 1553 if (root_level == 0) 1554 return 1; 1555 1556 while (level <= root_level) { 1557 eb = path->nodes[level]; 1558 nr = btrfs_header_nritems(eb); 1559 path->slots[level]++; 1560 slot = path->slots[level]; 1561 if (slot >= nr || level == 0) { 1562 /* 1563 * Don't free the root - we will detect this 1564 * condition after our loop and return a 1565 * positive value for caller to stop walking the tree. 1566 */ 1567 if (level != root_level) { 1568 btrfs_tree_unlock_rw(eb, path->locks[level]); 1569 path->locks[level] = 0; 1570 1571 free_extent_buffer(eb); 1572 path->nodes[level] = NULL; 1573 path->slots[level] = 0; 1574 } 1575 } else { 1576 /* 1577 * We have a valid slot to walk back down 1578 * from. Stop here so caller can process these 1579 * new nodes. 1580 */ 1581 break; 1582 } 1583 1584 level++; 1585 } 1586 1587 eb = path->nodes[root_level]; 1588 if (path->slots[root_level] >= btrfs_header_nritems(eb)) 1589 return 1; 1590 1591 return 0; 1592 } 1593 1594 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 1595 struct btrfs_root *root, 1596 struct extent_buffer *root_eb, 1597 u64 root_gen, int root_level) 1598 { 1599 struct btrfs_fs_info *fs_info = root->fs_info; 1600 int ret = 0; 1601 int level; 1602 struct extent_buffer *eb = root_eb; 1603 struct btrfs_path *path = NULL; 1604 1605 BUG_ON(root_level < 0 || root_level >= BTRFS_MAX_LEVEL); 1606 BUG_ON(root_eb == NULL); 1607 1608 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1609 return 0; 1610 1611 if (!extent_buffer_uptodate(root_eb)) { 1612 ret = btrfs_read_buffer(root_eb, root_gen); 1613 if (ret) 1614 goto out; 1615 } 1616 1617 if (root_level == 0) { 1618 ret = btrfs_qgroup_trace_leaf_items(trans, fs_info, root_eb); 1619 goto out; 1620 } 1621 1622 path = btrfs_alloc_path(); 1623 if (!path) 1624 return -ENOMEM; 1625 1626 /* 1627 * Walk down the tree. Missing extent blocks are filled in as 1628 * we go. Metadata is accounted every time we read a new 1629 * extent block. 1630 * 1631 * When we reach a leaf, we account for file extent items in it, 1632 * walk back up the tree (adjusting slot pointers as we go) 1633 * and restart the search process. 1634 */ 1635 extent_buffer_get(root_eb); /* For path */ 1636 path->nodes[root_level] = root_eb; 1637 path->slots[root_level] = 0; 1638 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ 1639 walk_down: 1640 level = root_level; 1641 while (level >= 0) { 1642 if (path->nodes[level] == NULL) { 1643 int parent_slot; 1644 u64 child_gen; 1645 u64 child_bytenr; 1646 1647 /* 1648 * We need to get child blockptr/gen from parent before 1649 * we can read it. 1650 */ 1651 eb = path->nodes[level + 1]; 1652 parent_slot = path->slots[level + 1]; 1653 child_bytenr = btrfs_node_blockptr(eb, parent_slot); 1654 child_gen = btrfs_node_ptr_generation(eb, parent_slot); 1655 1656 eb = read_tree_block(fs_info, child_bytenr, child_gen); 1657 if (IS_ERR(eb)) { 1658 ret = PTR_ERR(eb); 1659 goto out; 1660 } else if (!extent_buffer_uptodate(eb)) { 1661 free_extent_buffer(eb); 1662 ret = -EIO; 1663 goto out; 1664 } 1665 1666 path->nodes[level] = eb; 1667 path->slots[level] = 0; 1668 1669 btrfs_tree_read_lock(eb); 1670 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1671 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 1672 1673 ret = btrfs_qgroup_trace_extent(trans, fs_info, 1674 child_bytenr, 1675 fs_info->nodesize, 1676 GFP_NOFS); 1677 if (ret) 1678 goto out; 1679 } 1680 1681 if (level == 0) { 1682 ret = btrfs_qgroup_trace_leaf_items(trans,fs_info, 1683 path->nodes[level]); 1684 if (ret) 1685 goto out; 1686 1687 /* Nonzero return here means we completed our search */ 1688 ret = adjust_slots_upwards(path, root_level); 1689 if (ret) 1690 break; 1691 1692 /* Restart search with new slots */ 1693 goto walk_down; 1694 } 1695 1696 level--; 1697 } 1698 1699 ret = 0; 1700 out: 1701 btrfs_free_path(path); 1702 1703 return ret; 1704 } 1705 1706 #define UPDATE_NEW 0 1707 #define UPDATE_OLD 1 1708 /* 1709 * Walk all of the roots that points to the bytenr and adjust their refcnts. 1710 */ 1711 static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info, 1712 struct ulist *roots, struct ulist *tmp, 1713 struct ulist *qgroups, u64 seq, int update_old) 1714 { 1715 struct ulist_node *unode; 1716 struct ulist_iterator uiter; 1717 struct ulist_node *tmp_unode; 1718 struct ulist_iterator tmp_uiter; 1719 struct btrfs_qgroup *qg; 1720 int ret = 0; 1721 1722 if (!roots) 1723 return 0; 1724 ULIST_ITER_INIT(&uiter); 1725 while ((unode = ulist_next(roots, &uiter))) { 1726 qg = find_qgroup_rb(fs_info, unode->val); 1727 if (!qg) 1728 continue; 1729 1730 ulist_reinit(tmp); 1731 ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg), 1732 GFP_ATOMIC); 1733 if (ret < 0) 1734 return ret; 1735 ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC); 1736 if (ret < 0) 1737 return ret; 1738 ULIST_ITER_INIT(&tmp_uiter); 1739 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { 1740 struct btrfs_qgroup_list *glist; 1741 1742 qg = unode_aux_to_qgroup(tmp_unode); 1743 if (update_old) 1744 btrfs_qgroup_update_old_refcnt(qg, seq, 1); 1745 else 1746 btrfs_qgroup_update_new_refcnt(qg, seq, 1); 1747 list_for_each_entry(glist, &qg->groups, next_group) { 1748 ret = ulist_add(qgroups, glist->group->qgroupid, 1749 qgroup_to_aux(glist->group), 1750 GFP_ATOMIC); 1751 if (ret < 0) 1752 return ret; 1753 ret = ulist_add(tmp, glist->group->qgroupid, 1754 qgroup_to_aux(glist->group), 1755 GFP_ATOMIC); 1756 if (ret < 0) 1757 return ret; 1758 } 1759 } 1760 } 1761 return 0; 1762 } 1763 1764 /* 1765 * Update qgroup rfer/excl counters. 1766 * Rfer update is easy, codes can explain themselves. 1767 * 1768 * Excl update is tricky, the update is split into 2 part. 1769 * Part 1: Possible exclusive <-> sharing detect: 1770 * | A | !A | 1771 * ------------------------------------- 1772 * B | * | - | 1773 * ------------------------------------- 1774 * !B | + | ** | 1775 * ------------------------------------- 1776 * 1777 * Conditions: 1778 * A: cur_old_roots < nr_old_roots (not exclusive before) 1779 * !A: cur_old_roots == nr_old_roots (possible exclusive before) 1780 * B: cur_new_roots < nr_new_roots (not exclusive now) 1781 * !B: cur_new_roots == nr_new_roots (possible exclusive now) 1782 * 1783 * Results: 1784 * +: Possible sharing -> exclusive -: Possible exclusive -> sharing 1785 * *: Definitely not changed. **: Possible unchanged. 1786 * 1787 * For !A and !B condition, the exception is cur_old/new_roots == 0 case. 1788 * 1789 * To make the logic clear, we first use condition A and B to split 1790 * combination into 4 results. 1791 * 1792 * Then, for result "+" and "-", check old/new_roots == 0 case, as in them 1793 * only on variant maybe 0. 1794 * 1795 * Lastly, check result **, since there are 2 variants maybe 0, split them 1796 * again(2x2). 1797 * But this time we don't need to consider other things, the codes and logic 1798 * is easy to understand now. 1799 */ 1800 static int qgroup_update_counters(struct btrfs_fs_info *fs_info, 1801 struct ulist *qgroups, 1802 u64 nr_old_roots, 1803 u64 nr_new_roots, 1804 u64 num_bytes, u64 seq) 1805 { 1806 struct ulist_node *unode; 1807 struct ulist_iterator uiter; 1808 struct btrfs_qgroup *qg; 1809 u64 cur_new_count, cur_old_count; 1810 1811 ULIST_ITER_INIT(&uiter); 1812 while ((unode = ulist_next(qgroups, &uiter))) { 1813 bool dirty = false; 1814 1815 qg = unode_aux_to_qgroup(unode); 1816 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq); 1817 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq); 1818 1819 trace_qgroup_update_counters(fs_info, qg->qgroupid, 1820 cur_old_count, cur_new_count); 1821 1822 /* Rfer update part */ 1823 if (cur_old_count == 0 && cur_new_count > 0) { 1824 qg->rfer += num_bytes; 1825 qg->rfer_cmpr += num_bytes; 1826 dirty = true; 1827 } 1828 if (cur_old_count > 0 && cur_new_count == 0) { 1829 qg->rfer -= num_bytes; 1830 qg->rfer_cmpr -= num_bytes; 1831 dirty = true; 1832 } 1833 1834 /* Excl update part */ 1835 /* Exclusive/none -> shared case */ 1836 if (cur_old_count == nr_old_roots && 1837 cur_new_count < nr_new_roots) { 1838 /* Exclusive -> shared */ 1839 if (cur_old_count != 0) { 1840 qg->excl -= num_bytes; 1841 qg->excl_cmpr -= num_bytes; 1842 dirty = true; 1843 } 1844 } 1845 1846 /* Shared -> exclusive/none case */ 1847 if (cur_old_count < nr_old_roots && 1848 cur_new_count == nr_new_roots) { 1849 /* Shared->exclusive */ 1850 if (cur_new_count != 0) { 1851 qg->excl += num_bytes; 1852 qg->excl_cmpr += num_bytes; 1853 dirty = true; 1854 } 1855 } 1856 1857 /* Exclusive/none -> exclusive/none case */ 1858 if (cur_old_count == nr_old_roots && 1859 cur_new_count == nr_new_roots) { 1860 if (cur_old_count == 0) { 1861 /* None -> exclusive/none */ 1862 1863 if (cur_new_count != 0) { 1864 /* None -> exclusive */ 1865 qg->excl += num_bytes; 1866 qg->excl_cmpr += num_bytes; 1867 dirty = true; 1868 } 1869 /* None -> none, nothing changed */ 1870 } else { 1871 /* Exclusive -> exclusive/none */ 1872 1873 if (cur_new_count == 0) { 1874 /* Exclusive -> none */ 1875 qg->excl -= num_bytes; 1876 qg->excl_cmpr -= num_bytes; 1877 dirty = true; 1878 } 1879 /* Exclusive -> exclusive, nothing changed */ 1880 } 1881 } 1882 1883 if (dirty) 1884 qgroup_dirty(fs_info, qg); 1885 } 1886 return 0; 1887 } 1888 1889 /* 1890 * Check if the @roots potentially is a list of fs tree roots 1891 * 1892 * Return 0 for definitely not a fs/subvol tree roots ulist 1893 * Return 1 for possible fs/subvol tree roots in the list (considering an empty 1894 * one as well) 1895 */ 1896 static int maybe_fs_roots(struct ulist *roots) 1897 { 1898 struct ulist_node *unode; 1899 struct ulist_iterator uiter; 1900 1901 /* Empty one, still possible for fs roots */ 1902 if (!roots || roots->nnodes == 0) 1903 return 1; 1904 1905 ULIST_ITER_INIT(&uiter); 1906 unode = ulist_next(roots, &uiter); 1907 if (!unode) 1908 return 1; 1909 1910 /* 1911 * If it contains fs tree roots, then it must belong to fs/subvol 1912 * trees. 1913 * If it contains a non-fs tree, it won't be shared with fs/subvol trees. 1914 */ 1915 return is_fstree(unode->val); 1916 } 1917 1918 int 1919 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 1920 struct btrfs_fs_info *fs_info, 1921 u64 bytenr, u64 num_bytes, 1922 struct ulist *old_roots, struct ulist *new_roots) 1923 { 1924 struct ulist *qgroups = NULL; 1925 struct ulist *tmp = NULL; 1926 u64 seq; 1927 u64 nr_new_roots = 0; 1928 u64 nr_old_roots = 0; 1929 int ret = 0; 1930 1931 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 1932 return 0; 1933 1934 if (new_roots) { 1935 if (!maybe_fs_roots(new_roots)) 1936 goto out_free; 1937 nr_new_roots = new_roots->nnodes; 1938 } 1939 if (old_roots) { 1940 if (!maybe_fs_roots(old_roots)) 1941 goto out_free; 1942 nr_old_roots = old_roots->nnodes; 1943 } 1944 1945 /* Quick exit, either not fs tree roots, or won't affect any qgroup */ 1946 if (nr_old_roots == 0 && nr_new_roots == 0) 1947 goto out_free; 1948 1949 BUG_ON(!fs_info->quota_root); 1950 1951 trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes, 1952 nr_old_roots, nr_new_roots); 1953 1954 qgroups = ulist_alloc(GFP_NOFS); 1955 if (!qgroups) { 1956 ret = -ENOMEM; 1957 goto out_free; 1958 } 1959 tmp = ulist_alloc(GFP_NOFS); 1960 if (!tmp) { 1961 ret = -ENOMEM; 1962 goto out_free; 1963 } 1964 1965 mutex_lock(&fs_info->qgroup_rescan_lock); 1966 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 1967 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) { 1968 mutex_unlock(&fs_info->qgroup_rescan_lock); 1969 ret = 0; 1970 goto out_free; 1971 } 1972 } 1973 mutex_unlock(&fs_info->qgroup_rescan_lock); 1974 1975 spin_lock(&fs_info->qgroup_lock); 1976 seq = fs_info->qgroup_seq; 1977 1978 /* Update old refcnts using old_roots */ 1979 ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq, 1980 UPDATE_OLD); 1981 if (ret < 0) 1982 goto out; 1983 1984 /* Update new refcnts using new_roots */ 1985 ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq, 1986 UPDATE_NEW); 1987 if (ret < 0) 1988 goto out; 1989 1990 qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots, 1991 num_bytes, seq); 1992 1993 /* 1994 * Bump qgroup_seq to avoid seq overlap 1995 */ 1996 fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; 1997 out: 1998 spin_unlock(&fs_info->qgroup_lock); 1999 out_free: 2000 ulist_free(tmp); 2001 ulist_free(qgroups); 2002 ulist_free(old_roots); 2003 ulist_free(new_roots); 2004 return ret; 2005 } 2006 2007 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, 2008 struct btrfs_fs_info *fs_info) 2009 { 2010 struct btrfs_qgroup_extent_record *record; 2011 struct btrfs_delayed_ref_root *delayed_refs; 2012 struct ulist *new_roots = NULL; 2013 struct rb_node *node; 2014 u64 qgroup_to_skip; 2015 int ret = 0; 2016 2017 delayed_refs = &trans->transaction->delayed_refs; 2018 qgroup_to_skip = delayed_refs->qgroup_to_skip; 2019 while ((node = rb_first(&delayed_refs->dirty_extent_root))) { 2020 record = rb_entry(node, struct btrfs_qgroup_extent_record, 2021 node); 2022 2023 trace_btrfs_qgroup_account_extents(fs_info, record); 2024 2025 if (!ret) { 2026 /* 2027 * Old roots should be searched when inserting qgroup 2028 * extent record 2029 */ 2030 if (WARN_ON(!record->old_roots)) { 2031 /* Search commit root to find old_roots */ 2032 ret = btrfs_find_all_roots(NULL, fs_info, 2033 record->bytenr, 0, 2034 &record->old_roots); 2035 if (ret < 0) 2036 goto cleanup; 2037 } 2038 2039 /* 2040 * Use SEQ_LAST as time_seq to do special search, which 2041 * doesn't lock tree or delayed_refs and search current 2042 * root. It's safe inside commit_transaction(). 2043 */ 2044 ret = btrfs_find_all_roots(trans, fs_info, 2045 record->bytenr, SEQ_LAST, &new_roots); 2046 if (ret < 0) 2047 goto cleanup; 2048 if (qgroup_to_skip) { 2049 ulist_del(new_roots, qgroup_to_skip, 0); 2050 ulist_del(record->old_roots, qgroup_to_skip, 2051 0); 2052 } 2053 ret = btrfs_qgroup_account_extent(trans, fs_info, 2054 record->bytenr, record->num_bytes, 2055 record->old_roots, new_roots); 2056 record->old_roots = NULL; 2057 new_roots = NULL; 2058 } 2059 cleanup: 2060 ulist_free(record->old_roots); 2061 ulist_free(new_roots); 2062 new_roots = NULL; 2063 rb_erase(node, &delayed_refs->dirty_extent_root); 2064 kfree(record); 2065 2066 } 2067 return ret; 2068 } 2069 2070 /* 2071 * called from commit_transaction. Writes all changed qgroups to disk. 2072 */ 2073 int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 2074 struct btrfs_fs_info *fs_info) 2075 { 2076 struct btrfs_root *quota_root = fs_info->quota_root; 2077 int ret = 0; 2078 int start_rescan_worker = 0; 2079 2080 if (!quota_root) 2081 goto out; 2082 2083 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 2084 test_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2085 start_rescan_worker = 1; 2086 2087 if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2088 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2089 if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags)) 2090 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2091 2092 spin_lock(&fs_info->qgroup_lock); 2093 while (!list_empty(&fs_info->dirty_qgroups)) { 2094 struct btrfs_qgroup *qgroup; 2095 qgroup = list_first_entry(&fs_info->dirty_qgroups, 2096 struct btrfs_qgroup, dirty); 2097 list_del_init(&qgroup->dirty); 2098 spin_unlock(&fs_info->qgroup_lock); 2099 ret = update_qgroup_info_item(trans, quota_root, qgroup); 2100 if (ret) 2101 fs_info->qgroup_flags |= 2102 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2103 ret = update_qgroup_limit_item(trans, quota_root, qgroup); 2104 if (ret) 2105 fs_info->qgroup_flags |= 2106 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2107 spin_lock(&fs_info->qgroup_lock); 2108 } 2109 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2110 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON; 2111 else 2112 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; 2113 spin_unlock(&fs_info->qgroup_lock); 2114 2115 ret = update_qgroup_status_item(trans, fs_info, quota_root); 2116 if (ret) 2117 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2118 2119 if (!ret && start_rescan_worker) { 2120 ret = qgroup_rescan_init(fs_info, 0, 1); 2121 if (!ret) { 2122 qgroup_rescan_zero_tracking(fs_info); 2123 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2124 &fs_info->qgroup_rescan_work); 2125 } 2126 ret = 0; 2127 } 2128 2129 out: 2130 2131 return ret; 2132 } 2133 2134 /* 2135 * Copy the accounting information between qgroups. This is necessary 2136 * when a snapshot or a subvolume is created. Throwing an error will 2137 * cause a transaction abort so we take extra care here to only error 2138 * when a readonly fs is a reasonable outcome. 2139 */ 2140 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 2141 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 2142 struct btrfs_qgroup_inherit *inherit) 2143 { 2144 int ret = 0; 2145 int i; 2146 u64 *i_qgroups; 2147 struct btrfs_root *quota_root = fs_info->quota_root; 2148 struct btrfs_qgroup *srcgroup; 2149 struct btrfs_qgroup *dstgroup; 2150 u32 level_size = 0; 2151 u64 nums; 2152 2153 mutex_lock(&fs_info->qgroup_ioctl_lock); 2154 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 2155 goto out; 2156 2157 if (!quota_root) { 2158 ret = -EINVAL; 2159 goto out; 2160 } 2161 2162 if (inherit) { 2163 i_qgroups = (u64 *)(inherit + 1); 2164 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + 2165 2 * inherit->num_excl_copies; 2166 for (i = 0; i < nums; ++i) { 2167 srcgroup = find_qgroup_rb(fs_info, *i_qgroups); 2168 2169 /* 2170 * Zero out invalid groups so we can ignore 2171 * them later. 2172 */ 2173 if (!srcgroup || 2174 ((srcgroup->qgroupid >> 48) <= (objectid >> 48))) 2175 *i_qgroups = 0ULL; 2176 2177 ++i_qgroups; 2178 } 2179 } 2180 2181 /* 2182 * create a tracking group for the subvol itself 2183 */ 2184 ret = add_qgroup_item(trans, quota_root, objectid); 2185 if (ret) 2186 goto out; 2187 2188 if (srcid) { 2189 struct btrfs_root *srcroot; 2190 struct btrfs_key srckey; 2191 2192 srckey.objectid = srcid; 2193 srckey.type = BTRFS_ROOT_ITEM_KEY; 2194 srckey.offset = (u64)-1; 2195 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey); 2196 if (IS_ERR(srcroot)) { 2197 ret = PTR_ERR(srcroot); 2198 goto out; 2199 } 2200 2201 level_size = fs_info->nodesize; 2202 } 2203 2204 /* 2205 * add qgroup to all inherited groups 2206 */ 2207 if (inherit) { 2208 i_qgroups = (u64 *)(inherit + 1); 2209 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) { 2210 if (*i_qgroups == 0) 2211 continue; 2212 ret = add_qgroup_relation_item(trans, quota_root, 2213 objectid, *i_qgroups); 2214 if (ret && ret != -EEXIST) 2215 goto out; 2216 ret = add_qgroup_relation_item(trans, quota_root, 2217 *i_qgroups, objectid); 2218 if (ret && ret != -EEXIST) 2219 goto out; 2220 } 2221 ret = 0; 2222 } 2223 2224 2225 spin_lock(&fs_info->qgroup_lock); 2226 2227 dstgroup = add_qgroup_rb(fs_info, objectid); 2228 if (IS_ERR(dstgroup)) { 2229 ret = PTR_ERR(dstgroup); 2230 goto unlock; 2231 } 2232 2233 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) { 2234 dstgroup->lim_flags = inherit->lim.flags; 2235 dstgroup->max_rfer = inherit->lim.max_rfer; 2236 dstgroup->max_excl = inherit->lim.max_excl; 2237 dstgroup->rsv_rfer = inherit->lim.rsv_rfer; 2238 dstgroup->rsv_excl = inherit->lim.rsv_excl; 2239 2240 ret = update_qgroup_limit_item(trans, quota_root, dstgroup); 2241 if (ret) { 2242 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2243 btrfs_info(fs_info, 2244 "unable to update quota limit for %llu", 2245 dstgroup->qgroupid); 2246 goto unlock; 2247 } 2248 } 2249 2250 if (srcid) { 2251 srcgroup = find_qgroup_rb(fs_info, srcid); 2252 if (!srcgroup) 2253 goto unlock; 2254 2255 /* 2256 * We call inherit after we clone the root in order to make sure 2257 * our counts don't go crazy, so at this point the only 2258 * difference between the two roots should be the root node. 2259 */ 2260 dstgroup->rfer = srcgroup->rfer; 2261 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr; 2262 dstgroup->excl = level_size; 2263 dstgroup->excl_cmpr = level_size; 2264 srcgroup->excl = level_size; 2265 srcgroup->excl_cmpr = level_size; 2266 2267 /* inherit the limit info */ 2268 dstgroup->lim_flags = srcgroup->lim_flags; 2269 dstgroup->max_rfer = srcgroup->max_rfer; 2270 dstgroup->max_excl = srcgroup->max_excl; 2271 dstgroup->rsv_rfer = srcgroup->rsv_rfer; 2272 dstgroup->rsv_excl = srcgroup->rsv_excl; 2273 2274 qgroup_dirty(fs_info, dstgroup); 2275 qgroup_dirty(fs_info, srcgroup); 2276 } 2277 2278 if (!inherit) 2279 goto unlock; 2280 2281 i_qgroups = (u64 *)(inherit + 1); 2282 for (i = 0; i < inherit->num_qgroups; ++i) { 2283 if (*i_qgroups) { 2284 ret = add_relation_rb(fs_info, objectid, *i_qgroups); 2285 if (ret) 2286 goto unlock; 2287 } 2288 ++i_qgroups; 2289 } 2290 2291 for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) { 2292 struct btrfs_qgroup *src; 2293 struct btrfs_qgroup *dst; 2294 2295 if (!i_qgroups[0] || !i_qgroups[1]) 2296 continue; 2297 2298 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2299 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2300 2301 if (!src || !dst) { 2302 ret = -EINVAL; 2303 goto unlock; 2304 } 2305 2306 dst->rfer = src->rfer - level_size; 2307 dst->rfer_cmpr = src->rfer_cmpr - level_size; 2308 } 2309 for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) { 2310 struct btrfs_qgroup *src; 2311 struct btrfs_qgroup *dst; 2312 2313 if (!i_qgroups[0] || !i_qgroups[1]) 2314 continue; 2315 2316 src = find_qgroup_rb(fs_info, i_qgroups[0]); 2317 dst = find_qgroup_rb(fs_info, i_qgroups[1]); 2318 2319 if (!src || !dst) { 2320 ret = -EINVAL; 2321 goto unlock; 2322 } 2323 2324 dst->excl = src->excl + level_size; 2325 dst->excl_cmpr = src->excl_cmpr + level_size; 2326 } 2327 2328 unlock: 2329 spin_unlock(&fs_info->qgroup_lock); 2330 out: 2331 mutex_unlock(&fs_info->qgroup_ioctl_lock); 2332 return ret; 2333 } 2334 2335 static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes) 2336 { 2337 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) && 2338 qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer) 2339 return false; 2340 2341 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) && 2342 qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl) 2343 return false; 2344 2345 return true; 2346 } 2347 2348 static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce) 2349 { 2350 struct btrfs_root *quota_root; 2351 struct btrfs_qgroup *qgroup; 2352 struct btrfs_fs_info *fs_info = root->fs_info; 2353 u64 ref_root = root->root_key.objectid; 2354 int ret = 0; 2355 int retried = 0; 2356 struct ulist_node *unode; 2357 struct ulist_iterator uiter; 2358 2359 if (!is_fstree(ref_root)) 2360 return 0; 2361 2362 if (num_bytes == 0) 2363 return 0; 2364 2365 if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) && 2366 capable(CAP_SYS_RESOURCE)) 2367 enforce = false; 2368 2369 retry: 2370 spin_lock(&fs_info->qgroup_lock); 2371 quota_root = fs_info->quota_root; 2372 if (!quota_root) 2373 goto out; 2374 2375 qgroup = find_qgroup_rb(fs_info, ref_root); 2376 if (!qgroup) 2377 goto out; 2378 2379 /* 2380 * in a first step, we check all affected qgroups if any limits would 2381 * be exceeded 2382 */ 2383 ulist_reinit(fs_info->qgroup_ulist); 2384 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2385 (uintptr_t)qgroup, GFP_ATOMIC); 2386 if (ret < 0) 2387 goto out; 2388 ULIST_ITER_INIT(&uiter); 2389 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2390 struct btrfs_qgroup *qg; 2391 struct btrfs_qgroup_list *glist; 2392 2393 qg = unode_aux_to_qgroup(unode); 2394 2395 if (enforce && !qgroup_check_limits(qg, num_bytes)) { 2396 /* 2397 * Commit the tree and retry, since we may have 2398 * deletions which would free up space. 2399 */ 2400 if (!retried && qg->reserved > 0) { 2401 struct btrfs_trans_handle *trans; 2402 2403 spin_unlock(&fs_info->qgroup_lock); 2404 ret = btrfs_start_delalloc_inodes(root, 0); 2405 if (ret) 2406 return ret; 2407 btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); 2408 trans = btrfs_join_transaction(root); 2409 if (IS_ERR(trans)) 2410 return PTR_ERR(trans); 2411 ret = btrfs_commit_transaction(trans); 2412 if (ret) 2413 return ret; 2414 retried++; 2415 goto retry; 2416 } 2417 ret = -EDQUOT; 2418 goto out; 2419 } 2420 2421 list_for_each_entry(glist, &qg->groups, next_group) { 2422 ret = ulist_add(fs_info->qgroup_ulist, 2423 glist->group->qgroupid, 2424 (uintptr_t)glist->group, GFP_ATOMIC); 2425 if (ret < 0) 2426 goto out; 2427 } 2428 } 2429 ret = 0; 2430 /* 2431 * no limits exceeded, now record the reservation into all qgroups 2432 */ 2433 ULIST_ITER_INIT(&uiter); 2434 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2435 struct btrfs_qgroup *qg; 2436 2437 qg = unode_aux_to_qgroup(unode); 2438 2439 trace_qgroup_update_reserve(fs_info, qg, num_bytes); 2440 qg->reserved += num_bytes; 2441 } 2442 2443 out: 2444 spin_unlock(&fs_info->qgroup_lock); 2445 return ret; 2446 } 2447 2448 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, 2449 u64 ref_root, u64 num_bytes) 2450 { 2451 struct btrfs_root *quota_root; 2452 struct btrfs_qgroup *qgroup; 2453 struct ulist_node *unode; 2454 struct ulist_iterator uiter; 2455 int ret = 0; 2456 2457 if (!is_fstree(ref_root)) 2458 return; 2459 2460 if (num_bytes == 0) 2461 return; 2462 2463 spin_lock(&fs_info->qgroup_lock); 2464 2465 quota_root = fs_info->quota_root; 2466 if (!quota_root) 2467 goto out; 2468 2469 qgroup = find_qgroup_rb(fs_info, ref_root); 2470 if (!qgroup) 2471 goto out; 2472 2473 ulist_reinit(fs_info->qgroup_ulist); 2474 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid, 2475 (uintptr_t)qgroup, GFP_ATOMIC); 2476 if (ret < 0) 2477 goto out; 2478 ULIST_ITER_INIT(&uiter); 2479 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) { 2480 struct btrfs_qgroup *qg; 2481 struct btrfs_qgroup_list *glist; 2482 2483 qg = unode_aux_to_qgroup(unode); 2484 2485 trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes); 2486 if (qg->reserved < num_bytes) 2487 report_reserved_underflow(fs_info, qg, num_bytes); 2488 else 2489 qg->reserved -= num_bytes; 2490 2491 list_for_each_entry(glist, &qg->groups, next_group) { 2492 ret = ulist_add(fs_info->qgroup_ulist, 2493 glist->group->qgroupid, 2494 (uintptr_t)glist->group, GFP_ATOMIC); 2495 if (ret < 0) 2496 goto out; 2497 } 2498 } 2499 2500 out: 2501 spin_unlock(&fs_info->qgroup_lock); 2502 } 2503 2504 /* 2505 * returns < 0 on error, 0 when more leafs are to be scanned. 2506 * returns 1 when done. 2507 */ 2508 static int 2509 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 2510 struct btrfs_trans_handle *trans) 2511 { 2512 struct btrfs_key found; 2513 struct extent_buffer *scratch_leaf = NULL; 2514 struct ulist *roots = NULL; 2515 struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); 2516 u64 num_bytes; 2517 int slot; 2518 int ret; 2519 2520 mutex_lock(&fs_info->qgroup_rescan_lock); 2521 ret = btrfs_search_slot_for_read(fs_info->extent_root, 2522 &fs_info->qgroup_rescan_progress, 2523 path, 1, 0); 2524 2525 btrfs_debug(fs_info, 2526 "current progress key (%llu %u %llu), search_slot ret %d", 2527 fs_info->qgroup_rescan_progress.objectid, 2528 fs_info->qgroup_rescan_progress.type, 2529 fs_info->qgroup_rescan_progress.offset, ret); 2530 2531 if (ret) { 2532 /* 2533 * The rescan is about to end, we will not be scanning any 2534 * further blocks. We cannot unset the RESCAN flag here, because 2535 * we want to commit the transaction if everything went well. 2536 * To make the live accounting work in this phase, we set our 2537 * scan progress pointer such that every real extent objectid 2538 * will be smaller. 2539 */ 2540 fs_info->qgroup_rescan_progress.objectid = (u64)-1; 2541 btrfs_release_path(path); 2542 mutex_unlock(&fs_info->qgroup_rescan_lock); 2543 return ret; 2544 } 2545 2546 btrfs_item_key_to_cpu(path->nodes[0], &found, 2547 btrfs_header_nritems(path->nodes[0]) - 1); 2548 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1; 2549 2550 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2551 scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]); 2552 if (!scratch_leaf) { 2553 ret = -ENOMEM; 2554 mutex_unlock(&fs_info->qgroup_rescan_lock); 2555 goto out; 2556 } 2557 extent_buffer_get(scratch_leaf); 2558 btrfs_tree_read_lock(scratch_leaf); 2559 btrfs_set_lock_blocking_rw(scratch_leaf, BTRFS_READ_LOCK); 2560 slot = path->slots[0]; 2561 btrfs_release_path(path); 2562 mutex_unlock(&fs_info->qgroup_rescan_lock); 2563 2564 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { 2565 btrfs_item_key_to_cpu(scratch_leaf, &found, slot); 2566 if (found.type != BTRFS_EXTENT_ITEM_KEY && 2567 found.type != BTRFS_METADATA_ITEM_KEY) 2568 continue; 2569 if (found.type == BTRFS_METADATA_ITEM_KEY) 2570 num_bytes = fs_info->nodesize; 2571 else 2572 num_bytes = found.offset; 2573 2574 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, 2575 &roots); 2576 if (ret < 0) 2577 goto out; 2578 /* For rescan, just pass old_roots as NULL */ 2579 ret = btrfs_qgroup_account_extent(trans, fs_info, 2580 found.objectid, num_bytes, NULL, roots); 2581 if (ret < 0) 2582 goto out; 2583 } 2584 out: 2585 if (scratch_leaf) { 2586 btrfs_tree_read_unlock_blocking(scratch_leaf); 2587 free_extent_buffer(scratch_leaf); 2588 } 2589 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 2590 2591 return ret; 2592 } 2593 2594 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) 2595 { 2596 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, 2597 qgroup_rescan_work); 2598 struct btrfs_path *path; 2599 struct btrfs_trans_handle *trans = NULL; 2600 int err = -ENOMEM; 2601 int ret = 0; 2602 2603 path = btrfs_alloc_path(); 2604 if (!path) 2605 goto out; 2606 2607 err = 0; 2608 while (!err && !btrfs_fs_closing(fs_info)) { 2609 trans = btrfs_start_transaction(fs_info->fs_root, 0); 2610 if (IS_ERR(trans)) { 2611 err = PTR_ERR(trans); 2612 break; 2613 } 2614 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { 2615 err = -EINTR; 2616 } else { 2617 err = qgroup_rescan_leaf(fs_info, path, trans); 2618 } 2619 if (err > 0) 2620 btrfs_commit_transaction(trans); 2621 else 2622 btrfs_end_transaction(trans); 2623 } 2624 2625 out: 2626 btrfs_free_path(path); 2627 2628 mutex_lock(&fs_info->qgroup_rescan_lock); 2629 if (!btrfs_fs_closing(fs_info)) 2630 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2631 2632 if (err > 0 && 2633 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { 2634 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2635 } else if (err < 0) { 2636 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; 2637 } 2638 mutex_unlock(&fs_info->qgroup_rescan_lock); 2639 2640 /* 2641 * only update status, since the previous part has already updated the 2642 * qgroup info. 2643 */ 2644 trans = btrfs_start_transaction(fs_info->quota_root, 1); 2645 if (IS_ERR(trans)) { 2646 err = PTR_ERR(trans); 2647 btrfs_err(fs_info, 2648 "fail to start transaction for status update: %d", 2649 err); 2650 goto done; 2651 } 2652 ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root); 2653 if (ret < 0) { 2654 err = ret; 2655 btrfs_err(fs_info, "fail to update qgroup status: %d", err); 2656 } 2657 btrfs_end_transaction(trans); 2658 2659 if (btrfs_fs_closing(fs_info)) { 2660 btrfs_info(fs_info, "qgroup scan paused"); 2661 } else if (err >= 0) { 2662 btrfs_info(fs_info, "qgroup scan completed%s", 2663 err > 0 ? " (inconsistency flag cleared)" : ""); 2664 } else { 2665 btrfs_err(fs_info, "qgroup scan failed with %d", err); 2666 } 2667 2668 done: 2669 mutex_lock(&fs_info->qgroup_rescan_lock); 2670 fs_info->qgroup_rescan_running = false; 2671 mutex_unlock(&fs_info->qgroup_rescan_lock); 2672 complete_all(&fs_info->qgroup_rescan_completion); 2673 } 2674 2675 /* 2676 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all 2677 * memory required for the rescan context. 2678 */ 2679 static int 2680 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, 2681 int init_flags) 2682 { 2683 int ret = 0; 2684 2685 if (!init_flags && 2686 (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) || 2687 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) { 2688 ret = -EINVAL; 2689 goto err; 2690 } 2691 2692 mutex_lock(&fs_info->qgroup_rescan_lock); 2693 spin_lock(&fs_info->qgroup_lock); 2694 2695 if (init_flags) { 2696 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2697 ret = -EINPROGRESS; 2698 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) 2699 ret = -EINVAL; 2700 2701 if (ret) { 2702 spin_unlock(&fs_info->qgroup_lock); 2703 mutex_unlock(&fs_info->qgroup_rescan_lock); 2704 goto err; 2705 } 2706 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2707 } 2708 2709 memset(&fs_info->qgroup_rescan_progress, 0, 2710 sizeof(fs_info->qgroup_rescan_progress)); 2711 fs_info->qgroup_rescan_progress.objectid = progress_objectid; 2712 init_completion(&fs_info->qgroup_rescan_completion); 2713 fs_info->qgroup_rescan_running = true; 2714 2715 spin_unlock(&fs_info->qgroup_lock); 2716 mutex_unlock(&fs_info->qgroup_rescan_lock); 2717 2718 memset(&fs_info->qgroup_rescan_work, 0, 2719 sizeof(fs_info->qgroup_rescan_work)); 2720 btrfs_init_work(&fs_info->qgroup_rescan_work, 2721 btrfs_qgroup_rescan_helper, 2722 btrfs_qgroup_rescan_worker, NULL, NULL); 2723 2724 if (ret) { 2725 err: 2726 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret); 2727 return ret; 2728 } 2729 2730 return 0; 2731 } 2732 2733 static void 2734 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info) 2735 { 2736 struct rb_node *n; 2737 struct btrfs_qgroup *qgroup; 2738 2739 spin_lock(&fs_info->qgroup_lock); 2740 /* clear all current qgroup tracking information */ 2741 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) { 2742 qgroup = rb_entry(n, struct btrfs_qgroup, node); 2743 qgroup->rfer = 0; 2744 qgroup->rfer_cmpr = 0; 2745 qgroup->excl = 0; 2746 qgroup->excl_cmpr = 0; 2747 } 2748 spin_unlock(&fs_info->qgroup_lock); 2749 } 2750 2751 int 2752 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) 2753 { 2754 int ret = 0; 2755 struct btrfs_trans_handle *trans; 2756 2757 ret = qgroup_rescan_init(fs_info, 0, 1); 2758 if (ret) 2759 return ret; 2760 2761 /* 2762 * We have set the rescan_progress to 0, which means no more 2763 * delayed refs will be accounted by btrfs_qgroup_account_ref. 2764 * However, btrfs_qgroup_account_ref may be right after its call 2765 * to btrfs_find_all_roots, in which case it would still do the 2766 * accounting. 2767 * To solve this, we're committing the transaction, which will 2768 * ensure we run all delayed refs and only after that, we are 2769 * going to clear all tracking information for a clean start. 2770 */ 2771 2772 trans = btrfs_join_transaction(fs_info->fs_root); 2773 if (IS_ERR(trans)) { 2774 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2775 return PTR_ERR(trans); 2776 } 2777 ret = btrfs_commit_transaction(trans); 2778 if (ret) { 2779 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; 2780 return ret; 2781 } 2782 2783 qgroup_rescan_zero_tracking(fs_info); 2784 2785 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2786 &fs_info->qgroup_rescan_work); 2787 2788 return 0; 2789 } 2790 2791 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 2792 bool interruptible) 2793 { 2794 int running; 2795 int ret = 0; 2796 2797 mutex_lock(&fs_info->qgroup_rescan_lock); 2798 spin_lock(&fs_info->qgroup_lock); 2799 running = fs_info->qgroup_rescan_running; 2800 spin_unlock(&fs_info->qgroup_lock); 2801 mutex_unlock(&fs_info->qgroup_rescan_lock); 2802 2803 if (!running) 2804 return 0; 2805 2806 if (interruptible) 2807 ret = wait_for_completion_interruptible( 2808 &fs_info->qgroup_rescan_completion); 2809 else 2810 wait_for_completion(&fs_info->qgroup_rescan_completion); 2811 2812 return ret; 2813 } 2814 2815 /* 2816 * this is only called from open_ctree where we're still single threaded, thus 2817 * locking is omitted here. 2818 */ 2819 void 2820 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) 2821 { 2822 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) 2823 btrfs_queue_work(fs_info->qgroup_rescan_workers, 2824 &fs_info->qgroup_rescan_work); 2825 } 2826 2827 /* 2828 * Reserve qgroup space for range [start, start + len). 2829 * 2830 * This function will either reserve space from related qgroups or doing 2831 * nothing if the range is already reserved. 2832 * 2833 * Return 0 for successful reserve 2834 * Return <0 for error (including -EQUOT) 2835 * 2836 * NOTE: this function may sleep for memory allocation. 2837 * if btrfs_qgroup_reserve_data() is called multiple times with 2838 * same @reserved, caller must ensure when error happens it's OK 2839 * to free *ALL* reserved space. 2840 */ 2841 int btrfs_qgroup_reserve_data(struct inode *inode, 2842 struct extent_changeset **reserved_ret, u64 start, 2843 u64 len) 2844 { 2845 struct btrfs_root *root = BTRFS_I(inode)->root; 2846 struct ulist_node *unode; 2847 struct ulist_iterator uiter; 2848 struct extent_changeset *reserved; 2849 u64 orig_reserved; 2850 u64 to_reserve; 2851 int ret; 2852 2853 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) || 2854 !is_fstree(root->objectid) || len == 0) 2855 return 0; 2856 2857 /* @reserved parameter is mandatory for qgroup */ 2858 if (WARN_ON(!reserved_ret)) 2859 return -EINVAL; 2860 if (!*reserved_ret) { 2861 *reserved_ret = extent_changeset_alloc(); 2862 if (!*reserved_ret) 2863 return -ENOMEM; 2864 } 2865 reserved = *reserved_ret; 2866 /* Record already reserved space */ 2867 orig_reserved = reserved->bytes_changed; 2868 ret = set_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 2869 start + len -1, EXTENT_QGROUP_RESERVED, reserved); 2870 2871 /* Newly reserved space */ 2872 to_reserve = reserved->bytes_changed - orig_reserved; 2873 trace_btrfs_qgroup_reserve_data(inode, start, len, 2874 to_reserve, QGROUP_RESERVE); 2875 if (ret < 0) 2876 goto cleanup; 2877 ret = qgroup_reserve(root, to_reserve, true); 2878 if (ret < 0) 2879 goto cleanup; 2880 2881 return ret; 2882 2883 cleanup: 2884 /* cleanup *ALL* already reserved ranges */ 2885 ULIST_ITER_INIT(&uiter); 2886 while ((unode = ulist_next(&reserved->range_changed, &uiter))) 2887 clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val, 2888 unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL, 2889 GFP_NOFS); 2890 extent_changeset_release(reserved); 2891 return ret; 2892 } 2893 2894 /* Free ranges specified by @reserved, normally in error path */ 2895 static int qgroup_free_reserved_data(struct inode *inode, 2896 struct extent_changeset *reserved, u64 start, u64 len) 2897 { 2898 struct btrfs_root *root = BTRFS_I(inode)->root; 2899 struct ulist_node *unode; 2900 struct ulist_iterator uiter; 2901 struct extent_changeset changeset; 2902 int freed = 0; 2903 int ret; 2904 2905 extent_changeset_init(&changeset); 2906 len = round_up(start + len, root->fs_info->sectorsize); 2907 start = round_down(start, root->fs_info->sectorsize); 2908 2909 ULIST_ITER_INIT(&uiter); 2910 while ((unode = ulist_next(&reserved->range_changed, &uiter))) { 2911 u64 range_start = unode->val; 2912 /* unode->aux is the inclusive end */ 2913 u64 range_len = unode->aux - range_start + 1; 2914 u64 free_start; 2915 u64 free_len; 2916 2917 extent_changeset_release(&changeset); 2918 2919 /* Only free range in range [start, start + len) */ 2920 if (range_start >= start + len || 2921 range_start + range_len <= start) 2922 continue; 2923 free_start = max(range_start, start); 2924 free_len = min(start + len, range_start + range_len) - 2925 free_start; 2926 /* 2927 * TODO: To also modify reserved->ranges_reserved to reflect 2928 * the modification. 2929 * 2930 * However as long as we free qgroup reserved according to 2931 * EXTENT_QGROUP_RESERVED, we won't double free. 2932 * So not need to rush. 2933 */ 2934 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_failure_tree, 2935 free_start, free_start + free_len - 1, 2936 EXTENT_QGROUP_RESERVED, &changeset); 2937 if (ret < 0) 2938 goto out; 2939 freed += changeset.bytes_changed; 2940 } 2941 btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed); 2942 ret = freed; 2943 out: 2944 extent_changeset_release(&changeset); 2945 return ret; 2946 } 2947 2948 static int __btrfs_qgroup_release_data(struct inode *inode, 2949 struct extent_changeset *reserved, u64 start, u64 len, 2950 int free) 2951 { 2952 struct extent_changeset changeset; 2953 int trace_op = QGROUP_RELEASE; 2954 int ret; 2955 2956 /* In release case, we shouldn't have @reserved */ 2957 WARN_ON(!free && reserved); 2958 if (free && reserved) 2959 return qgroup_free_reserved_data(inode, reserved, start, len); 2960 extent_changeset_init(&changeset); 2961 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, start, 2962 start + len -1, EXTENT_QGROUP_RESERVED, &changeset); 2963 if (ret < 0) 2964 goto out; 2965 2966 if (free) 2967 trace_op = QGROUP_FREE; 2968 trace_btrfs_qgroup_release_data(inode, start, len, 2969 changeset.bytes_changed, trace_op); 2970 if (free) 2971 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, 2972 BTRFS_I(inode)->root->objectid, 2973 changeset.bytes_changed); 2974 ret = changeset.bytes_changed; 2975 out: 2976 extent_changeset_release(&changeset); 2977 return ret; 2978 } 2979 2980 /* 2981 * Free a reserved space range from io_tree and related qgroups 2982 * 2983 * Should be called when a range of pages get invalidated before reaching disk. 2984 * Or for error cleanup case. 2985 * if @reserved is given, only reserved range in [@start, @start + @len) will 2986 * be freed. 2987 * 2988 * For data written to disk, use btrfs_qgroup_release_data(). 2989 * 2990 * NOTE: This function may sleep for memory allocation. 2991 */ 2992 int btrfs_qgroup_free_data(struct inode *inode, 2993 struct extent_changeset *reserved, u64 start, u64 len) 2994 { 2995 return __btrfs_qgroup_release_data(inode, reserved, start, len, 1); 2996 } 2997 2998 /* 2999 * Release a reserved space range from io_tree only. 3000 * 3001 * Should be called when a range of pages get written to disk and corresponding 3002 * FILE_EXTENT is inserted into corresponding root. 3003 * 3004 * Since new qgroup accounting framework will only update qgroup numbers at 3005 * commit_transaction() time, its reserved space shouldn't be freed from 3006 * related qgroups. 3007 * 3008 * But we should release the range from io_tree, to allow further write to be 3009 * COWed. 3010 * 3011 * NOTE: This function may sleep for memory allocation. 3012 */ 3013 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len) 3014 { 3015 return __btrfs_qgroup_release_data(inode, NULL, start, len, 0); 3016 } 3017 3018 int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, 3019 bool enforce) 3020 { 3021 struct btrfs_fs_info *fs_info = root->fs_info; 3022 int ret; 3023 3024 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3025 !is_fstree(root->objectid) || num_bytes == 0) 3026 return 0; 3027 3028 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 3029 trace_qgroup_meta_reserve(root, (s64)num_bytes); 3030 ret = qgroup_reserve(root, num_bytes, enforce); 3031 if (ret < 0) 3032 return ret; 3033 atomic64_add(num_bytes, &root->qgroup_meta_rsv); 3034 return ret; 3035 } 3036 3037 void btrfs_qgroup_free_meta_all(struct btrfs_root *root) 3038 { 3039 struct btrfs_fs_info *fs_info = root->fs_info; 3040 u64 reserved; 3041 3042 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3043 !is_fstree(root->objectid)) 3044 return; 3045 3046 reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0); 3047 if (reserved == 0) 3048 return; 3049 trace_qgroup_meta_reserve(root, -(s64)reserved); 3050 btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved); 3051 } 3052 3053 void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) 3054 { 3055 struct btrfs_fs_info *fs_info = root->fs_info; 3056 3057 if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || 3058 !is_fstree(root->objectid)) 3059 return; 3060 3061 BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); 3062 WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes); 3063 atomic64_sub(num_bytes, &root->qgroup_meta_rsv); 3064 trace_qgroup_meta_reserve(root, -(s64)num_bytes); 3065 btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes); 3066 } 3067 3068 /* 3069 * Check qgroup reserved space leaking, normally at destroy inode 3070 * time 3071 */ 3072 void btrfs_qgroup_check_reserved_leak(struct inode *inode) 3073 { 3074 struct extent_changeset changeset; 3075 struct ulist_node *unode; 3076 struct ulist_iterator iter; 3077 int ret; 3078 3079 extent_changeset_init(&changeset); 3080 ret = clear_record_extent_bits(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 3081 EXTENT_QGROUP_RESERVED, &changeset); 3082 3083 WARN_ON(ret < 0); 3084 if (WARN_ON(changeset.bytes_changed)) { 3085 ULIST_ITER_INIT(&iter); 3086 while ((unode = ulist_next(&changeset.range_changed, &iter))) { 3087 btrfs_warn(BTRFS_I(inode)->root->fs_info, 3088 "leaking qgroup reserved space, ino: %lu, start: %llu, end: %llu", 3089 inode->i_ino, unode->val, unode->aux); 3090 } 3091 btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info, 3092 BTRFS_I(inode)->root->objectid, 3093 changeset.bytes_changed); 3094 3095 } 3096 extent_changeset_release(&changeset); 3097 } 3098