1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2009 Oracle. All rights reserved. 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/slab.h> 8 #include <linux/sort.h> 9 #include "ctree.h" 10 #include "delayed-ref.h" 11 #include "transaction.h" 12 #include "qgroup.h" 13 #include "space-info.h" 14 15 struct kmem_cache *btrfs_delayed_ref_head_cachep; 16 struct kmem_cache *btrfs_delayed_tree_ref_cachep; 17 struct kmem_cache *btrfs_delayed_data_ref_cachep; 18 struct kmem_cache *btrfs_delayed_extent_op_cachep; 19 /* 20 * delayed back reference update tracking. For subvolume trees 21 * we queue up extent allocations and backref maintenance for 22 * delayed processing. This avoids deep call chains where we 23 * add extents in the middle of btrfs_search_slot, and it allows 24 * us to buffer up frequently modified backrefs in an rb tree instead 25 * of hammering updates on the extent allocation tree. 26 */ 27 28 bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info) 29 { 30 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; 31 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; 32 bool ret = false; 33 u64 reserved; 34 35 spin_lock(&global_rsv->lock); 36 reserved = global_rsv->reserved; 37 spin_unlock(&global_rsv->lock); 38 39 /* 40 * Since the global reserve is just kind of magic we don't really want 41 * to rely on it to save our bacon, so if our size is more than the 42 * delayed_refs_rsv and the global rsv then it's time to think about 43 * bailing. 44 */ 45 spin_lock(&delayed_refs_rsv->lock); 46 reserved += delayed_refs_rsv->reserved; 47 if (delayed_refs_rsv->size >= reserved) 48 ret = true; 49 spin_unlock(&delayed_refs_rsv->lock); 50 return ret; 51 } 52 53 int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans) 54 { 55 u64 num_entries = 56 atomic_read(&trans->transaction->delayed_refs.num_entries); 57 u64 avg_runtime; 58 u64 val; 59 60 smp_mb(); 61 avg_runtime = trans->fs_info->avg_delayed_ref_runtime; 62 val = num_entries * avg_runtime; 63 if (val >= NSEC_PER_SEC) 64 return 1; 65 if (val >= NSEC_PER_SEC / 2) 66 return 2; 67 68 return btrfs_check_space_for_delayed_refs(trans->fs_info); 69 } 70 71 /** 72 * btrfs_delayed_refs_rsv_release - release a ref head's reservation. 73 * @fs_info - the fs_info for our fs. 74 * @nr - the number of items to drop. 75 * 76 * This drops the delayed ref head's count from the delayed refs rsv and frees 77 * any excess reservation we had. 78 */ 79 void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr) 80 { 81 struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; 82 u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr); 83 u64 released = 0; 84 85 released = __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, 86 NULL); 87 if (released) 88 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 89 0, released, 0); 90 } 91 92 /* 93 * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv 94 * @trans - the trans that may have generated delayed refs 95 * 96 * This is to be called anytime we may have adjusted trans->delayed_ref_updates, 97 * it'll calculate the additional size and add it to the delayed_refs_rsv. 98 */ 99 void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) 100 { 101 struct btrfs_fs_info *fs_info = trans->fs_info; 102 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; 103 u64 num_bytes; 104 105 if (!trans->delayed_ref_updates) 106 return; 107 108 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 109 trans->delayed_ref_updates); 110 spin_lock(&delayed_rsv->lock); 111 delayed_rsv->size += num_bytes; 112 delayed_rsv->full = 0; 113 spin_unlock(&delayed_rsv->lock); 114 trans->delayed_ref_updates = 0; 115 } 116 117 /** 118 * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv. 119 * @fs_info - the fs info for our fs. 120 * @src - the source block rsv to transfer from. 121 * @num_bytes - the number of bytes to transfer. 122 * 123 * This transfers up to the num_bytes amount from the src rsv to the 124 * delayed_refs_rsv. Any extra bytes are returned to the space info. 125 */ 126 void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, 127 struct btrfs_block_rsv *src, 128 u64 num_bytes) 129 { 130 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; 131 u64 to_free = 0; 132 133 spin_lock(&src->lock); 134 src->reserved -= num_bytes; 135 src->size -= num_bytes; 136 spin_unlock(&src->lock); 137 138 spin_lock(&delayed_refs_rsv->lock); 139 if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) { 140 u64 delta = delayed_refs_rsv->size - 141 delayed_refs_rsv->reserved; 142 if (num_bytes > delta) { 143 to_free = num_bytes - delta; 144 num_bytes = delta; 145 } 146 } else { 147 to_free = num_bytes; 148 num_bytes = 0; 149 } 150 151 if (num_bytes) 152 delayed_refs_rsv->reserved += num_bytes; 153 if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size) 154 delayed_refs_rsv->full = 1; 155 spin_unlock(&delayed_refs_rsv->lock); 156 157 if (num_bytes) 158 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 159 0, num_bytes, 1); 160 if (to_free) 161 btrfs_space_info_add_old_bytes(fs_info, 162 delayed_refs_rsv->space_info, to_free); 163 } 164 165 /** 166 * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage. 167 * @fs_info - the fs_info for our fs. 168 * @flush - control how we can flush for this reservation. 169 * 170 * This will refill the delayed block_rsv up to 1 items size worth of space and 171 * will return -ENOSPC if we can't make the reservation. 172 */ 173 int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, 174 enum btrfs_reserve_flush_enum flush) 175 { 176 struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; 177 u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1); 178 u64 num_bytes = 0; 179 int ret = -ENOSPC; 180 181 spin_lock(&block_rsv->lock); 182 if (block_rsv->reserved < block_rsv->size) { 183 num_bytes = block_rsv->size - block_rsv->reserved; 184 num_bytes = min(num_bytes, limit); 185 } 186 spin_unlock(&block_rsv->lock); 187 188 if (!num_bytes) 189 return 0; 190 191 ret = btrfs_reserve_metadata_bytes(fs_info->extent_root, block_rsv, 192 num_bytes, flush); 193 if (ret) 194 return ret; 195 btrfs_block_rsv_add_bytes(block_rsv, num_bytes, 0); 196 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 197 0, num_bytes, 1); 198 return 0; 199 } 200 201 /* 202 * compare two delayed tree backrefs with same bytenr and type 203 */ 204 static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1, 205 struct btrfs_delayed_tree_ref *ref2) 206 { 207 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { 208 if (ref1->root < ref2->root) 209 return -1; 210 if (ref1->root > ref2->root) 211 return 1; 212 } else { 213 if (ref1->parent < ref2->parent) 214 return -1; 215 if (ref1->parent > ref2->parent) 216 return 1; 217 } 218 return 0; 219 } 220 221 /* 222 * compare two delayed data backrefs with same bytenr and type 223 */ 224 static int comp_data_refs(struct btrfs_delayed_data_ref *ref1, 225 struct btrfs_delayed_data_ref *ref2) 226 { 227 if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) { 228 if (ref1->root < ref2->root) 229 return -1; 230 if (ref1->root > ref2->root) 231 return 1; 232 if (ref1->objectid < ref2->objectid) 233 return -1; 234 if (ref1->objectid > ref2->objectid) 235 return 1; 236 if (ref1->offset < ref2->offset) 237 return -1; 238 if (ref1->offset > ref2->offset) 239 return 1; 240 } else { 241 if (ref1->parent < ref2->parent) 242 return -1; 243 if (ref1->parent > ref2->parent) 244 return 1; 245 } 246 return 0; 247 } 248 249 static int comp_refs(struct btrfs_delayed_ref_node *ref1, 250 struct btrfs_delayed_ref_node *ref2, 251 bool check_seq) 252 { 253 int ret = 0; 254 255 if (ref1->type < ref2->type) 256 return -1; 257 if (ref1->type > ref2->type) 258 return 1; 259 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || 260 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) 261 ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1), 262 btrfs_delayed_node_to_tree_ref(ref2)); 263 else 264 ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1), 265 btrfs_delayed_node_to_data_ref(ref2)); 266 if (ret) 267 return ret; 268 if (check_seq) { 269 if (ref1->seq < ref2->seq) 270 return -1; 271 if (ref1->seq > ref2->seq) 272 return 1; 273 } 274 return 0; 275 } 276 277 /* insert a new ref to head ref rbtree */ 278 static struct btrfs_delayed_ref_head *htree_insert(struct rb_root_cached *root, 279 struct rb_node *node) 280 { 281 struct rb_node **p = &root->rb_root.rb_node; 282 struct rb_node *parent_node = NULL; 283 struct btrfs_delayed_ref_head *entry; 284 struct btrfs_delayed_ref_head *ins; 285 u64 bytenr; 286 bool leftmost = true; 287 288 ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node); 289 bytenr = ins->bytenr; 290 while (*p) { 291 parent_node = *p; 292 entry = rb_entry(parent_node, struct btrfs_delayed_ref_head, 293 href_node); 294 295 if (bytenr < entry->bytenr) { 296 p = &(*p)->rb_left; 297 } else if (bytenr > entry->bytenr) { 298 p = &(*p)->rb_right; 299 leftmost = false; 300 } else { 301 return entry; 302 } 303 } 304 305 rb_link_node(node, parent_node, p); 306 rb_insert_color_cached(node, root, leftmost); 307 return NULL; 308 } 309 310 static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root, 311 struct btrfs_delayed_ref_node *ins) 312 { 313 struct rb_node **p = &root->rb_root.rb_node; 314 struct rb_node *node = &ins->ref_node; 315 struct rb_node *parent_node = NULL; 316 struct btrfs_delayed_ref_node *entry; 317 bool leftmost = true; 318 319 while (*p) { 320 int comp; 321 322 parent_node = *p; 323 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, 324 ref_node); 325 comp = comp_refs(ins, entry, true); 326 if (comp < 0) { 327 p = &(*p)->rb_left; 328 } else if (comp > 0) { 329 p = &(*p)->rb_right; 330 leftmost = false; 331 } else { 332 return entry; 333 } 334 } 335 336 rb_link_node(node, parent_node, p); 337 rb_insert_color_cached(node, root, leftmost); 338 return NULL; 339 } 340 341 static struct btrfs_delayed_ref_head *find_first_ref_head( 342 struct btrfs_delayed_ref_root *dr) 343 { 344 struct rb_node *n; 345 struct btrfs_delayed_ref_head *entry; 346 347 n = rb_first_cached(&dr->href_root); 348 if (!n) 349 return NULL; 350 351 entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); 352 353 return entry; 354 } 355 356 /* 357 * Find a head entry based on bytenr. This returns the delayed ref head if it 358 * was able to find one, or NULL if nothing was in that spot. If return_bigger 359 * is given, the next bigger entry is returned if no exact match is found. 360 */ 361 static struct btrfs_delayed_ref_head *find_ref_head( 362 struct btrfs_delayed_ref_root *dr, u64 bytenr, 363 bool return_bigger) 364 { 365 struct rb_root *root = &dr->href_root.rb_root; 366 struct rb_node *n; 367 struct btrfs_delayed_ref_head *entry; 368 369 n = root->rb_node; 370 entry = NULL; 371 while (n) { 372 entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); 373 374 if (bytenr < entry->bytenr) 375 n = n->rb_left; 376 else if (bytenr > entry->bytenr) 377 n = n->rb_right; 378 else 379 return entry; 380 } 381 if (entry && return_bigger) { 382 if (bytenr > entry->bytenr) { 383 n = rb_next(&entry->href_node); 384 if (!n) 385 return NULL; 386 entry = rb_entry(n, struct btrfs_delayed_ref_head, 387 href_node); 388 } 389 return entry; 390 } 391 return NULL; 392 } 393 394 int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs, 395 struct btrfs_delayed_ref_head *head) 396 { 397 lockdep_assert_held(&delayed_refs->lock); 398 if (mutex_trylock(&head->mutex)) 399 return 0; 400 401 refcount_inc(&head->refs); 402 spin_unlock(&delayed_refs->lock); 403 404 mutex_lock(&head->mutex); 405 spin_lock(&delayed_refs->lock); 406 if (RB_EMPTY_NODE(&head->href_node)) { 407 mutex_unlock(&head->mutex); 408 btrfs_put_delayed_ref_head(head); 409 return -EAGAIN; 410 } 411 btrfs_put_delayed_ref_head(head); 412 return 0; 413 } 414 415 static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, 416 struct btrfs_delayed_ref_root *delayed_refs, 417 struct btrfs_delayed_ref_head *head, 418 struct btrfs_delayed_ref_node *ref) 419 { 420 lockdep_assert_held(&head->lock); 421 rb_erase_cached(&ref->ref_node, &head->ref_tree); 422 RB_CLEAR_NODE(&ref->ref_node); 423 if (!list_empty(&ref->add_list)) 424 list_del(&ref->add_list); 425 ref->in_tree = 0; 426 btrfs_put_delayed_ref(ref); 427 atomic_dec(&delayed_refs->num_entries); 428 } 429 430 static bool merge_ref(struct btrfs_trans_handle *trans, 431 struct btrfs_delayed_ref_root *delayed_refs, 432 struct btrfs_delayed_ref_head *head, 433 struct btrfs_delayed_ref_node *ref, 434 u64 seq) 435 { 436 struct btrfs_delayed_ref_node *next; 437 struct rb_node *node = rb_next(&ref->ref_node); 438 bool done = false; 439 440 while (!done && node) { 441 int mod; 442 443 next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); 444 node = rb_next(node); 445 if (seq && next->seq >= seq) 446 break; 447 if (comp_refs(ref, next, false)) 448 break; 449 450 if (ref->action == next->action) { 451 mod = next->ref_mod; 452 } else { 453 if (ref->ref_mod < next->ref_mod) { 454 swap(ref, next); 455 done = true; 456 } 457 mod = -next->ref_mod; 458 } 459 460 drop_delayed_ref(trans, delayed_refs, head, next); 461 ref->ref_mod += mod; 462 if (ref->ref_mod == 0) { 463 drop_delayed_ref(trans, delayed_refs, head, ref); 464 done = true; 465 } else { 466 /* 467 * Can't have multiples of the same ref on a tree block. 468 */ 469 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || 470 ref->type == BTRFS_SHARED_BLOCK_REF_KEY); 471 } 472 } 473 474 return done; 475 } 476 477 void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, 478 struct btrfs_delayed_ref_root *delayed_refs, 479 struct btrfs_delayed_ref_head *head) 480 { 481 struct btrfs_fs_info *fs_info = trans->fs_info; 482 struct btrfs_delayed_ref_node *ref; 483 struct rb_node *node; 484 u64 seq = 0; 485 486 lockdep_assert_held(&head->lock); 487 488 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root)) 489 return; 490 491 /* We don't have too many refs to merge for data. */ 492 if (head->is_data) 493 return; 494 495 spin_lock(&fs_info->tree_mod_seq_lock); 496 if (!list_empty(&fs_info->tree_mod_seq_list)) { 497 struct seq_list *elem; 498 499 elem = list_first_entry(&fs_info->tree_mod_seq_list, 500 struct seq_list, list); 501 seq = elem->seq; 502 } 503 spin_unlock(&fs_info->tree_mod_seq_lock); 504 505 again: 506 for (node = rb_first_cached(&head->ref_tree); node; 507 node = rb_next(node)) { 508 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); 509 if (seq && ref->seq >= seq) 510 continue; 511 if (merge_ref(trans, delayed_refs, head, ref, seq)) 512 goto again; 513 } 514 } 515 516 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq) 517 { 518 struct seq_list *elem; 519 int ret = 0; 520 521 spin_lock(&fs_info->tree_mod_seq_lock); 522 if (!list_empty(&fs_info->tree_mod_seq_list)) { 523 elem = list_first_entry(&fs_info->tree_mod_seq_list, 524 struct seq_list, list); 525 if (seq >= elem->seq) { 526 btrfs_debug(fs_info, 527 "holding back delayed_ref %#x.%x, lowest is %#x.%x", 528 (u32)(seq >> 32), (u32)seq, 529 (u32)(elem->seq >> 32), (u32)elem->seq); 530 ret = 1; 531 } 532 } 533 534 spin_unlock(&fs_info->tree_mod_seq_lock); 535 return ret; 536 } 537 538 struct btrfs_delayed_ref_head *btrfs_select_ref_head( 539 struct btrfs_delayed_ref_root *delayed_refs) 540 { 541 struct btrfs_delayed_ref_head *head; 542 543 again: 544 head = find_ref_head(delayed_refs, delayed_refs->run_delayed_start, 545 true); 546 if (!head && delayed_refs->run_delayed_start != 0) { 547 delayed_refs->run_delayed_start = 0; 548 head = find_first_ref_head(delayed_refs); 549 } 550 if (!head) 551 return NULL; 552 553 while (head->processing) { 554 struct rb_node *node; 555 556 node = rb_next(&head->href_node); 557 if (!node) { 558 if (delayed_refs->run_delayed_start == 0) 559 return NULL; 560 delayed_refs->run_delayed_start = 0; 561 goto again; 562 } 563 head = rb_entry(node, struct btrfs_delayed_ref_head, 564 href_node); 565 } 566 567 head->processing = 1; 568 WARN_ON(delayed_refs->num_heads_ready == 0); 569 delayed_refs->num_heads_ready--; 570 delayed_refs->run_delayed_start = head->bytenr + 571 head->num_bytes; 572 return head; 573 } 574 575 void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs, 576 struct btrfs_delayed_ref_head *head) 577 { 578 lockdep_assert_held(&delayed_refs->lock); 579 lockdep_assert_held(&head->lock); 580 581 rb_erase_cached(&head->href_node, &delayed_refs->href_root); 582 RB_CLEAR_NODE(&head->href_node); 583 atomic_dec(&delayed_refs->num_entries); 584 delayed_refs->num_heads--; 585 if (head->processing == 0) 586 delayed_refs->num_heads_ready--; 587 } 588 589 /* 590 * Helper to insert the ref_node to the tail or merge with tail. 591 * 592 * Return 0 for insert. 593 * Return >0 for merge. 594 */ 595 static int insert_delayed_ref(struct btrfs_trans_handle *trans, 596 struct btrfs_delayed_ref_root *root, 597 struct btrfs_delayed_ref_head *href, 598 struct btrfs_delayed_ref_node *ref) 599 { 600 struct btrfs_delayed_ref_node *exist; 601 int mod; 602 int ret = 0; 603 604 spin_lock(&href->lock); 605 exist = tree_insert(&href->ref_tree, ref); 606 if (!exist) 607 goto inserted; 608 609 /* Now we are sure we can merge */ 610 ret = 1; 611 if (exist->action == ref->action) { 612 mod = ref->ref_mod; 613 } else { 614 /* Need to change action */ 615 if (exist->ref_mod < ref->ref_mod) { 616 exist->action = ref->action; 617 mod = -exist->ref_mod; 618 exist->ref_mod = ref->ref_mod; 619 if (ref->action == BTRFS_ADD_DELAYED_REF) 620 list_add_tail(&exist->add_list, 621 &href->ref_add_list); 622 else if (ref->action == BTRFS_DROP_DELAYED_REF) { 623 ASSERT(!list_empty(&exist->add_list)); 624 list_del(&exist->add_list); 625 } else { 626 ASSERT(0); 627 } 628 } else 629 mod = -ref->ref_mod; 630 } 631 exist->ref_mod += mod; 632 633 /* remove existing tail if its ref_mod is zero */ 634 if (exist->ref_mod == 0) 635 drop_delayed_ref(trans, root, href, exist); 636 spin_unlock(&href->lock); 637 return ret; 638 inserted: 639 if (ref->action == BTRFS_ADD_DELAYED_REF) 640 list_add_tail(&ref->add_list, &href->ref_add_list); 641 atomic_inc(&root->num_entries); 642 spin_unlock(&href->lock); 643 return ret; 644 } 645 646 /* 647 * helper function to update the accounting in the head ref 648 * existing and update must have the same bytenr 649 */ 650 static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans, 651 struct btrfs_delayed_ref_head *existing, 652 struct btrfs_delayed_ref_head *update, 653 int *old_ref_mod_ret) 654 { 655 struct btrfs_delayed_ref_root *delayed_refs = 656 &trans->transaction->delayed_refs; 657 struct btrfs_fs_info *fs_info = trans->fs_info; 658 int old_ref_mod; 659 660 BUG_ON(existing->is_data != update->is_data); 661 662 spin_lock(&existing->lock); 663 if (update->must_insert_reserved) { 664 /* if the extent was freed and then 665 * reallocated before the delayed ref 666 * entries were processed, we can end up 667 * with an existing head ref without 668 * the must_insert_reserved flag set. 669 * Set it again here 670 */ 671 existing->must_insert_reserved = update->must_insert_reserved; 672 673 /* 674 * update the num_bytes so we make sure the accounting 675 * is done correctly 676 */ 677 existing->num_bytes = update->num_bytes; 678 679 } 680 681 if (update->extent_op) { 682 if (!existing->extent_op) { 683 existing->extent_op = update->extent_op; 684 } else { 685 if (update->extent_op->update_key) { 686 memcpy(&existing->extent_op->key, 687 &update->extent_op->key, 688 sizeof(update->extent_op->key)); 689 existing->extent_op->update_key = true; 690 } 691 if (update->extent_op->update_flags) { 692 existing->extent_op->flags_to_set |= 693 update->extent_op->flags_to_set; 694 existing->extent_op->update_flags = true; 695 } 696 btrfs_free_delayed_extent_op(update->extent_op); 697 } 698 } 699 /* 700 * update the reference mod on the head to reflect this new operation, 701 * only need the lock for this case cause we could be processing it 702 * currently, for refs we just added we know we're a-ok. 703 */ 704 old_ref_mod = existing->total_ref_mod; 705 if (old_ref_mod_ret) 706 *old_ref_mod_ret = old_ref_mod; 707 existing->ref_mod += update->ref_mod; 708 existing->total_ref_mod += update->ref_mod; 709 710 /* 711 * If we are going to from a positive ref mod to a negative or vice 712 * versa we need to make sure to adjust pending_csums accordingly. 713 */ 714 if (existing->is_data) { 715 u64 csum_leaves = 716 btrfs_csum_bytes_to_leaves(fs_info, 717 existing->num_bytes); 718 719 if (existing->total_ref_mod >= 0 && old_ref_mod < 0) { 720 delayed_refs->pending_csums -= existing->num_bytes; 721 btrfs_delayed_refs_rsv_release(fs_info, csum_leaves); 722 } 723 if (existing->total_ref_mod < 0 && old_ref_mod >= 0) { 724 delayed_refs->pending_csums += existing->num_bytes; 725 trans->delayed_ref_updates += csum_leaves; 726 } 727 } 728 spin_unlock(&existing->lock); 729 } 730 731 static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref, 732 struct btrfs_qgroup_extent_record *qrecord, 733 u64 bytenr, u64 num_bytes, u64 ref_root, 734 u64 reserved, int action, bool is_data, 735 bool is_system) 736 { 737 int count_mod = 1; 738 int must_insert_reserved = 0; 739 740 /* If reserved is provided, it must be a data extent. */ 741 BUG_ON(!is_data && reserved); 742 743 /* 744 * The head node stores the sum of all the mods, so dropping a ref 745 * should drop the sum in the head node by one. 746 */ 747 if (action == BTRFS_UPDATE_DELAYED_HEAD) 748 count_mod = 0; 749 else if (action == BTRFS_DROP_DELAYED_REF) 750 count_mod = -1; 751 752 /* 753 * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved 754 * accounting when the extent is finally added, or if a later 755 * modification deletes the delayed ref without ever inserting the 756 * extent into the extent allocation tree. ref->must_insert_reserved 757 * is the flag used to record that accounting mods are required. 758 * 759 * Once we record must_insert_reserved, switch the action to 760 * BTRFS_ADD_DELAYED_REF because other special casing is not required. 761 */ 762 if (action == BTRFS_ADD_DELAYED_EXTENT) 763 must_insert_reserved = 1; 764 else 765 must_insert_reserved = 0; 766 767 refcount_set(&head_ref->refs, 1); 768 head_ref->bytenr = bytenr; 769 head_ref->num_bytes = num_bytes; 770 head_ref->ref_mod = count_mod; 771 head_ref->must_insert_reserved = must_insert_reserved; 772 head_ref->is_data = is_data; 773 head_ref->is_system = is_system; 774 head_ref->ref_tree = RB_ROOT_CACHED; 775 INIT_LIST_HEAD(&head_ref->ref_add_list); 776 RB_CLEAR_NODE(&head_ref->href_node); 777 head_ref->processing = 0; 778 head_ref->total_ref_mod = count_mod; 779 spin_lock_init(&head_ref->lock); 780 mutex_init(&head_ref->mutex); 781 782 if (qrecord) { 783 if (ref_root && reserved) { 784 qrecord->data_rsv = reserved; 785 qrecord->data_rsv_refroot = ref_root; 786 } 787 qrecord->bytenr = bytenr; 788 qrecord->num_bytes = num_bytes; 789 qrecord->old_roots = NULL; 790 } 791 } 792 793 /* 794 * helper function to actually insert a head node into the rbtree. 795 * this does all the dirty work in terms of maintaining the correct 796 * overall modification count. 797 */ 798 static noinline struct btrfs_delayed_ref_head * 799 add_delayed_ref_head(struct btrfs_trans_handle *trans, 800 struct btrfs_delayed_ref_head *head_ref, 801 struct btrfs_qgroup_extent_record *qrecord, 802 int action, int *qrecord_inserted_ret, 803 int *old_ref_mod, int *new_ref_mod) 804 { 805 struct btrfs_delayed_ref_head *existing; 806 struct btrfs_delayed_ref_root *delayed_refs; 807 int qrecord_inserted = 0; 808 809 delayed_refs = &trans->transaction->delayed_refs; 810 811 /* Record qgroup extent info if provided */ 812 if (qrecord) { 813 if (btrfs_qgroup_trace_extent_nolock(trans->fs_info, 814 delayed_refs, qrecord)) 815 kfree(qrecord); 816 else 817 qrecord_inserted = 1; 818 } 819 820 trace_add_delayed_ref_head(trans->fs_info, head_ref, action); 821 822 existing = htree_insert(&delayed_refs->href_root, 823 &head_ref->href_node); 824 if (existing) { 825 update_existing_head_ref(trans, existing, head_ref, 826 old_ref_mod); 827 /* 828 * we've updated the existing ref, free the newly 829 * allocated ref 830 */ 831 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 832 head_ref = existing; 833 } else { 834 if (old_ref_mod) 835 *old_ref_mod = 0; 836 if (head_ref->is_data && head_ref->ref_mod < 0) { 837 delayed_refs->pending_csums += head_ref->num_bytes; 838 trans->delayed_ref_updates += 839 btrfs_csum_bytes_to_leaves(trans->fs_info, 840 head_ref->num_bytes); 841 } 842 delayed_refs->num_heads++; 843 delayed_refs->num_heads_ready++; 844 atomic_inc(&delayed_refs->num_entries); 845 trans->delayed_ref_updates++; 846 } 847 if (qrecord_inserted_ret) 848 *qrecord_inserted_ret = qrecord_inserted; 849 if (new_ref_mod) 850 *new_ref_mod = head_ref->total_ref_mod; 851 852 return head_ref; 853 } 854 855 /* 856 * init_delayed_ref_common - Initialize the structure which represents a 857 * modification to a an extent. 858 * 859 * @fs_info: Internal to the mounted filesystem mount structure. 860 * 861 * @ref: The structure which is going to be initialized. 862 * 863 * @bytenr: The logical address of the extent for which a modification is 864 * going to be recorded. 865 * 866 * @num_bytes: Size of the extent whose modification is being recorded. 867 * 868 * @ref_root: The id of the root where this modification has originated, this 869 * can be either one of the well-known metadata trees or the 870 * subvolume id which references this extent. 871 * 872 * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or 873 * BTRFS_ADD_DELAYED_EXTENT 874 * 875 * @ref_type: Holds the type of the extent which is being recorded, can be 876 * one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY 877 * when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/ 878 * BTRFS_EXTENT_DATA_REF_KEY when recording data extent 879 */ 880 static void init_delayed_ref_common(struct btrfs_fs_info *fs_info, 881 struct btrfs_delayed_ref_node *ref, 882 u64 bytenr, u64 num_bytes, u64 ref_root, 883 int action, u8 ref_type) 884 { 885 u64 seq = 0; 886 887 if (action == BTRFS_ADD_DELAYED_EXTENT) 888 action = BTRFS_ADD_DELAYED_REF; 889 890 if (is_fstree(ref_root)) 891 seq = atomic64_read(&fs_info->tree_mod_seq); 892 893 refcount_set(&ref->refs, 1); 894 ref->bytenr = bytenr; 895 ref->num_bytes = num_bytes; 896 ref->ref_mod = 1; 897 ref->action = action; 898 ref->is_head = 0; 899 ref->in_tree = 1; 900 ref->seq = seq; 901 ref->type = ref_type; 902 RB_CLEAR_NODE(&ref->ref_node); 903 INIT_LIST_HEAD(&ref->add_list); 904 } 905 906 /* 907 * add a delayed tree ref. This does all of the accounting required 908 * to make sure the delayed ref is eventually processed before this 909 * transaction commits. 910 */ 911 int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, 912 struct btrfs_ref *generic_ref, 913 struct btrfs_delayed_extent_op *extent_op, 914 int *old_ref_mod, int *new_ref_mod) 915 { 916 struct btrfs_fs_info *fs_info = trans->fs_info; 917 struct btrfs_delayed_tree_ref *ref; 918 struct btrfs_delayed_ref_head *head_ref; 919 struct btrfs_delayed_ref_root *delayed_refs; 920 struct btrfs_qgroup_extent_record *record = NULL; 921 int qrecord_inserted; 922 bool is_system; 923 int action = generic_ref->action; 924 int level = generic_ref->tree_ref.level; 925 int ret; 926 u64 bytenr = generic_ref->bytenr; 927 u64 num_bytes = generic_ref->len; 928 u64 parent = generic_ref->parent; 929 u8 ref_type; 930 931 is_system = (generic_ref->real_root == BTRFS_CHUNK_TREE_OBJECTID); 932 933 ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action); 934 BUG_ON(extent_op && extent_op->is_data); 935 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); 936 if (!ref) 937 return -ENOMEM; 938 939 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); 940 if (!head_ref) { 941 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); 942 return -ENOMEM; 943 } 944 945 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 946 is_fstree(generic_ref->real_root) && 947 is_fstree(generic_ref->tree_ref.root) && 948 !generic_ref->skip_qgroup) { 949 record = kzalloc(sizeof(*record), GFP_NOFS); 950 if (!record) { 951 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); 952 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 953 return -ENOMEM; 954 } 955 } 956 957 if (parent) 958 ref_type = BTRFS_SHARED_BLOCK_REF_KEY; 959 else 960 ref_type = BTRFS_TREE_BLOCK_REF_KEY; 961 962 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes, 963 generic_ref->tree_ref.root, action, ref_type); 964 ref->root = generic_ref->tree_ref.root; 965 ref->parent = parent; 966 ref->level = level; 967 968 init_delayed_ref_head(head_ref, record, bytenr, num_bytes, 969 generic_ref->tree_ref.root, 0, action, false, 970 is_system); 971 head_ref->extent_op = extent_op; 972 973 delayed_refs = &trans->transaction->delayed_refs; 974 spin_lock(&delayed_refs->lock); 975 976 /* 977 * insert both the head node and the new ref without dropping 978 * the spin lock 979 */ 980 head_ref = add_delayed_ref_head(trans, head_ref, record, 981 action, &qrecord_inserted, 982 old_ref_mod, new_ref_mod); 983 984 ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node); 985 spin_unlock(&delayed_refs->lock); 986 987 /* 988 * Need to update the delayed_refs_rsv with any changes we may have 989 * made. 990 */ 991 btrfs_update_delayed_refs_rsv(trans); 992 993 trace_add_delayed_tree_ref(fs_info, &ref->node, ref, 994 action == BTRFS_ADD_DELAYED_EXTENT ? 995 BTRFS_ADD_DELAYED_REF : action); 996 if (ret > 0) 997 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); 998 999 if (qrecord_inserted) 1000 btrfs_qgroup_trace_extent_post(fs_info, record); 1001 1002 return 0; 1003 } 1004 1005 /* 1006 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref. 1007 */ 1008 int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, 1009 struct btrfs_ref *generic_ref, 1010 u64 reserved, int *old_ref_mod, 1011 int *new_ref_mod) 1012 { 1013 struct btrfs_fs_info *fs_info = trans->fs_info; 1014 struct btrfs_delayed_data_ref *ref; 1015 struct btrfs_delayed_ref_head *head_ref; 1016 struct btrfs_delayed_ref_root *delayed_refs; 1017 struct btrfs_qgroup_extent_record *record = NULL; 1018 int qrecord_inserted; 1019 int action = generic_ref->action; 1020 int ret; 1021 u64 bytenr = generic_ref->bytenr; 1022 u64 num_bytes = generic_ref->len; 1023 u64 parent = generic_ref->parent; 1024 u64 ref_root = generic_ref->data_ref.ref_root; 1025 u64 owner = generic_ref->data_ref.ino; 1026 u64 offset = generic_ref->data_ref.offset; 1027 u8 ref_type; 1028 1029 ASSERT(generic_ref->type == BTRFS_REF_DATA && action); 1030 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); 1031 if (!ref) 1032 return -ENOMEM; 1033 1034 if (parent) 1035 ref_type = BTRFS_SHARED_DATA_REF_KEY; 1036 else 1037 ref_type = BTRFS_EXTENT_DATA_REF_KEY; 1038 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes, 1039 ref_root, action, ref_type); 1040 ref->root = ref_root; 1041 ref->parent = parent; 1042 ref->objectid = owner; 1043 ref->offset = offset; 1044 1045 1046 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); 1047 if (!head_ref) { 1048 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); 1049 return -ENOMEM; 1050 } 1051 1052 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 1053 is_fstree(ref_root) && 1054 is_fstree(generic_ref->real_root) && 1055 !generic_ref->skip_qgroup) { 1056 record = kzalloc(sizeof(*record), GFP_NOFS); 1057 if (!record) { 1058 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); 1059 kmem_cache_free(btrfs_delayed_ref_head_cachep, 1060 head_ref); 1061 return -ENOMEM; 1062 } 1063 } 1064 1065 init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root, 1066 reserved, action, true, false); 1067 head_ref->extent_op = NULL; 1068 1069 delayed_refs = &trans->transaction->delayed_refs; 1070 spin_lock(&delayed_refs->lock); 1071 1072 /* 1073 * insert both the head node and the new ref without dropping 1074 * the spin lock 1075 */ 1076 head_ref = add_delayed_ref_head(trans, head_ref, record, 1077 action, &qrecord_inserted, 1078 old_ref_mod, new_ref_mod); 1079 1080 ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node); 1081 spin_unlock(&delayed_refs->lock); 1082 1083 /* 1084 * Need to update the delayed_refs_rsv with any changes we may have 1085 * made. 1086 */ 1087 btrfs_update_delayed_refs_rsv(trans); 1088 1089 trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref, 1090 action == BTRFS_ADD_DELAYED_EXTENT ? 1091 BTRFS_ADD_DELAYED_REF : action); 1092 if (ret > 0) 1093 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); 1094 1095 1096 if (qrecord_inserted) 1097 return btrfs_qgroup_trace_extent_post(fs_info, record); 1098 return 0; 1099 } 1100 1101 int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, 1102 u64 bytenr, u64 num_bytes, 1103 struct btrfs_delayed_extent_op *extent_op) 1104 { 1105 struct btrfs_delayed_ref_head *head_ref; 1106 struct btrfs_delayed_ref_root *delayed_refs; 1107 1108 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); 1109 if (!head_ref) 1110 return -ENOMEM; 1111 1112 init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0, 1113 BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data, 1114 false); 1115 head_ref->extent_op = extent_op; 1116 1117 delayed_refs = &trans->transaction->delayed_refs; 1118 spin_lock(&delayed_refs->lock); 1119 1120 add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD, 1121 NULL, NULL, NULL); 1122 1123 spin_unlock(&delayed_refs->lock); 1124 1125 /* 1126 * Need to update the delayed_refs_rsv with any changes we may have 1127 * made. 1128 */ 1129 btrfs_update_delayed_refs_rsv(trans); 1130 return 0; 1131 } 1132 1133 /* 1134 * This does a simple search for the head node for a given extent. Returns the 1135 * head node if found, or NULL if not. 1136 */ 1137 struct btrfs_delayed_ref_head * 1138 btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr) 1139 { 1140 lockdep_assert_held(&delayed_refs->lock); 1141 1142 return find_ref_head(delayed_refs, bytenr, false); 1143 } 1144 1145 void __cold btrfs_delayed_ref_exit(void) 1146 { 1147 kmem_cache_destroy(btrfs_delayed_ref_head_cachep); 1148 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep); 1149 kmem_cache_destroy(btrfs_delayed_data_ref_cachep); 1150 kmem_cache_destroy(btrfs_delayed_extent_op_cachep); 1151 } 1152 1153 int __init btrfs_delayed_ref_init(void) 1154 { 1155 btrfs_delayed_ref_head_cachep = kmem_cache_create( 1156 "btrfs_delayed_ref_head", 1157 sizeof(struct btrfs_delayed_ref_head), 0, 1158 SLAB_MEM_SPREAD, NULL); 1159 if (!btrfs_delayed_ref_head_cachep) 1160 goto fail; 1161 1162 btrfs_delayed_tree_ref_cachep = kmem_cache_create( 1163 "btrfs_delayed_tree_ref", 1164 sizeof(struct btrfs_delayed_tree_ref), 0, 1165 SLAB_MEM_SPREAD, NULL); 1166 if (!btrfs_delayed_tree_ref_cachep) 1167 goto fail; 1168 1169 btrfs_delayed_data_ref_cachep = kmem_cache_create( 1170 "btrfs_delayed_data_ref", 1171 sizeof(struct btrfs_delayed_data_ref), 0, 1172 SLAB_MEM_SPREAD, NULL); 1173 if (!btrfs_delayed_data_ref_cachep) 1174 goto fail; 1175 1176 btrfs_delayed_extent_op_cachep = kmem_cache_create( 1177 "btrfs_delayed_extent_op", 1178 sizeof(struct btrfs_delayed_extent_op), 0, 1179 SLAB_MEM_SPREAD, NULL); 1180 if (!btrfs_delayed_extent_op_cachep) 1181 goto fail; 1182 1183 return 0; 1184 fail: 1185 btrfs_delayed_ref_exit(); 1186 return -ENOMEM; 1187 } 1188