1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2009 Oracle. All rights reserved. 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/slab.h> 8 #include <linux/sort.h> 9 #include "ctree.h" 10 #include "delayed-ref.h" 11 #include "transaction.h" 12 #include "qgroup.h" 13 #include "space-info.h" 14 15 struct kmem_cache *btrfs_delayed_ref_head_cachep; 16 struct kmem_cache *btrfs_delayed_tree_ref_cachep; 17 struct kmem_cache *btrfs_delayed_data_ref_cachep; 18 struct kmem_cache *btrfs_delayed_extent_op_cachep; 19 /* 20 * delayed back reference update tracking. For subvolume trees 21 * we queue up extent allocations and backref maintenance for 22 * delayed processing. This avoids deep call chains where we 23 * add extents in the middle of btrfs_search_slot, and it allows 24 * us to buffer up frequently modified backrefs in an rb tree instead 25 * of hammering updates on the extent allocation tree. 26 */ 27 28 bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info) 29 { 30 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; 31 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; 32 bool ret = false; 33 u64 reserved; 34 35 spin_lock(&global_rsv->lock); 36 reserved = global_rsv->reserved; 37 spin_unlock(&global_rsv->lock); 38 39 /* 40 * Since the global reserve is just kind of magic we don't really want 41 * to rely on it to save our bacon, so if our size is more than the 42 * delayed_refs_rsv and the global rsv then it's time to think about 43 * bailing. 44 */ 45 spin_lock(&delayed_refs_rsv->lock); 46 reserved += delayed_refs_rsv->reserved; 47 if (delayed_refs_rsv->size >= reserved) 48 ret = true; 49 spin_unlock(&delayed_refs_rsv->lock); 50 return ret; 51 } 52 53 int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans) 54 { 55 u64 num_entries = 56 atomic_read(&trans->transaction->delayed_refs.num_entries); 57 u64 avg_runtime; 58 u64 val; 59 60 smp_mb(); 61 avg_runtime = trans->fs_info->avg_delayed_ref_runtime; 62 val = num_entries * avg_runtime; 63 if (val >= NSEC_PER_SEC) 64 return 1; 65 if (val >= NSEC_PER_SEC / 2) 66 return 2; 67 68 return btrfs_check_space_for_delayed_refs(trans->fs_info); 69 } 70 71 /** 72 * btrfs_delayed_refs_rsv_release - release a ref head's reservation. 73 * @fs_info - the fs_info for our fs. 74 * @nr - the number of items to drop. 75 * 76 * This drops the delayed ref head's count from the delayed refs rsv and frees 77 * any excess reservation we had. 78 */ 79 void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr) 80 { 81 struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; 82 u64 num_bytes = btrfs_calc_insert_metadata_size(fs_info, nr); 83 u64 released = 0; 84 85 released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL); 86 if (released) 87 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 88 0, released, 0); 89 } 90 91 /* 92 * btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv 93 * @trans - the trans that may have generated delayed refs 94 * 95 * This is to be called anytime we may have adjusted trans->delayed_ref_updates, 96 * it'll calculate the additional size and add it to the delayed_refs_rsv. 97 */ 98 void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) 99 { 100 struct btrfs_fs_info *fs_info = trans->fs_info; 101 struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; 102 u64 num_bytes; 103 104 if (!trans->delayed_ref_updates) 105 return; 106 107 num_bytes = btrfs_calc_insert_metadata_size(fs_info, 108 trans->delayed_ref_updates); 109 spin_lock(&delayed_rsv->lock); 110 delayed_rsv->size += num_bytes; 111 delayed_rsv->full = 0; 112 spin_unlock(&delayed_rsv->lock); 113 trans->delayed_ref_updates = 0; 114 } 115 116 /** 117 * btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv. 118 * @fs_info - the fs info for our fs. 119 * @src - the source block rsv to transfer from. 120 * @num_bytes - the number of bytes to transfer. 121 * 122 * This transfers up to the num_bytes amount from the src rsv to the 123 * delayed_refs_rsv. Any extra bytes are returned to the space info. 124 */ 125 void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, 126 struct btrfs_block_rsv *src, 127 u64 num_bytes) 128 { 129 struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; 130 u64 to_free = 0; 131 132 spin_lock(&src->lock); 133 src->reserved -= num_bytes; 134 src->size -= num_bytes; 135 spin_unlock(&src->lock); 136 137 spin_lock(&delayed_refs_rsv->lock); 138 if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) { 139 u64 delta = delayed_refs_rsv->size - 140 delayed_refs_rsv->reserved; 141 if (num_bytes > delta) { 142 to_free = num_bytes - delta; 143 num_bytes = delta; 144 } 145 } else { 146 to_free = num_bytes; 147 num_bytes = 0; 148 } 149 150 if (num_bytes) 151 delayed_refs_rsv->reserved += num_bytes; 152 if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size) 153 delayed_refs_rsv->full = 1; 154 spin_unlock(&delayed_refs_rsv->lock); 155 156 if (num_bytes) 157 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 158 0, num_bytes, 1); 159 if (to_free) 160 btrfs_space_info_free_bytes_may_use(fs_info, 161 delayed_refs_rsv->space_info, to_free); 162 } 163 164 /** 165 * btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage. 166 * @fs_info - the fs_info for our fs. 167 * @flush - control how we can flush for this reservation. 168 * 169 * This will refill the delayed block_rsv up to 1 items size worth of space and 170 * will return -ENOSPC if we can't make the reservation. 171 */ 172 int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, 173 enum btrfs_reserve_flush_enum flush) 174 { 175 struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; 176 u64 limit = btrfs_calc_insert_metadata_size(fs_info, 1); 177 u64 num_bytes = 0; 178 int ret = -ENOSPC; 179 180 spin_lock(&block_rsv->lock); 181 if (block_rsv->reserved < block_rsv->size) { 182 num_bytes = block_rsv->size - block_rsv->reserved; 183 num_bytes = min(num_bytes, limit); 184 } 185 spin_unlock(&block_rsv->lock); 186 187 if (!num_bytes) 188 return 0; 189 190 ret = btrfs_reserve_metadata_bytes(fs_info->extent_root, block_rsv, 191 num_bytes, flush); 192 if (ret) 193 return ret; 194 btrfs_block_rsv_add_bytes(block_rsv, num_bytes, 0); 195 trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 196 0, num_bytes, 1); 197 return 0; 198 } 199 200 /* 201 * compare two delayed tree backrefs with same bytenr and type 202 */ 203 static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1, 204 struct btrfs_delayed_tree_ref *ref2) 205 { 206 if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { 207 if (ref1->root < ref2->root) 208 return -1; 209 if (ref1->root > ref2->root) 210 return 1; 211 } else { 212 if (ref1->parent < ref2->parent) 213 return -1; 214 if (ref1->parent > ref2->parent) 215 return 1; 216 } 217 return 0; 218 } 219 220 /* 221 * compare two delayed data backrefs with same bytenr and type 222 */ 223 static int comp_data_refs(struct btrfs_delayed_data_ref *ref1, 224 struct btrfs_delayed_data_ref *ref2) 225 { 226 if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) { 227 if (ref1->root < ref2->root) 228 return -1; 229 if (ref1->root > ref2->root) 230 return 1; 231 if (ref1->objectid < ref2->objectid) 232 return -1; 233 if (ref1->objectid > ref2->objectid) 234 return 1; 235 if (ref1->offset < ref2->offset) 236 return -1; 237 if (ref1->offset > ref2->offset) 238 return 1; 239 } else { 240 if (ref1->parent < ref2->parent) 241 return -1; 242 if (ref1->parent > ref2->parent) 243 return 1; 244 } 245 return 0; 246 } 247 248 static int comp_refs(struct btrfs_delayed_ref_node *ref1, 249 struct btrfs_delayed_ref_node *ref2, 250 bool check_seq) 251 { 252 int ret = 0; 253 254 if (ref1->type < ref2->type) 255 return -1; 256 if (ref1->type > ref2->type) 257 return 1; 258 if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || 259 ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) 260 ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1), 261 btrfs_delayed_node_to_tree_ref(ref2)); 262 else 263 ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1), 264 btrfs_delayed_node_to_data_ref(ref2)); 265 if (ret) 266 return ret; 267 if (check_seq) { 268 if (ref1->seq < ref2->seq) 269 return -1; 270 if (ref1->seq > ref2->seq) 271 return 1; 272 } 273 return 0; 274 } 275 276 /* insert a new ref to head ref rbtree */ 277 static struct btrfs_delayed_ref_head *htree_insert(struct rb_root_cached *root, 278 struct rb_node *node) 279 { 280 struct rb_node **p = &root->rb_root.rb_node; 281 struct rb_node *parent_node = NULL; 282 struct btrfs_delayed_ref_head *entry; 283 struct btrfs_delayed_ref_head *ins; 284 u64 bytenr; 285 bool leftmost = true; 286 287 ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node); 288 bytenr = ins->bytenr; 289 while (*p) { 290 parent_node = *p; 291 entry = rb_entry(parent_node, struct btrfs_delayed_ref_head, 292 href_node); 293 294 if (bytenr < entry->bytenr) { 295 p = &(*p)->rb_left; 296 } else if (bytenr > entry->bytenr) { 297 p = &(*p)->rb_right; 298 leftmost = false; 299 } else { 300 return entry; 301 } 302 } 303 304 rb_link_node(node, parent_node, p); 305 rb_insert_color_cached(node, root, leftmost); 306 return NULL; 307 } 308 309 static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root, 310 struct btrfs_delayed_ref_node *ins) 311 { 312 struct rb_node **p = &root->rb_root.rb_node; 313 struct rb_node *node = &ins->ref_node; 314 struct rb_node *parent_node = NULL; 315 struct btrfs_delayed_ref_node *entry; 316 bool leftmost = true; 317 318 while (*p) { 319 int comp; 320 321 parent_node = *p; 322 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, 323 ref_node); 324 comp = comp_refs(ins, entry, true); 325 if (comp < 0) { 326 p = &(*p)->rb_left; 327 } else if (comp > 0) { 328 p = &(*p)->rb_right; 329 leftmost = false; 330 } else { 331 return entry; 332 } 333 } 334 335 rb_link_node(node, parent_node, p); 336 rb_insert_color_cached(node, root, leftmost); 337 return NULL; 338 } 339 340 static struct btrfs_delayed_ref_head *find_first_ref_head( 341 struct btrfs_delayed_ref_root *dr) 342 { 343 struct rb_node *n; 344 struct btrfs_delayed_ref_head *entry; 345 346 n = rb_first_cached(&dr->href_root); 347 if (!n) 348 return NULL; 349 350 entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); 351 352 return entry; 353 } 354 355 /* 356 * Find a head entry based on bytenr. This returns the delayed ref head if it 357 * was able to find one, or NULL if nothing was in that spot. If return_bigger 358 * is given, the next bigger entry is returned if no exact match is found. 359 */ 360 static struct btrfs_delayed_ref_head *find_ref_head( 361 struct btrfs_delayed_ref_root *dr, u64 bytenr, 362 bool return_bigger) 363 { 364 struct rb_root *root = &dr->href_root.rb_root; 365 struct rb_node *n; 366 struct btrfs_delayed_ref_head *entry; 367 368 n = root->rb_node; 369 entry = NULL; 370 while (n) { 371 entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); 372 373 if (bytenr < entry->bytenr) 374 n = n->rb_left; 375 else if (bytenr > entry->bytenr) 376 n = n->rb_right; 377 else 378 return entry; 379 } 380 if (entry && return_bigger) { 381 if (bytenr > entry->bytenr) { 382 n = rb_next(&entry->href_node); 383 if (!n) 384 return NULL; 385 entry = rb_entry(n, struct btrfs_delayed_ref_head, 386 href_node); 387 } 388 return entry; 389 } 390 return NULL; 391 } 392 393 int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs, 394 struct btrfs_delayed_ref_head *head) 395 { 396 lockdep_assert_held(&delayed_refs->lock); 397 if (mutex_trylock(&head->mutex)) 398 return 0; 399 400 refcount_inc(&head->refs); 401 spin_unlock(&delayed_refs->lock); 402 403 mutex_lock(&head->mutex); 404 spin_lock(&delayed_refs->lock); 405 if (RB_EMPTY_NODE(&head->href_node)) { 406 mutex_unlock(&head->mutex); 407 btrfs_put_delayed_ref_head(head); 408 return -EAGAIN; 409 } 410 btrfs_put_delayed_ref_head(head); 411 return 0; 412 } 413 414 static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, 415 struct btrfs_delayed_ref_root *delayed_refs, 416 struct btrfs_delayed_ref_head *head, 417 struct btrfs_delayed_ref_node *ref) 418 { 419 lockdep_assert_held(&head->lock); 420 rb_erase_cached(&ref->ref_node, &head->ref_tree); 421 RB_CLEAR_NODE(&ref->ref_node); 422 if (!list_empty(&ref->add_list)) 423 list_del(&ref->add_list); 424 ref->in_tree = 0; 425 btrfs_put_delayed_ref(ref); 426 atomic_dec(&delayed_refs->num_entries); 427 } 428 429 static bool merge_ref(struct btrfs_trans_handle *trans, 430 struct btrfs_delayed_ref_root *delayed_refs, 431 struct btrfs_delayed_ref_head *head, 432 struct btrfs_delayed_ref_node *ref, 433 u64 seq) 434 { 435 struct btrfs_delayed_ref_node *next; 436 struct rb_node *node = rb_next(&ref->ref_node); 437 bool done = false; 438 439 while (!done && node) { 440 int mod; 441 442 next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); 443 node = rb_next(node); 444 if (seq && next->seq >= seq) 445 break; 446 if (comp_refs(ref, next, false)) 447 break; 448 449 if (ref->action == next->action) { 450 mod = next->ref_mod; 451 } else { 452 if (ref->ref_mod < next->ref_mod) { 453 swap(ref, next); 454 done = true; 455 } 456 mod = -next->ref_mod; 457 } 458 459 drop_delayed_ref(trans, delayed_refs, head, next); 460 ref->ref_mod += mod; 461 if (ref->ref_mod == 0) { 462 drop_delayed_ref(trans, delayed_refs, head, ref); 463 done = true; 464 } else { 465 /* 466 * Can't have multiples of the same ref on a tree block. 467 */ 468 WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || 469 ref->type == BTRFS_SHARED_BLOCK_REF_KEY); 470 } 471 } 472 473 return done; 474 } 475 476 void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, 477 struct btrfs_delayed_ref_root *delayed_refs, 478 struct btrfs_delayed_ref_head *head) 479 { 480 struct btrfs_fs_info *fs_info = trans->fs_info; 481 struct btrfs_delayed_ref_node *ref; 482 struct rb_node *node; 483 u64 seq = 0; 484 485 lockdep_assert_held(&head->lock); 486 487 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root)) 488 return; 489 490 /* We don't have too many refs to merge for data. */ 491 if (head->is_data) 492 return; 493 494 read_lock(&fs_info->tree_mod_log_lock); 495 if (!list_empty(&fs_info->tree_mod_seq_list)) { 496 struct seq_list *elem; 497 498 elem = list_first_entry(&fs_info->tree_mod_seq_list, 499 struct seq_list, list); 500 seq = elem->seq; 501 } 502 read_unlock(&fs_info->tree_mod_log_lock); 503 504 again: 505 for (node = rb_first_cached(&head->ref_tree); node; 506 node = rb_next(node)) { 507 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); 508 if (seq && ref->seq >= seq) 509 continue; 510 if (merge_ref(trans, delayed_refs, head, ref, seq)) 511 goto again; 512 } 513 } 514 515 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq) 516 { 517 struct seq_list *elem; 518 int ret = 0; 519 520 read_lock(&fs_info->tree_mod_log_lock); 521 if (!list_empty(&fs_info->tree_mod_seq_list)) { 522 elem = list_first_entry(&fs_info->tree_mod_seq_list, 523 struct seq_list, list); 524 if (seq >= elem->seq) { 525 btrfs_debug(fs_info, 526 "holding back delayed_ref %#x.%x, lowest is %#x.%x", 527 (u32)(seq >> 32), (u32)seq, 528 (u32)(elem->seq >> 32), (u32)elem->seq); 529 ret = 1; 530 } 531 } 532 533 read_unlock(&fs_info->tree_mod_log_lock); 534 return ret; 535 } 536 537 struct btrfs_delayed_ref_head *btrfs_select_ref_head( 538 struct btrfs_delayed_ref_root *delayed_refs) 539 { 540 struct btrfs_delayed_ref_head *head; 541 542 again: 543 head = find_ref_head(delayed_refs, delayed_refs->run_delayed_start, 544 true); 545 if (!head && delayed_refs->run_delayed_start != 0) { 546 delayed_refs->run_delayed_start = 0; 547 head = find_first_ref_head(delayed_refs); 548 } 549 if (!head) 550 return NULL; 551 552 while (head->processing) { 553 struct rb_node *node; 554 555 node = rb_next(&head->href_node); 556 if (!node) { 557 if (delayed_refs->run_delayed_start == 0) 558 return NULL; 559 delayed_refs->run_delayed_start = 0; 560 goto again; 561 } 562 head = rb_entry(node, struct btrfs_delayed_ref_head, 563 href_node); 564 } 565 566 head->processing = 1; 567 WARN_ON(delayed_refs->num_heads_ready == 0); 568 delayed_refs->num_heads_ready--; 569 delayed_refs->run_delayed_start = head->bytenr + 570 head->num_bytes; 571 return head; 572 } 573 574 void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs, 575 struct btrfs_delayed_ref_head *head) 576 { 577 lockdep_assert_held(&delayed_refs->lock); 578 lockdep_assert_held(&head->lock); 579 580 rb_erase_cached(&head->href_node, &delayed_refs->href_root); 581 RB_CLEAR_NODE(&head->href_node); 582 atomic_dec(&delayed_refs->num_entries); 583 delayed_refs->num_heads--; 584 if (head->processing == 0) 585 delayed_refs->num_heads_ready--; 586 } 587 588 /* 589 * Helper to insert the ref_node to the tail or merge with tail. 590 * 591 * Return 0 for insert. 592 * Return >0 for merge. 593 */ 594 static int insert_delayed_ref(struct btrfs_trans_handle *trans, 595 struct btrfs_delayed_ref_root *root, 596 struct btrfs_delayed_ref_head *href, 597 struct btrfs_delayed_ref_node *ref) 598 { 599 struct btrfs_delayed_ref_node *exist; 600 int mod; 601 int ret = 0; 602 603 spin_lock(&href->lock); 604 exist = tree_insert(&href->ref_tree, ref); 605 if (!exist) 606 goto inserted; 607 608 /* Now we are sure we can merge */ 609 ret = 1; 610 if (exist->action == ref->action) { 611 mod = ref->ref_mod; 612 } else { 613 /* Need to change action */ 614 if (exist->ref_mod < ref->ref_mod) { 615 exist->action = ref->action; 616 mod = -exist->ref_mod; 617 exist->ref_mod = ref->ref_mod; 618 if (ref->action == BTRFS_ADD_DELAYED_REF) 619 list_add_tail(&exist->add_list, 620 &href->ref_add_list); 621 else if (ref->action == BTRFS_DROP_DELAYED_REF) { 622 ASSERT(!list_empty(&exist->add_list)); 623 list_del(&exist->add_list); 624 } else { 625 ASSERT(0); 626 } 627 } else 628 mod = -ref->ref_mod; 629 } 630 exist->ref_mod += mod; 631 632 /* remove existing tail if its ref_mod is zero */ 633 if (exist->ref_mod == 0) 634 drop_delayed_ref(trans, root, href, exist); 635 spin_unlock(&href->lock); 636 return ret; 637 inserted: 638 if (ref->action == BTRFS_ADD_DELAYED_REF) 639 list_add_tail(&ref->add_list, &href->ref_add_list); 640 atomic_inc(&root->num_entries); 641 spin_unlock(&href->lock); 642 return ret; 643 } 644 645 /* 646 * helper function to update the accounting in the head ref 647 * existing and update must have the same bytenr 648 */ 649 static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans, 650 struct btrfs_delayed_ref_head *existing, 651 struct btrfs_delayed_ref_head *update, 652 int *old_ref_mod_ret) 653 { 654 struct btrfs_delayed_ref_root *delayed_refs = 655 &trans->transaction->delayed_refs; 656 struct btrfs_fs_info *fs_info = trans->fs_info; 657 int old_ref_mod; 658 659 BUG_ON(existing->is_data != update->is_data); 660 661 spin_lock(&existing->lock); 662 if (update->must_insert_reserved) { 663 /* if the extent was freed and then 664 * reallocated before the delayed ref 665 * entries were processed, we can end up 666 * with an existing head ref without 667 * the must_insert_reserved flag set. 668 * Set it again here 669 */ 670 existing->must_insert_reserved = update->must_insert_reserved; 671 672 /* 673 * update the num_bytes so we make sure the accounting 674 * is done correctly 675 */ 676 existing->num_bytes = update->num_bytes; 677 678 } 679 680 if (update->extent_op) { 681 if (!existing->extent_op) { 682 existing->extent_op = update->extent_op; 683 } else { 684 if (update->extent_op->update_key) { 685 memcpy(&existing->extent_op->key, 686 &update->extent_op->key, 687 sizeof(update->extent_op->key)); 688 existing->extent_op->update_key = true; 689 } 690 if (update->extent_op->update_flags) { 691 existing->extent_op->flags_to_set |= 692 update->extent_op->flags_to_set; 693 existing->extent_op->update_flags = true; 694 } 695 btrfs_free_delayed_extent_op(update->extent_op); 696 } 697 } 698 /* 699 * update the reference mod on the head to reflect this new operation, 700 * only need the lock for this case cause we could be processing it 701 * currently, for refs we just added we know we're a-ok. 702 */ 703 old_ref_mod = existing->total_ref_mod; 704 if (old_ref_mod_ret) 705 *old_ref_mod_ret = old_ref_mod; 706 existing->ref_mod += update->ref_mod; 707 existing->total_ref_mod += update->ref_mod; 708 709 /* 710 * If we are going to from a positive ref mod to a negative or vice 711 * versa we need to make sure to adjust pending_csums accordingly. 712 */ 713 if (existing->is_data) { 714 u64 csum_leaves = 715 btrfs_csum_bytes_to_leaves(fs_info, 716 existing->num_bytes); 717 718 if (existing->total_ref_mod >= 0 && old_ref_mod < 0) { 719 delayed_refs->pending_csums -= existing->num_bytes; 720 btrfs_delayed_refs_rsv_release(fs_info, csum_leaves); 721 } 722 if (existing->total_ref_mod < 0 && old_ref_mod >= 0) { 723 delayed_refs->pending_csums += existing->num_bytes; 724 trans->delayed_ref_updates += csum_leaves; 725 } 726 } 727 spin_unlock(&existing->lock); 728 } 729 730 static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref, 731 struct btrfs_qgroup_extent_record *qrecord, 732 u64 bytenr, u64 num_bytes, u64 ref_root, 733 u64 reserved, int action, bool is_data, 734 bool is_system) 735 { 736 int count_mod = 1; 737 int must_insert_reserved = 0; 738 739 /* If reserved is provided, it must be a data extent. */ 740 BUG_ON(!is_data && reserved); 741 742 /* 743 * The head node stores the sum of all the mods, so dropping a ref 744 * should drop the sum in the head node by one. 745 */ 746 if (action == BTRFS_UPDATE_DELAYED_HEAD) 747 count_mod = 0; 748 else if (action == BTRFS_DROP_DELAYED_REF) 749 count_mod = -1; 750 751 /* 752 * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved 753 * accounting when the extent is finally added, or if a later 754 * modification deletes the delayed ref without ever inserting the 755 * extent into the extent allocation tree. ref->must_insert_reserved 756 * is the flag used to record that accounting mods are required. 757 * 758 * Once we record must_insert_reserved, switch the action to 759 * BTRFS_ADD_DELAYED_REF because other special casing is not required. 760 */ 761 if (action == BTRFS_ADD_DELAYED_EXTENT) 762 must_insert_reserved = 1; 763 else 764 must_insert_reserved = 0; 765 766 refcount_set(&head_ref->refs, 1); 767 head_ref->bytenr = bytenr; 768 head_ref->num_bytes = num_bytes; 769 head_ref->ref_mod = count_mod; 770 head_ref->must_insert_reserved = must_insert_reserved; 771 head_ref->is_data = is_data; 772 head_ref->is_system = is_system; 773 head_ref->ref_tree = RB_ROOT_CACHED; 774 INIT_LIST_HEAD(&head_ref->ref_add_list); 775 RB_CLEAR_NODE(&head_ref->href_node); 776 head_ref->processing = 0; 777 head_ref->total_ref_mod = count_mod; 778 spin_lock_init(&head_ref->lock); 779 mutex_init(&head_ref->mutex); 780 781 if (qrecord) { 782 if (ref_root && reserved) { 783 qrecord->data_rsv = reserved; 784 qrecord->data_rsv_refroot = ref_root; 785 } 786 qrecord->bytenr = bytenr; 787 qrecord->num_bytes = num_bytes; 788 qrecord->old_roots = NULL; 789 } 790 } 791 792 /* 793 * helper function to actually insert a head node into the rbtree. 794 * this does all the dirty work in terms of maintaining the correct 795 * overall modification count. 796 */ 797 static noinline struct btrfs_delayed_ref_head * 798 add_delayed_ref_head(struct btrfs_trans_handle *trans, 799 struct btrfs_delayed_ref_head *head_ref, 800 struct btrfs_qgroup_extent_record *qrecord, 801 int action, int *qrecord_inserted_ret, 802 int *old_ref_mod, int *new_ref_mod) 803 { 804 struct btrfs_delayed_ref_head *existing; 805 struct btrfs_delayed_ref_root *delayed_refs; 806 int qrecord_inserted = 0; 807 808 delayed_refs = &trans->transaction->delayed_refs; 809 810 /* Record qgroup extent info if provided */ 811 if (qrecord) { 812 if (btrfs_qgroup_trace_extent_nolock(trans->fs_info, 813 delayed_refs, qrecord)) 814 kfree(qrecord); 815 else 816 qrecord_inserted = 1; 817 } 818 819 trace_add_delayed_ref_head(trans->fs_info, head_ref, action); 820 821 existing = htree_insert(&delayed_refs->href_root, 822 &head_ref->href_node); 823 if (existing) { 824 update_existing_head_ref(trans, existing, head_ref, 825 old_ref_mod); 826 /* 827 * we've updated the existing ref, free the newly 828 * allocated ref 829 */ 830 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 831 head_ref = existing; 832 } else { 833 if (old_ref_mod) 834 *old_ref_mod = 0; 835 if (head_ref->is_data && head_ref->ref_mod < 0) { 836 delayed_refs->pending_csums += head_ref->num_bytes; 837 trans->delayed_ref_updates += 838 btrfs_csum_bytes_to_leaves(trans->fs_info, 839 head_ref->num_bytes); 840 } 841 delayed_refs->num_heads++; 842 delayed_refs->num_heads_ready++; 843 atomic_inc(&delayed_refs->num_entries); 844 trans->delayed_ref_updates++; 845 } 846 if (qrecord_inserted_ret) 847 *qrecord_inserted_ret = qrecord_inserted; 848 if (new_ref_mod) 849 *new_ref_mod = head_ref->total_ref_mod; 850 851 return head_ref; 852 } 853 854 /* 855 * init_delayed_ref_common - Initialize the structure which represents a 856 * modification to a an extent. 857 * 858 * @fs_info: Internal to the mounted filesystem mount structure. 859 * 860 * @ref: The structure which is going to be initialized. 861 * 862 * @bytenr: The logical address of the extent for which a modification is 863 * going to be recorded. 864 * 865 * @num_bytes: Size of the extent whose modification is being recorded. 866 * 867 * @ref_root: The id of the root where this modification has originated, this 868 * can be either one of the well-known metadata trees or the 869 * subvolume id which references this extent. 870 * 871 * @action: Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or 872 * BTRFS_ADD_DELAYED_EXTENT 873 * 874 * @ref_type: Holds the type of the extent which is being recorded, can be 875 * one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY 876 * when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/ 877 * BTRFS_EXTENT_DATA_REF_KEY when recording data extent 878 */ 879 static void init_delayed_ref_common(struct btrfs_fs_info *fs_info, 880 struct btrfs_delayed_ref_node *ref, 881 u64 bytenr, u64 num_bytes, u64 ref_root, 882 int action, u8 ref_type) 883 { 884 u64 seq = 0; 885 886 if (action == BTRFS_ADD_DELAYED_EXTENT) 887 action = BTRFS_ADD_DELAYED_REF; 888 889 if (is_fstree(ref_root)) 890 seq = atomic64_read(&fs_info->tree_mod_seq); 891 892 refcount_set(&ref->refs, 1); 893 ref->bytenr = bytenr; 894 ref->num_bytes = num_bytes; 895 ref->ref_mod = 1; 896 ref->action = action; 897 ref->is_head = 0; 898 ref->in_tree = 1; 899 ref->seq = seq; 900 ref->type = ref_type; 901 RB_CLEAR_NODE(&ref->ref_node); 902 INIT_LIST_HEAD(&ref->add_list); 903 } 904 905 /* 906 * add a delayed tree ref. This does all of the accounting required 907 * to make sure the delayed ref is eventually processed before this 908 * transaction commits. 909 */ 910 int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, 911 struct btrfs_ref *generic_ref, 912 struct btrfs_delayed_extent_op *extent_op, 913 int *old_ref_mod, int *new_ref_mod) 914 { 915 struct btrfs_fs_info *fs_info = trans->fs_info; 916 struct btrfs_delayed_tree_ref *ref; 917 struct btrfs_delayed_ref_head *head_ref; 918 struct btrfs_delayed_ref_root *delayed_refs; 919 struct btrfs_qgroup_extent_record *record = NULL; 920 int qrecord_inserted; 921 bool is_system; 922 int action = generic_ref->action; 923 int level = generic_ref->tree_ref.level; 924 int ret; 925 u64 bytenr = generic_ref->bytenr; 926 u64 num_bytes = generic_ref->len; 927 u64 parent = generic_ref->parent; 928 u8 ref_type; 929 930 is_system = (generic_ref->real_root == BTRFS_CHUNK_TREE_OBJECTID); 931 932 ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action); 933 BUG_ON(extent_op && extent_op->is_data); 934 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); 935 if (!ref) 936 return -ENOMEM; 937 938 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); 939 if (!head_ref) { 940 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); 941 return -ENOMEM; 942 } 943 944 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 945 is_fstree(generic_ref->real_root) && 946 is_fstree(generic_ref->tree_ref.root) && 947 !generic_ref->skip_qgroup) { 948 record = kzalloc(sizeof(*record), GFP_NOFS); 949 if (!record) { 950 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); 951 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); 952 return -ENOMEM; 953 } 954 } 955 956 if (parent) 957 ref_type = BTRFS_SHARED_BLOCK_REF_KEY; 958 else 959 ref_type = BTRFS_TREE_BLOCK_REF_KEY; 960 961 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes, 962 generic_ref->tree_ref.root, action, ref_type); 963 ref->root = generic_ref->tree_ref.root; 964 ref->parent = parent; 965 ref->level = level; 966 967 init_delayed_ref_head(head_ref, record, bytenr, num_bytes, 968 generic_ref->tree_ref.root, 0, action, false, 969 is_system); 970 head_ref->extent_op = extent_op; 971 972 delayed_refs = &trans->transaction->delayed_refs; 973 spin_lock(&delayed_refs->lock); 974 975 /* 976 * insert both the head node and the new ref without dropping 977 * the spin lock 978 */ 979 head_ref = add_delayed_ref_head(trans, head_ref, record, 980 action, &qrecord_inserted, 981 old_ref_mod, new_ref_mod); 982 983 ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node); 984 spin_unlock(&delayed_refs->lock); 985 986 /* 987 * Need to update the delayed_refs_rsv with any changes we may have 988 * made. 989 */ 990 btrfs_update_delayed_refs_rsv(trans); 991 992 trace_add_delayed_tree_ref(fs_info, &ref->node, ref, 993 action == BTRFS_ADD_DELAYED_EXTENT ? 994 BTRFS_ADD_DELAYED_REF : action); 995 if (ret > 0) 996 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); 997 998 if (qrecord_inserted) 999 btrfs_qgroup_trace_extent_post(fs_info, record); 1000 1001 return 0; 1002 } 1003 1004 /* 1005 * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref. 1006 */ 1007 int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, 1008 struct btrfs_ref *generic_ref, 1009 u64 reserved, int *old_ref_mod, 1010 int *new_ref_mod) 1011 { 1012 struct btrfs_fs_info *fs_info = trans->fs_info; 1013 struct btrfs_delayed_data_ref *ref; 1014 struct btrfs_delayed_ref_head *head_ref; 1015 struct btrfs_delayed_ref_root *delayed_refs; 1016 struct btrfs_qgroup_extent_record *record = NULL; 1017 int qrecord_inserted; 1018 int action = generic_ref->action; 1019 int ret; 1020 u64 bytenr = generic_ref->bytenr; 1021 u64 num_bytes = generic_ref->len; 1022 u64 parent = generic_ref->parent; 1023 u64 ref_root = generic_ref->data_ref.ref_root; 1024 u64 owner = generic_ref->data_ref.ino; 1025 u64 offset = generic_ref->data_ref.offset; 1026 u8 ref_type; 1027 1028 ASSERT(generic_ref->type == BTRFS_REF_DATA && action); 1029 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); 1030 if (!ref) 1031 return -ENOMEM; 1032 1033 if (parent) 1034 ref_type = BTRFS_SHARED_DATA_REF_KEY; 1035 else 1036 ref_type = BTRFS_EXTENT_DATA_REF_KEY; 1037 init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes, 1038 ref_root, action, ref_type); 1039 ref->root = ref_root; 1040 ref->parent = parent; 1041 ref->objectid = owner; 1042 ref->offset = offset; 1043 1044 1045 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); 1046 if (!head_ref) { 1047 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); 1048 return -ENOMEM; 1049 } 1050 1051 if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && 1052 is_fstree(ref_root) && 1053 is_fstree(generic_ref->real_root) && 1054 !generic_ref->skip_qgroup) { 1055 record = kzalloc(sizeof(*record), GFP_NOFS); 1056 if (!record) { 1057 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); 1058 kmem_cache_free(btrfs_delayed_ref_head_cachep, 1059 head_ref); 1060 return -ENOMEM; 1061 } 1062 } 1063 1064 init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root, 1065 reserved, action, true, false); 1066 head_ref->extent_op = NULL; 1067 1068 delayed_refs = &trans->transaction->delayed_refs; 1069 spin_lock(&delayed_refs->lock); 1070 1071 /* 1072 * insert both the head node and the new ref without dropping 1073 * the spin lock 1074 */ 1075 head_ref = add_delayed_ref_head(trans, head_ref, record, 1076 action, &qrecord_inserted, 1077 old_ref_mod, new_ref_mod); 1078 1079 ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node); 1080 spin_unlock(&delayed_refs->lock); 1081 1082 /* 1083 * Need to update the delayed_refs_rsv with any changes we may have 1084 * made. 1085 */ 1086 btrfs_update_delayed_refs_rsv(trans); 1087 1088 trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref, 1089 action == BTRFS_ADD_DELAYED_EXTENT ? 1090 BTRFS_ADD_DELAYED_REF : action); 1091 if (ret > 0) 1092 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); 1093 1094 1095 if (qrecord_inserted) 1096 return btrfs_qgroup_trace_extent_post(fs_info, record); 1097 return 0; 1098 } 1099 1100 int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, 1101 u64 bytenr, u64 num_bytes, 1102 struct btrfs_delayed_extent_op *extent_op) 1103 { 1104 struct btrfs_delayed_ref_head *head_ref; 1105 struct btrfs_delayed_ref_root *delayed_refs; 1106 1107 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); 1108 if (!head_ref) 1109 return -ENOMEM; 1110 1111 init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0, 1112 BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data, 1113 false); 1114 head_ref->extent_op = extent_op; 1115 1116 delayed_refs = &trans->transaction->delayed_refs; 1117 spin_lock(&delayed_refs->lock); 1118 1119 add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD, 1120 NULL, NULL, NULL); 1121 1122 spin_unlock(&delayed_refs->lock); 1123 1124 /* 1125 * Need to update the delayed_refs_rsv with any changes we may have 1126 * made. 1127 */ 1128 btrfs_update_delayed_refs_rsv(trans); 1129 return 0; 1130 } 1131 1132 /* 1133 * This does a simple search for the head node for a given extent. Returns the 1134 * head node if found, or NULL if not. 1135 */ 1136 struct btrfs_delayed_ref_head * 1137 btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr) 1138 { 1139 lockdep_assert_held(&delayed_refs->lock); 1140 1141 return find_ref_head(delayed_refs, bytenr, false); 1142 } 1143 1144 void __cold btrfs_delayed_ref_exit(void) 1145 { 1146 kmem_cache_destroy(btrfs_delayed_ref_head_cachep); 1147 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep); 1148 kmem_cache_destroy(btrfs_delayed_data_ref_cachep); 1149 kmem_cache_destroy(btrfs_delayed_extent_op_cachep); 1150 } 1151 1152 int __init btrfs_delayed_ref_init(void) 1153 { 1154 btrfs_delayed_ref_head_cachep = kmem_cache_create( 1155 "btrfs_delayed_ref_head", 1156 sizeof(struct btrfs_delayed_ref_head), 0, 1157 SLAB_MEM_SPREAD, NULL); 1158 if (!btrfs_delayed_ref_head_cachep) 1159 goto fail; 1160 1161 btrfs_delayed_tree_ref_cachep = kmem_cache_create( 1162 "btrfs_delayed_tree_ref", 1163 sizeof(struct btrfs_delayed_tree_ref), 0, 1164 SLAB_MEM_SPREAD, NULL); 1165 if (!btrfs_delayed_tree_ref_cachep) 1166 goto fail; 1167 1168 btrfs_delayed_data_ref_cachep = kmem_cache_create( 1169 "btrfs_delayed_data_ref", 1170 sizeof(struct btrfs_delayed_data_ref), 0, 1171 SLAB_MEM_SPREAD, NULL); 1172 if (!btrfs_delayed_data_ref_cachep) 1173 goto fail; 1174 1175 btrfs_delayed_extent_op_cachep = kmem_cache_create( 1176 "btrfs_delayed_extent_op", 1177 sizeof(struct btrfs_delayed_extent_op), 0, 1178 SLAB_MEM_SPREAD, NULL); 1179 if (!btrfs_delayed_extent_op_cachep) 1180 goto fail; 1181 1182 return 0; 1183 fail: 1184 btrfs_delayed_ref_exit(); 1185 return -ENOMEM; 1186 } 1187