1 /* 2 * Copyright (C) 2011 Fujitsu. All rights reserved. 3 * Written by Miao Xie <miaox@cn.fujitsu.com> 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public 7 * License v2 as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public 15 * License along with this program; if not, write to the 16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 17 * Boston, MA 021110-1307, USA. 18 */ 19 20 #include <linux/slab.h> 21 #include "delayed-inode.h" 22 #include "disk-io.h" 23 #include "transaction.h" 24 #include "ctree.h" 25 26 #define BTRFS_DELAYED_WRITEBACK 512 27 #define BTRFS_DELAYED_BACKGROUND 128 28 #define BTRFS_DELAYED_BATCH 16 29 30 static struct kmem_cache *delayed_node_cache; 31 32 int __init btrfs_delayed_inode_init(void) 33 { 34 delayed_node_cache = kmem_cache_create("btrfs_delayed_node", 35 sizeof(struct btrfs_delayed_node), 36 0, 37 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, 38 NULL); 39 if (!delayed_node_cache) 40 return -ENOMEM; 41 return 0; 42 } 43 44 void btrfs_delayed_inode_exit(void) 45 { 46 if (delayed_node_cache) 47 kmem_cache_destroy(delayed_node_cache); 48 } 49 50 static inline void btrfs_init_delayed_node( 51 struct btrfs_delayed_node *delayed_node, 52 struct btrfs_root *root, u64 inode_id) 53 { 54 delayed_node->root = root; 55 delayed_node->inode_id = inode_id; 56 atomic_set(&delayed_node->refs, 0); 57 delayed_node->count = 0; 58 delayed_node->flags = 0; 59 delayed_node->ins_root = RB_ROOT; 60 delayed_node->del_root = RB_ROOT; 61 mutex_init(&delayed_node->mutex); 62 delayed_node->index_cnt = 0; 63 INIT_LIST_HEAD(&delayed_node->n_list); 64 INIT_LIST_HEAD(&delayed_node->p_list); 65 delayed_node->bytes_reserved = 0; 66 memset(&delayed_node->inode_item, 0, sizeof(delayed_node->inode_item)); 67 } 68 69 static inline int btrfs_is_continuous_delayed_item( 70 struct btrfs_delayed_item *item1, 71 struct btrfs_delayed_item *item2) 72 { 73 if (item1->key.type == BTRFS_DIR_INDEX_KEY && 74 item1->key.objectid == item2->key.objectid && 75 item1->key.type == item2->key.type && 76 item1->key.offset + 1 == item2->key.offset) 77 return 1; 78 return 0; 79 } 80 81 static inline struct btrfs_delayed_root *btrfs_get_delayed_root( 82 struct btrfs_root *root) 83 { 84 return root->fs_info->delayed_root; 85 } 86 87 static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode) 88 { 89 struct btrfs_inode *btrfs_inode = BTRFS_I(inode); 90 struct btrfs_root *root = btrfs_inode->root; 91 u64 ino = btrfs_ino(inode); 92 struct btrfs_delayed_node *node; 93 94 node = ACCESS_ONCE(btrfs_inode->delayed_node); 95 if (node) { 96 atomic_inc(&node->refs); 97 return node; 98 } 99 100 spin_lock(&root->inode_lock); 101 node = radix_tree_lookup(&root->delayed_nodes_tree, ino); 102 if (node) { 103 if (btrfs_inode->delayed_node) { 104 atomic_inc(&node->refs); /* can be accessed */ 105 BUG_ON(btrfs_inode->delayed_node != node); 106 spin_unlock(&root->inode_lock); 107 return node; 108 } 109 btrfs_inode->delayed_node = node; 110 /* can be accessed and cached in the inode */ 111 atomic_add(2, &node->refs); 112 spin_unlock(&root->inode_lock); 113 return node; 114 } 115 spin_unlock(&root->inode_lock); 116 117 return NULL; 118 } 119 120 /* Will return either the node or PTR_ERR(-ENOMEM) */ 121 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node( 122 struct inode *inode) 123 { 124 struct btrfs_delayed_node *node; 125 struct btrfs_inode *btrfs_inode = BTRFS_I(inode); 126 struct btrfs_root *root = btrfs_inode->root; 127 u64 ino = btrfs_ino(inode); 128 int ret; 129 130 again: 131 node = btrfs_get_delayed_node(inode); 132 if (node) 133 return node; 134 135 node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS); 136 if (!node) 137 return ERR_PTR(-ENOMEM); 138 btrfs_init_delayed_node(node, root, ino); 139 140 /* cached in the btrfs inode and can be accessed */ 141 atomic_add(2, &node->refs); 142 143 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); 144 if (ret) { 145 kmem_cache_free(delayed_node_cache, node); 146 return ERR_PTR(ret); 147 } 148 149 spin_lock(&root->inode_lock); 150 ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node); 151 if (ret == -EEXIST) { 152 spin_unlock(&root->inode_lock); 153 kmem_cache_free(delayed_node_cache, node); 154 radix_tree_preload_end(); 155 goto again; 156 } 157 btrfs_inode->delayed_node = node; 158 spin_unlock(&root->inode_lock); 159 radix_tree_preload_end(); 160 161 return node; 162 } 163 164 /* 165 * Call it when holding delayed_node->mutex 166 * 167 * If mod = 1, add this node into the prepared list. 168 */ 169 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root, 170 struct btrfs_delayed_node *node, 171 int mod) 172 { 173 spin_lock(&root->lock); 174 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) { 175 if (!list_empty(&node->p_list)) 176 list_move_tail(&node->p_list, &root->prepare_list); 177 else if (mod) 178 list_add_tail(&node->p_list, &root->prepare_list); 179 } else { 180 list_add_tail(&node->n_list, &root->node_list); 181 list_add_tail(&node->p_list, &root->prepare_list); 182 atomic_inc(&node->refs); /* inserted into list */ 183 root->nodes++; 184 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags); 185 } 186 spin_unlock(&root->lock); 187 } 188 189 /* Call it when holding delayed_node->mutex */ 190 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root, 191 struct btrfs_delayed_node *node) 192 { 193 spin_lock(&root->lock); 194 if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) { 195 root->nodes--; 196 atomic_dec(&node->refs); /* not in the list */ 197 list_del_init(&node->n_list); 198 if (!list_empty(&node->p_list)) 199 list_del_init(&node->p_list); 200 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags); 201 } 202 spin_unlock(&root->lock); 203 } 204 205 static struct btrfs_delayed_node *btrfs_first_delayed_node( 206 struct btrfs_delayed_root *delayed_root) 207 { 208 struct list_head *p; 209 struct btrfs_delayed_node *node = NULL; 210 211 spin_lock(&delayed_root->lock); 212 if (list_empty(&delayed_root->node_list)) 213 goto out; 214 215 p = delayed_root->node_list.next; 216 node = list_entry(p, struct btrfs_delayed_node, n_list); 217 atomic_inc(&node->refs); 218 out: 219 spin_unlock(&delayed_root->lock); 220 221 return node; 222 } 223 224 static struct btrfs_delayed_node *btrfs_next_delayed_node( 225 struct btrfs_delayed_node *node) 226 { 227 struct btrfs_delayed_root *delayed_root; 228 struct list_head *p; 229 struct btrfs_delayed_node *next = NULL; 230 231 delayed_root = node->root->fs_info->delayed_root; 232 spin_lock(&delayed_root->lock); 233 if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) { 234 /* not in the list */ 235 if (list_empty(&delayed_root->node_list)) 236 goto out; 237 p = delayed_root->node_list.next; 238 } else if (list_is_last(&node->n_list, &delayed_root->node_list)) 239 goto out; 240 else 241 p = node->n_list.next; 242 243 next = list_entry(p, struct btrfs_delayed_node, n_list); 244 atomic_inc(&next->refs); 245 out: 246 spin_unlock(&delayed_root->lock); 247 248 return next; 249 } 250 251 static void __btrfs_release_delayed_node( 252 struct btrfs_delayed_node *delayed_node, 253 int mod) 254 { 255 struct btrfs_delayed_root *delayed_root; 256 257 if (!delayed_node) 258 return; 259 260 delayed_root = delayed_node->root->fs_info->delayed_root; 261 262 mutex_lock(&delayed_node->mutex); 263 if (delayed_node->count) 264 btrfs_queue_delayed_node(delayed_root, delayed_node, mod); 265 else 266 btrfs_dequeue_delayed_node(delayed_root, delayed_node); 267 mutex_unlock(&delayed_node->mutex); 268 269 if (atomic_dec_and_test(&delayed_node->refs)) { 270 bool free = false; 271 struct btrfs_root *root = delayed_node->root; 272 spin_lock(&root->inode_lock); 273 if (atomic_read(&delayed_node->refs) == 0) { 274 radix_tree_delete(&root->delayed_nodes_tree, 275 delayed_node->inode_id); 276 free = true; 277 } 278 spin_unlock(&root->inode_lock); 279 if (free) 280 kmem_cache_free(delayed_node_cache, delayed_node); 281 } 282 } 283 284 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node) 285 { 286 __btrfs_release_delayed_node(node, 0); 287 } 288 289 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node( 290 struct btrfs_delayed_root *delayed_root) 291 { 292 struct list_head *p; 293 struct btrfs_delayed_node *node = NULL; 294 295 spin_lock(&delayed_root->lock); 296 if (list_empty(&delayed_root->prepare_list)) 297 goto out; 298 299 p = delayed_root->prepare_list.next; 300 list_del_init(p); 301 node = list_entry(p, struct btrfs_delayed_node, p_list); 302 atomic_inc(&node->refs); 303 out: 304 spin_unlock(&delayed_root->lock); 305 306 return node; 307 } 308 309 static inline void btrfs_release_prepared_delayed_node( 310 struct btrfs_delayed_node *node) 311 { 312 __btrfs_release_delayed_node(node, 1); 313 } 314 315 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len) 316 { 317 struct btrfs_delayed_item *item; 318 item = kmalloc(sizeof(*item) + data_len, GFP_NOFS); 319 if (item) { 320 item->data_len = data_len; 321 item->ins_or_del = 0; 322 item->bytes_reserved = 0; 323 item->delayed_node = NULL; 324 atomic_set(&item->refs, 1); 325 } 326 return item; 327 } 328 329 /* 330 * __btrfs_lookup_delayed_item - look up the delayed item by key 331 * @delayed_node: pointer to the delayed node 332 * @key: the key to look up 333 * @prev: used to store the prev item if the right item isn't found 334 * @next: used to store the next item if the right item isn't found 335 * 336 * Note: if we don't find the right item, we will return the prev item and 337 * the next item. 338 */ 339 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item( 340 struct rb_root *root, 341 struct btrfs_key *key, 342 struct btrfs_delayed_item **prev, 343 struct btrfs_delayed_item **next) 344 { 345 struct rb_node *node, *prev_node = NULL; 346 struct btrfs_delayed_item *delayed_item = NULL; 347 int ret = 0; 348 349 node = root->rb_node; 350 351 while (node) { 352 delayed_item = rb_entry(node, struct btrfs_delayed_item, 353 rb_node); 354 prev_node = node; 355 ret = btrfs_comp_cpu_keys(&delayed_item->key, key); 356 if (ret < 0) 357 node = node->rb_right; 358 else if (ret > 0) 359 node = node->rb_left; 360 else 361 return delayed_item; 362 } 363 364 if (prev) { 365 if (!prev_node) 366 *prev = NULL; 367 else if (ret < 0) 368 *prev = delayed_item; 369 else if ((node = rb_prev(prev_node)) != NULL) { 370 *prev = rb_entry(node, struct btrfs_delayed_item, 371 rb_node); 372 } else 373 *prev = NULL; 374 } 375 376 if (next) { 377 if (!prev_node) 378 *next = NULL; 379 else if (ret > 0) 380 *next = delayed_item; 381 else if ((node = rb_next(prev_node)) != NULL) { 382 *next = rb_entry(node, struct btrfs_delayed_item, 383 rb_node); 384 } else 385 *next = NULL; 386 } 387 return NULL; 388 } 389 390 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item( 391 struct btrfs_delayed_node *delayed_node, 392 struct btrfs_key *key) 393 { 394 struct btrfs_delayed_item *item; 395 396 item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key, 397 NULL, NULL); 398 return item; 399 } 400 401 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node, 402 struct btrfs_delayed_item *ins, 403 int action) 404 { 405 struct rb_node **p, *node; 406 struct rb_node *parent_node = NULL; 407 struct rb_root *root; 408 struct btrfs_delayed_item *item; 409 int cmp; 410 411 if (action == BTRFS_DELAYED_INSERTION_ITEM) 412 root = &delayed_node->ins_root; 413 else if (action == BTRFS_DELAYED_DELETION_ITEM) 414 root = &delayed_node->del_root; 415 else 416 BUG(); 417 p = &root->rb_node; 418 node = &ins->rb_node; 419 420 while (*p) { 421 parent_node = *p; 422 item = rb_entry(parent_node, struct btrfs_delayed_item, 423 rb_node); 424 425 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key); 426 if (cmp < 0) 427 p = &(*p)->rb_right; 428 else if (cmp > 0) 429 p = &(*p)->rb_left; 430 else 431 return -EEXIST; 432 } 433 434 rb_link_node(node, parent_node, p); 435 rb_insert_color(node, root); 436 ins->delayed_node = delayed_node; 437 ins->ins_or_del = action; 438 439 if (ins->key.type == BTRFS_DIR_INDEX_KEY && 440 action == BTRFS_DELAYED_INSERTION_ITEM && 441 ins->key.offset >= delayed_node->index_cnt) 442 delayed_node->index_cnt = ins->key.offset + 1; 443 444 delayed_node->count++; 445 atomic_inc(&delayed_node->root->fs_info->delayed_root->items); 446 return 0; 447 } 448 449 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node, 450 struct btrfs_delayed_item *item) 451 { 452 return __btrfs_add_delayed_item(node, item, 453 BTRFS_DELAYED_INSERTION_ITEM); 454 } 455 456 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node, 457 struct btrfs_delayed_item *item) 458 { 459 return __btrfs_add_delayed_item(node, item, 460 BTRFS_DELAYED_DELETION_ITEM); 461 } 462 463 static void finish_one_item(struct btrfs_delayed_root *delayed_root) 464 { 465 int seq = atomic_inc_return(&delayed_root->items_seq); 466 467 /* 468 * atomic_dec_return implies a barrier for waitqueue_active 469 */ 470 if ((atomic_dec_return(&delayed_root->items) < 471 BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) && 472 waitqueue_active(&delayed_root->wait)) 473 wake_up(&delayed_root->wait); 474 } 475 476 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) 477 { 478 struct rb_root *root; 479 struct btrfs_delayed_root *delayed_root; 480 481 delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root; 482 483 BUG_ON(!delayed_root); 484 BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM && 485 delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM); 486 487 if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM) 488 root = &delayed_item->delayed_node->ins_root; 489 else 490 root = &delayed_item->delayed_node->del_root; 491 492 rb_erase(&delayed_item->rb_node, root); 493 delayed_item->delayed_node->count--; 494 495 finish_one_item(delayed_root); 496 } 497 498 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item) 499 { 500 if (item) { 501 __btrfs_remove_delayed_item(item); 502 if (atomic_dec_and_test(&item->refs)) 503 kfree(item); 504 } 505 } 506 507 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item( 508 struct btrfs_delayed_node *delayed_node) 509 { 510 struct rb_node *p; 511 struct btrfs_delayed_item *item = NULL; 512 513 p = rb_first(&delayed_node->ins_root); 514 if (p) 515 item = rb_entry(p, struct btrfs_delayed_item, rb_node); 516 517 return item; 518 } 519 520 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item( 521 struct btrfs_delayed_node *delayed_node) 522 { 523 struct rb_node *p; 524 struct btrfs_delayed_item *item = NULL; 525 526 p = rb_first(&delayed_node->del_root); 527 if (p) 528 item = rb_entry(p, struct btrfs_delayed_item, rb_node); 529 530 return item; 531 } 532 533 static struct btrfs_delayed_item *__btrfs_next_delayed_item( 534 struct btrfs_delayed_item *item) 535 { 536 struct rb_node *p; 537 struct btrfs_delayed_item *next = NULL; 538 539 p = rb_next(&item->rb_node); 540 if (p) 541 next = rb_entry(p, struct btrfs_delayed_item, rb_node); 542 543 return next; 544 } 545 546 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans, 547 struct btrfs_root *root, 548 struct btrfs_delayed_item *item) 549 { 550 struct btrfs_block_rsv *src_rsv; 551 struct btrfs_block_rsv *dst_rsv; 552 u64 num_bytes; 553 int ret; 554 555 if (!trans->bytes_reserved) 556 return 0; 557 558 src_rsv = trans->block_rsv; 559 dst_rsv = &root->fs_info->delayed_block_rsv; 560 561 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 562 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); 563 if (!ret) { 564 trace_btrfs_space_reservation(root->fs_info, "delayed_item", 565 item->key.objectid, 566 num_bytes, 1); 567 item->bytes_reserved = num_bytes; 568 } 569 570 return ret; 571 } 572 573 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root, 574 struct btrfs_delayed_item *item) 575 { 576 struct btrfs_block_rsv *rsv; 577 578 if (!item->bytes_reserved) 579 return; 580 581 rsv = &root->fs_info->delayed_block_rsv; 582 trace_btrfs_space_reservation(root->fs_info, "delayed_item", 583 item->key.objectid, item->bytes_reserved, 584 0); 585 btrfs_block_rsv_release(root, rsv, 586 item->bytes_reserved); 587 } 588 589 static int btrfs_delayed_inode_reserve_metadata( 590 struct btrfs_trans_handle *trans, 591 struct btrfs_root *root, 592 struct inode *inode, 593 struct btrfs_delayed_node *node) 594 { 595 struct btrfs_block_rsv *src_rsv; 596 struct btrfs_block_rsv *dst_rsv; 597 u64 num_bytes; 598 int ret; 599 bool release = false; 600 601 src_rsv = trans->block_rsv; 602 dst_rsv = &root->fs_info->delayed_block_rsv; 603 604 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 605 606 /* 607 * btrfs_dirty_inode will update the inode under btrfs_join_transaction 608 * which doesn't reserve space for speed. This is a problem since we 609 * still need to reserve space for this update, so try to reserve the 610 * space. 611 * 612 * Now if src_rsv == delalloc_block_rsv we'll let it just steal since 613 * we're accounted for. 614 */ 615 if (!src_rsv || (!trans->bytes_reserved && 616 src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { 617 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, 618 BTRFS_RESERVE_NO_FLUSH); 619 /* 620 * Since we're under a transaction reserve_metadata_bytes could 621 * try to commit the transaction which will make it return 622 * EAGAIN to make us stop the transaction we have, so return 623 * ENOSPC instead so that btrfs_dirty_inode knows what to do. 624 */ 625 if (ret == -EAGAIN) 626 ret = -ENOSPC; 627 if (!ret) { 628 node->bytes_reserved = num_bytes; 629 trace_btrfs_space_reservation(root->fs_info, 630 "delayed_inode", 631 btrfs_ino(inode), 632 num_bytes, 1); 633 } 634 return ret; 635 } else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) { 636 spin_lock(&BTRFS_I(inode)->lock); 637 if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, 638 &BTRFS_I(inode)->runtime_flags)) { 639 spin_unlock(&BTRFS_I(inode)->lock); 640 release = true; 641 goto migrate; 642 } 643 spin_unlock(&BTRFS_I(inode)->lock); 644 645 /* Ok we didn't have space pre-reserved. This shouldn't happen 646 * too often but it can happen if we do delalloc to an existing 647 * inode which gets dirtied because of the time update, and then 648 * isn't touched again until after the transaction commits and 649 * then we try to write out the data. First try to be nice and 650 * reserve something strictly for us. If not be a pain and try 651 * to steal from the delalloc block rsv. 652 */ 653 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, 654 BTRFS_RESERVE_NO_FLUSH); 655 if (!ret) 656 goto out; 657 658 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); 659 if (!WARN_ON(ret)) 660 goto out; 661 662 /* 663 * Ok this is a problem, let's just steal from the global rsv 664 * since this really shouldn't happen that often. 665 */ 666 ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv, 667 dst_rsv, num_bytes); 668 goto out; 669 } 670 671 migrate: 672 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes); 673 674 out: 675 /* 676 * Migrate only takes a reservation, it doesn't touch the size of the 677 * block_rsv. This is to simplify people who don't normally have things 678 * migrated from their block rsv. If they go to release their 679 * reservation, that will decrease the size as well, so if migrate 680 * reduced size we'd end up with a negative size. But for the 681 * delalloc_meta_reserved stuff we will only know to drop 1 reservation, 682 * but we could in fact do this reserve/migrate dance several times 683 * between the time we did the original reservation and we'd clean it 684 * up. So to take care of this, release the space for the meta 685 * reservation here. I think it may be time for a documentation page on 686 * how block rsvs. work. 687 */ 688 if (!ret) { 689 trace_btrfs_space_reservation(root->fs_info, "delayed_inode", 690 btrfs_ino(inode), num_bytes, 1); 691 node->bytes_reserved = num_bytes; 692 } 693 694 if (release) { 695 trace_btrfs_space_reservation(root->fs_info, "delalloc", 696 btrfs_ino(inode), num_bytes, 0); 697 btrfs_block_rsv_release(root, src_rsv, num_bytes); 698 } 699 700 return ret; 701 } 702 703 static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root, 704 struct btrfs_delayed_node *node) 705 { 706 struct btrfs_block_rsv *rsv; 707 708 if (!node->bytes_reserved) 709 return; 710 711 rsv = &root->fs_info->delayed_block_rsv; 712 trace_btrfs_space_reservation(root->fs_info, "delayed_inode", 713 node->inode_id, node->bytes_reserved, 0); 714 btrfs_block_rsv_release(root, rsv, 715 node->bytes_reserved); 716 node->bytes_reserved = 0; 717 } 718 719 /* 720 * This helper will insert some continuous items into the same leaf according 721 * to the free space of the leaf. 722 */ 723 static int btrfs_batch_insert_items(struct btrfs_root *root, 724 struct btrfs_path *path, 725 struct btrfs_delayed_item *item) 726 { 727 struct btrfs_delayed_item *curr, *next; 728 int free_space; 729 int total_data_size = 0, total_size = 0; 730 struct extent_buffer *leaf; 731 char *data_ptr; 732 struct btrfs_key *keys; 733 u32 *data_size; 734 struct list_head head; 735 int slot; 736 int nitems; 737 int i; 738 int ret = 0; 739 740 BUG_ON(!path->nodes[0]); 741 742 leaf = path->nodes[0]; 743 free_space = btrfs_leaf_free_space(root, leaf); 744 INIT_LIST_HEAD(&head); 745 746 next = item; 747 nitems = 0; 748 749 /* 750 * count the number of the continuous items that we can insert in batch 751 */ 752 while (total_size + next->data_len + sizeof(struct btrfs_item) <= 753 free_space) { 754 total_data_size += next->data_len; 755 total_size += next->data_len + sizeof(struct btrfs_item); 756 list_add_tail(&next->tree_list, &head); 757 nitems++; 758 759 curr = next; 760 next = __btrfs_next_delayed_item(curr); 761 if (!next) 762 break; 763 764 if (!btrfs_is_continuous_delayed_item(curr, next)) 765 break; 766 } 767 768 if (!nitems) { 769 ret = 0; 770 goto out; 771 } 772 773 /* 774 * we need allocate some memory space, but it might cause the task 775 * to sleep, so we set all locked nodes in the path to blocking locks 776 * first. 777 */ 778 btrfs_set_path_blocking(path); 779 780 keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS); 781 if (!keys) { 782 ret = -ENOMEM; 783 goto out; 784 } 785 786 data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS); 787 if (!data_size) { 788 ret = -ENOMEM; 789 goto error; 790 } 791 792 /* get keys of all the delayed items */ 793 i = 0; 794 list_for_each_entry(next, &head, tree_list) { 795 keys[i] = next->key; 796 data_size[i] = next->data_len; 797 i++; 798 } 799 800 /* reset all the locked nodes in the patch to spinning locks. */ 801 btrfs_clear_path_blocking(path, NULL, 0); 802 803 /* insert the keys of the items */ 804 setup_items_for_insert(root, path, keys, data_size, 805 total_data_size, total_size, nitems); 806 807 /* insert the dir index items */ 808 slot = path->slots[0]; 809 list_for_each_entry_safe(curr, next, &head, tree_list) { 810 data_ptr = btrfs_item_ptr(leaf, slot, char); 811 write_extent_buffer(leaf, &curr->data, 812 (unsigned long)data_ptr, 813 curr->data_len); 814 slot++; 815 816 btrfs_delayed_item_release_metadata(root, curr); 817 818 list_del(&curr->tree_list); 819 btrfs_release_delayed_item(curr); 820 } 821 822 error: 823 kfree(data_size); 824 kfree(keys); 825 out: 826 return ret; 827 } 828 829 /* 830 * This helper can just do simple insertion that needn't extend item for new 831 * data, such as directory name index insertion, inode insertion. 832 */ 833 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, 834 struct btrfs_root *root, 835 struct btrfs_path *path, 836 struct btrfs_delayed_item *delayed_item) 837 { 838 struct extent_buffer *leaf; 839 char *ptr; 840 int ret; 841 842 ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key, 843 delayed_item->data_len); 844 if (ret < 0 && ret != -EEXIST) 845 return ret; 846 847 leaf = path->nodes[0]; 848 849 ptr = btrfs_item_ptr(leaf, path->slots[0], char); 850 851 write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr, 852 delayed_item->data_len); 853 btrfs_mark_buffer_dirty(leaf); 854 855 btrfs_delayed_item_release_metadata(root, delayed_item); 856 return 0; 857 } 858 859 /* 860 * we insert an item first, then if there are some continuous items, we try 861 * to insert those items into the same leaf. 862 */ 863 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans, 864 struct btrfs_path *path, 865 struct btrfs_root *root, 866 struct btrfs_delayed_node *node) 867 { 868 struct btrfs_delayed_item *curr, *prev; 869 int ret = 0; 870 871 do_again: 872 mutex_lock(&node->mutex); 873 curr = __btrfs_first_delayed_insertion_item(node); 874 if (!curr) 875 goto insert_end; 876 877 ret = btrfs_insert_delayed_item(trans, root, path, curr); 878 if (ret < 0) { 879 btrfs_release_path(path); 880 goto insert_end; 881 } 882 883 prev = curr; 884 curr = __btrfs_next_delayed_item(prev); 885 if (curr && btrfs_is_continuous_delayed_item(prev, curr)) { 886 /* insert the continuous items into the same leaf */ 887 path->slots[0]++; 888 btrfs_batch_insert_items(root, path, curr); 889 } 890 btrfs_release_delayed_item(prev); 891 btrfs_mark_buffer_dirty(path->nodes[0]); 892 893 btrfs_release_path(path); 894 mutex_unlock(&node->mutex); 895 goto do_again; 896 897 insert_end: 898 mutex_unlock(&node->mutex); 899 return ret; 900 } 901 902 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans, 903 struct btrfs_root *root, 904 struct btrfs_path *path, 905 struct btrfs_delayed_item *item) 906 { 907 struct btrfs_delayed_item *curr, *next; 908 struct extent_buffer *leaf; 909 struct btrfs_key key; 910 struct list_head head; 911 int nitems, i, last_item; 912 int ret = 0; 913 914 BUG_ON(!path->nodes[0]); 915 916 leaf = path->nodes[0]; 917 918 i = path->slots[0]; 919 last_item = btrfs_header_nritems(leaf) - 1; 920 if (i > last_item) 921 return -ENOENT; /* FIXME: Is errno suitable? */ 922 923 next = item; 924 INIT_LIST_HEAD(&head); 925 btrfs_item_key_to_cpu(leaf, &key, i); 926 nitems = 0; 927 /* 928 * count the number of the dir index items that we can delete in batch 929 */ 930 while (btrfs_comp_cpu_keys(&next->key, &key) == 0) { 931 list_add_tail(&next->tree_list, &head); 932 nitems++; 933 934 curr = next; 935 next = __btrfs_next_delayed_item(curr); 936 if (!next) 937 break; 938 939 if (!btrfs_is_continuous_delayed_item(curr, next)) 940 break; 941 942 i++; 943 if (i > last_item) 944 break; 945 btrfs_item_key_to_cpu(leaf, &key, i); 946 } 947 948 if (!nitems) 949 return 0; 950 951 ret = btrfs_del_items(trans, root, path, path->slots[0], nitems); 952 if (ret) 953 goto out; 954 955 list_for_each_entry_safe(curr, next, &head, tree_list) { 956 btrfs_delayed_item_release_metadata(root, curr); 957 list_del(&curr->tree_list); 958 btrfs_release_delayed_item(curr); 959 } 960 961 out: 962 return ret; 963 } 964 965 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans, 966 struct btrfs_path *path, 967 struct btrfs_root *root, 968 struct btrfs_delayed_node *node) 969 { 970 struct btrfs_delayed_item *curr, *prev; 971 int ret = 0; 972 973 do_again: 974 mutex_lock(&node->mutex); 975 curr = __btrfs_first_delayed_deletion_item(node); 976 if (!curr) 977 goto delete_fail; 978 979 ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1); 980 if (ret < 0) 981 goto delete_fail; 982 else if (ret > 0) { 983 /* 984 * can't find the item which the node points to, so this node 985 * is invalid, just drop it. 986 */ 987 prev = curr; 988 curr = __btrfs_next_delayed_item(prev); 989 btrfs_release_delayed_item(prev); 990 ret = 0; 991 btrfs_release_path(path); 992 if (curr) { 993 mutex_unlock(&node->mutex); 994 goto do_again; 995 } else 996 goto delete_fail; 997 } 998 999 btrfs_batch_delete_items(trans, root, path, curr); 1000 btrfs_release_path(path); 1001 mutex_unlock(&node->mutex); 1002 goto do_again; 1003 1004 delete_fail: 1005 btrfs_release_path(path); 1006 mutex_unlock(&node->mutex); 1007 return ret; 1008 } 1009 1010 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node) 1011 { 1012 struct btrfs_delayed_root *delayed_root; 1013 1014 if (delayed_node && 1015 test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { 1016 BUG_ON(!delayed_node->root); 1017 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags); 1018 delayed_node->count--; 1019 1020 delayed_root = delayed_node->root->fs_info->delayed_root; 1021 finish_one_item(delayed_root); 1022 } 1023 } 1024 1025 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node) 1026 { 1027 struct btrfs_delayed_root *delayed_root; 1028 1029 ASSERT(delayed_node->root); 1030 clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags); 1031 delayed_node->count--; 1032 1033 delayed_root = delayed_node->root->fs_info->delayed_root; 1034 finish_one_item(delayed_root); 1035 } 1036 1037 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, 1038 struct btrfs_root *root, 1039 struct btrfs_path *path, 1040 struct btrfs_delayed_node *node) 1041 { 1042 struct btrfs_key key; 1043 struct btrfs_inode_item *inode_item; 1044 struct extent_buffer *leaf; 1045 int mod; 1046 int ret; 1047 1048 key.objectid = node->inode_id; 1049 key.type = BTRFS_INODE_ITEM_KEY; 1050 key.offset = 0; 1051 1052 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags)) 1053 mod = -1; 1054 else 1055 mod = 1; 1056 1057 ret = btrfs_lookup_inode(trans, root, path, &key, mod); 1058 if (ret > 0) { 1059 btrfs_release_path(path); 1060 return -ENOENT; 1061 } else if (ret < 0) { 1062 return ret; 1063 } 1064 1065 leaf = path->nodes[0]; 1066 inode_item = btrfs_item_ptr(leaf, path->slots[0], 1067 struct btrfs_inode_item); 1068 write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item, 1069 sizeof(struct btrfs_inode_item)); 1070 btrfs_mark_buffer_dirty(leaf); 1071 1072 if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags)) 1073 goto no_iref; 1074 1075 path->slots[0]++; 1076 if (path->slots[0] >= btrfs_header_nritems(leaf)) 1077 goto search; 1078 again: 1079 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1080 if (key.objectid != node->inode_id) 1081 goto out; 1082 1083 if (key.type != BTRFS_INODE_REF_KEY && 1084 key.type != BTRFS_INODE_EXTREF_KEY) 1085 goto out; 1086 1087 /* 1088 * Delayed iref deletion is for the inode who has only one link, 1089 * so there is only one iref. The case that several irefs are 1090 * in the same item doesn't exist. 1091 */ 1092 btrfs_del_item(trans, root, path); 1093 out: 1094 btrfs_release_delayed_iref(node); 1095 no_iref: 1096 btrfs_release_path(path); 1097 err_out: 1098 btrfs_delayed_inode_release_metadata(root, node); 1099 btrfs_release_delayed_inode(node); 1100 1101 return ret; 1102 1103 search: 1104 btrfs_release_path(path); 1105 1106 key.type = BTRFS_INODE_EXTREF_KEY; 1107 key.offset = -1; 1108 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1109 if (ret < 0) 1110 goto err_out; 1111 ASSERT(ret); 1112 1113 ret = 0; 1114 leaf = path->nodes[0]; 1115 path->slots[0]--; 1116 goto again; 1117 } 1118 1119 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, 1120 struct btrfs_root *root, 1121 struct btrfs_path *path, 1122 struct btrfs_delayed_node *node) 1123 { 1124 int ret; 1125 1126 mutex_lock(&node->mutex); 1127 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) { 1128 mutex_unlock(&node->mutex); 1129 return 0; 1130 } 1131 1132 ret = __btrfs_update_delayed_inode(trans, root, path, node); 1133 mutex_unlock(&node->mutex); 1134 return ret; 1135 } 1136 1137 static inline int 1138 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, 1139 struct btrfs_path *path, 1140 struct btrfs_delayed_node *node) 1141 { 1142 int ret; 1143 1144 ret = btrfs_insert_delayed_items(trans, path, node->root, node); 1145 if (ret) 1146 return ret; 1147 1148 ret = btrfs_delete_delayed_items(trans, path, node->root, node); 1149 if (ret) 1150 return ret; 1151 1152 ret = btrfs_update_delayed_inode(trans, node->root, path, node); 1153 return ret; 1154 } 1155 1156 /* 1157 * Called when committing the transaction. 1158 * Returns 0 on success. 1159 * Returns < 0 on error and returns with an aborted transaction with any 1160 * outstanding delayed items cleaned up. 1161 */ 1162 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, 1163 struct btrfs_root *root, int nr) 1164 { 1165 struct btrfs_delayed_root *delayed_root; 1166 struct btrfs_delayed_node *curr_node, *prev_node; 1167 struct btrfs_path *path; 1168 struct btrfs_block_rsv *block_rsv; 1169 int ret = 0; 1170 bool count = (nr > 0); 1171 1172 if (trans->aborted) 1173 return -EIO; 1174 1175 path = btrfs_alloc_path(); 1176 if (!path) 1177 return -ENOMEM; 1178 path->leave_spinning = 1; 1179 1180 block_rsv = trans->block_rsv; 1181 trans->block_rsv = &root->fs_info->delayed_block_rsv; 1182 1183 delayed_root = btrfs_get_delayed_root(root); 1184 1185 curr_node = btrfs_first_delayed_node(delayed_root); 1186 while (curr_node && (!count || (count && nr--))) { 1187 ret = __btrfs_commit_inode_delayed_items(trans, path, 1188 curr_node); 1189 if (ret) { 1190 btrfs_release_delayed_node(curr_node); 1191 curr_node = NULL; 1192 btrfs_abort_transaction(trans, root, ret); 1193 break; 1194 } 1195 1196 prev_node = curr_node; 1197 curr_node = btrfs_next_delayed_node(curr_node); 1198 btrfs_release_delayed_node(prev_node); 1199 } 1200 1201 if (curr_node) 1202 btrfs_release_delayed_node(curr_node); 1203 btrfs_free_path(path); 1204 trans->block_rsv = block_rsv; 1205 1206 return ret; 1207 } 1208 1209 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans, 1210 struct btrfs_root *root) 1211 { 1212 return __btrfs_run_delayed_items(trans, root, -1); 1213 } 1214 1215 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, 1216 struct btrfs_root *root, int nr) 1217 { 1218 return __btrfs_run_delayed_items(trans, root, nr); 1219 } 1220 1221 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, 1222 struct inode *inode) 1223 { 1224 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); 1225 struct btrfs_path *path; 1226 struct btrfs_block_rsv *block_rsv; 1227 int ret; 1228 1229 if (!delayed_node) 1230 return 0; 1231 1232 mutex_lock(&delayed_node->mutex); 1233 if (!delayed_node->count) { 1234 mutex_unlock(&delayed_node->mutex); 1235 btrfs_release_delayed_node(delayed_node); 1236 return 0; 1237 } 1238 mutex_unlock(&delayed_node->mutex); 1239 1240 path = btrfs_alloc_path(); 1241 if (!path) { 1242 btrfs_release_delayed_node(delayed_node); 1243 return -ENOMEM; 1244 } 1245 path->leave_spinning = 1; 1246 1247 block_rsv = trans->block_rsv; 1248 trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv; 1249 1250 ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node); 1251 1252 btrfs_release_delayed_node(delayed_node); 1253 btrfs_free_path(path); 1254 trans->block_rsv = block_rsv; 1255 1256 return ret; 1257 } 1258 1259 int btrfs_commit_inode_delayed_inode(struct inode *inode) 1260 { 1261 struct btrfs_trans_handle *trans; 1262 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); 1263 struct btrfs_path *path; 1264 struct btrfs_block_rsv *block_rsv; 1265 int ret; 1266 1267 if (!delayed_node) 1268 return 0; 1269 1270 mutex_lock(&delayed_node->mutex); 1271 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { 1272 mutex_unlock(&delayed_node->mutex); 1273 btrfs_release_delayed_node(delayed_node); 1274 return 0; 1275 } 1276 mutex_unlock(&delayed_node->mutex); 1277 1278 trans = btrfs_join_transaction(delayed_node->root); 1279 if (IS_ERR(trans)) { 1280 ret = PTR_ERR(trans); 1281 goto out; 1282 } 1283 1284 path = btrfs_alloc_path(); 1285 if (!path) { 1286 ret = -ENOMEM; 1287 goto trans_out; 1288 } 1289 path->leave_spinning = 1; 1290 1291 block_rsv = trans->block_rsv; 1292 trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv; 1293 1294 mutex_lock(&delayed_node->mutex); 1295 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) 1296 ret = __btrfs_update_delayed_inode(trans, delayed_node->root, 1297 path, delayed_node); 1298 else 1299 ret = 0; 1300 mutex_unlock(&delayed_node->mutex); 1301 1302 btrfs_free_path(path); 1303 trans->block_rsv = block_rsv; 1304 trans_out: 1305 btrfs_end_transaction(trans, delayed_node->root); 1306 btrfs_btree_balance_dirty(delayed_node->root); 1307 out: 1308 btrfs_release_delayed_node(delayed_node); 1309 1310 return ret; 1311 } 1312 1313 void btrfs_remove_delayed_node(struct inode *inode) 1314 { 1315 struct btrfs_delayed_node *delayed_node; 1316 1317 delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node); 1318 if (!delayed_node) 1319 return; 1320 1321 BTRFS_I(inode)->delayed_node = NULL; 1322 btrfs_release_delayed_node(delayed_node); 1323 } 1324 1325 struct btrfs_async_delayed_work { 1326 struct btrfs_delayed_root *delayed_root; 1327 int nr; 1328 struct btrfs_work work; 1329 }; 1330 1331 static void btrfs_async_run_delayed_root(struct btrfs_work *work) 1332 { 1333 struct btrfs_async_delayed_work *async_work; 1334 struct btrfs_delayed_root *delayed_root; 1335 struct btrfs_trans_handle *trans; 1336 struct btrfs_path *path; 1337 struct btrfs_delayed_node *delayed_node = NULL; 1338 struct btrfs_root *root; 1339 struct btrfs_block_rsv *block_rsv; 1340 int total_done = 0; 1341 1342 async_work = container_of(work, struct btrfs_async_delayed_work, work); 1343 delayed_root = async_work->delayed_root; 1344 1345 path = btrfs_alloc_path(); 1346 if (!path) 1347 goto out; 1348 1349 again: 1350 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2) 1351 goto free_path; 1352 1353 delayed_node = btrfs_first_prepared_delayed_node(delayed_root); 1354 if (!delayed_node) 1355 goto free_path; 1356 1357 path->leave_spinning = 1; 1358 root = delayed_node->root; 1359 1360 trans = btrfs_join_transaction(root); 1361 if (IS_ERR(trans)) 1362 goto release_path; 1363 1364 block_rsv = trans->block_rsv; 1365 trans->block_rsv = &root->fs_info->delayed_block_rsv; 1366 1367 __btrfs_commit_inode_delayed_items(trans, path, delayed_node); 1368 1369 trans->block_rsv = block_rsv; 1370 btrfs_end_transaction(trans, root); 1371 btrfs_btree_balance_dirty_nodelay(root); 1372 1373 release_path: 1374 btrfs_release_path(path); 1375 total_done++; 1376 1377 btrfs_release_prepared_delayed_node(delayed_node); 1378 if (async_work->nr == 0 || total_done < async_work->nr) 1379 goto again; 1380 1381 free_path: 1382 btrfs_free_path(path); 1383 out: 1384 wake_up(&delayed_root->wait); 1385 kfree(async_work); 1386 } 1387 1388 1389 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, 1390 struct btrfs_fs_info *fs_info, int nr) 1391 { 1392 struct btrfs_async_delayed_work *async_work; 1393 1394 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) 1395 return 0; 1396 1397 async_work = kmalloc(sizeof(*async_work), GFP_NOFS); 1398 if (!async_work) 1399 return -ENOMEM; 1400 1401 async_work->delayed_root = delayed_root; 1402 btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper, 1403 btrfs_async_run_delayed_root, NULL, NULL); 1404 async_work->nr = nr; 1405 1406 btrfs_queue_work(fs_info->delayed_workers, &async_work->work); 1407 return 0; 1408 } 1409 1410 void btrfs_assert_delayed_root_empty(struct btrfs_root *root) 1411 { 1412 struct btrfs_delayed_root *delayed_root; 1413 delayed_root = btrfs_get_delayed_root(root); 1414 WARN_ON(btrfs_first_delayed_node(delayed_root)); 1415 } 1416 1417 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq) 1418 { 1419 int val = atomic_read(&delayed_root->items_seq); 1420 1421 if (val < seq || val >= seq + BTRFS_DELAYED_BATCH) 1422 return 1; 1423 1424 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) 1425 return 1; 1426 1427 return 0; 1428 } 1429 1430 void btrfs_balance_delayed_items(struct btrfs_root *root) 1431 { 1432 struct btrfs_delayed_root *delayed_root; 1433 struct btrfs_fs_info *fs_info = root->fs_info; 1434 1435 delayed_root = btrfs_get_delayed_root(root); 1436 1437 if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) 1438 return; 1439 1440 if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) { 1441 int seq; 1442 int ret; 1443 1444 seq = atomic_read(&delayed_root->items_seq); 1445 1446 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0); 1447 if (ret) 1448 return; 1449 1450 wait_event_interruptible(delayed_root->wait, 1451 could_end_wait(delayed_root, seq)); 1452 return; 1453 } 1454 1455 btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH); 1456 } 1457 1458 /* Will return 0 or -ENOMEM */ 1459 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, 1460 struct btrfs_root *root, const char *name, 1461 int name_len, struct inode *dir, 1462 struct btrfs_disk_key *disk_key, u8 type, 1463 u64 index) 1464 { 1465 struct btrfs_delayed_node *delayed_node; 1466 struct btrfs_delayed_item *delayed_item; 1467 struct btrfs_dir_item *dir_item; 1468 int ret; 1469 1470 delayed_node = btrfs_get_or_create_delayed_node(dir); 1471 if (IS_ERR(delayed_node)) 1472 return PTR_ERR(delayed_node); 1473 1474 delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len); 1475 if (!delayed_item) { 1476 ret = -ENOMEM; 1477 goto release_node; 1478 } 1479 1480 delayed_item->key.objectid = btrfs_ino(dir); 1481 delayed_item->key.type = BTRFS_DIR_INDEX_KEY; 1482 delayed_item->key.offset = index; 1483 1484 dir_item = (struct btrfs_dir_item *)delayed_item->data; 1485 dir_item->location = *disk_key; 1486 btrfs_set_stack_dir_transid(dir_item, trans->transid); 1487 btrfs_set_stack_dir_data_len(dir_item, 0); 1488 btrfs_set_stack_dir_name_len(dir_item, name_len); 1489 btrfs_set_stack_dir_type(dir_item, type); 1490 memcpy((char *)(dir_item + 1), name, name_len); 1491 1492 ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item); 1493 /* 1494 * we have reserved enough space when we start a new transaction, 1495 * so reserving metadata failure is impossible 1496 */ 1497 BUG_ON(ret); 1498 1499 1500 mutex_lock(&delayed_node->mutex); 1501 ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item); 1502 if (unlikely(ret)) { 1503 btrfs_err(root->fs_info, "err add delayed dir index item(name: %.*s) " 1504 "into the insertion tree of the delayed node" 1505 "(root id: %llu, inode id: %llu, errno: %d)", 1506 name_len, name, delayed_node->root->objectid, 1507 delayed_node->inode_id, ret); 1508 BUG(); 1509 } 1510 mutex_unlock(&delayed_node->mutex); 1511 1512 release_node: 1513 btrfs_release_delayed_node(delayed_node); 1514 return ret; 1515 } 1516 1517 static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root, 1518 struct btrfs_delayed_node *node, 1519 struct btrfs_key *key) 1520 { 1521 struct btrfs_delayed_item *item; 1522 1523 mutex_lock(&node->mutex); 1524 item = __btrfs_lookup_delayed_insertion_item(node, key); 1525 if (!item) { 1526 mutex_unlock(&node->mutex); 1527 return 1; 1528 } 1529 1530 btrfs_delayed_item_release_metadata(root, item); 1531 btrfs_release_delayed_item(item); 1532 mutex_unlock(&node->mutex); 1533 return 0; 1534 } 1535 1536 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans, 1537 struct btrfs_root *root, struct inode *dir, 1538 u64 index) 1539 { 1540 struct btrfs_delayed_node *node; 1541 struct btrfs_delayed_item *item; 1542 struct btrfs_key item_key; 1543 int ret; 1544 1545 node = btrfs_get_or_create_delayed_node(dir); 1546 if (IS_ERR(node)) 1547 return PTR_ERR(node); 1548 1549 item_key.objectid = btrfs_ino(dir); 1550 item_key.type = BTRFS_DIR_INDEX_KEY; 1551 item_key.offset = index; 1552 1553 ret = btrfs_delete_delayed_insertion_item(root, node, &item_key); 1554 if (!ret) 1555 goto end; 1556 1557 item = btrfs_alloc_delayed_item(0); 1558 if (!item) { 1559 ret = -ENOMEM; 1560 goto end; 1561 } 1562 1563 item->key = item_key; 1564 1565 ret = btrfs_delayed_item_reserve_metadata(trans, root, item); 1566 /* 1567 * we have reserved enough space when we start a new transaction, 1568 * so reserving metadata failure is impossible. 1569 */ 1570 BUG_ON(ret); 1571 1572 mutex_lock(&node->mutex); 1573 ret = __btrfs_add_delayed_deletion_item(node, item); 1574 if (unlikely(ret)) { 1575 btrfs_err(root->fs_info, "err add delayed dir index item(index: %llu) " 1576 "into the deletion tree of the delayed node" 1577 "(root id: %llu, inode id: %llu, errno: %d)", 1578 index, node->root->objectid, node->inode_id, 1579 ret); 1580 BUG(); 1581 } 1582 mutex_unlock(&node->mutex); 1583 end: 1584 btrfs_release_delayed_node(node); 1585 return ret; 1586 } 1587 1588 int btrfs_inode_delayed_dir_index_count(struct inode *inode) 1589 { 1590 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); 1591 1592 if (!delayed_node) 1593 return -ENOENT; 1594 1595 /* 1596 * Since we have held i_mutex of this directory, it is impossible that 1597 * a new directory index is added into the delayed node and index_cnt 1598 * is updated now. So we needn't lock the delayed node. 1599 */ 1600 if (!delayed_node->index_cnt) { 1601 btrfs_release_delayed_node(delayed_node); 1602 return -EINVAL; 1603 } 1604 1605 BTRFS_I(inode)->index_cnt = delayed_node->index_cnt; 1606 btrfs_release_delayed_node(delayed_node); 1607 return 0; 1608 } 1609 1610 void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list, 1611 struct list_head *del_list) 1612 { 1613 struct btrfs_delayed_node *delayed_node; 1614 struct btrfs_delayed_item *item; 1615 1616 delayed_node = btrfs_get_delayed_node(inode); 1617 if (!delayed_node) 1618 return; 1619 1620 mutex_lock(&delayed_node->mutex); 1621 item = __btrfs_first_delayed_insertion_item(delayed_node); 1622 while (item) { 1623 atomic_inc(&item->refs); 1624 list_add_tail(&item->readdir_list, ins_list); 1625 item = __btrfs_next_delayed_item(item); 1626 } 1627 1628 item = __btrfs_first_delayed_deletion_item(delayed_node); 1629 while (item) { 1630 atomic_inc(&item->refs); 1631 list_add_tail(&item->readdir_list, del_list); 1632 item = __btrfs_next_delayed_item(item); 1633 } 1634 mutex_unlock(&delayed_node->mutex); 1635 /* 1636 * This delayed node is still cached in the btrfs inode, so refs 1637 * must be > 1 now, and we needn't check it is going to be freed 1638 * or not. 1639 * 1640 * Besides that, this function is used to read dir, we do not 1641 * insert/delete delayed items in this period. So we also needn't 1642 * requeue or dequeue this delayed node. 1643 */ 1644 atomic_dec(&delayed_node->refs); 1645 } 1646 1647 void btrfs_put_delayed_items(struct list_head *ins_list, 1648 struct list_head *del_list) 1649 { 1650 struct btrfs_delayed_item *curr, *next; 1651 1652 list_for_each_entry_safe(curr, next, ins_list, readdir_list) { 1653 list_del(&curr->readdir_list); 1654 if (atomic_dec_and_test(&curr->refs)) 1655 kfree(curr); 1656 } 1657 1658 list_for_each_entry_safe(curr, next, del_list, readdir_list) { 1659 list_del(&curr->readdir_list); 1660 if (atomic_dec_and_test(&curr->refs)) 1661 kfree(curr); 1662 } 1663 } 1664 1665 int btrfs_should_delete_dir_index(struct list_head *del_list, 1666 u64 index) 1667 { 1668 struct btrfs_delayed_item *curr, *next; 1669 int ret; 1670 1671 if (list_empty(del_list)) 1672 return 0; 1673 1674 list_for_each_entry_safe(curr, next, del_list, readdir_list) { 1675 if (curr->key.offset > index) 1676 break; 1677 1678 list_del(&curr->readdir_list); 1679 ret = (curr->key.offset == index); 1680 1681 if (atomic_dec_and_test(&curr->refs)) 1682 kfree(curr); 1683 1684 if (ret) 1685 return 1; 1686 else 1687 continue; 1688 } 1689 return 0; 1690 } 1691 1692 /* 1693 * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree 1694 * 1695 */ 1696 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, 1697 struct list_head *ins_list) 1698 { 1699 struct btrfs_dir_item *di; 1700 struct btrfs_delayed_item *curr, *next; 1701 struct btrfs_key location; 1702 char *name; 1703 int name_len; 1704 int over = 0; 1705 unsigned char d_type; 1706 1707 if (list_empty(ins_list)) 1708 return 0; 1709 1710 /* 1711 * Changing the data of the delayed item is impossible. So 1712 * we needn't lock them. And we have held i_mutex of the 1713 * directory, nobody can delete any directory indexes now. 1714 */ 1715 list_for_each_entry_safe(curr, next, ins_list, readdir_list) { 1716 list_del(&curr->readdir_list); 1717 1718 if (curr->key.offset < ctx->pos) { 1719 if (atomic_dec_and_test(&curr->refs)) 1720 kfree(curr); 1721 continue; 1722 } 1723 1724 ctx->pos = curr->key.offset; 1725 1726 di = (struct btrfs_dir_item *)curr->data; 1727 name = (char *)(di + 1); 1728 name_len = btrfs_stack_dir_name_len(di); 1729 1730 d_type = btrfs_filetype_table[di->type]; 1731 btrfs_disk_key_to_cpu(&location, &di->location); 1732 1733 over = !dir_emit(ctx, name, name_len, 1734 location.objectid, d_type); 1735 1736 if (atomic_dec_and_test(&curr->refs)) 1737 kfree(curr); 1738 1739 if (over) 1740 return 1; 1741 } 1742 return 0; 1743 } 1744 1745 static void fill_stack_inode_item(struct btrfs_trans_handle *trans, 1746 struct btrfs_inode_item *inode_item, 1747 struct inode *inode) 1748 { 1749 btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode)); 1750 btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode)); 1751 btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size); 1752 btrfs_set_stack_inode_mode(inode_item, inode->i_mode); 1753 btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink); 1754 btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); 1755 btrfs_set_stack_inode_generation(inode_item, 1756 BTRFS_I(inode)->generation); 1757 btrfs_set_stack_inode_sequence(inode_item, inode->i_version); 1758 btrfs_set_stack_inode_transid(inode_item, trans->transid); 1759 btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); 1760 btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); 1761 btrfs_set_stack_inode_block_group(inode_item, 0); 1762 1763 btrfs_set_stack_timespec_sec(&inode_item->atime, 1764 inode->i_atime.tv_sec); 1765 btrfs_set_stack_timespec_nsec(&inode_item->atime, 1766 inode->i_atime.tv_nsec); 1767 1768 btrfs_set_stack_timespec_sec(&inode_item->mtime, 1769 inode->i_mtime.tv_sec); 1770 btrfs_set_stack_timespec_nsec(&inode_item->mtime, 1771 inode->i_mtime.tv_nsec); 1772 1773 btrfs_set_stack_timespec_sec(&inode_item->ctime, 1774 inode->i_ctime.tv_sec); 1775 btrfs_set_stack_timespec_nsec(&inode_item->ctime, 1776 inode->i_ctime.tv_nsec); 1777 1778 btrfs_set_stack_timespec_sec(&inode_item->otime, 1779 BTRFS_I(inode)->i_otime.tv_sec); 1780 btrfs_set_stack_timespec_nsec(&inode_item->otime, 1781 BTRFS_I(inode)->i_otime.tv_nsec); 1782 } 1783 1784 int btrfs_fill_inode(struct inode *inode, u32 *rdev) 1785 { 1786 struct btrfs_delayed_node *delayed_node; 1787 struct btrfs_inode_item *inode_item; 1788 1789 delayed_node = btrfs_get_delayed_node(inode); 1790 if (!delayed_node) 1791 return -ENOENT; 1792 1793 mutex_lock(&delayed_node->mutex); 1794 if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { 1795 mutex_unlock(&delayed_node->mutex); 1796 btrfs_release_delayed_node(delayed_node); 1797 return -ENOENT; 1798 } 1799 1800 inode_item = &delayed_node->inode_item; 1801 1802 i_uid_write(inode, btrfs_stack_inode_uid(inode_item)); 1803 i_gid_write(inode, btrfs_stack_inode_gid(inode_item)); 1804 btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item)); 1805 inode->i_mode = btrfs_stack_inode_mode(inode_item); 1806 set_nlink(inode, btrfs_stack_inode_nlink(inode_item)); 1807 inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item)); 1808 BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); 1809 BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item); 1810 1811 inode->i_version = btrfs_stack_inode_sequence(inode_item); 1812 inode->i_rdev = 0; 1813 *rdev = btrfs_stack_inode_rdev(inode_item); 1814 BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); 1815 1816 inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime); 1817 inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime); 1818 1819 inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime); 1820 inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime); 1821 1822 inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime); 1823 inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime); 1824 1825 BTRFS_I(inode)->i_otime.tv_sec = 1826 btrfs_stack_timespec_sec(&inode_item->otime); 1827 BTRFS_I(inode)->i_otime.tv_nsec = 1828 btrfs_stack_timespec_nsec(&inode_item->otime); 1829 1830 inode->i_generation = BTRFS_I(inode)->generation; 1831 BTRFS_I(inode)->index_cnt = (u64)-1; 1832 1833 mutex_unlock(&delayed_node->mutex); 1834 btrfs_release_delayed_node(delayed_node); 1835 return 0; 1836 } 1837 1838 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, 1839 struct btrfs_root *root, struct inode *inode) 1840 { 1841 struct btrfs_delayed_node *delayed_node; 1842 int ret = 0; 1843 1844 delayed_node = btrfs_get_or_create_delayed_node(inode); 1845 if (IS_ERR(delayed_node)) 1846 return PTR_ERR(delayed_node); 1847 1848 mutex_lock(&delayed_node->mutex); 1849 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { 1850 fill_stack_inode_item(trans, &delayed_node->inode_item, inode); 1851 goto release_node; 1852 } 1853 1854 ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode, 1855 delayed_node); 1856 if (ret) 1857 goto release_node; 1858 1859 fill_stack_inode_item(trans, &delayed_node->inode_item, inode); 1860 set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags); 1861 delayed_node->count++; 1862 atomic_inc(&root->fs_info->delayed_root->items); 1863 release_node: 1864 mutex_unlock(&delayed_node->mutex); 1865 btrfs_release_delayed_node(delayed_node); 1866 return ret; 1867 } 1868 1869 int btrfs_delayed_delete_inode_ref(struct inode *inode) 1870 { 1871 struct btrfs_delayed_node *delayed_node; 1872 1873 /* 1874 * we don't do delayed inode updates during log recovery because it 1875 * leads to enospc problems. This means we also can't do 1876 * delayed inode refs 1877 */ 1878 if (BTRFS_I(inode)->root->fs_info->log_root_recovering) 1879 return -EAGAIN; 1880 1881 delayed_node = btrfs_get_or_create_delayed_node(inode); 1882 if (IS_ERR(delayed_node)) 1883 return PTR_ERR(delayed_node); 1884 1885 /* 1886 * We don't reserve space for inode ref deletion is because: 1887 * - We ONLY do async inode ref deletion for the inode who has only 1888 * one link(i_nlink == 1), it means there is only one inode ref. 1889 * And in most case, the inode ref and the inode item are in the 1890 * same leaf, and we will deal with them at the same time. 1891 * Since we are sure we will reserve the space for the inode item, 1892 * it is unnecessary to reserve space for inode ref deletion. 1893 * - If the inode ref and the inode item are not in the same leaf, 1894 * We also needn't worry about enospc problem, because we reserve 1895 * much more space for the inode update than it needs. 1896 * - At the worst, we can steal some space from the global reservation. 1897 * It is very rare. 1898 */ 1899 mutex_lock(&delayed_node->mutex); 1900 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags)) 1901 goto release_node; 1902 1903 set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags); 1904 delayed_node->count++; 1905 atomic_inc(&BTRFS_I(inode)->root->fs_info->delayed_root->items); 1906 release_node: 1907 mutex_unlock(&delayed_node->mutex); 1908 btrfs_release_delayed_node(delayed_node); 1909 return 0; 1910 } 1911 1912 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node) 1913 { 1914 struct btrfs_root *root = delayed_node->root; 1915 struct btrfs_delayed_item *curr_item, *prev_item; 1916 1917 mutex_lock(&delayed_node->mutex); 1918 curr_item = __btrfs_first_delayed_insertion_item(delayed_node); 1919 while (curr_item) { 1920 btrfs_delayed_item_release_metadata(root, curr_item); 1921 prev_item = curr_item; 1922 curr_item = __btrfs_next_delayed_item(prev_item); 1923 btrfs_release_delayed_item(prev_item); 1924 } 1925 1926 curr_item = __btrfs_first_delayed_deletion_item(delayed_node); 1927 while (curr_item) { 1928 btrfs_delayed_item_release_metadata(root, curr_item); 1929 prev_item = curr_item; 1930 curr_item = __btrfs_next_delayed_item(prev_item); 1931 btrfs_release_delayed_item(prev_item); 1932 } 1933 1934 if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags)) 1935 btrfs_release_delayed_iref(delayed_node); 1936 1937 if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) { 1938 btrfs_delayed_inode_release_metadata(root, delayed_node); 1939 btrfs_release_delayed_inode(delayed_node); 1940 } 1941 mutex_unlock(&delayed_node->mutex); 1942 } 1943 1944 void btrfs_kill_delayed_inode_items(struct inode *inode) 1945 { 1946 struct btrfs_delayed_node *delayed_node; 1947 1948 delayed_node = btrfs_get_delayed_node(inode); 1949 if (!delayed_node) 1950 return; 1951 1952 __btrfs_kill_delayed_node(delayed_node); 1953 btrfs_release_delayed_node(delayed_node); 1954 } 1955 1956 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) 1957 { 1958 u64 inode_id = 0; 1959 struct btrfs_delayed_node *delayed_nodes[8]; 1960 int i, n; 1961 1962 while (1) { 1963 spin_lock(&root->inode_lock); 1964 n = radix_tree_gang_lookup(&root->delayed_nodes_tree, 1965 (void **)delayed_nodes, inode_id, 1966 ARRAY_SIZE(delayed_nodes)); 1967 if (!n) { 1968 spin_unlock(&root->inode_lock); 1969 break; 1970 } 1971 1972 inode_id = delayed_nodes[n - 1]->inode_id + 1; 1973 1974 for (i = 0; i < n; i++) 1975 atomic_inc(&delayed_nodes[i]->refs); 1976 spin_unlock(&root->inode_lock); 1977 1978 for (i = 0; i < n; i++) { 1979 __btrfs_kill_delayed_node(delayed_nodes[i]); 1980 btrfs_release_delayed_node(delayed_nodes[i]); 1981 } 1982 } 1983 } 1984 1985 void btrfs_destroy_delayed_inodes(struct btrfs_root *root) 1986 { 1987 struct btrfs_delayed_root *delayed_root; 1988 struct btrfs_delayed_node *curr_node, *prev_node; 1989 1990 delayed_root = btrfs_get_delayed_root(root); 1991 1992 curr_node = btrfs_first_delayed_node(delayed_root); 1993 while (curr_node) { 1994 __btrfs_kill_delayed_node(curr_node); 1995 1996 prev_node = curr_node; 1997 curr_node = btrfs_next_delayed_node(curr_node); 1998 btrfs_release_delayed_node(prev_node); 1999 } 2000 } 2001 2002