1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * f2fs extent cache support 4 * 5 * Copyright (c) 2015 Motorola Mobility 6 * Copyright (c) 2015 Samsung Electronics 7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org> 8 * Chao Yu <chao2.yu@samsung.com> 9 * 10 * block_age-based extent cache added by: 11 * Copyright (c) 2022 xiaomi Co., Ltd. 12 * http://www.xiaomi.com/ 13 */ 14 15 #include <linux/fs.h> 16 #include <linux/f2fs_fs.h> 17 18 #include "f2fs.h" 19 #include "node.h" 20 #include <trace/events/f2fs.h> 21 22 bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) 23 { 24 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 25 struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; 26 struct extent_info ei; 27 28 get_read_extent_info(&ei, i_ext); 29 30 if (!ei.len) 31 return true; 32 33 if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) || 34 !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1, 35 DATA_GENERIC_ENHANCE)) { 36 set_sbi_flag(sbi, SBI_NEED_FSCK); 37 f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", 38 __func__, inode->i_ino, 39 ei.blk, ei.fofs, ei.len); 40 return false; 41 } 42 return true; 43 } 44 45 static void __set_extent_info(struct extent_info *ei, 46 unsigned int fofs, unsigned int len, 47 block_t blk, bool keep_clen, 48 unsigned long age, unsigned long last_blocks, 49 enum extent_type type) 50 { 51 ei->fofs = fofs; 52 ei->len = len; 53 54 if (type == EX_READ) { 55 ei->blk = blk; 56 if (keep_clen) 57 return; 58 #ifdef CONFIG_F2FS_FS_COMPRESSION 59 ei->c_len = 0; 60 #endif 61 } else if (type == EX_BLOCK_AGE) { 62 ei->age = age; 63 ei->last_blocks = last_blocks; 64 } 65 } 66 67 static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) 68 { 69 if (type == EX_READ) 70 return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) && 71 S_ISREG(inode->i_mode); 72 if (type == EX_BLOCK_AGE) 73 return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) && 74 (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)); 75 return false; 76 } 77 78 static bool __may_extent_tree(struct inode *inode, enum extent_type type) 79 { 80 /* 81 * for recovered files during mount do not create extents 82 * if shrinker is not registered. 83 */ 84 if (list_empty(&F2FS_I_SB(inode)->s_list)) 85 return false; 86 87 if (!__init_may_extent_tree(inode, type)) 88 return false; 89 90 if (type == EX_READ) { 91 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 92 return false; 93 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && 94 !f2fs_sb_has_readonly(F2FS_I_SB(inode))) 95 return false; 96 } else if (type == EX_BLOCK_AGE) { 97 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) 98 return false; 99 if (file_is_cold(inode)) 100 return false; 101 } 102 return true; 103 } 104 105 static void __try_update_largest_extent(struct extent_tree *et, 106 struct extent_node *en) 107 { 108 if (et->type != EX_READ) 109 return; 110 if (en->ei.len <= et->largest.len) 111 return; 112 113 et->largest = en->ei; 114 et->largest_updated = true; 115 } 116 117 static bool __is_extent_mergeable(struct extent_info *back, 118 struct extent_info *front, enum extent_type type) 119 { 120 if (type == EX_READ) { 121 #ifdef CONFIG_F2FS_FS_COMPRESSION 122 if (back->c_len && back->len != back->c_len) 123 return false; 124 if (front->c_len && front->len != front->c_len) 125 return false; 126 #endif 127 return (back->fofs + back->len == front->fofs && 128 back->blk + back->len == front->blk); 129 } else if (type == EX_BLOCK_AGE) { 130 return (back->fofs + back->len == front->fofs && 131 abs(back->age - front->age) <= SAME_AGE_REGION && 132 abs(back->last_blocks - front->last_blocks) <= 133 SAME_AGE_REGION); 134 } 135 return false; 136 } 137 138 static bool __is_back_mergeable(struct extent_info *cur, 139 struct extent_info *back, enum extent_type type) 140 { 141 return __is_extent_mergeable(back, cur, type); 142 } 143 144 static bool __is_front_mergeable(struct extent_info *cur, 145 struct extent_info *front, enum extent_type type) 146 { 147 return __is_extent_mergeable(cur, front, type); 148 } 149 150 static struct extent_node *__lookup_extent_node(struct rb_root_cached *root, 151 struct extent_node *cached_en, unsigned int fofs) 152 { 153 struct rb_node *node = root->rb_root.rb_node; 154 struct extent_node *en; 155 156 /* check a cached entry */ 157 if (cached_en && cached_en->ei.fofs <= fofs && 158 cached_en->ei.fofs + cached_en->ei.len > fofs) 159 return cached_en; 160 161 /* check rb_tree */ 162 while (node) { 163 en = rb_entry(node, struct extent_node, rb_node); 164 165 if (fofs < en->ei.fofs) 166 node = node->rb_left; 167 else if (fofs >= en->ei.fofs + en->ei.len) 168 node = node->rb_right; 169 else 170 return en; 171 } 172 return NULL; 173 } 174 175 /* 176 * lookup rb entry in position of @fofs in rb-tree, 177 * if hit, return the entry, otherwise, return NULL 178 * @prev_ex: extent before fofs 179 * @next_ex: extent after fofs 180 * @insert_p: insert point for new extent at fofs 181 * in order to simplify the insertion after. 182 * tree must stay unchanged between lookup and insertion. 183 */ 184 static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root, 185 struct extent_node *cached_en, 186 unsigned int fofs, 187 struct extent_node **prev_entry, 188 struct extent_node **next_entry, 189 struct rb_node ***insert_p, 190 struct rb_node **insert_parent, 191 bool *leftmost) 192 { 193 struct rb_node **pnode = &root->rb_root.rb_node; 194 struct rb_node *parent = NULL, *tmp_node; 195 struct extent_node *en = cached_en; 196 197 *insert_p = NULL; 198 *insert_parent = NULL; 199 *prev_entry = NULL; 200 *next_entry = NULL; 201 202 if (RB_EMPTY_ROOT(&root->rb_root)) 203 return NULL; 204 205 if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs) 206 goto lookup_neighbors; 207 208 *leftmost = true; 209 210 while (*pnode) { 211 parent = *pnode; 212 en = rb_entry(*pnode, struct extent_node, rb_node); 213 214 if (fofs < en->ei.fofs) { 215 pnode = &(*pnode)->rb_left; 216 } else if (fofs >= en->ei.fofs + en->ei.len) { 217 pnode = &(*pnode)->rb_right; 218 *leftmost = false; 219 } else { 220 goto lookup_neighbors; 221 } 222 } 223 224 *insert_p = pnode; 225 *insert_parent = parent; 226 227 en = rb_entry(parent, struct extent_node, rb_node); 228 tmp_node = parent; 229 if (parent && fofs > en->ei.fofs) 230 tmp_node = rb_next(parent); 231 *next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); 232 233 tmp_node = parent; 234 if (parent && fofs < en->ei.fofs) 235 tmp_node = rb_prev(parent); 236 *prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); 237 return NULL; 238 239 lookup_neighbors: 240 if (fofs == en->ei.fofs) { 241 /* lookup prev node for merging backward later */ 242 tmp_node = rb_prev(&en->rb_node); 243 *prev_entry = rb_entry_safe(tmp_node, 244 struct extent_node, rb_node); 245 } 246 if (fofs == en->ei.fofs + en->ei.len - 1) { 247 /* lookup next node for merging frontward later */ 248 tmp_node = rb_next(&en->rb_node); 249 *next_entry = rb_entry_safe(tmp_node, 250 struct extent_node, rb_node); 251 } 252 return en; 253 } 254 255 static struct kmem_cache *extent_tree_slab; 256 static struct kmem_cache *extent_node_slab; 257 258 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, 259 struct extent_tree *et, struct extent_info *ei, 260 struct rb_node *parent, struct rb_node **p, 261 bool leftmost) 262 { 263 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 264 struct extent_node *en; 265 266 en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi); 267 if (!en) 268 return NULL; 269 270 en->ei = *ei; 271 INIT_LIST_HEAD(&en->list); 272 en->et = et; 273 274 rb_link_node(&en->rb_node, parent, p); 275 rb_insert_color_cached(&en->rb_node, &et->root, leftmost); 276 atomic_inc(&et->node_cnt); 277 atomic_inc(&eti->total_ext_node); 278 return en; 279 } 280 281 static void __detach_extent_node(struct f2fs_sb_info *sbi, 282 struct extent_tree *et, struct extent_node *en) 283 { 284 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 285 286 rb_erase_cached(&en->rb_node, &et->root); 287 atomic_dec(&et->node_cnt); 288 atomic_dec(&eti->total_ext_node); 289 290 if (et->cached_en == en) 291 et->cached_en = NULL; 292 kmem_cache_free(extent_node_slab, en); 293 } 294 295 /* 296 * Flow to release an extent_node: 297 * 1. list_del_init 298 * 2. __detach_extent_node 299 * 3. kmem_cache_free. 300 */ 301 static void __release_extent_node(struct f2fs_sb_info *sbi, 302 struct extent_tree *et, struct extent_node *en) 303 { 304 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 305 306 spin_lock(&eti->extent_lock); 307 f2fs_bug_on(sbi, list_empty(&en->list)); 308 list_del_init(&en->list); 309 spin_unlock(&eti->extent_lock); 310 311 __detach_extent_node(sbi, et, en); 312 } 313 314 static struct extent_tree *__grab_extent_tree(struct inode *inode, 315 enum extent_type type) 316 { 317 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 318 struct extent_tree_info *eti = &sbi->extent_tree[type]; 319 struct extent_tree *et; 320 nid_t ino = inode->i_ino; 321 322 mutex_lock(&eti->extent_tree_lock); 323 et = radix_tree_lookup(&eti->extent_tree_root, ino); 324 if (!et) { 325 et = f2fs_kmem_cache_alloc(extent_tree_slab, 326 GFP_NOFS, true, NULL); 327 f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et); 328 memset(et, 0, sizeof(struct extent_tree)); 329 et->ino = ino; 330 et->type = type; 331 et->root = RB_ROOT_CACHED; 332 et->cached_en = NULL; 333 rwlock_init(&et->lock); 334 INIT_LIST_HEAD(&et->list); 335 atomic_set(&et->node_cnt, 0); 336 atomic_inc(&eti->total_ext_tree); 337 } else { 338 atomic_dec(&eti->total_zombie_tree); 339 list_del_init(&et->list); 340 } 341 mutex_unlock(&eti->extent_tree_lock); 342 343 /* never died until evict_inode */ 344 F2FS_I(inode)->extent_tree[type] = et; 345 346 return et; 347 } 348 349 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, 350 struct extent_tree *et, unsigned int nr_shrink) 351 { 352 struct rb_node *node, *next; 353 struct extent_node *en; 354 unsigned int count; 355 356 node = rb_first_cached(&et->root); 357 358 for (count = 0; node && count < nr_shrink; count++) { 359 next = rb_next(node); 360 en = rb_entry(node, struct extent_node, rb_node); 361 __release_extent_node(sbi, et, en); 362 node = next; 363 } 364 365 return count; 366 } 367 368 static void __drop_largest_extent(struct extent_tree *et, 369 pgoff_t fofs, unsigned int len) 370 { 371 if (fofs < (pgoff_t)et->largest.fofs + et->largest.len && 372 fofs + len > et->largest.fofs) { 373 et->largest.len = 0; 374 et->largest_updated = true; 375 } 376 } 377 378 void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) 379 { 380 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 381 struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; 382 struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; 383 struct extent_tree *et; 384 struct extent_node *en; 385 struct extent_info ei; 386 387 if (!__may_extent_tree(inode, EX_READ)) { 388 /* drop largest read extent */ 389 if (i_ext->len) { 390 f2fs_wait_on_page_writeback(ipage, NODE, true, true); 391 i_ext->len = 0; 392 set_page_dirty(ipage); 393 } 394 set_inode_flag(inode, FI_NO_EXTENT); 395 return; 396 } 397 398 et = __grab_extent_tree(inode, EX_READ); 399 400 get_read_extent_info(&ei, i_ext); 401 402 write_lock(&et->lock); 403 if (atomic_read(&et->node_cnt) || !ei.len) 404 goto skip; 405 406 en = __attach_extent_node(sbi, et, &ei, NULL, 407 &et->root.rb_root.rb_node, true); 408 if (en) { 409 et->largest = en->ei; 410 et->cached_en = en; 411 412 spin_lock(&eti->extent_lock); 413 list_add_tail(&en->list, &eti->extent_list); 414 spin_unlock(&eti->extent_lock); 415 } 416 skip: 417 /* Let's drop, if checkpoint got corrupted. */ 418 if (f2fs_cp_error(sbi)) { 419 et->largest.len = 0; 420 et->largest_updated = true; 421 } 422 write_unlock(&et->lock); 423 } 424 425 void f2fs_init_age_extent_tree(struct inode *inode) 426 { 427 if (!__init_may_extent_tree(inode, EX_BLOCK_AGE)) 428 return; 429 __grab_extent_tree(inode, EX_BLOCK_AGE); 430 } 431 432 void f2fs_init_extent_tree(struct inode *inode) 433 { 434 /* initialize read cache */ 435 if (__init_may_extent_tree(inode, EX_READ)) 436 __grab_extent_tree(inode, EX_READ); 437 438 /* initialize block age cache */ 439 if (__init_may_extent_tree(inode, EX_BLOCK_AGE)) 440 __grab_extent_tree(inode, EX_BLOCK_AGE); 441 } 442 443 static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, 444 struct extent_info *ei, enum extent_type type) 445 { 446 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 447 struct extent_tree_info *eti = &sbi->extent_tree[type]; 448 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 449 struct extent_node *en; 450 bool ret = false; 451 452 if (!et) 453 return false; 454 455 trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); 456 457 read_lock(&et->lock); 458 459 if (type == EX_READ && 460 et->largest.fofs <= pgofs && 461 (pgoff_t)et->largest.fofs + et->largest.len > pgofs) { 462 *ei = et->largest; 463 ret = true; 464 stat_inc_largest_node_hit(sbi); 465 goto out; 466 } 467 468 en = __lookup_extent_node(&et->root, et->cached_en, pgofs); 469 if (!en) 470 goto out; 471 472 if (en == et->cached_en) 473 stat_inc_cached_node_hit(sbi, type); 474 else 475 stat_inc_rbtree_node_hit(sbi, type); 476 477 *ei = en->ei; 478 spin_lock(&eti->extent_lock); 479 if (!list_empty(&en->list)) { 480 list_move_tail(&en->list, &eti->extent_list); 481 et->cached_en = en; 482 } 483 spin_unlock(&eti->extent_lock); 484 ret = true; 485 out: 486 stat_inc_total_hit(sbi, type); 487 read_unlock(&et->lock); 488 489 if (type == EX_READ) 490 trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei); 491 else if (type == EX_BLOCK_AGE) 492 trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei); 493 return ret; 494 } 495 496 static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, 497 struct extent_tree *et, struct extent_info *ei, 498 struct extent_node *prev_ex, 499 struct extent_node *next_ex) 500 { 501 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 502 struct extent_node *en = NULL; 503 504 if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) { 505 prev_ex->ei.len += ei->len; 506 ei = &prev_ex->ei; 507 en = prev_ex; 508 } 509 510 if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) { 511 next_ex->ei.fofs = ei->fofs; 512 next_ex->ei.len += ei->len; 513 if (et->type == EX_READ) 514 next_ex->ei.blk = ei->blk; 515 if (en) 516 __release_extent_node(sbi, et, prev_ex); 517 518 en = next_ex; 519 } 520 521 if (!en) 522 return NULL; 523 524 __try_update_largest_extent(et, en); 525 526 spin_lock(&eti->extent_lock); 527 if (!list_empty(&en->list)) { 528 list_move_tail(&en->list, &eti->extent_list); 529 et->cached_en = en; 530 } 531 spin_unlock(&eti->extent_lock); 532 return en; 533 } 534 535 static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, 536 struct extent_tree *et, struct extent_info *ei, 537 struct rb_node **insert_p, 538 struct rb_node *insert_parent, 539 bool leftmost) 540 { 541 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 542 struct rb_node **p = &et->root.rb_root.rb_node; 543 struct rb_node *parent = NULL; 544 struct extent_node *en = NULL; 545 546 if (insert_p && insert_parent) { 547 parent = insert_parent; 548 p = insert_p; 549 goto do_insert; 550 } 551 552 leftmost = true; 553 554 /* look up extent_node in the rb tree */ 555 while (*p) { 556 parent = *p; 557 en = rb_entry(parent, struct extent_node, rb_node); 558 559 if (ei->fofs < en->ei.fofs) { 560 p = &(*p)->rb_left; 561 } else if (ei->fofs >= en->ei.fofs + en->ei.len) { 562 p = &(*p)->rb_right; 563 leftmost = false; 564 } else { 565 f2fs_bug_on(sbi, 1); 566 } 567 } 568 569 do_insert: 570 en = __attach_extent_node(sbi, et, ei, parent, p, leftmost); 571 if (!en) 572 return NULL; 573 574 __try_update_largest_extent(et, en); 575 576 /* update in global extent list */ 577 spin_lock(&eti->extent_lock); 578 list_add_tail(&en->list, &eti->extent_list); 579 et->cached_en = en; 580 spin_unlock(&eti->extent_lock); 581 return en; 582 } 583 584 static unsigned int __destroy_extent_node(struct inode *inode, 585 enum extent_type type) 586 { 587 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 588 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 589 unsigned int nr_shrink = type == EX_READ ? 590 READ_EXTENT_CACHE_SHRINK_NUMBER : 591 AGE_EXTENT_CACHE_SHRINK_NUMBER; 592 unsigned int node_cnt = 0; 593 594 if (!et || !atomic_read(&et->node_cnt)) 595 return 0; 596 597 while (atomic_read(&et->node_cnt)) { 598 write_lock(&et->lock); 599 node_cnt += __free_extent_tree(sbi, et, nr_shrink); 600 write_unlock(&et->lock); 601 } 602 603 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 604 605 return node_cnt; 606 } 607 608 static void __update_extent_tree_range(struct inode *inode, 609 struct extent_info *tei, enum extent_type type) 610 { 611 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 612 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 613 struct extent_node *en = NULL, *en1 = NULL; 614 struct extent_node *prev_en = NULL, *next_en = NULL; 615 struct extent_info ei, dei, prev; 616 struct rb_node **insert_p = NULL, *insert_parent = NULL; 617 unsigned int fofs = tei->fofs, len = tei->len; 618 unsigned int end = fofs + len; 619 bool updated = false; 620 bool leftmost = false; 621 622 if (!et) 623 return; 624 625 if (type == EX_READ) 626 trace_f2fs_update_read_extent_tree_range(inode, fofs, len, 627 tei->blk, 0); 628 else if (type == EX_BLOCK_AGE) 629 trace_f2fs_update_age_extent_tree_range(inode, fofs, len, 630 tei->age, tei->last_blocks); 631 632 write_lock(&et->lock); 633 634 if (type == EX_READ) { 635 if (is_inode_flag_set(inode, FI_NO_EXTENT)) { 636 write_unlock(&et->lock); 637 return; 638 } 639 640 prev = et->largest; 641 dei.len = 0; 642 643 /* 644 * drop largest extent before lookup, in case it's already 645 * been shrunk from extent tree 646 */ 647 __drop_largest_extent(et, fofs, len); 648 } 649 650 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ 651 en = __lookup_extent_node_ret(&et->root, 652 et->cached_en, fofs, 653 &prev_en, &next_en, 654 &insert_p, &insert_parent, 655 &leftmost); 656 if (!en) 657 en = next_en; 658 659 /* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */ 660 while (en && en->ei.fofs < end) { 661 unsigned int org_end; 662 int parts = 0; /* # of parts current extent split into */ 663 664 next_en = en1 = NULL; 665 666 dei = en->ei; 667 org_end = dei.fofs + dei.len; 668 f2fs_bug_on(sbi, fofs >= org_end); 669 670 if (fofs > dei.fofs && (type != EX_READ || 671 fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) { 672 en->ei.len = fofs - en->ei.fofs; 673 prev_en = en; 674 parts = 1; 675 } 676 677 if (end < org_end && (type != EX_READ || 678 org_end - end >= F2FS_MIN_EXTENT_LEN)) { 679 if (parts) { 680 __set_extent_info(&ei, 681 end, org_end - end, 682 end - dei.fofs + dei.blk, false, 683 dei.age, dei.last_blocks, 684 type); 685 en1 = __insert_extent_tree(sbi, et, &ei, 686 NULL, NULL, true); 687 next_en = en1; 688 } else { 689 __set_extent_info(&en->ei, 690 end, en->ei.len - (end - dei.fofs), 691 en->ei.blk + (end - dei.fofs), true, 692 dei.age, dei.last_blocks, 693 type); 694 next_en = en; 695 } 696 parts++; 697 } 698 699 if (!next_en) { 700 struct rb_node *node = rb_next(&en->rb_node); 701 702 next_en = rb_entry_safe(node, struct extent_node, 703 rb_node); 704 } 705 706 if (parts) 707 __try_update_largest_extent(et, en); 708 else 709 __release_extent_node(sbi, et, en); 710 711 /* 712 * if original extent is split into zero or two parts, extent 713 * tree has been altered by deletion or insertion, therefore 714 * invalidate pointers regard to tree. 715 */ 716 if (parts != 1) { 717 insert_p = NULL; 718 insert_parent = NULL; 719 } 720 en = next_en; 721 } 722 723 if (type == EX_BLOCK_AGE) 724 goto update_age_extent_cache; 725 726 /* 3. update extent in read extent cache */ 727 BUG_ON(type != EX_READ); 728 729 if (tei->blk) { 730 __set_extent_info(&ei, fofs, len, tei->blk, false, 731 0, 0, EX_READ); 732 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 733 __insert_extent_tree(sbi, et, &ei, 734 insert_p, insert_parent, leftmost); 735 736 /* give up extent_cache, if split and small updates happen */ 737 if (dei.len >= 1 && 738 prev.len < F2FS_MIN_EXTENT_LEN && 739 et->largest.len < F2FS_MIN_EXTENT_LEN) { 740 et->largest.len = 0; 741 et->largest_updated = true; 742 set_inode_flag(inode, FI_NO_EXTENT); 743 } 744 } 745 746 if (et->largest_updated) { 747 et->largest_updated = false; 748 updated = true; 749 } 750 goto out_read_extent_cache; 751 update_age_extent_cache: 752 if (!tei->last_blocks) 753 goto out_read_extent_cache; 754 755 __set_extent_info(&ei, fofs, len, 0, false, 756 tei->age, tei->last_blocks, EX_BLOCK_AGE); 757 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 758 __insert_extent_tree(sbi, et, &ei, 759 insert_p, insert_parent, leftmost); 760 out_read_extent_cache: 761 write_unlock(&et->lock); 762 763 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 764 __destroy_extent_node(inode, EX_READ); 765 766 if (updated) 767 f2fs_mark_inode_dirty_sync(inode, true); 768 } 769 770 #ifdef CONFIG_F2FS_FS_COMPRESSION 771 void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, 772 pgoff_t fofs, block_t blkaddr, unsigned int llen, 773 unsigned int c_len) 774 { 775 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 776 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; 777 struct extent_node *en = NULL; 778 struct extent_node *prev_en = NULL, *next_en = NULL; 779 struct extent_info ei; 780 struct rb_node **insert_p = NULL, *insert_parent = NULL; 781 bool leftmost = false; 782 783 trace_f2fs_update_read_extent_tree_range(inode, fofs, llen, 784 blkaddr, c_len); 785 786 /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ 787 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 788 return; 789 790 write_lock(&et->lock); 791 792 en = __lookup_extent_node_ret(&et->root, 793 et->cached_en, fofs, 794 &prev_en, &next_en, 795 &insert_p, &insert_parent, 796 &leftmost); 797 if (en) 798 goto unlock_out; 799 800 __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ); 801 ei.c_len = c_len; 802 803 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 804 __insert_extent_tree(sbi, et, &ei, 805 insert_p, insert_parent, leftmost); 806 unlock_out: 807 write_unlock(&et->lock); 808 } 809 #endif 810 811 static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi, 812 unsigned long long new, 813 unsigned long long old) 814 { 815 unsigned int rem_old, rem_new; 816 unsigned long long res; 817 unsigned int weight = sbi->last_age_weight; 818 819 res = div_u64_rem(new, 100, &rem_new) * (100 - weight) 820 + div_u64_rem(old, 100, &rem_old) * weight; 821 822 if (rem_new) 823 res += rem_new * (100 - weight) / 100; 824 if (rem_old) 825 res += rem_old * weight / 100; 826 827 return res; 828 } 829 830 /* This returns a new age and allocated blocks in ei */ 831 static int __get_new_block_age(struct inode *inode, struct extent_info *ei, 832 block_t blkaddr) 833 { 834 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 835 loff_t f_size = i_size_read(inode); 836 unsigned long long cur_blocks = 837 atomic64_read(&sbi->allocated_data_blocks); 838 struct extent_info tei = *ei; /* only fofs and len are valid */ 839 840 /* 841 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last 842 * file block even in seq write. So don't record age for newly last file 843 * block here. 844 */ 845 if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && 846 blkaddr == NEW_ADDR) 847 return -EINVAL; 848 849 if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) { 850 unsigned long long cur_age; 851 852 if (cur_blocks >= tei.last_blocks) 853 cur_age = cur_blocks - tei.last_blocks; 854 else 855 /* allocated_data_blocks overflow */ 856 cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks; 857 858 if (tei.age) 859 ei->age = __calculate_block_age(sbi, cur_age, tei.age); 860 else 861 ei->age = cur_age; 862 ei->last_blocks = cur_blocks; 863 WARN_ON(ei->age > cur_blocks); 864 return 0; 865 } 866 867 f2fs_bug_on(sbi, blkaddr == NULL_ADDR); 868 869 /* the data block was allocated for the first time */ 870 if (blkaddr == NEW_ADDR) 871 goto out; 872 873 if (__is_valid_data_blkaddr(blkaddr) && 874 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { 875 f2fs_bug_on(sbi, 1); 876 return -EINVAL; 877 } 878 out: 879 /* 880 * init block age with zero, this can happen when the block age extent 881 * was reclaimed due to memory constraint or system reboot 882 */ 883 ei->age = 0; 884 ei->last_blocks = cur_blocks; 885 return 0; 886 } 887 888 static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) 889 { 890 struct extent_info ei = {}; 891 892 if (!__may_extent_tree(dn->inode, type)) 893 return; 894 895 ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + 896 dn->ofs_in_node; 897 ei.len = 1; 898 899 if (type == EX_READ) { 900 if (dn->data_blkaddr == NEW_ADDR) 901 ei.blk = NULL_ADDR; 902 else 903 ei.blk = dn->data_blkaddr; 904 } else if (type == EX_BLOCK_AGE) { 905 if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr)) 906 return; 907 } 908 __update_extent_tree_range(dn->inode, &ei, type); 909 } 910 911 static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink, 912 enum extent_type type) 913 { 914 struct extent_tree_info *eti = &sbi->extent_tree[type]; 915 struct extent_tree *et, *next; 916 struct extent_node *en; 917 unsigned int node_cnt = 0, tree_cnt = 0; 918 int remained; 919 920 if (!atomic_read(&eti->total_zombie_tree)) 921 goto free_node; 922 923 if (!mutex_trylock(&eti->extent_tree_lock)) 924 goto out; 925 926 /* 1. remove unreferenced extent tree */ 927 list_for_each_entry_safe(et, next, &eti->zombie_list, list) { 928 if (atomic_read(&et->node_cnt)) { 929 write_lock(&et->lock); 930 node_cnt += __free_extent_tree(sbi, et, 931 nr_shrink - node_cnt - tree_cnt); 932 write_unlock(&et->lock); 933 } 934 935 if (atomic_read(&et->node_cnt)) 936 goto unlock_out; 937 938 list_del_init(&et->list); 939 radix_tree_delete(&eti->extent_tree_root, et->ino); 940 kmem_cache_free(extent_tree_slab, et); 941 atomic_dec(&eti->total_ext_tree); 942 atomic_dec(&eti->total_zombie_tree); 943 tree_cnt++; 944 945 if (node_cnt + tree_cnt >= nr_shrink) 946 goto unlock_out; 947 cond_resched(); 948 } 949 mutex_unlock(&eti->extent_tree_lock); 950 951 free_node: 952 /* 2. remove LRU extent entries */ 953 if (!mutex_trylock(&eti->extent_tree_lock)) 954 goto out; 955 956 remained = nr_shrink - (node_cnt + tree_cnt); 957 958 spin_lock(&eti->extent_lock); 959 for (; remained > 0; remained--) { 960 if (list_empty(&eti->extent_list)) 961 break; 962 en = list_first_entry(&eti->extent_list, 963 struct extent_node, list); 964 et = en->et; 965 if (!write_trylock(&et->lock)) { 966 /* refresh this extent node's position in extent list */ 967 list_move_tail(&en->list, &eti->extent_list); 968 continue; 969 } 970 971 list_del_init(&en->list); 972 spin_unlock(&eti->extent_lock); 973 974 __detach_extent_node(sbi, et, en); 975 976 write_unlock(&et->lock); 977 node_cnt++; 978 spin_lock(&eti->extent_lock); 979 } 980 spin_unlock(&eti->extent_lock); 981 982 unlock_out: 983 mutex_unlock(&eti->extent_tree_lock); 984 out: 985 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type); 986 987 return node_cnt + tree_cnt; 988 } 989 990 /* read extent cache operations */ 991 bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, 992 struct extent_info *ei) 993 { 994 if (!__may_extent_tree(inode, EX_READ)) 995 return false; 996 997 return __lookup_extent_tree(inode, pgofs, ei, EX_READ); 998 } 999 1000 bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index, 1001 block_t *blkaddr) 1002 { 1003 struct extent_info ei = {}; 1004 1005 if (!f2fs_lookup_read_extent_cache(inode, index, &ei)) 1006 return false; 1007 *blkaddr = ei.blk + index - ei.fofs; 1008 return true; 1009 } 1010 1011 void f2fs_update_read_extent_cache(struct dnode_of_data *dn) 1012 { 1013 return __update_extent_cache(dn, EX_READ); 1014 } 1015 1016 void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, 1017 pgoff_t fofs, block_t blkaddr, unsigned int len) 1018 { 1019 struct extent_info ei = { 1020 .fofs = fofs, 1021 .len = len, 1022 .blk = blkaddr, 1023 }; 1024 1025 if (!__may_extent_tree(dn->inode, EX_READ)) 1026 return; 1027 1028 __update_extent_tree_range(dn->inode, &ei, EX_READ); 1029 } 1030 1031 unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1032 { 1033 if (!test_opt(sbi, READ_EXTENT_CACHE)) 1034 return 0; 1035 1036 return __shrink_extent_tree(sbi, nr_shrink, EX_READ); 1037 } 1038 1039 /* block age extent cache operations */ 1040 bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, 1041 struct extent_info *ei) 1042 { 1043 if (!__may_extent_tree(inode, EX_BLOCK_AGE)) 1044 return false; 1045 1046 return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE); 1047 } 1048 1049 void f2fs_update_age_extent_cache(struct dnode_of_data *dn) 1050 { 1051 return __update_extent_cache(dn, EX_BLOCK_AGE); 1052 } 1053 1054 void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, 1055 pgoff_t fofs, unsigned int len) 1056 { 1057 struct extent_info ei = { 1058 .fofs = fofs, 1059 .len = len, 1060 }; 1061 1062 if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE)) 1063 return; 1064 1065 __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE); 1066 } 1067 1068 unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1069 { 1070 if (!test_opt(sbi, AGE_EXTENT_CACHE)) 1071 return 0; 1072 1073 return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); 1074 } 1075 1076 void f2fs_destroy_extent_node(struct inode *inode) 1077 { 1078 __destroy_extent_node(inode, EX_READ); 1079 __destroy_extent_node(inode, EX_BLOCK_AGE); 1080 } 1081 1082 static void __drop_extent_tree(struct inode *inode, enum extent_type type) 1083 { 1084 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1085 bool updated = false; 1086 1087 if (!__may_extent_tree(inode, type)) 1088 return; 1089 1090 write_lock(&et->lock); 1091 if (type == EX_READ) { 1092 set_inode_flag(inode, FI_NO_EXTENT); 1093 if (et->largest.len) { 1094 et->largest.len = 0; 1095 updated = true; 1096 } 1097 } 1098 write_unlock(&et->lock); 1099 1100 __destroy_extent_node(inode, type); 1101 1102 if (updated) 1103 f2fs_mark_inode_dirty_sync(inode, true); 1104 } 1105 1106 void f2fs_drop_extent_tree(struct inode *inode) 1107 { 1108 __drop_extent_tree(inode, EX_READ); 1109 __drop_extent_tree(inode, EX_BLOCK_AGE); 1110 } 1111 1112 static void __destroy_extent_tree(struct inode *inode, enum extent_type type) 1113 { 1114 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1115 struct extent_tree_info *eti = &sbi->extent_tree[type]; 1116 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1117 unsigned int node_cnt = 0; 1118 1119 if (!et) 1120 return; 1121 1122 if (inode->i_nlink && !is_bad_inode(inode) && 1123 atomic_read(&et->node_cnt)) { 1124 mutex_lock(&eti->extent_tree_lock); 1125 list_add_tail(&et->list, &eti->zombie_list); 1126 atomic_inc(&eti->total_zombie_tree); 1127 mutex_unlock(&eti->extent_tree_lock); 1128 return; 1129 } 1130 1131 /* free all extent info belong to this extent tree */ 1132 node_cnt = __destroy_extent_node(inode, type); 1133 1134 /* delete extent tree entry in radix tree */ 1135 mutex_lock(&eti->extent_tree_lock); 1136 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 1137 radix_tree_delete(&eti->extent_tree_root, inode->i_ino); 1138 kmem_cache_free(extent_tree_slab, et); 1139 atomic_dec(&eti->total_ext_tree); 1140 mutex_unlock(&eti->extent_tree_lock); 1141 1142 F2FS_I(inode)->extent_tree[type] = NULL; 1143 1144 trace_f2fs_destroy_extent_tree(inode, node_cnt, type); 1145 } 1146 1147 void f2fs_destroy_extent_tree(struct inode *inode) 1148 { 1149 __destroy_extent_tree(inode, EX_READ); 1150 __destroy_extent_tree(inode, EX_BLOCK_AGE); 1151 } 1152 1153 static void __init_extent_tree_info(struct extent_tree_info *eti) 1154 { 1155 INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO); 1156 mutex_init(&eti->extent_tree_lock); 1157 INIT_LIST_HEAD(&eti->extent_list); 1158 spin_lock_init(&eti->extent_lock); 1159 atomic_set(&eti->total_ext_tree, 0); 1160 INIT_LIST_HEAD(&eti->zombie_list); 1161 atomic_set(&eti->total_zombie_tree, 0); 1162 atomic_set(&eti->total_ext_node, 0); 1163 } 1164 1165 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) 1166 { 1167 __init_extent_tree_info(&sbi->extent_tree[EX_READ]); 1168 __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]); 1169 1170 /* initialize for block age extents */ 1171 atomic64_set(&sbi->allocated_data_blocks, 0); 1172 sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; 1173 sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; 1174 sbi->last_age_weight = LAST_AGE_WEIGHT; 1175 } 1176 1177 int __init f2fs_create_extent_cache(void) 1178 { 1179 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", 1180 sizeof(struct extent_tree)); 1181 if (!extent_tree_slab) 1182 return -ENOMEM; 1183 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", 1184 sizeof(struct extent_node)); 1185 if (!extent_node_slab) { 1186 kmem_cache_destroy(extent_tree_slab); 1187 return -ENOMEM; 1188 } 1189 return 0; 1190 } 1191 1192 void f2fs_destroy_extent_cache(void) 1193 { 1194 kmem_cache_destroy(extent_node_slab); 1195 kmem_cache_destroy(extent_tree_slab); 1196 } 1197