1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * f2fs extent cache support 4 * 5 * Copyright (c) 2015 Motorola Mobility 6 * Copyright (c) 2015 Samsung Electronics 7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org> 8 * Chao Yu <chao2.yu@samsung.com> 9 * 10 * block_age-based extent cache added by: 11 * Copyright (c) 2022 xiaomi Co., Ltd. 12 * http://www.xiaomi.com/ 13 */ 14 15 #include <linux/fs.h> 16 #include <linux/f2fs_fs.h> 17 18 #include "f2fs.h" 19 #include "node.h" 20 #include <trace/events/f2fs.h> 21 22 bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) 23 { 24 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 25 struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; 26 struct extent_info ei; 27 28 get_read_extent_info(&ei, i_ext); 29 30 if (!ei.len) 31 return true; 32 33 if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) || 34 !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1, 35 DATA_GENERIC_ENHANCE)) { 36 set_sbi_flag(sbi, SBI_NEED_FSCK); 37 f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", 38 __func__, inode->i_ino, 39 ei.blk, ei.fofs, ei.len); 40 return false; 41 } 42 return true; 43 } 44 45 static void __set_extent_info(struct extent_info *ei, 46 unsigned int fofs, unsigned int len, 47 block_t blk, bool keep_clen, 48 unsigned long age, unsigned long last_blocks, 49 enum extent_type type) 50 { 51 ei->fofs = fofs; 52 ei->len = len; 53 54 if (type == EX_READ) { 55 ei->blk = blk; 56 if (keep_clen) 57 return; 58 #ifdef CONFIG_F2FS_FS_COMPRESSION 59 ei->c_len = 0; 60 #endif 61 } else if (type == EX_BLOCK_AGE) { 62 ei->age = age; 63 ei->last_blocks = last_blocks; 64 } 65 } 66 67 static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) 68 { 69 if (type == EX_READ) 70 return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) && 71 S_ISREG(inode->i_mode); 72 if (type == EX_BLOCK_AGE) 73 return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) && 74 (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)); 75 return false; 76 } 77 78 static bool __may_extent_tree(struct inode *inode, enum extent_type type) 79 { 80 /* 81 * for recovered files during mount do not create extents 82 * if shrinker is not registered. 83 */ 84 if (list_empty(&F2FS_I_SB(inode)->s_list)) 85 return false; 86 87 if (!__init_may_extent_tree(inode, type)) 88 return false; 89 90 if (type == EX_READ) { 91 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 92 return false; 93 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && 94 !f2fs_sb_has_readonly(F2FS_I_SB(inode))) 95 return false; 96 } else if (type == EX_BLOCK_AGE) { 97 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) 98 return false; 99 if (file_is_cold(inode)) 100 return false; 101 } 102 return true; 103 } 104 105 static void __try_update_largest_extent(struct extent_tree *et, 106 struct extent_node *en) 107 { 108 if (et->type != EX_READ) 109 return; 110 if (en->ei.len <= et->largest.len) 111 return; 112 113 et->largest = en->ei; 114 et->largest_updated = true; 115 } 116 117 static bool __is_extent_mergeable(struct extent_info *back, 118 struct extent_info *front, enum extent_type type) 119 { 120 if (type == EX_READ) { 121 #ifdef CONFIG_F2FS_FS_COMPRESSION 122 if (back->c_len && back->len != back->c_len) 123 return false; 124 if (front->c_len && front->len != front->c_len) 125 return false; 126 #endif 127 return (back->fofs + back->len == front->fofs && 128 back->blk + back->len == front->blk); 129 } else if (type == EX_BLOCK_AGE) { 130 return (back->fofs + back->len == front->fofs && 131 abs(back->age - front->age) <= SAME_AGE_REGION && 132 abs(back->last_blocks - front->last_blocks) <= 133 SAME_AGE_REGION); 134 } 135 return false; 136 } 137 138 static bool __is_back_mergeable(struct extent_info *cur, 139 struct extent_info *back, enum extent_type type) 140 { 141 return __is_extent_mergeable(back, cur, type); 142 } 143 144 static bool __is_front_mergeable(struct extent_info *cur, 145 struct extent_info *front, enum extent_type type) 146 { 147 return __is_extent_mergeable(cur, front, type); 148 } 149 150 static struct extent_node *__lookup_extent_node(struct rb_root_cached *root, 151 struct extent_node *cached_en, unsigned int fofs) 152 { 153 struct rb_node *node = root->rb_root.rb_node; 154 struct extent_node *en; 155 156 /* check a cached entry */ 157 if (cached_en && cached_en->ei.fofs <= fofs && 158 cached_en->ei.fofs + cached_en->ei.len > fofs) 159 return cached_en; 160 161 /* check rb_tree */ 162 while (node) { 163 en = rb_entry(node, struct extent_node, rb_node); 164 165 if (fofs < en->ei.fofs) 166 node = node->rb_left; 167 else if (fofs >= en->ei.fofs + en->ei.len) 168 node = node->rb_right; 169 else 170 return en; 171 } 172 return NULL; 173 } 174 175 /* 176 * lookup rb entry in position of @fofs in rb-tree, 177 * if hit, return the entry, otherwise, return NULL 178 * @prev_ex: extent before fofs 179 * @next_ex: extent after fofs 180 * @insert_p: insert point for new extent at fofs 181 * in order to simplify the insertion after. 182 * tree must stay unchanged between lookup and insertion. 183 */ 184 static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root, 185 struct extent_node *cached_en, 186 unsigned int fofs, 187 struct extent_node **prev_entry, 188 struct extent_node **next_entry, 189 struct rb_node ***insert_p, 190 struct rb_node **insert_parent, 191 bool *leftmost) 192 { 193 struct rb_node **pnode = &root->rb_root.rb_node; 194 struct rb_node *parent = NULL, *tmp_node; 195 struct extent_node *en = cached_en; 196 197 *insert_p = NULL; 198 *insert_parent = NULL; 199 *prev_entry = NULL; 200 *next_entry = NULL; 201 202 if (RB_EMPTY_ROOT(&root->rb_root)) 203 return NULL; 204 205 if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs) 206 goto lookup_neighbors; 207 208 *leftmost = true; 209 210 while (*pnode) { 211 parent = *pnode; 212 en = rb_entry(*pnode, struct extent_node, rb_node); 213 214 if (fofs < en->ei.fofs) { 215 pnode = &(*pnode)->rb_left; 216 } else if (fofs >= en->ei.fofs + en->ei.len) { 217 pnode = &(*pnode)->rb_right; 218 *leftmost = false; 219 } else { 220 goto lookup_neighbors; 221 } 222 } 223 224 *insert_p = pnode; 225 *insert_parent = parent; 226 227 en = rb_entry(parent, struct extent_node, rb_node); 228 tmp_node = parent; 229 if (parent && fofs > en->ei.fofs) 230 tmp_node = rb_next(parent); 231 *next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); 232 233 tmp_node = parent; 234 if (parent && fofs < en->ei.fofs) 235 tmp_node = rb_prev(parent); 236 *prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); 237 return NULL; 238 239 lookup_neighbors: 240 if (fofs == en->ei.fofs) { 241 /* lookup prev node for merging backward later */ 242 tmp_node = rb_prev(&en->rb_node); 243 *prev_entry = rb_entry_safe(tmp_node, 244 struct extent_node, rb_node); 245 } 246 if (fofs == en->ei.fofs + en->ei.len - 1) { 247 /* lookup next node for merging frontward later */ 248 tmp_node = rb_next(&en->rb_node); 249 *next_entry = rb_entry_safe(tmp_node, 250 struct extent_node, rb_node); 251 } 252 return en; 253 } 254 255 static struct kmem_cache *extent_tree_slab; 256 static struct kmem_cache *extent_node_slab; 257 258 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, 259 struct extent_tree *et, struct extent_info *ei, 260 struct rb_node *parent, struct rb_node **p, 261 bool leftmost) 262 { 263 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 264 struct extent_node *en; 265 266 en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi); 267 if (!en) 268 return NULL; 269 270 en->ei = *ei; 271 INIT_LIST_HEAD(&en->list); 272 en->et = et; 273 274 rb_link_node(&en->rb_node, parent, p); 275 rb_insert_color_cached(&en->rb_node, &et->root, leftmost); 276 atomic_inc(&et->node_cnt); 277 atomic_inc(&eti->total_ext_node); 278 return en; 279 } 280 281 static void __detach_extent_node(struct f2fs_sb_info *sbi, 282 struct extent_tree *et, struct extent_node *en) 283 { 284 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 285 286 rb_erase_cached(&en->rb_node, &et->root); 287 atomic_dec(&et->node_cnt); 288 atomic_dec(&eti->total_ext_node); 289 290 if (et->cached_en == en) 291 et->cached_en = NULL; 292 kmem_cache_free(extent_node_slab, en); 293 } 294 295 /* 296 * Flow to release an extent_node: 297 * 1. list_del_init 298 * 2. __detach_extent_node 299 * 3. kmem_cache_free. 300 */ 301 static void __release_extent_node(struct f2fs_sb_info *sbi, 302 struct extent_tree *et, struct extent_node *en) 303 { 304 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 305 306 spin_lock(&eti->extent_lock); 307 f2fs_bug_on(sbi, list_empty(&en->list)); 308 list_del_init(&en->list); 309 spin_unlock(&eti->extent_lock); 310 311 __detach_extent_node(sbi, et, en); 312 } 313 314 static struct extent_tree *__grab_extent_tree(struct inode *inode, 315 enum extent_type type) 316 { 317 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 318 struct extent_tree_info *eti = &sbi->extent_tree[type]; 319 struct extent_tree *et; 320 nid_t ino = inode->i_ino; 321 322 mutex_lock(&eti->extent_tree_lock); 323 et = radix_tree_lookup(&eti->extent_tree_root, ino); 324 if (!et) { 325 et = f2fs_kmem_cache_alloc(extent_tree_slab, 326 GFP_NOFS, true, NULL); 327 f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et); 328 memset(et, 0, sizeof(struct extent_tree)); 329 et->ino = ino; 330 et->type = type; 331 et->root = RB_ROOT_CACHED; 332 et->cached_en = NULL; 333 rwlock_init(&et->lock); 334 INIT_LIST_HEAD(&et->list); 335 atomic_set(&et->node_cnt, 0); 336 atomic_inc(&eti->total_ext_tree); 337 } else { 338 atomic_dec(&eti->total_zombie_tree); 339 list_del_init(&et->list); 340 } 341 mutex_unlock(&eti->extent_tree_lock); 342 343 /* never died until evict_inode */ 344 F2FS_I(inode)->extent_tree[type] = et; 345 346 return et; 347 } 348 349 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, 350 struct extent_tree *et) 351 { 352 struct rb_node *node, *next; 353 struct extent_node *en; 354 unsigned int count = atomic_read(&et->node_cnt); 355 356 node = rb_first_cached(&et->root); 357 while (node) { 358 next = rb_next(node); 359 en = rb_entry(node, struct extent_node, rb_node); 360 __release_extent_node(sbi, et, en); 361 node = next; 362 } 363 364 return count - atomic_read(&et->node_cnt); 365 } 366 367 static void __drop_largest_extent(struct extent_tree *et, 368 pgoff_t fofs, unsigned int len) 369 { 370 if (fofs < (pgoff_t)et->largest.fofs + et->largest.len && 371 fofs + len > et->largest.fofs) { 372 et->largest.len = 0; 373 et->largest_updated = true; 374 } 375 } 376 377 void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) 378 { 379 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 380 struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; 381 struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; 382 struct extent_tree *et; 383 struct extent_node *en; 384 struct extent_info ei; 385 386 if (!__may_extent_tree(inode, EX_READ)) { 387 /* drop largest read extent */ 388 if (i_ext->len) { 389 f2fs_wait_on_page_writeback(ipage, NODE, true, true); 390 i_ext->len = 0; 391 set_page_dirty(ipage); 392 } 393 set_inode_flag(inode, FI_NO_EXTENT); 394 return; 395 } 396 397 et = __grab_extent_tree(inode, EX_READ); 398 399 get_read_extent_info(&ei, i_ext); 400 401 write_lock(&et->lock); 402 if (atomic_read(&et->node_cnt) || !ei.len) 403 goto skip; 404 405 en = __attach_extent_node(sbi, et, &ei, NULL, 406 &et->root.rb_root.rb_node, true); 407 if (en) { 408 et->largest = en->ei; 409 et->cached_en = en; 410 411 spin_lock(&eti->extent_lock); 412 list_add_tail(&en->list, &eti->extent_list); 413 spin_unlock(&eti->extent_lock); 414 } 415 skip: 416 /* Let's drop, if checkpoint got corrupted. */ 417 if (f2fs_cp_error(sbi)) { 418 et->largest.len = 0; 419 et->largest_updated = true; 420 } 421 write_unlock(&et->lock); 422 } 423 424 void f2fs_init_age_extent_tree(struct inode *inode) 425 { 426 if (!__init_may_extent_tree(inode, EX_BLOCK_AGE)) 427 return; 428 __grab_extent_tree(inode, EX_BLOCK_AGE); 429 } 430 431 void f2fs_init_extent_tree(struct inode *inode) 432 { 433 /* initialize read cache */ 434 if (__init_may_extent_tree(inode, EX_READ)) 435 __grab_extent_tree(inode, EX_READ); 436 437 /* initialize block age cache */ 438 if (__init_may_extent_tree(inode, EX_BLOCK_AGE)) 439 __grab_extent_tree(inode, EX_BLOCK_AGE); 440 } 441 442 static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, 443 struct extent_info *ei, enum extent_type type) 444 { 445 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 446 struct extent_tree_info *eti = &sbi->extent_tree[type]; 447 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 448 struct extent_node *en; 449 bool ret = false; 450 451 if (!et) 452 return false; 453 454 trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); 455 456 read_lock(&et->lock); 457 458 if (type == EX_READ && 459 et->largest.fofs <= pgofs && 460 (pgoff_t)et->largest.fofs + et->largest.len > pgofs) { 461 *ei = et->largest; 462 ret = true; 463 stat_inc_largest_node_hit(sbi); 464 goto out; 465 } 466 467 en = __lookup_extent_node(&et->root, et->cached_en, pgofs); 468 if (!en) 469 goto out; 470 471 if (en == et->cached_en) 472 stat_inc_cached_node_hit(sbi, type); 473 else 474 stat_inc_rbtree_node_hit(sbi, type); 475 476 *ei = en->ei; 477 spin_lock(&eti->extent_lock); 478 if (!list_empty(&en->list)) { 479 list_move_tail(&en->list, &eti->extent_list); 480 et->cached_en = en; 481 } 482 spin_unlock(&eti->extent_lock); 483 ret = true; 484 out: 485 stat_inc_total_hit(sbi, type); 486 read_unlock(&et->lock); 487 488 if (type == EX_READ) 489 trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei); 490 else if (type == EX_BLOCK_AGE) 491 trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei); 492 return ret; 493 } 494 495 static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, 496 struct extent_tree *et, struct extent_info *ei, 497 struct extent_node *prev_ex, 498 struct extent_node *next_ex) 499 { 500 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 501 struct extent_node *en = NULL; 502 503 if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) { 504 prev_ex->ei.len += ei->len; 505 ei = &prev_ex->ei; 506 en = prev_ex; 507 } 508 509 if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) { 510 next_ex->ei.fofs = ei->fofs; 511 next_ex->ei.len += ei->len; 512 if (et->type == EX_READ) 513 next_ex->ei.blk = ei->blk; 514 if (en) 515 __release_extent_node(sbi, et, prev_ex); 516 517 en = next_ex; 518 } 519 520 if (!en) 521 return NULL; 522 523 __try_update_largest_extent(et, en); 524 525 spin_lock(&eti->extent_lock); 526 if (!list_empty(&en->list)) { 527 list_move_tail(&en->list, &eti->extent_list); 528 et->cached_en = en; 529 } 530 spin_unlock(&eti->extent_lock); 531 return en; 532 } 533 534 static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, 535 struct extent_tree *et, struct extent_info *ei, 536 struct rb_node **insert_p, 537 struct rb_node *insert_parent, 538 bool leftmost) 539 { 540 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 541 struct rb_node **p = &et->root.rb_root.rb_node; 542 struct rb_node *parent = NULL; 543 struct extent_node *en = NULL; 544 545 if (insert_p && insert_parent) { 546 parent = insert_parent; 547 p = insert_p; 548 goto do_insert; 549 } 550 551 leftmost = true; 552 553 /* look up extent_node in the rb tree */ 554 while (*p) { 555 parent = *p; 556 en = rb_entry(parent, struct extent_node, rb_node); 557 558 if (ei->fofs < en->ei.fofs) { 559 p = &(*p)->rb_left; 560 } else if (ei->fofs >= en->ei.fofs + en->ei.len) { 561 p = &(*p)->rb_right; 562 leftmost = false; 563 } else { 564 f2fs_bug_on(sbi, 1); 565 } 566 } 567 568 do_insert: 569 en = __attach_extent_node(sbi, et, ei, parent, p, leftmost); 570 if (!en) 571 return NULL; 572 573 __try_update_largest_extent(et, en); 574 575 /* update in global extent list */ 576 spin_lock(&eti->extent_lock); 577 list_add_tail(&en->list, &eti->extent_list); 578 et->cached_en = en; 579 spin_unlock(&eti->extent_lock); 580 return en; 581 } 582 583 static void __update_extent_tree_range(struct inode *inode, 584 struct extent_info *tei, enum extent_type type) 585 { 586 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 587 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 588 struct extent_node *en = NULL, *en1 = NULL; 589 struct extent_node *prev_en = NULL, *next_en = NULL; 590 struct extent_info ei, dei, prev; 591 struct rb_node **insert_p = NULL, *insert_parent = NULL; 592 unsigned int fofs = tei->fofs, len = tei->len; 593 unsigned int end = fofs + len; 594 bool updated = false; 595 bool leftmost = false; 596 597 if (!et) 598 return; 599 600 if (type == EX_READ) 601 trace_f2fs_update_read_extent_tree_range(inode, fofs, len, 602 tei->blk, 0); 603 else if (type == EX_BLOCK_AGE) 604 trace_f2fs_update_age_extent_tree_range(inode, fofs, len, 605 tei->age, tei->last_blocks); 606 607 write_lock(&et->lock); 608 609 if (type == EX_READ) { 610 if (is_inode_flag_set(inode, FI_NO_EXTENT)) { 611 write_unlock(&et->lock); 612 return; 613 } 614 615 prev = et->largest; 616 dei.len = 0; 617 618 /* 619 * drop largest extent before lookup, in case it's already 620 * been shrunk from extent tree 621 */ 622 __drop_largest_extent(et, fofs, len); 623 } 624 625 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ 626 en = __lookup_extent_node_ret(&et->root, 627 et->cached_en, fofs, 628 &prev_en, &next_en, 629 &insert_p, &insert_parent, 630 &leftmost); 631 if (!en) 632 en = next_en; 633 634 /* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */ 635 while (en && en->ei.fofs < end) { 636 unsigned int org_end; 637 int parts = 0; /* # of parts current extent split into */ 638 639 next_en = en1 = NULL; 640 641 dei = en->ei; 642 org_end = dei.fofs + dei.len; 643 f2fs_bug_on(sbi, fofs >= org_end); 644 645 if (fofs > dei.fofs && (type != EX_READ || 646 fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) { 647 en->ei.len = fofs - en->ei.fofs; 648 prev_en = en; 649 parts = 1; 650 } 651 652 if (end < org_end && (type != EX_READ || 653 org_end - end >= F2FS_MIN_EXTENT_LEN)) { 654 if (parts) { 655 __set_extent_info(&ei, 656 end, org_end - end, 657 end - dei.fofs + dei.blk, false, 658 dei.age, dei.last_blocks, 659 type); 660 en1 = __insert_extent_tree(sbi, et, &ei, 661 NULL, NULL, true); 662 next_en = en1; 663 } else { 664 __set_extent_info(&en->ei, 665 end, en->ei.len - (end - dei.fofs), 666 en->ei.blk + (end - dei.fofs), true, 667 dei.age, dei.last_blocks, 668 type); 669 next_en = en; 670 } 671 parts++; 672 } 673 674 if (!next_en) { 675 struct rb_node *node = rb_next(&en->rb_node); 676 677 next_en = rb_entry_safe(node, struct extent_node, 678 rb_node); 679 } 680 681 if (parts) 682 __try_update_largest_extent(et, en); 683 else 684 __release_extent_node(sbi, et, en); 685 686 /* 687 * if original extent is split into zero or two parts, extent 688 * tree has been altered by deletion or insertion, therefore 689 * invalidate pointers regard to tree. 690 */ 691 if (parts != 1) { 692 insert_p = NULL; 693 insert_parent = NULL; 694 } 695 en = next_en; 696 } 697 698 if (type == EX_BLOCK_AGE) 699 goto update_age_extent_cache; 700 701 /* 3. update extent in read extent cache */ 702 BUG_ON(type != EX_READ); 703 704 if (tei->blk) { 705 __set_extent_info(&ei, fofs, len, tei->blk, false, 706 0, 0, EX_READ); 707 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 708 __insert_extent_tree(sbi, et, &ei, 709 insert_p, insert_parent, leftmost); 710 711 /* give up extent_cache, if split and small updates happen */ 712 if (dei.len >= 1 && 713 prev.len < F2FS_MIN_EXTENT_LEN && 714 et->largest.len < F2FS_MIN_EXTENT_LEN) { 715 et->largest.len = 0; 716 et->largest_updated = true; 717 set_inode_flag(inode, FI_NO_EXTENT); 718 } 719 } 720 721 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 722 __free_extent_tree(sbi, et); 723 724 if (et->largest_updated) { 725 et->largest_updated = false; 726 updated = true; 727 } 728 goto out_read_extent_cache; 729 update_age_extent_cache: 730 if (!tei->last_blocks) 731 goto out_read_extent_cache; 732 733 __set_extent_info(&ei, fofs, len, 0, false, 734 tei->age, tei->last_blocks, EX_BLOCK_AGE); 735 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 736 __insert_extent_tree(sbi, et, &ei, 737 insert_p, insert_parent, leftmost); 738 out_read_extent_cache: 739 write_unlock(&et->lock); 740 741 if (updated) 742 f2fs_mark_inode_dirty_sync(inode, true); 743 } 744 745 #ifdef CONFIG_F2FS_FS_COMPRESSION 746 void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, 747 pgoff_t fofs, block_t blkaddr, unsigned int llen, 748 unsigned int c_len) 749 { 750 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 751 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; 752 struct extent_node *en = NULL; 753 struct extent_node *prev_en = NULL, *next_en = NULL; 754 struct extent_info ei; 755 struct rb_node **insert_p = NULL, *insert_parent = NULL; 756 bool leftmost = false; 757 758 trace_f2fs_update_read_extent_tree_range(inode, fofs, llen, 759 blkaddr, c_len); 760 761 /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ 762 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 763 return; 764 765 write_lock(&et->lock); 766 767 en = __lookup_extent_node_ret(&et->root, 768 et->cached_en, fofs, 769 &prev_en, &next_en, 770 &insert_p, &insert_parent, 771 &leftmost); 772 if (en) 773 goto unlock_out; 774 775 __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ); 776 ei.c_len = c_len; 777 778 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 779 __insert_extent_tree(sbi, et, &ei, 780 insert_p, insert_parent, leftmost); 781 unlock_out: 782 write_unlock(&et->lock); 783 } 784 #endif 785 786 static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi, 787 unsigned long long new, 788 unsigned long long old) 789 { 790 unsigned int rem_old, rem_new; 791 unsigned long long res; 792 unsigned int weight = sbi->last_age_weight; 793 794 res = div_u64_rem(new, 100, &rem_new) * (100 - weight) 795 + div_u64_rem(old, 100, &rem_old) * weight; 796 797 if (rem_new) 798 res += rem_new * (100 - weight) / 100; 799 if (rem_old) 800 res += rem_old * weight / 100; 801 802 return res; 803 } 804 805 /* This returns a new age and allocated blocks in ei */ 806 static int __get_new_block_age(struct inode *inode, struct extent_info *ei, 807 block_t blkaddr) 808 { 809 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 810 loff_t f_size = i_size_read(inode); 811 unsigned long long cur_blocks = 812 atomic64_read(&sbi->allocated_data_blocks); 813 struct extent_info tei = *ei; /* only fofs and len are valid */ 814 815 /* 816 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last 817 * file block even in seq write. So don't record age for newly last file 818 * block here. 819 */ 820 if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && 821 blkaddr == NEW_ADDR) 822 return -EINVAL; 823 824 if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) { 825 unsigned long long cur_age; 826 827 if (cur_blocks >= tei.last_blocks) 828 cur_age = cur_blocks - tei.last_blocks; 829 else 830 /* allocated_data_blocks overflow */ 831 cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks; 832 833 if (tei.age) 834 ei->age = __calculate_block_age(sbi, cur_age, tei.age); 835 else 836 ei->age = cur_age; 837 ei->last_blocks = cur_blocks; 838 WARN_ON(ei->age > cur_blocks); 839 return 0; 840 } 841 842 f2fs_bug_on(sbi, blkaddr == NULL_ADDR); 843 844 /* the data block was allocated for the first time */ 845 if (blkaddr == NEW_ADDR) 846 goto out; 847 848 if (__is_valid_data_blkaddr(blkaddr) && 849 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { 850 f2fs_bug_on(sbi, 1); 851 return -EINVAL; 852 } 853 out: 854 /* 855 * init block age with zero, this can happen when the block age extent 856 * was reclaimed due to memory constraint or system reboot 857 */ 858 ei->age = 0; 859 ei->last_blocks = cur_blocks; 860 return 0; 861 } 862 863 static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) 864 { 865 struct extent_info ei = {}; 866 867 if (!__may_extent_tree(dn->inode, type)) 868 return; 869 870 ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + 871 dn->ofs_in_node; 872 ei.len = 1; 873 874 if (type == EX_READ) { 875 if (dn->data_blkaddr == NEW_ADDR) 876 ei.blk = NULL_ADDR; 877 else 878 ei.blk = dn->data_blkaddr; 879 } else if (type == EX_BLOCK_AGE) { 880 if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr)) 881 return; 882 } 883 __update_extent_tree_range(dn->inode, &ei, type); 884 } 885 886 static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink, 887 enum extent_type type) 888 { 889 struct extent_tree_info *eti = &sbi->extent_tree[type]; 890 struct extent_tree *et, *next; 891 struct extent_node *en; 892 unsigned int node_cnt = 0, tree_cnt = 0; 893 int remained; 894 895 if (!atomic_read(&eti->total_zombie_tree)) 896 goto free_node; 897 898 if (!mutex_trylock(&eti->extent_tree_lock)) 899 goto out; 900 901 /* 1. remove unreferenced extent tree */ 902 list_for_each_entry_safe(et, next, &eti->zombie_list, list) { 903 if (atomic_read(&et->node_cnt)) { 904 write_lock(&et->lock); 905 node_cnt += __free_extent_tree(sbi, et); 906 write_unlock(&et->lock); 907 } 908 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 909 list_del_init(&et->list); 910 radix_tree_delete(&eti->extent_tree_root, et->ino); 911 kmem_cache_free(extent_tree_slab, et); 912 atomic_dec(&eti->total_ext_tree); 913 atomic_dec(&eti->total_zombie_tree); 914 tree_cnt++; 915 916 if (node_cnt + tree_cnt >= nr_shrink) 917 goto unlock_out; 918 cond_resched(); 919 } 920 mutex_unlock(&eti->extent_tree_lock); 921 922 free_node: 923 /* 2. remove LRU extent entries */ 924 if (!mutex_trylock(&eti->extent_tree_lock)) 925 goto out; 926 927 remained = nr_shrink - (node_cnt + tree_cnt); 928 929 spin_lock(&eti->extent_lock); 930 for (; remained > 0; remained--) { 931 if (list_empty(&eti->extent_list)) 932 break; 933 en = list_first_entry(&eti->extent_list, 934 struct extent_node, list); 935 et = en->et; 936 if (!write_trylock(&et->lock)) { 937 /* refresh this extent node's position in extent list */ 938 list_move_tail(&en->list, &eti->extent_list); 939 continue; 940 } 941 942 list_del_init(&en->list); 943 spin_unlock(&eti->extent_lock); 944 945 __detach_extent_node(sbi, et, en); 946 947 write_unlock(&et->lock); 948 node_cnt++; 949 spin_lock(&eti->extent_lock); 950 } 951 spin_unlock(&eti->extent_lock); 952 953 unlock_out: 954 mutex_unlock(&eti->extent_tree_lock); 955 out: 956 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type); 957 958 return node_cnt + tree_cnt; 959 } 960 961 /* read extent cache operations */ 962 bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, 963 struct extent_info *ei) 964 { 965 if (!__may_extent_tree(inode, EX_READ)) 966 return false; 967 968 return __lookup_extent_tree(inode, pgofs, ei, EX_READ); 969 } 970 971 bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index, 972 block_t *blkaddr) 973 { 974 struct extent_info ei = {}; 975 976 if (!f2fs_lookup_read_extent_cache(inode, index, &ei)) 977 return false; 978 *blkaddr = ei.blk + index - ei.fofs; 979 return true; 980 } 981 982 void f2fs_update_read_extent_cache(struct dnode_of_data *dn) 983 { 984 return __update_extent_cache(dn, EX_READ); 985 } 986 987 void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, 988 pgoff_t fofs, block_t blkaddr, unsigned int len) 989 { 990 struct extent_info ei = { 991 .fofs = fofs, 992 .len = len, 993 .blk = blkaddr, 994 }; 995 996 if (!__may_extent_tree(dn->inode, EX_READ)) 997 return; 998 999 __update_extent_tree_range(dn->inode, &ei, EX_READ); 1000 } 1001 1002 unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1003 { 1004 if (!test_opt(sbi, READ_EXTENT_CACHE)) 1005 return 0; 1006 1007 return __shrink_extent_tree(sbi, nr_shrink, EX_READ); 1008 } 1009 1010 /* block age extent cache operations */ 1011 bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, 1012 struct extent_info *ei) 1013 { 1014 if (!__may_extent_tree(inode, EX_BLOCK_AGE)) 1015 return false; 1016 1017 return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE); 1018 } 1019 1020 void f2fs_update_age_extent_cache(struct dnode_of_data *dn) 1021 { 1022 return __update_extent_cache(dn, EX_BLOCK_AGE); 1023 } 1024 1025 void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, 1026 pgoff_t fofs, unsigned int len) 1027 { 1028 struct extent_info ei = { 1029 .fofs = fofs, 1030 .len = len, 1031 }; 1032 1033 if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE)) 1034 return; 1035 1036 __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE); 1037 } 1038 1039 unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1040 { 1041 if (!test_opt(sbi, AGE_EXTENT_CACHE)) 1042 return 0; 1043 1044 return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); 1045 } 1046 1047 static unsigned int __destroy_extent_node(struct inode *inode, 1048 enum extent_type type) 1049 { 1050 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1051 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1052 unsigned int node_cnt = 0; 1053 1054 if (!et || !atomic_read(&et->node_cnt)) 1055 return 0; 1056 1057 write_lock(&et->lock); 1058 node_cnt = __free_extent_tree(sbi, et); 1059 write_unlock(&et->lock); 1060 1061 return node_cnt; 1062 } 1063 1064 void f2fs_destroy_extent_node(struct inode *inode) 1065 { 1066 __destroy_extent_node(inode, EX_READ); 1067 __destroy_extent_node(inode, EX_BLOCK_AGE); 1068 } 1069 1070 static void __drop_extent_tree(struct inode *inode, enum extent_type type) 1071 { 1072 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1073 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1074 bool updated = false; 1075 1076 if (!__may_extent_tree(inode, type)) 1077 return; 1078 1079 write_lock(&et->lock); 1080 __free_extent_tree(sbi, et); 1081 if (type == EX_READ) { 1082 set_inode_flag(inode, FI_NO_EXTENT); 1083 if (et->largest.len) { 1084 et->largest.len = 0; 1085 updated = true; 1086 } 1087 } 1088 write_unlock(&et->lock); 1089 if (updated) 1090 f2fs_mark_inode_dirty_sync(inode, true); 1091 } 1092 1093 void f2fs_drop_extent_tree(struct inode *inode) 1094 { 1095 __drop_extent_tree(inode, EX_READ); 1096 __drop_extent_tree(inode, EX_BLOCK_AGE); 1097 } 1098 1099 static void __destroy_extent_tree(struct inode *inode, enum extent_type type) 1100 { 1101 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1102 struct extent_tree_info *eti = &sbi->extent_tree[type]; 1103 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1104 unsigned int node_cnt = 0; 1105 1106 if (!et) 1107 return; 1108 1109 if (inode->i_nlink && !is_bad_inode(inode) && 1110 atomic_read(&et->node_cnt)) { 1111 mutex_lock(&eti->extent_tree_lock); 1112 list_add_tail(&et->list, &eti->zombie_list); 1113 atomic_inc(&eti->total_zombie_tree); 1114 mutex_unlock(&eti->extent_tree_lock); 1115 return; 1116 } 1117 1118 /* free all extent info belong to this extent tree */ 1119 node_cnt = __destroy_extent_node(inode, type); 1120 1121 /* delete extent tree entry in radix tree */ 1122 mutex_lock(&eti->extent_tree_lock); 1123 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 1124 radix_tree_delete(&eti->extent_tree_root, inode->i_ino); 1125 kmem_cache_free(extent_tree_slab, et); 1126 atomic_dec(&eti->total_ext_tree); 1127 mutex_unlock(&eti->extent_tree_lock); 1128 1129 F2FS_I(inode)->extent_tree[type] = NULL; 1130 1131 trace_f2fs_destroy_extent_tree(inode, node_cnt, type); 1132 } 1133 1134 void f2fs_destroy_extent_tree(struct inode *inode) 1135 { 1136 __destroy_extent_tree(inode, EX_READ); 1137 __destroy_extent_tree(inode, EX_BLOCK_AGE); 1138 } 1139 1140 static void __init_extent_tree_info(struct extent_tree_info *eti) 1141 { 1142 INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO); 1143 mutex_init(&eti->extent_tree_lock); 1144 INIT_LIST_HEAD(&eti->extent_list); 1145 spin_lock_init(&eti->extent_lock); 1146 atomic_set(&eti->total_ext_tree, 0); 1147 INIT_LIST_HEAD(&eti->zombie_list); 1148 atomic_set(&eti->total_zombie_tree, 0); 1149 atomic_set(&eti->total_ext_node, 0); 1150 } 1151 1152 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) 1153 { 1154 __init_extent_tree_info(&sbi->extent_tree[EX_READ]); 1155 __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]); 1156 1157 /* initialize for block age extents */ 1158 atomic64_set(&sbi->allocated_data_blocks, 0); 1159 sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; 1160 sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; 1161 sbi->last_age_weight = LAST_AGE_WEIGHT; 1162 } 1163 1164 int __init f2fs_create_extent_cache(void) 1165 { 1166 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", 1167 sizeof(struct extent_tree)); 1168 if (!extent_tree_slab) 1169 return -ENOMEM; 1170 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", 1171 sizeof(struct extent_node)); 1172 if (!extent_node_slab) { 1173 kmem_cache_destroy(extent_tree_slab); 1174 return -ENOMEM; 1175 } 1176 return 0; 1177 } 1178 1179 void f2fs_destroy_extent_cache(void) 1180 { 1181 kmem_cache_destroy(extent_node_slab); 1182 kmem_cache_destroy(extent_tree_slab); 1183 } 1184