1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * f2fs extent cache support 4 * 5 * Copyright (c) 2015 Motorola Mobility 6 * Copyright (c) 2015 Samsung Electronics 7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org> 8 * Chao Yu <chao2.yu@samsung.com> 9 * 10 * block_age-based extent cache added by: 11 * Copyright (c) 2022 xiaomi Co., Ltd. 12 * http://www.xiaomi.com/ 13 */ 14 15 #include <linux/fs.h> 16 #include <linux/f2fs_fs.h> 17 18 #include "f2fs.h" 19 #include "node.h" 20 #include <trace/events/f2fs.h> 21 22 static void __set_extent_info(struct extent_info *ei, 23 unsigned int fofs, unsigned int len, 24 block_t blk, bool keep_clen, 25 unsigned long age, unsigned long last_blocks, 26 enum extent_type type) 27 { 28 ei->fofs = fofs; 29 ei->len = len; 30 31 if (type == EX_READ) { 32 ei->blk = blk; 33 if (keep_clen) 34 return; 35 #ifdef CONFIG_F2FS_FS_COMPRESSION 36 ei->c_len = 0; 37 #endif 38 } else if (type == EX_BLOCK_AGE) { 39 ei->age = age; 40 ei->last_blocks = last_blocks; 41 } 42 } 43 44 static bool __may_read_extent_tree(struct inode *inode) 45 { 46 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 47 48 if (!test_opt(sbi, READ_EXTENT_CACHE)) 49 return false; 50 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 51 return false; 52 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && 53 !f2fs_sb_has_readonly(sbi)) 54 return false; 55 return S_ISREG(inode->i_mode); 56 } 57 58 static bool __may_age_extent_tree(struct inode *inode) 59 { 60 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 61 62 if (!test_opt(sbi, AGE_EXTENT_CACHE)) 63 return false; 64 /* don't cache block age info for cold file */ 65 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) 66 return false; 67 if (file_is_cold(inode)) 68 return false; 69 70 return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); 71 } 72 73 static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) 74 { 75 if (type == EX_READ) 76 return __may_read_extent_tree(inode); 77 else if (type == EX_BLOCK_AGE) 78 return __may_age_extent_tree(inode); 79 return false; 80 } 81 82 static bool __may_extent_tree(struct inode *inode, enum extent_type type) 83 { 84 /* 85 * for recovered files during mount do not create extents 86 * if shrinker is not registered. 87 */ 88 if (list_empty(&F2FS_I_SB(inode)->s_list)) 89 return false; 90 91 return __init_may_extent_tree(inode, type); 92 } 93 94 static void __try_update_largest_extent(struct extent_tree *et, 95 struct extent_node *en) 96 { 97 if (et->type != EX_READ) 98 return; 99 if (en->ei.len <= et->largest.len) 100 return; 101 102 et->largest = en->ei; 103 et->largest_updated = true; 104 } 105 106 static bool __is_extent_mergeable(struct extent_info *back, 107 struct extent_info *front, enum extent_type type) 108 { 109 if (type == EX_READ) { 110 #ifdef CONFIG_F2FS_FS_COMPRESSION 111 if (back->c_len && back->len != back->c_len) 112 return false; 113 if (front->c_len && front->len != front->c_len) 114 return false; 115 #endif 116 return (back->fofs + back->len == front->fofs && 117 back->blk + back->len == front->blk); 118 } else if (type == EX_BLOCK_AGE) { 119 return (back->fofs + back->len == front->fofs && 120 abs(back->age - front->age) <= SAME_AGE_REGION && 121 abs(back->last_blocks - front->last_blocks) <= 122 SAME_AGE_REGION); 123 } 124 return false; 125 } 126 127 static bool __is_back_mergeable(struct extent_info *cur, 128 struct extent_info *back, enum extent_type type) 129 { 130 return __is_extent_mergeable(back, cur, type); 131 } 132 133 static bool __is_front_mergeable(struct extent_info *cur, 134 struct extent_info *front, enum extent_type type) 135 { 136 return __is_extent_mergeable(cur, front, type); 137 } 138 139 static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, 140 unsigned int ofs) 141 { 142 if (cached_re) { 143 if (cached_re->ofs <= ofs && 144 cached_re->ofs + cached_re->len > ofs) { 145 return cached_re; 146 } 147 } 148 return NULL; 149 } 150 151 static struct rb_entry *__lookup_rb_tree_slow(struct rb_root_cached *root, 152 unsigned int ofs) 153 { 154 struct rb_node *node = root->rb_root.rb_node; 155 struct rb_entry *re; 156 157 while (node) { 158 re = rb_entry(node, struct rb_entry, rb_node); 159 160 if (ofs < re->ofs) 161 node = node->rb_left; 162 else if (ofs >= re->ofs + re->len) 163 node = node->rb_right; 164 else 165 return re; 166 } 167 return NULL; 168 } 169 170 struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root, 171 struct rb_entry *cached_re, unsigned int ofs) 172 { 173 struct rb_entry *re; 174 175 re = __lookup_rb_tree_fast(cached_re, ofs); 176 if (!re) 177 return __lookup_rb_tree_slow(root, ofs); 178 179 return re; 180 } 181 182 struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi, 183 struct rb_root_cached *root, 184 struct rb_node **parent, 185 unsigned long long key, bool *leftmost) 186 { 187 struct rb_node **p = &root->rb_root.rb_node; 188 struct rb_entry *re; 189 190 while (*p) { 191 *parent = *p; 192 re = rb_entry(*parent, struct rb_entry, rb_node); 193 194 if (key < re->key) { 195 p = &(*p)->rb_left; 196 } else { 197 p = &(*p)->rb_right; 198 *leftmost = false; 199 } 200 } 201 202 return p; 203 } 204 205 struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, 206 struct rb_root_cached *root, 207 struct rb_node **parent, 208 unsigned int ofs, bool *leftmost) 209 { 210 struct rb_node **p = &root->rb_root.rb_node; 211 struct rb_entry *re; 212 213 while (*p) { 214 *parent = *p; 215 re = rb_entry(*parent, struct rb_entry, rb_node); 216 217 if (ofs < re->ofs) { 218 p = &(*p)->rb_left; 219 } else if (ofs >= re->ofs + re->len) { 220 p = &(*p)->rb_right; 221 *leftmost = false; 222 } else { 223 f2fs_bug_on(sbi, 1); 224 } 225 } 226 227 return p; 228 } 229 230 /* 231 * lookup rb entry in position of @ofs in rb-tree, 232 * if hit, return the entry, otherwise, return NULL 233 * @prev_ex: extent before ofs 234 * @next_ex: extent after ofs 235 * @insert_p: insert point for new extent at ofs 236 * in order to simpfy the insertion after. 237 * tree must stay unchanged between lookup and insertion. 238 */ 239 struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, 240 struct rb_entry *cached_re, 241 unsigned int ofs, 242 struct rb_entry **prev_entry, 243 struct rb_entry **next_entry, 244 struct rb_node ***insert_p, 245 struct rb_node **insert_parent, 246 bool force, bool *leftmost) 247 { 248 struct rb_node **pnode = &root->rb_root.rb_node; 249 struct rb_node *parent = NULL, *tmp_node; 250 struct rb_entry *re = cached_re; 251 252 *insert_p = NULL; 253 *insert_parent = NULL; 254 *prev_entry = NULL; 255 *next_entry = NULL; 256 257 if (RB_EMPTY_ROOT(&root->rb_root)) 258 return NULL; 259 260 if (re) { 261 if (re->ofs <= ofs && re->ofs + re->len > ofs) 262 goto lookup_neighbors; 263 } 264 265 if (leftmost) 266 *leftmost = true; 267 268 while (*pnode) { 269 parent = *pnode; 270 re = rb_entry(*pnode, struct rb_entry, rb_node); 271 272 if (ofs < re->ofs) { 273 pnode = &(*pnode)->rb_left; 274 } else if (ofs >= re->ofs + re->len) { 275 pnode = &(*pnode)->rb_right; 276 if (leftmost) 277 *leftmost = false; 278 } else { 279 goto lookup_neighbors; 280 } 281 } 282 283 *insert_p = pnode; 284 *insert_parent = parent; 285 286 re = rb_entry(parent, struct rb_entry, rb_node); 287 tmp_node = parent; 288 if (parent && ofs > re->ofs) 289 tmp_node = rb_next(parent); 290 *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 291 292 tmp_node = parent; 293 if (parent && ofs < re->ofs) 294 tmp_node = rb_prev(parent); 295 *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 296 return NULL; 297 298 lookup_neighbors: 299 if (ofs == re->ofs || force) { 300 /* lookup prev node for merging backward later */ 301 tmp_node = rb_prev(&re->rb_node); 302 *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 303 } 304 if (ofs == re->ofs + re->len - 1 || force) { 305 /* lookup next node for merging frontward later */ 306 tmp_node = rb_next(&re->rb_node); 307 *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 308 } 309 return re; 310 } 311 312 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, 313 struct rb_root_cached *root, bool check_key) 314 { 315 #ifdef CONFIG_F2FS_CHECK_FS 316 struct rb_node *cur = rb_first_cached(root), *next; 317 struct rb_entry *cur_re, *next_re; 318 319 if (!cur) 320 return true; 321 322 while (cur) { 323 next = rb_next(cur); 324 if (!next) 325 return true; 326 327 cur_re = rb_entry(cur, struct rb_entry, rb_node); 328 next_re = rb_entry(next, struct rb_entry, rb_node); 329 330 if (check_key) { 331 if (cur_re->key > next_re->key) { 332 f2fs_info(sbi, "inconsistent rbtree, " 333 "cur(%llu) next(%llu)", 334 cur_re->key, next_re->key); 335 return false; 336 } 337 goto next; 338 } 339 340 if (cur_re->ofs + cur_re->len > next_re->ofs) { 341 f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)", 342 cur_re->ofs, cur_re->len, 343 next_re->ofs, next_re->len); 344 return false; 345 } 346 next: 347 cur = next; 348 } 349 #endif 350 return true; 351 } 352 353 static struct kmem_cache *extent_tree_slab; 354 static struct kmem_cache *extent_node_slab; 355 356 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, 357 struct extent_tree *et, struct extent_info *ei, 358 struct rb_node *parent, struct rb_node **p, 359 bool leftmost) 360 { 361 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 362 struct extent_node *en; 363 364 en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi); 365 if (!en) 366 return NULL; 367 368 en->ei = *ei; 369 INIT_LIST_HEAD(&en->list); 370 en->et = et; 371 372 rb_link_node(&en->rb_node, parent, p); 373 rb_insert_color_cached(&en->rb_node, &et->root, leftmost); 374 atomic_inc(&et->node_cnt); 375 atomic_inc(&eti->total_ext_node); 376 return en; 377 } 378 379 static void __detach_extent_node(struct f2fs_sb_info *sbi, 380 struct extent_tree *et, struct extent_node *en) 381 { 382 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 383 384 rb_erase_cached(&en->rb_node, &et->root); 385 atomic_dec(&et->node_cnt); 386 atomic_dec(&eti->total_ext_node); 387 388 if (et->cached_en == en) 389 et->cached_en = NULL; 390 kmem_cache_free(extent_node_slab, en); 391 } 392 393 /* 394 * Flow to release an extent_node: 395 * 1. list_del_init 396 * 2. __detach_extent_node 397 * 3. kmem_cache_free. 398 */ 399 static void __release_extent_node(struct f2fs_sb_info *sbi, 400 struct extent_tree *et, struct extent_node *en) 401 { 402 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 403 404 spin_lock(&eti->extent_lock); 405 f2fs_bug_on(sbi, list_empty(&en->list)); 406 list_del_init(&en->list); 407 spin_unlock(&eti->extent_lock); 408 409 __detach_extent_node(sbi, et, en); 410 } 411 412 static struct extent_tree *__grab_extent_tree(struct inode *inode, 413 enum extent_type type) 414 { 415 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 416 struct extent_tree_info *eti = &sbi->extent_tree[type]; 417 struct extent_tree *et; 418 nid_t ino = inode->i_ino; 419 420 mutex_lock(&eti->extent_tree_lock); 421 et = radix_tree_lookup(&eti->extent_tree_root, ino); 422 if (!et) { 423 et = f2fs_kmem_cache_alloc(extent_tree_slab, 424 GFP_NOFS, true, NULL); 425 f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et); 426 memset(et, 0, sizeof(struct extent_tree)); 427 et->ino = ino; 428 et->type = type; 429 et->root = RB_ROOT_CACHED; 430 et->cached_en = NULL; 431 rwlock_init(&et->lock); 432 INIT_LIST_HEAD(&et->list); 433 atomic_set(&et->node_cnt, 0); 434 atomic_inc(&eti->total_ext_tree); 435 } else { 436 atomic_dec(&eti->total_zombie_tree); 437 list_del_init(&et->list); 438 } 439 mutex_unlock(&eti->extent_tree_lock); 440 441 /* never died until evict_inode */ 442 F2FS_I(inode)->extent_tree[type] = et; 443 444 return et; 445 } 446 447 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, 448 struct extent_tree *et) 449 { 450 struct rb_node *node, *next; 451 struct extent_node *en; 452 unsigned int count = atomic_read(&et->node_cnt); 453 454 node = rb_first_cached(&et->root); 455 while (node) { 456 next = rb_next(node); 457 en = rb_entry(node, struct extent_node, rb_node); 458 __release_extent_node(sbi, et, en); 459 node = next; 460 } 461 462 return count - atomic_read(&et->node_cnt); 463 } 464 465 static void __drop_largest_extent(struct extent_tree *et, 466 pgoff_t fofs, unsigned int len) 467 { 468 if (fofs < et->largest.fofs + et->largest.len && 469 fofs + len > et->largest.fofs) { 470 et->largest.len = 0; 471 et->largest_updated = true; 472 } 473 } 474 475 void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) 476 { 477 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 478 struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; 479 struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; 480 struct extent_tree *et; 481 struct extent_node *en; 482 struct extent_info ei; 483 484 if (!__may_extent_tree(inode, EX_READ)) { 485 /* drop largest read extent */ 486 if (i_ext && i_ext->len) { 487 f2fs_wait_on_page_writeback(ipage, NODE, true, true); 488 i_ext->len = 0; 489 set_page_dirty(ipage); 490 } 491 goto out; 492 } 493 494 et = __grab_extent_tree(inode, EX_READ); 495 496 if (!i_ext || !i_ext->len) 497 goto out; 498 499 get_read_extent_info(&ei, i_ext); 500 501 write_lock(&et->lock); 502 if (atomic_read(&et->node_cnt)) 503 goto unlock_out; 504 505 en = __attach_extent_node(sbi, et, &ei, NULL, 506 &et->root.rb_root.rb_node, true); 507 if (en) { 508 et->largest = en->ei; 509 et->cached_en = en; 510 511 spin_lock(&eti->extent_lock); 512 list_add_tail(&en->list, &eti->extent_list); 513 spin_unlock(&eti->extent_lock); 514 } 515 unlock_out: 516 write_unlock(&et->lock); 517 out: 518 if (!F2FS_I(inode)->extent_tree[EX_READ]) 519 set_inode_flag(inode, FI_NO_EXTENT); 520 } 521 522 void f2fs_init_age_extent_tree(struct inode *inode) 523 { 524 if (!__init_may_extent_tree(inode, EX_BLOCK_AGE)) 525 return; 526 __grab_extent_tree(inode, EX_BLOCK_AGE); 527 } 528 529 void f2fs_init_extent_tree(struct inode *inode) 530 { 531 /* initialize read cache */ 532 if (__init_may_extent_tree(inode, EX_READ)) 533 __grab_extent_tree(inode, EX_READ); 534 535 /* initialize block age cache */ 536 if (__init_may_extent_tree(inode, EX_BLOCK_AGE)) 537 __grab_extent_tree(inode, EX_BLOCK_AGE); 538 } 539 540 static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, 541 struct extent_info *ei, enum extent_type type) 542 { 543 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 544 struct extent_tree_info *eti = &sbi->extent_tree[type]; 545 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 546 struct extent_node *en; 547 bool ret = false; 548 549 if (!et) 550 return false; 551 552 trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); 553 554 read_lock(&et->lock); 555 556 if (type == EX_READ && 557 et->largest.fofs <= pgofs && 558 et->largest.fofs + et->largest.len > pgofs) { 559 *ei = et->largest; 560 ret = true; 561 stat_inc_largest_node_hit(sbi); 562 goto out; 563 } 564 565 en = (struct extent_node *)f2fs_lookup_rb_tree(&et->root, 566 (struct rb_entry *)et->cached_en, pgofs); 567 if (!en) 568 goto out; 569 570 if (en == et->cached_en) 571 stat_inc_cached_node_hit(sbi, type); 572 else 573 stat_inc_rbtree_node_hit(sbi, type); 574 575 *ei = en->ei; 576 spin_lock(&eti->extent_lock); 577 if (!list_empty(&en->list)) { 578 list_move_tail(&en->list, &eti->extent_list); 579 et->cached_en = en; 580 } 581 spin_unlock(&eti->extent_lock); 582 ret = true; 583 out: 584 stat_inc_total_hit(sbi, type); 585 read_unlock(&et->lock); 586 587 if (type == EX_READ) 588 trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei); 589 else if (type == EX_BLOCK_AGE) 590 trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei); 591 return ret; 592 } 593 594 static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, 595 struct extent_tree *et, struct extent_info *ei, 596 struct extent_node *prev_ex, 597 struct extent_node *next_ex) 598 { 599 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 600 struct extent_node *en = NULL; 601 602 if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) { 603 prev_ex->ei.len += ei->len; 604 ei = &prev_ex->ei; 605 en = prev_ex; 606 } 607 608 if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) { 609 next_ex->ei.fofs = ei->fofs; 610 next_ex->ei.len += ei->len; 611 if (et->type == EX_READ) 612 next_ex->ei.blk = ei->blk; 613 if (en) 614 __release_extent_node(sbi, et, prev_ex); 615 616 en = next_ex; 617 } 618 619 if (!en) 620 return NULL; 621 622 __try_update_largest_extent(et, en); 623 624 spin_lock(&eti->extent_lock); 625 if (!list_empty(&en->list)) { 626 list_move_tail(&en->list, &eti->extent_list); 627 et->cached_en = en; 628 } 629 spin_unlock(&eti->extent_lock); 630 return en; 631 } 632 633 static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, 634 struct extent_tree *et, struct extent_info *ei, 635 struct rb_node **insert_p, 636 struct rb_node *insert_parent, 637 bool leftmost) 638 { 639 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 640 struct rb_node **p; 641 struct rb_node *parent = NULL; 642 struct extent_node *en = NULL; 643 644 if (insert_p && insert_parent) { 645 parent = insert_parent; 646 p = insert_p; 647 goto do_insert; 648 } 649 650 leftmost = true; 651 652 p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent, 653 ei->fofs, &leftmost); 654 do_insert: 655 en = __attach_extent_node(sbi, et, ei, parent, p, leftmost); 656 if (!en) 657 return NULL; 658 659 __try_update_largest_extent(et, en); 660 661 /* update in global extent list */ 662 spin_lock(&eti->extent_lock); 663 list_add_tail(&en->list, &eti->extent_list); 664 et->cached_en = en; 665 spin_unlock(&eti->extent_lock); 666 return en; 667 } 668 669 static void __update_extent_tree_range(struct inode *inode, 670 struct extent_info *tei, enum extent_type type) 671 { 672 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 673 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 674 struct extent_node *en = NULL, *en1 = NULL; 675 struct extent_node *prev_en = NULL, *next_en = NULL; 676 struct extent_info ei, dei, prev; 677 struct rb_node **insert_p = NULL, *insert_parent = NULL; 678 unsigned int fofs = tei->fofs, len = tei->len; 679 unsigned int end = fofs + len; 680 bool updated = false; 681 bool leftmost = false; 682 683 if (!et) 684 return; 685 686 if (type == EX_READ) 687 trace_f2fs_update_read_extent_tree_range(inode, fofs, len, 688 tei->blk, 0); 689 else if (type == EX_BLOCK_AGE) 690 trace_f2fs_update_age_extent_tree_range(inode, fofs, len, 691 tei->age, tei->last_blocks); 692 693 write_lock(&et->lock); 694 695 if (type == EX_READ) { 696 if (is_inode_flag_set(inode, FI_NO_EXTENT)) { 697 write_unlock(&et->lock); 698 return; 699 } 700 701 prev = et->largest; 702 dei.len = 0; 703 704 /* 705 * drop largest extent before lookup, in case it's already 706 * been shrunk from extent tree 707 */ 708 __drop_largest_extent(et, fofs, len); 709 } 710 711 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ 712 en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, 713 (struct rb_entry *)et->cached_en, fofs, 714 (struct rb_entry **)&prev_en, 715 (struct rb_entry **)&next_en, 716 &insert_p, &insert_parent, false, 717 &leftmost); 718 if (!en) 719 en = next_en; 720 721 /* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */ 722 while (en && en->ei.fofs < end) { 723 unsigned int org_end; 724 int parts = 0; /* # of parts current extent split into */ 725 726 next_en = en1 = NULL; 727 728 dei = en->ei; 729 org_end = dei.fofs + dei.len; 730 f2fs_bug_on(sbi, fofs >= org_end); 731 732 if (fofs > dei.fofs && (type != EX_READ || 733 fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) { 734 en->ei.len = fofs - en->ei.fofs; 735 prev_en = en; 736 parts = 1; 737 } 738 739 if (end < org_end && (type != EX_READ || 740 org_end - end >= F2FS_MIN_EXTENT_LEN)) { 741 if (parts) { 742 __set_extent_info(&ei, 743 end, org_end - end, 744 end - dei.fofs + dei.blk, false, 745 dei.age, dei.last_blocks, 746 type); 747 en1 = __insert_extent_tree(sbi, et, &ei, 748 NULL, NULL, true); 749 next_en = en1; 750 } else { 751 __set_extent_info(&en->ei, 752 end, en->ei.len - (end - dei.fofs), 753 en->ei.blk + (end - dei.fofs), true, 754 dei.age, dei.last_blocks, 755 type); 756 next_en = en; 757 } 758 parts++; 759 } 760 761 if (!next_en) { 762 struct rb_node *node = rb_next(&en->rb_node); 763 764 next_en = rb_entry_safe(node, struct extent_node, 765 rb_node); 766 } 767 768 if (parts) 769 __try_update_largest_extent(et, en); 770 else 771 __release_extent_node(sbi, et, en); 772 773 /* 774 * if original extent is split into zero or two parts, extent 775 * tree has been altered by deletion or insertion, therefore 776 * invalidate pointers regard to tree. 777 */ 778 if (parts != 1) { 779 insert_p = NULL; 780 insert_parent = NULL; 781 } 782 en = next_en; 783 } 784 785 if (type == EX_BLOCK_AGE) 786 goto update_age_extent_cache; 787 788 /* 3. update extent in read extent cache */ 789 BUG_ON(type != EX_READ); 790 791 if (tei->blk) { 792 __set_extent_info(&ei, fofs, len, tei->blk, false, 793 0, 0, EX_READ); 794 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 795 __insert_extent_tree(sbi, et, &ei, 796 insert_p, insert_parent, leftmost); 797 798 /* give up extent_cache, if split and small updates happen */ 799 if (dei.len >= 1 && 800 prev.len < F2FS_MIN_EXTENT_LEN && 801 et->largest.len < F2FS_MIN_EXTENT_LEN) { 802 et->largest.len = 0; 803 et->largest_updated = true; 804 set_inode_flag(inode, FI_NO_EXTENT); 805 } 806 } 807 808 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 809 __free_extent_tree(sbi, et); 810 811 if (et->largest_updated) { 812 et->largest_updated = false; 813 updated = true; 814 } 815 goto out_read_extent_cache; 816 update_age_extent_cache: 817 if (!tei->last_blocks) 818 goto out_read_extent_cache; 819 820 __set_extent_info(&ei, fofs, len, 0, false, 821 tei->age, tei->last_blocks, EX_BLOCK_AGE); 822 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 823 __insert_extent_tree(sbi, et, &ei, 824 insert_p, insert_parent, leftmost); 825 out_read_extent_cache: 826 write_unlock(&et->lock); 827 828 if (updated) 829 f2fs_mark_inode_dirty_sync(inode, true); 830 } 831 832 #ifdef CONFIG_F2FS_FS_COMPRESSION 833 void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, 834 pgoff_t fofs, block_t blkaddr, unsigned int llen, 835 unsigned int c_len) 836 { 837 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 838 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; 839 struct extent_node *en = NULL; 840 struct extent_node *prev_en = NULL, *next_en = NULL; 841 struct extent_info ei; 842 struct rb_node **insert_p = NULL, *insert_parent = NULL; 843 bool leftmost = false; 844 845 trace_f2fs_update_read_extent_tree_range(inode, fofs, llen, 846 blkaddr, c_len); 847 848 /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ 849 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 850 return; 851 852 write_lock(&et->lock); 853 854 en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, 855 (struct rb_entry *)et->cached_en, fofs, 856 (struct rb_entry **)&prev_en, 857 (struct rb_entry **)&next_en, 858 &insert_p, &insert_parent, false, 859 &leftmost); 860 if (en) 861 goto unlock_out; 862 863 __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ); 864 ei.c_len = c_len; 865 866 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 867 __insert_extent_tree(sbi, et, &ei, 868 insert_p, insert_parent, leftmost); 869 unlock_out: 870 write_unlock(&et->lock); 871 } 872 #endif 873 874 static unsigned long long __calculate_block_age(unsigned long long new, 875 unsigned long long old) 876 { 877 unsigned long long diff; 878 879 diff = (new >= old) ? new - (new - old) : new + (old - new); 880 881 return div_u64(diff * LAST_AGE_WEIGHT, 100); 882 } 883 884 /* This returns a new age and allocated blocks in ei */ 885 static int __get_new_block_age(struct inode *inode, struct extent_info *ei, 886 block_t blkaddr) 887 { 888 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 889 loff_t f_size = i_size_read(inode); 890 unsigned long long cur_blocks = 891 atomic64_read(&sbi->allocated_data_blocks); 892 struct extent_info tei = *ei; /* only fofs and len are valid */ 893 894 /* 895 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last 896 * file block even in seq write. So don't record age for newly last file 897 * block here. 898 */ 899 if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && 900 blkaddr == NEW_ADDR) 901 return -EINVAL; 902 903 if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) { 904 unsigned long long cur_age; 905 906 if (cur_blocks >= tei.last_blocks) 907 cur_age = cur_blocks - tei.last_blocks; 908 else 909 /* allocated_data_blocks overflow */ 910 cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks; 911 912 if (tei.age) 913 ei->age = __calculate_block_age(cur_age, tei.age); 914 else 915 ei->age = cur_age; 916 ei->last_blocks = cur_blocks; 917 WARN_ON(ei->age > cur_blocks); 918 return 0; 919 } 920 921 f2fs_bug_on(sbi, blkaddr == NULL_ADDR); 922 923 /* the data block was allocated for the first time */ 924 if (blkaddr == NEW_ADDR) 925 goto out; 926 927 if (__is_valid_data_blkaddr(blkaddr) && 928 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { 929 f2fs_bug_on(sbi, 1); 930 return -EINVAL; 931 } 932 out: 933 /* 934 * init block age with zero, this can happen when the block age extent 935 * was reclaimed due to memory constraint or system reboot 936 */ 937 ei->age = 0; 938 ei->last_blocks = cur_blocks; 939 return 0; 940 } 941 942 static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) 943 { 944 struct extent_info ei = {}; 945 946 if (!__may_extent_tree(dn->inode, type)) 947 return; 948 949 ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + 950 dn->ofs_in_node; 951 ei.len = 1; 952 953 if (type == EX_READ) { 954 if (dn->data_blkaddr == NEW_ADDR) 955 ei.blk = NULL_ADDR; 956 else 957 ei.blk = dn->data_blkaddr; 958 } else if (type == EX_BLOCK_AGE) { 959 if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr)) 960 return; 961 } 962 __update_extent_tree_range(dn->inode, &ei, type); 963 } 964 965 static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink, 966 enum extent_type type) 967 { 968 struct extent_tree_info *eti = &sbi->extent_tree[type]; 969 struct extent_tree *et, *next; 970 struct extent_node *en; 971 unsigned int node_cnt = 0, tree_cnt = 0; 972 int remained; 973 974 if (!atomic_read(&eti->total_zombie_tree)) 975 goto free_node; 976 977 if (!mutex_trylock(&eti->extent_tree_lock)) 978 goto out; 979 980 /* 1. remove unreferenced extent tree */ 981 list_for_each_entry_safe(et, next, &eti->zombie_list, list) { 982 if (atomic_read(&et->node_cnt)) { 983 write_lock(&et->lock); 984 node_cnt += __free_extent_tree(sbi, et); 985 write_unlock(&et->lock); 986 } 987 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 988 list_del_init(&et->list); 989 radix_tree_delete(&eti->extent_tree_root, et->ino); 990 kmem_cache_free(extent_tree_slab, et); 991 atomic_dec(&eti->total_ext_tree); 992 atomic_dec(&eti->total_zombie_tree); 993 tree_cnt++; 994 995 if (node_cnt + tree_cnt >= nr_shrink) 996 goto unlock_out; 997 cond_resched(); 998 } 999 mutex_unlock(&eti->extent_tree_lock); 1000 1001 free_node: 1002 /* 2. remove LRU extent entries */ 1003 if (!mutex_trylock(&eti->extent_tree_lock)) 1004 goto out; 1005 1006 remained = nr_shrink - (node_cnt + tree_cnt); 1007 1008 spin_lock(&eti->extent_lock); 1009 for (; remained > 0; remained--) { 1010 if (list_empty(&eti->extent_list)) 1011 break; 1012 en = list_first_entry(&eti->extent_list, 1013 struct extent_node, list); 1014 et = en->et; 1015 if (!write_trylock(&et->lock)) { 1016 /* refresh this extent node's position in extent list */ 1017 list_move_tail(&en->list, &eti->extent_list); 1018 continue; 1019 } 1020 1021 list_del_init(&en->list); 1022 spin_unlock(&eti->extent_lock); 1023 1024 __detach_extent_node(sbi, et, en); 1025 1026 write_unlock(&et->lock); 1027 node_cnt++; 1028 spin_lock(&eti->extent_lock); 1029 } 1030 spin_unlock(&eti->extent_lock); 1031 1032 unlock_out: 1033 mutex_unlock(&eti->extent_tree_lock); 1034 out: 1035 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type); 1036 1037 return node_cnt + tree_cnt; 1038 } 1039 1040 /* read extent cache operations */ 1041 bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, 1042 struct extent_info *ei) 1043 { 1044 if (!__may_extent_tree(inode, EX_READ)) 1045 return false; 1046 1047 return __lookup_extent_tree(inode, pgofs, ei, EX_READ); 1048 } 1049 1050 void f2fs_update_read_extent_cache(struct dnode_of_data *dn) 1051 { 1052 return __update_extent_cache(dn, EX_READ); 1053 } 1054 1055 void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, 1056 pgoff_t fofs, block_t blkaddr, unsigned int len) 1057 { 1058 struct extent_info ei = { 1059 .fofs = fofs, 1060 .len = len, 1061 .blk = blkaddr, 1062 }; 1063 1064 if (!__may_extent_tree(dn->inode, EX_READ)) 1065 return; 1066 1067 __update_extent_tree_range(dn->inode, &ei, EX_READ); 1068 } 1069 1070 unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1071 { 1072 if (!test_opt(sbi, READ_EXTENT_CACHE)) 1073 return 0; 1074 1075 return __shrink_extent_tree(sbi, nr_shrink, EX_READ); 1076 } 1077 1078 /* block age extent cache operations */ 1079 bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, 1080 struct extent_info *ei) 1081 { 1082 if (!__may_extent_tree(inode, EX_BLOCK_AGE)) 1083 return false; 1084 1085 return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE); 1086 } 1087 1088 void f2fs_update_age_extent_cache(struct dnode_of_data *dn) 1089 { 1090 return __update_extent_cache(dn, EX_BLOCK_AGE); 1091 } 1092 1093 void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, 1094 pgoff_t fofs, unsigned int len) 1095 { 1096 struct extent_info ei = { 1097 .fofs = fofs, 1098 .len = len, 1099 }; 1100 1101 if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE)) 1102 return; 1103 1104 __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE); 1105 } 1106 1107 unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1108 { 1109 if (!test_opt(sbi, AGE_EXTENT_CACHE)) 1110 return 0; 1111 1112 return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); 1113 } 1114 1115 static unsigned int __destroy_extent_node(struct inode *inode, 1116 enum extent_type type) 1117 { 1118 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1119 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1120 unsigned int node_cnt = 0; 1121 1122 if (!et || !atomic_read(&et->node_cnt)) 1123 return 0; 1124 1125 write_lock(&et->lock); 1126 node_cnt = __free_extent_tree(sbi, et); 1127 write_unlock(&et->lock); 1128 1129 return node_cnt; 1130 } 1131 1132 void f2fs_destroy_extent_node(struct inode *inode) 1133 { 1134 __destroy_extent_node(inode, EX_READ); 1135 __destroy_extent_node(inode, EX_BLOCK_AGE); 1136 } 1137 1138 static void __drop_extent_tree(struct inode *inode, enum extent_type type) 1139 { 1140 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1141 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1142 bool updated = false; 1143 1144 if (!__may_extent_tree(inode, type)) 1145 return; 1146 1147 write_lock(&et->lock); 1148 __free_extent_tree(sbi, et); 1149 if (type == EX_READ) { 1150 set_inode_flag(inode, FI_NO_EXTENT); 1151 if (et->largest.len) { 1152 et->largest.len = 0; 1153 updated = true; 1154 } 1155 } 1156 write_unlock(&et->lock); 1157 if (updated) 1158 f2fs_mark_inode_dirty_sync(inode, true); 1159 } 1160 1161 void f2fs_drop_extent_tree(struct inode *inode) 1162 { 1163 __drop_extent_tree(inode, EX_READ); 1164 __drop_extent_tree(inode, EX_BLOCK_AGE); 1165 } 1166 1167 static void __destroy_extent_tree(struct inode *inode, enum extent_type type) 1168 { 1169 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1170 struct extent_tree_info *eti = &sbi->extent_tree[type]; 1171 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1172 unsigned int node_cnt = 0; 1173 1174 if (!et) 1175 return; 1176 1177 if (inode->i_nlink && !is_bad_inode(inode) && 1178 atomic_read(&et->node_cnt)) { 1179 mutex_lock(&eti->extent_tree_lock); 1180 list_add_tail(&et->list, &eti->zombie_list); 1181 atomic_inc(&eti->total_zombie_tree); 1182 mutex_unlock(&eti->extent_tree_lock); 1183 return; 1184 } 1185 1186 /* free all extent info belong to this extent tree */ 1187 node_cnt = __destroy_extent_node(inode, type); 1188 1189 /* delete extent tree entry in radix tree */ 1190 mutex_lock(&eti->extent_tree_lock); 1191 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 1192 radix_tree_delete(&eti->extent_tree_root, inode->i_ino); 1193 kmem_cache_free(extent_tree_slab, et); 1194 atomic_dec(&eti->total_ext_tree); 1195 mutex_unlock(&eti->extent_tree_lock); 1196 1197 F2FS_I(inode)->extent_tree[type] = NULL; 1198 1199 trace_f2fs_destroy_extent_tree(inode, node_cnt, type); 1200 } 1201 1202 void f2fs_destroy_extent_tree(struct inode *inode) 1203 { 1204 __destroy_extent_tree(inode, EX_READ); 1205 __destroy_extent_tree(inode, EX_BLOCK_AGE); 1206 } 1207 1208 static void __init_extent_tree_info(struct extent_tree_info *eti) 1209 { 1210 INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO); 1211 mutex_init(&eti->extent_tree_lock); 1212 INIT_LIST_HEAD(&eti->extent_list); 1213 spin_lock_init(&eti->extent_lock); 1214 atomic_set(&eti->total_ext_tree, 0); 1215 INIT_LIST_HEAD(&eti->zombie_list); 1216 atomic_set(&eti->total_zombie_tree, 0); 1217 atomic_set(&eti->total_ext_node, 0); 1218 } 1219 1220 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) 1221 { 1222 __init_extent_tree_info(&sbi->extent_tree[EX_READ]); 1223 __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]); 1224 1225 /* initialize for block age extents */ 1226 atomic64_set(&sbi->allocated_data_blocks, 0); 1227 sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; 1228 sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; 1229 } 1230 1231 int __init f2fs_create_extent_cache(void) 1232 { 1233 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", 1234 sizeof(struct extent_tree)); 1235 if (!extent_tree_slab) 1236 return -ENOMEM; 1237 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", 1238 sizeof(struct extent_node)); 1239 if (!extent_node_slab) { 1240 kmem_cache_destroy(extent_tree_slab); 1241 return -ENOMEM; 1242 } 1243 return 0; 1244 } 1245 1246 void f2fs_destroy_extent_cache(void) 1247 { 1248 kmem_cache_destroy(extent_node_slab); 1249 kmem_cache_destroy(extent_tree_slab); 1250 } 1251