1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * f2fs extent cache support 4 * 5 * Copyright (c) 2015 Motorola Mobility 6 * Copyright (c) 2015 Samsung Electronics 7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org> 8 * Chao Yu <chao2.yu@samsung.com> 9 * 10 * block_age-based extent cache added by: 11 * Copyright (c) 2022 xiaomi Co., Ltd. 12 * http://www.xiaomi.com/ 13 */ 14 15 #include <linux/fs.h> 16 #include <linux/f2fs_fs.h> 17 18 #include "f2fs.h" 19 #include "node.h" 20 #include <trace/events/f2fs.h> 21 22 static void __set_extent_info(struct extent_info *ei, 23 unsigned int fofs, unsigned int len, 24 block_t blk, bool keep_clen, 25 unsigned long age, unsigned long last_blocks, 26 enum extent_type type) 27 { 28 ei->fofs = fofs; 29 ei->len = len; 30 31 if (type == EX_READ) { 32 ei->blk = blk; 33 if (keep_clen) 34 return; 35 #ifdef CONFIG_F2FS_FS_COMPRESSION 36 ei->c_len = 0; 37 #endif 38 } else if (type == EX_BLOCK_AGE) { 39 ei->age = age; 40 ei->last_blocks = last_blocks; 41 } 42 } 43 44 static bool __may_read_extent_tree(struct inode *inode) 45 { 46 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 47 48 if (!test_opt(sbi, READ_EXTENT_CACHE)) 49 return false; 50 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 51 return false; 52 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) && 53 !f2fs_sb_has_readonly(sbi)) 54 return false; 55 return S_ISREG(inode->i_mode); 56 } 57 58 static bool __may_age_extent_tree(struct inode *inode) 59 { 60 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 61 62 if (!test_opt(sbi, AGE_EXTENT_CACHE)) 63 return false; 64 /* don't cache block age info for cold file */ 65 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE)) 66 return false; 67 if (file_is_cold(inode)) 68 return false; 69 70 return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); 71 } 72 73 static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) 74 { 75 if (type == EX_READ) 76 return __may_read_extent_tree(inode); 77 else if (type == EX_BLOCK_AGE) 78 return __may_age_extent_tree(inode); 79 return false; 80 } 81 82 static bool __may_extent_tree(struct inode *inode, enum extent_type type) 83 { 84 /* 85 * for recovered files during mount do not create extents 86 * if shrinker is not registered. 87 */ 88 if (list_empty(&F2FS_I_SB(inode)->s_list)) 89 return false; 90 91 return __init_may_extent_tree(inode, type); 92 } 93 94 static void __try_update_largest_extent(struct extent_tree *et, 95 struct extent_node *en) 96 { 97 if (et->type != EX_READ) 98 return; 99 if (en->ei.len <= et->largest.len) 100 return; 101 102 et->largest = en->ei; 103 et->largest_updated = true; 104 } 105 106 static bool __is_extent_mergeable(struct extent_info *back, 107 struct extent_info *front, enum extent_type type) 108 { 109 if (type == EX_READ) { 110 #ifdef CONFIG_F2FS_FS_COMPRESSION 111 if (back->c_len && back->len != back->c_len) 112 return false; 113 if (front->c_len && front->len != front->c_len) 114 return false; 115 #endif 116 return (back->fofs + back->len == front->fofs && 117 back->blk + back->len == front->blk); 118 } else if (type == EX_BLOCK_AGE) { 119 return (back->fofs + back->len == front->fofs && 120 abs(back->age - front->age) <= SAME_AGE_REGION && 121 abs(back->last_blocks - front->last_blocks) <= 122 SAME_AGE_REGION); 123 } 124 return false; 125 } 126 127 static bool __is_back_mergeable(struct extent_info *cur, 128 struct extent_info *back, enum extent_type type) 129 { 130 return __is_extent_mergeable(back, cur, type); 131 } 132 133 static bool __is_front_mergeable(struct extent_info *cur, 134 struct extent_info *front, enum extent_type type) 135 { 136 return __is_extent_mergeable(cur, front, type); 137 } 138 139 static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, 140 unsigned int ofs) 141 { 142 if (cached_re) { 143 if (cached_re->ofs <= ofs && 144 cached_re->ofs + cached_re->len > ofs) { 145 return cached_re; 146 } 147 } 148 return NULL; 149 } 150 151 static struct rb_entry *__lookup_rb_tree_slow(struct rb_root_cached *root, 152 unsigned int ofs) 153 { 154 struct rb_node *node = root->rb_root.rb_node; 155 struct rb_entry *re; 156 157 while (node) { 158 re = rb_entry(node, struct rb_entry, rb_node); 159 160 if (ofs < re->ofs) 161 node = node->rb_left; 162 else if (ofs >= re->ofs + re->len) 163 node = node->rb_right; 164 else 165 return re; 166 } 167 return NULL; 168 } 169 170 struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root, 171 struct rb_entry *cached_re, unsigned int ofs) 172 { 173 struct rb_entry *re; 174 175 re = __lookup_rb_tree_fast(cached_re, ofs); 176 if (!re) 177 return __lookup_rb_tree_slow(root, ofs); 178 179 return re; 180 } 181 182 struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi, 183 struct rb_root_cached *root, 184 struct rb_node **parent, 185 unsigned long long key, bool *leftmost) 186 { 187 struct rb_node **p = &root->rb_root.rb_node; 188 struct rb_entry *re; 189 190 while (*p) { 191 *parent = *p; 192 re = rb_entry(*parent, struct rb_entry, rb_node); 193 194 if (key < re->key) { 195 p = &(*p)->rb_left; 196 } else { 197 p = &(*p)->rb_right; 198 *leftmost = false; 199 } 200 } 201 202 return p; 203 } 204 205 struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, 206 struct rb_root_cached *root, 207 struct rb_node **parent, 208 unsigned int ofs, bool *leftmost) 209 { 210 struct rb_node **p = &root->rb_root.rb_node; 211 struct rb_entry *re; 212 213 while (*p) { 214 *parent = *p; 215 re = rb_entry(*parent, struct rb_entry, rb_node); 216 217 if (ofs < re->ofs) { 218 p = &(*p)->rb_left; 219 } else if (ofs >= re->ofs + re->len) { 220 p = &(*p)->rb_right; 221 *leftmost = false; 222 } else { 223 f2fs_bug_on(sbi, 1); 224 } 225 } 226 227 return p; 228 } 229 230 /* 231 * lookup rb entry in position of @ofs in rb-tree, 232 * if hit, return the entry, otherwise, return NULL 233 * @prev_ex: extent before ofs 234 * @next_ex: extent after ofs 235 * @insert_p: insert point for new extent at ofs 236 * in order to simpfy the insertion after. 237 * tree must stay unchanged between lookup and insertion. 238 */ 239 struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, 240 struct rb_entry *cached_re, 241 unsigned int ofs, 242 struct rb_entry **prev_entry, 243 struct rb_entry **next_entry, 244 struct rb_node ***insert_p, 245 struct rb_node **insert_parent, 246 bool force, bool *leftmost) 247 { 248 struct rb_node **pnode = &root->rb_root.rb_node; 249 struct rb_node *parent = NULL, *tmp_node; 250 struct rb_entry *re = cached_re; 251 252 *insert_p = NULL; 253 *insert_parent = NULL; 254 *prev_entry = NULL; 255 *next_entry = NULL; 256 257 if (RB_EMPTY_ROOT(&root->rb_root)) 258 return NULL; 259 260 if (re) { 261 if (re->ofs <= ofs && re->ofs + re->len > ofs) 262 goto lookup_neighbors; 263 } 264 265 if (leftmost) 266 *leftmost = true; 267 268 while (*pnode) { 269 parent = *pnode; 270 re = rb_entry(*pnode, struct rb_entry, rb_node); 271 272 if (ofs < re->ofs) { 273 pnode = &(*pnode)->rb_left; 274 } else if (ofs >= re->ofs + re->len) { 275 pnode = &(*pnode)->rb_right; 276 if (leftmost) 277 *leftmost = false; 278 } else { 279 goto lookup_neighbors; 280 } 281 } 282 283 *insert_p = pnode; 284 *insert_parent = parent; 285 286 re = rb_entry(parent, struct rb_entry, rb_node); 287 tmp_node = parent; 288 if (parent && ofs > re->ofs) 289 tmp_node = rb_next(parent); 290 *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 291 292 tmp_node = parent; 293 if (parent && ofs < re->ofs) 294 tmp_node = rb_prev(parent); 295 *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 296 return NULL; 297 298 lookup_neighbors: 299 if (ofs == re->ofs || force) { 300 /* lookup prev node for merging backward later */ 301 tmp_node = rb_prev(&re->rb_node); 302 *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 303 } 304 if (ofs == re->ofs + re->len - 1 || force) { 305 /* lookup next node for merging frontward later */ 306 tmp_node = rb_next(&re->rb_node); 307 *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); 308 } 309 return re; 310 } 311 312 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, 313 struct rb_root_cached *root, bool check_key) 314 { 315 #ifdef CONFIG_F2FS_CHECK_FS 316 struct rb_node *cur = rb_first_cached(root), *next; 317 struct rb_entry *cur_re, *next_re; 318 319 if (!cur) 320 return true; 321 322 while (cur) { 323 next = rb_next(cur); 324 if (!next) 325 return true; 326 327 cur_re = rb_entry(cur, struct rb_entry, rb_node); 328 next_re = rb_entry(next, struct rb_entry, rb_node); 329 330 if (check_key) { 331 if (cur_re->key > next_re->key) { 332 f2fs_info(sbi, "inconsistent rbtree, " 333 "cur(%llu) next(%llu)", 334 cur_re->key, next_re->key); 335 return false; 336 } 337 goto next; 338 } 339 340 if (cur_re->ofs + cur_re->len > next_re->ofs) { 341 f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)", 342 cur_re->ofs, cur_re->len, 343 next_re->ofs, next_re->len); 344 return false; 345 } 346 next: 347 cur = next; 348 } 349 #endif 350 return true; 351 } 352 353 static struct kmem_cache *extent_tree_slab; 354 static struct kmem_cache *extent_node_slab; 355 356 static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, 357 struct extent_tree *et, struct extent_info *ei, 358 struct rb_node *parent, struct rb_node **p, 359 bool leftmost) 360 { 361 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 362 struct extent_node *en; 363 364 en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi); 365 if (!en) 366 return NULL; 367 368 en->ei = *ei; 369 INIT_LIST_HEAD(&en->list); 370 en->et = et; 371 372 rb_link_node(&en->rb_node, parent, p); 373 rb_insert_color_cached(&en->rb_node, &et->root, leftmost); 374 atomic_inc(&et->node_cnt); 375 atomic_inc(&eti->total_ext_node); 376 return en; 377 } 378 379 static void __detach_extent_node(struct f2fs_sb_info *sbi, 380 struct extent_tree *et, struct extent_node *en) 381 { 382 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 383 384 rb_erase_cached(&en->rb_node, &et->root); 385 atomic_dec(&et->node_cnt); 386 atomic_dec(&eti->total_ext_node); 387 388 if (et->cached_en == en) 389 et->cached_en = NULL; 390 kmem_cache_free(extent_node_slab, en); 391 } 392 393 /* 394 * Flow to release an extent_node: 395 * 1. list_del_init 396 * 2. __detach_extent_node 397 * 3. kmem_cache_free. 398 */ 399 static void __release_extent_node(struct f2fs_sb_info *sbi, 400 struct extent_tree *et, struct extent_node *en) 401 { 402 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 403 404 spin_lock(&eti->extent_lock); 405 f2fs_bug_on(sbi, list_empty(&en->list)); 406 list_del_init(&en->list); 407 spin_unlock(&eti->extent_lock); 408 409 __detach_extent_node(sbi, et, en); 410 } 411 412 static struct extent_tree *__grab_extent_tree(struct inode *inode, 413 enum extent_type type) 414 { 415 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 416 struct extent_tree_info *eti = &sbi->extent_tree[type]; 417 struct extent_tree *et; 418 nid_t ino = inode->i_ino; 419 420 mutex_lock(&eti->extent_tree_lock); 421 et = radix_tree_lookup(&eti->extent_tree_root, ino); 422 if (!et) { 423 et = f2fs_kmem_cache_alloc(extent_tree_slab, 424 GFP_NOFS, true, NULL); 425 f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et); 426 memset(et, 0, sizeof(struct extent_tree)); 427 et->ino = ino; 428 et->type = type; 429 et->root = RB_ROOT_CACHED; 430 et->cached_en = NULL; 431 rwlock_init(&et->lock); 432 INIT_LIST_HEAD(&et->list); 433 atomic_set(&et->node_cnt, 0); 434 atomic_inc(&eti->total_ext_tree); 435 } else { 436 atomic_dec(&eti->total_zombie_tree); 437 list_del_init(&et->list); 438 } 439 mutex_unlock(&eti->extent_tree_lock); 440 441 /* never died until evict_inode */ 442 F2FS_I(inode)->extent_tree[type] = et; 443 444 return et; 445 } 446 447 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, 448 struct extent_tree *et) 449 { 450 struct rb_node *node, *next; 451 struct extent_node *en; 452 unsigned int count = atomic_read(&et->node_cnt); 453 454 node = rb_first_cached(&et->root); 455 while (node) { 456 next = rb_next(node); 457 en = rb_entry(node, struct extent_node, rb_node); 458 __release_extent_node(sbi, et, en); 459 node = next; 460 } 461 462 return count - atomic_read(&et->node_cnt); 463 } 464 465 static void __drop_largest_extent(struct extent_tree *et, 466 pgoff_t fofs, unsigned int len) 467 { 468 if (fofs < et->largest.fofs + et->largest.len && 469 fofs + len > et->largest.fofs) { 470 et->largest.len = 0; 471 et->largest_updated = true; 472 } 473 } 474 475 void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) 476 { 477 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 478 struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; 479 struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; 480 struct extent_tree *et; 481 struct extent_node *en; 482 struct extent_info ei; 483 484 if (!__may_extent_tree(inode, EX_READ)) { 485 /* drop largest read extent */ 486 if (i_ext && i_ext->len) { 487 f2fs_wait_on_page_writeback(ipage, NODE, true, true); 488 i_ext->len = 0; 489 set_page_dirty(ipage); 490 } 491 goto out; 492 } 493 494 et = __grab_extent_tree(inode, EX_READ); 495 496 if (!i_ext || !i_ext->len) 497 goto out; 498 499 get_read_extent_info(&ei, i_ext); 500 501 write_lock(&et->lock); 502 if (atomic_read(&et->node_cnt)) 503 goto unlock_out; 504 505 en = __attach_extent_node(sbi, et, &ei, NULL, 506 &et->root.rb_root.rb_node, true); 507 if (en) { 508 et->largest = en->ei; 509 et->cached_en = en; 510 511 spin_lock(&eti->extent_lock); 512 list_add_tail(&en->list, &eti->extent_list); 513 spin_unlock(&eti->extent_lock); 514 } 515 unlock_out: 516 write_unlock(&et->lock); 517 out: 518 if (!F2FS_I(inode)->extent_tree[EX_READ]) 519 set_inode_flag(inode, FI_NO_EXTENT); 520 } 521 522 void f2fs_init_age_extent_tree(struct inode *inode) 523 { 524 if (!__init_may_extent_tree(inode, EX_BLOCK_AGE)) 525 return; 526 __grab_extent_tree(inode, EX_BLOCK_AGE); 527 } 528 529 void f2fs_init_extent_tree(struct inode *inode) 530 { 531 /* initialize read cache */ 532 if (__init_may_extent_tree(inode, EX_READ)) 533 __grab_extent_tree(inode, EX_READ); 534 535 /* initialize block age cache */ 536 if (__init_may_extent_tree(inode, EX_BLOCK_AGE)) 537 __grab_extent_tree(inode, EX_BLOCK_AGE); 538 } 539 540 static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, 541 struct extent_info *ei, enum extent_type type) 542 { 543 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 544 struct extent_tree_info *eti = &sbi->extent_tree[type]; 545 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 546 struct extent_node *en; 547 bool ret = false; 548 549 f2fs_bug_on(sbi, !et); 550 551 trace_f2fs_lookup_extent_tree_start(inode, pgofs, type); 552 553 read_lock(&et->lock); 554 555 if (type == EX_READ && 556 et->largest.fofs <= pgofs && 557 et->largest.fofs + et->largest.len > pgofs) { 558 *ei = et->largest; 559 ret = true; 560 stat_inc_largest_node_hit(sbi); 561 goto out; 562 } 563 564 en = (struct extent_node *)f2fs_lookup_rb_tree(&et->root, 565 (struct rb_entry *)et->cached_en, pgofs); 566 if (!en) 567 goto out; 568 569 if (en == et->cached_en) 570 stat_inc_cached_node_hit(sbi, type); 571 else 572 stat_inc_rbtree_node_hit(sbi, type); 573 574 *ei = en->ei; 575 spin_lock(&eti->extent_lock); 576 if (!list_empty(&en->list)) { 577 list_move_tail(&en->list, &eti->extent_list); 578 et->cached_en = en; 579 } 580 spin_unlock(&eti->extent_lock); 581 ret = true; 582 out: 583 stat_inc_total_hit(sbi, type); 584 read_unlock(&et->lock); 585 586 if (type == EX_READ) 587 trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei); 588 else if (type == EX_BLOCK_AGE) 589 trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei); 590 return ret; 591 } 592 593 static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, 594 struct extent_tree *et, struct extent_info *ei, 595 struct extent_node *prev_ex, 596 struct extent_node *next_ex) 597 { 598 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 599 struct extent_node *en = NULL; 600 601 if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) { 602 prev_ex->ei.len += ei->len; 603 ei = &prev_ex->ei; 604 en = prev_ex; 605 } 606 607 if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) { 608 next_ex->ei.fofs = ei->fofs; 609 next_ex->ei.len += ei->len; 610 if (et->type == EX_READ) 611 next_ex->ei.blk = ei->blk; 612 if (en) 613 __release_extent_node(sbi, et, prev_ex); 614 615 en = next_ex; 616 } 617 618 if (!en) 619 return NULL; 620 621 __try_update_largest_extent(et, en); 622 623 spin_lock(&eti->extent_lock); 624 if (!list_empty(&en->list)) { 625 list_move_tail(&en->list, &eti->extent_list); 626 et->cached_en = en; 627 } 628 spin_unlock(&eti->extent_lock); 629 return en; 630 } 631 632 static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, 633 struct extent_tree *et, struct extent_info *ei, 634 struct rb_node **insert_p, 635 struct rb_node *insert_parent, 636 bool leftmost) 637 { 638 struct extent_tree_info *eti = &sbi->extent_tree[et->type]; 639 struct rb_node **p; 640 struct rb_node *parent = NULL; 641 struct extent_node *en = NULL; 642 643 if (insert_p && insert_parent) { 644 parent = insert_parent; 645 p = insert_p; 646 goto do_insert; 647 } 648 649 leftmost = true; 650 651 p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent, 652 ei->fofs, &leftmost); 653 do_insert: 654 en = __attach_extent_node(sbi, et, ei, parent, p, leftmost); 655 if (!en) 656 return NULL; 657 658 __try_update_largest_extent(et, en); 659 660 /* update in global extent list */ 661 spin_lock(&eti->extent_lock); 662 list_add_tail(&en->list, &eti->extent_list); 663 et->cached_en = en; 664 spin_unlock(&eti->extent_lock); 665 return en; 666 } 667 668 static void __update_extent_tree_range(struct inode *inode, 669 struct extent_info *tei, enum extent_type type) 670 { 671 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 672 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 673 struct extent_node *en = NULL, *en1 = NULL; 674 struct extent_node *prev_en = NULL, *next_en = NULL; 675 struct extent_info ei, dei, prev; 676 struct rb_node **insert_p = NULL, *insert_parent = NULL; 677 unsigned int fofs = tei->fofs, len = tei->len; 678 unsigned int end = fofs + len; 679 bool updated = false; 680 bool leftmost = false; 681 682 if (!et) 683 return; 684 685 if (type == EX_READ) 686 trace_f2fs_update_read_extent_tree_range(inode, fofs, len, 687 tei->blk, 0); 688 else if (type == EX_BLOCK_AGE) 689 trace_f2fs_update_age_extent_tree_range(inode, fofs, len, 690 tei->age, tei->last_blocks); 691 692 write_lock(&et->lock); 693 694 if (type == EX_READ) { 695 if (is_inode_flag_set(inode, FI_NO_EXTENT)) { 696 write_unlock(&et->lock); 697 return; 698 } 699 700 prev = et->largest; 701 dei.len = 0; 702 703 /* 704 * drop largest extent before lookup, in case it's already 705 * been shrunk from extent tree 706 */ 707 __drop_largest_extent(et, fofs, len); 708 } 709 710 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ 711 en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, 712 (struct rb_entry *)et->cached_en, fofs, 713 (struct rb_entry **)&prev_en, 714 (struct rb_entry **)&next_en, 715 &insert_p, &insert_parent, false, 716 &leftmost); 717 if (!en) 718 en = next_en; 719 720 /* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */ 721 while (en && en->ei.fofs < end) { 722 unsigned int org_end; 723 int parts = 0; /* # of parts current extent split into */ 724 725 next_en = en1 = NULL; 726 727 dei = en->ei; 728 org_end = dei.fofs + dei.len; 729 f2fs_bug_on(sbi, fofs >= org_end); 730 731 if (fofs > dei.fofs && (type != EX_READ || 732 fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) { 733 en->ei.len = fofs - en->ei.fofs; 734 prev_en = en; 735 parts = 1; 736 } 737 738 if (end < org_end && (type != EX_READ || 739 org_end - end >= F2FS_MIN_EXTENT_LEN)) { 740 if (parts) { 741 __set_extent_info(&ei, 742 end, org_end - end, 743 end - dei.fofs + dei.blk, false, 744 dei.age, dei.last_blocks, 745 type); 746 en1 = __insert_extent_tree(sbi, et, &ei, 747 NULL, NULL, true); 748 next_en = en1; 749 } else { 750 __set_extent_info(&en->ei, 751 end, en->ei.len - (end - dei.fofs), 752 en->ei.blk + (end - dei.fofs), true, 753 dei.age, dei.last_blocks, 754 type); 755 next_en = en; 756 } 757 parts++; 758 } 759 760 if (!next_en) { 761 struct rb_node *node = rb_next(&en->rb_node); 762 763 next_en = rb_entry_safe(node, struct extent_node, 764 rb_node); 765 } 766 767 if (parts) 768 __try_update_largest_extent(et, en); 769 else 770 __release_extent_node(sbi, et, en); 771 772 /* 773 * if original extent is split into zero or two parts, extent 774 * tree has been altered by deletion or insertion, therefore 775 * invalidate pointers regard to tree. 776 */ 777 if (parts != 1) { 778 insert_p = NULL; 779 insert_parent = NULL; 780 } 781 en = next_en; 782 } 783 784 if (type == EX_BLOCK_AGE) 785 goto update_age_extent_cache; 786 787 /* 3. update extent in read extent cache */ 788 BUG_ON(type != EX_READ); 789 790 if (tei->blk) { 791 __set_extent_info(&ei, fofs, len, tei->blk, false, 792 0, 0, EX_READ); 793 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 794 __insert_extent_tree(sbi, et, &ei, 795 insert_p, insert_parent, leftmost); 796 797 /* give up extent_cache, if split and small updates happen */ 798 if (dei.len >= 1 && 799 prev.len < F2FS_MIN_EXTENT_LEN && 800 et->largest.len < F2FS_MIN_EXTENT_LEN) { 801 et->largest.len = 0; 802 et->largest_updated = true; 803 set_inode_flag(inode, FI_NO_EXTENT); 804 } 805 } 806 807 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 808 __free_extent_tree(sbi, et); 809 810 if (et->largest_updated) { 811 et->largest_updated = false; 812 updated = true; 813 } 814 goto out_read_extent_cache; 815 update_age_extent_cache: 816 if (!tei->last_blocks) 817 goto out_read_extent_cache; 818 819 __set_extent_info(&ei, fofs, len, 0, false, 820 tei->age, tei->last_blocks, EX_BLOCK_AGE); 821 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 822 __insert_extent_tree(sbi, et, &ei, 823 insert_p, insert_parent, leftmost); 824 out_read_extent_cache: 825 write_unlock(&et->lock); 826 827 if (updated) 828 f2fs_mark_inode_dirty_sync(inode, true); 829 } 830 831 #ifdef CONFIG_F2FS_FS_COMPRESSION 832 void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, 833 pgoff_t fofs, block_t blkaddr, unsigned int llen, 834 unsigned int c_len) 835 { 836 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 837 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; 838 struct extent_node *en = NULL; 839 struct extent_node *prev_en = NULL, *next_en = NULL; 840 struct extent_info ei; 841 struct rb_node **insert_p = NULL, *insert_parent = NULL; 842 bool leftmost = false; 843 844 trace_f2fs_update_read_extent_tree_range(inode, fofs, llen, 845 blkaddr, c_len); 846 847 /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */ 848 if (is_inode_flag_set(inode, FI_NO_EXTENT)) 849 return; 850 851 write_lock(&et->lock); 852 853 en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, 854 (struct rb_entry *)et->cached_en, fofs, 855 (struct rb_entry **)&prev_en, 856 (struct rb_entry **)&next_en, 857 &insert_p, &insert_parent, false, 858 &leftmost); 859 if (en) 860 goto unlock_out; 861 862 __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ); 863 ei.c_len = c_len; 864 865 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) 866 __insert_extent_tree(sbi, et, &ei, 867 insert_p, insert_parent, leftmost); 868 unlock_out: 869 write_unlock(&et->lock); 870 } 871 #endif 872 873 static unsigned long long __calculate_block_age(unsigned long long new, 874 unsigned long long old) 875 { 876 unsigned long long diff; 877 878 diff = (new >= old) ? new - (new - old) : new + (old - new); 879 880 return div_u64(diff * LAST_AGE_WEIGHT, 100); 881 } 882 883 /* This returns a new age and allocated blocks in ei */ 884 static int __get_new_block_age(struct inode *inode, struct extent_info *ei) 885 { 886 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 887 loff_t f_size = i_size_read(inode); 888 unsigned long long cur_blocks = 889 atomic64_read(&sbi->allocated_data_blocks); 890 891 /* 892 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last 893 * file block even in seq write. So don't record age for newly last file 894 * block here. 895 */ 896 if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) && 897 ei->blk == NEW_ADDR) 898 return -EINVAL; 899 900 if (__lookup_extent_tree(inode, ei->fofs, ei, EX_BLOCK_AGE)) { 901 unsigned long long cur_age; 902 903 if (cur_blocks >= ei->last_blocks) 904 cur_age = cur_blocks - ei->last_blocks; 905 else 906 /* allocated_data_blocks overflow */ 907 cur_age = ULLONG_MAX - ei->last_blocks + cur_blocks; 908 909 if (ei->age) 910 ei->age = __calculate_block_age(cur_age, ei->age); 911 else 912 ei->age = cur_age; 913 ei->last_blocks = cur_blocks; 914 WARN_ON(ei->age > cur_blocks); 915 return 0; 916 } 917 918 f2fs_bug_on(sbi, ei->blk == NULL_ADDR); 919 920 /* the data block was allocated for the first time */ 921 if (ei->blk == NEW_ADDR) 922 goto out; 923 924 if (__is_valid_data_blkaddr(ei->blk) && 925 !f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE)) { 926 f2fs_bug_on(sbi, 1); 927 return -EINVAL; 928 } 929 out: 930 /* 931 * init block age with zero, this can happen when the block age extent 932 * was reclaimed due to memory constraint or system reboot 933 */ 934 ei->age = 0; 935 ei->last_blocks = cur_blocks; 936 return 0; 937 } 938 939 static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type) 940 { 941 struct extent_info ei; 942 943 if (!__may_extent_tree(dn->inode, type)) 944 return; 945 946 ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + 947 dn->ofs_in_node; 948 ei.len = 1; 949 950 if (type == EX_READ) { 951 if (dn->data_blkaddr == NEW_ADDR) 952 ei.blk = NULL_ADDR; 953 else 954 ei.blk = dn->data_blkaddr; 955 } else if (type == EX_BLOCK_AGE) { 956 ei.blk = dn->data_blkaddr; 957 if (__get_new_block_age(dn->inode, &ei)) 958 return; 959 } 960 __update_extent_tree_range(dn->inode, &ei, type); 961 } 962 963 static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink, 964 enum extent_type type) 965 { 966 struct extent_tree_info *eti = &sbi->extent_tree[type]; 967 struct extent_tree *et, *next; 968 struct extent_node *en; 969 unsigned int node_cnt = 0, tree_cnt = 0; 970 int remained; 971 972 if (!atomic_read(&eti->total_zombie_tree)) 973 goto free_node; 974 975 if (!mutex_trylock(&eti->extent_tree_lock)) 976 goto out; 977 978 /* 1. remove unreferenced extent tree */ 979 list_for_each_entry_safe(et, next, &eti->zombie_list, list) { 980 if (atomic_read(&et->node_cnt)) { 981 write_lock(&et->lock); 982 node_cnt += __free_extent_tree(sbi, et); 983 write_unlock(&et->lock); 984 } 985 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 986 list_del_init(&et->list); 987 radix_tree_delete(&eti->extent_tree_root, et->ino); 988 kmem_cache_free(extent_tree_slab, et); 989 atomic_dec(&eti->total_ext_tree); 990 atomic_dec(&eti->total_zombie_tree); 991 tree_cnt++; 992 993 if (node_cnt + tree_cnt >= nr_shrink) 994 goto unlock_out; 995 cond_resched(); 996 } 997 mutex_unlock(&eti->extent_tree_lock); 998 999 free_node: 1000 /* 2. remove LRU extent entries */ 1001 if (!mutex_trylock(&eti->extent_tree_lock)) 1002 goto out; 1003 1004 remained = nr_shrink - (node_cnt + tree_cnt); 1005 1006 spin_lock(&eti->extent_lock); 1007 for (; remained > 0; remained--) { 1008 if (list_empty(&eti->extent_list)) 1009 break; 1010 en = list_first_entry(&eti->extent_list, 1011 struct extent_node, list); 1012 et = en->et; 1013 if (!write_trylock(&et->lock)) { 1014 /* refresh this extent node's position in extent list */ 1015 list_move_tail(&en->list, &eti->extent_list); 1016 continue; 1017 } 1018 1019 list_del_init(&en->list); 1020 spin_unlock(&eti->extent_lock); 1021 1022 __detach_extent_node(sbi, et, en); 1023 1024 write_unlock(&et->lock); 1025 node_cnt++; 1026 spin_lock(&eti->extent_lock); 1027 } 1028 spin_unlock(&eti->extent_lock); 1029 1030 unlock_out: 1031 mutex_unlock(&eti->extent_tree_lock); 1032 out: 1033 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type); 1034 1035 return node_cnt + tree_cnt; 1036 } 1037 1038 /* read extent cache operations */ 1039 bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, 1040 struct extent_info *ei) 1041 { 1042 if (!__may_extent_tree(inode, EX_READ)) 1043 return false; 1044 1045 return __lookup_extent_tree(inode, pgofs, ei, EX_READ); 1046 } 1047 1048 void f2fs_update_read_extent_cache(struct dnode_of_data *dn) 1049 { 1050 return __update_extent_cache(dn, EX_READ); 1051 } 1052 1053 void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, 1054 pgoff_t fofs, block_t blkaddr, unsigned int len) 1055 { 1056 struct extent_info ei = { 1057 .fofs = fofs, 1058 .len = len, 1059 .blk = blkaddr, 1060 }; 1061 1062 if (!__may_extent_tree(dn->inode, EX_READ)) 1063 return; 1064 1065 __update_extent_tree_range(dn->inode, &ei, EX_READ); 1066 } 1067 1068 unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1069 { 1070 if (!test_opt(sbi, READ_EXTENT_CACHE)) 1071 return 0; 1072 1073 return __shrink_extent_tree(sbi, nr_shrink, EX_READ); 1074 } 1075 1076 /* block age extent cache operations */ 1077 bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs, 1078 struct extent_info *ei) 1079 { 1080 if (!__may_extent_tree(inode, EX_BLOCK_AGE)) 1081 return false; 1082 1083 return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE); 1084 } 1085 1086 void f2fs_update_age_extent_cache(struct dnode_of_data *dn) 1087 { 1088 return __update_extent_cache(dn, EX_BLOCK_AGE); 1089 } 1090 1091 void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn, 1092 pgoff_t fofs, unsigned int len) 1093 { 1094 struct extent_info ei = { 1095 .fofs = fofs, 1096 .len = len, 1097 }; 1098 1099 if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE)) 1100 return; 1101 1102 __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE); 1103 } 1104 1105 unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) 1106 { 1107 if (!test_opt(sbi, AGE_EXTENT_CACHE)) 1108 return 0; 1109 1110 return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); 1111 } 1112 1113 static unsigned int __destroy_extent_node(struct inode *inode, 1114 enum extent_type type) 1115 { 1116 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1117 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1118 unsigned int node_cnt = 0; 1119 1120 if (!et || !atomic_read(&et->node_cnt)) 1121 return 0; 1122 1123 write_lock(&et->lock); 1124 node_cnt = __free_extent_tree(sbi, et); 1125 write_unlock(&et->lock); 1126 1127 return node_cnt; 1128 } 1129 1130 void f2fs_destroy_extent_node(struct inode *inode) 1131 { 1132 __destroy_extent_node(inode, EX_READ); 1133 __destroy_extent_node(inode, EX_BLOCK_AGE); 1134 } 1135 1136 static void __drop_extent_tree(struct inode *inode, enum extent_type type) 1137 { 1138 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1139 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1140 bool updated = false; 1141 1142 if (!__may_extent_tree(inode, type)) 1143 return; 1144 1145 write_lock(&et->lock); 1146 __free_extent_tree(sbi, et); 1147 if (type == EX_READ) { 1148 set_inode_flag(inode, FI_NO_EXTENT); 1149 if (et->largest.len) { 1150 et->largest.len = 0; 1151 updated = true; 1152 } 1153 } 1154 write_unlock(&et->lock); 1155 if (updated) 1156 f2fs_mark_inode_dirty_sync(inode, true); 1157 } 1158 1159 void f2fs_drop_extent_tree(struct inode *inode) 1160 { 1161 __drop_extent_tree(inode, EX_READ); 1162 __drop_extent_tree(inode, EX_BLOCK_AGE); 1163 } 1164 1165 static void __destroy_extent_tree(struct inode *inode, enum extent_type type) 1166 { 1167 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1168 struct extent_tree_info *eti = &sbi->extent_tree[type]; 1169 struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; 1170 unsigned int node_cnt = 0; 1171 1172 if (!et) 1173 return; 1174 1175 if (inode->i_nlink && !is_bad_inode(inode) && 1176 atomic_read(&et->node_cnt)) { 1177 mutex_lock(&eti->extent_tree_lock); 1178 list_add_tail(&et->list, &eti->zombie_list); 1179 atomic_inc(&eti->total_zombie_tree); 1180 mutex_unlock(&eti->extent_tree_lock); 1181 return; 1182 } 1183 1184 /* free all extent info belong to this extent tree */ 1185 node_cnt = __destroy_extent_node(inode, type); 1186 1187 /* delete extent tree entry in radix tree */ 1188 mutex_lock(&eti->extent_tree_lock); 1189 f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); 1190 radix_tree_delete(&eti->extent_tree_root, inode->i_ino); 1191 kmem_cache_free(extent_tree_slab, et); 1192 atomic_dec(&eti->total_ext_tree); 1193 mutex_unlock(&eti->extent_tree_lock); 1194 1195 F2FS_I(inode)->extent_tree[type] = NULL; 1196 1197 trace_f2fs_destroy_extent_tree(inode, node_cnt, type); 1198 } 1199 1200 void f2fs_destroy_extent_tree(struct inode *inode) 1201 { 1202 __destroy_extent_tree(inode, EX_READ); 1203 __destroy_extent_tree(inode, EX_BLOCK_AGE); 1204 } 1205 1206 static void __init_extent_tree_info(struct extent_tree_info *eti) 1207 { 1208 INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO); 1209 mutex_init(&eti->extent_tree_lock); 1210 INIT_LIST_HEAD(&eti->extent_list); 1211 spin_lock_init(&eti->extent_lock); 1212 atomic_set(&eti->total_ext_tree, 0); 1213 INIT_LIST_HEAD(&eti->zombie_list); 1214 atomic_set(&eti->total_zombie_tree, 0); 1215 atomic_set(&eti->total_ext_node, 0); 1216 } 1217 1218 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) 1219 { 1220 __init_extent_tree_info(&sbi->extent_tree[EX_READ]); 1221 __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]); 1222 1223 /* initialize for block age extents */ 1224 atomic64_set(&sbi->allocated_data_blocks, 0); 1225 sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; 1226 sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; 1227 } 1228 1229 int __init f2fs_create_extent_cache(void) 1230 { 1231 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", 1232 sizeof(struct extent_tree)); 1233 if (!extent_tree_slab) 1234 return -ENOMEM; 1235 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", 1236 sizeof(struct extent_node)); 1237 if (!extent_node_slab) { 1238 kmem_cache_destroy(extent_tree_slab); 1239 return -ENOMEM; 1240 } 1241 return 0; 1242 } 1243 1244 void f2fs_destroy_extent_cache(void) 1245 { 1246 kmem_cache_destroy(extent_node_slab); 1247 kmem_cache_destroy(extent_tree_slab); 1248 } 1249