extents_status.c (eeca9fad52fc4bfdf42c38bfcf383e932eb3e9d6) | extents_status.c (1ab6c4997e04a00c50c6d786c2f046adc0d1f5de) |
---|---|
1/* 2 * fs/ext4/extents_status.c 3 * 4 * Written by Yongqiang Yang <xiaoqiangnk@gmail.com> 5 * Modified by 6 * Allison Henderson <achender@linux.vnet.ibm.com> 7 * Hugh Dickins <hughd@google.com> 8 * Zheng Liu <wenqing.lz@taobao.com> 9 * 10 * Ext4 extents status tree core functions. 11 */ 12#include <linux/rbtree.h> 13#include <linux/list_sort.h> 14#include "ext4.h" 15#include "extents_status.h" | 1/* 2 * fs/ext4/extents_status.c 3 * 4 * Written by Yongqiang Yang <xiaoqiangnk@gmail.com> 5 * Modified by 6 * Allison Henderson <achender@linux.vnet.ibm.com> 7 * Hugh Dickins <hughd@google.com> 8 * Zheng Liu <wenqing.lz@taobao.com> 9 * 10 * Ext4 extents status tree core functions. 11 */ 12#include <linux/rbtree.h> 13#include <linux/list_sort.h> 14#include "ext4.h" 15#include "extents_status.h" |
16#include "ext4_extents.h" | |
17 18#include <trace/events/ext4.h> 19 20/* 21 * According to previous discussion in Ext4 Developer Workshop, we 22 * will introduce a new structure called io tree to track all extent 23 * status in order to solve some problems that we have met 24 * (e.g. Reservation space warning), and provide extent-level locking. --- 233 unchanged lines hidden (view full) --- 258 read_lock(&EXT4_I(inode)->i_es_lock); 259 tree = &EXT4_I(inode)->i_es_tree; 260 261 /* find extent in cache firstly */ 262 es->es_lblk = es->es_len = es->es_pblk = 0; 263 if (tree->cache_es) { 264 es1 = tree->cache_es; 265 if (in_range(lblk, es1->es_lblk, es1->es_len)) { | 16 17#include <trace/events/ext4.h> 18 19/* 20 * According to previous discussion in Ext4 Developer Workshop, we 21 * will introduce a new structure called io tree to track all extent 22 * status in order to solve some problems that we have met 23 * (e.g. Reservation space warning), and provide extent-level locking. --- 233 unchanged lines hidden (view full) --- 257 read_lock(&EXT4_I(inode)->i_es_lock); 258 tree = &EXT4_I(inode)->i_es_tree; 259 260 /* find extent in cache firstly */ 261 es->es_lblk = es->es_len = es->es_pblk = 0; 262 if (tree->cache_es) { 263 es1 = tree->cache_es; 264 if (in_range(lblk, es1->es_lblk, es1->es_len)) { |
266 es_debug("%u cached by [%u/%u) %llu %llx\n", | 265 es_debug("%u cached by [%u/%u) %llu %x\n", |
267 lblk, es1->es_lblk, es1->es_len, 268 ext4_es_pblock(es1), ext4_es_status(es1)); 269 goto out; 270 } 271 } 272 273 es1 = __es_tree_search(&tree->root, lblk); 274 --- 129 unchanged lines hidden (view full) --- 404 rb_erase(node, &tree->root); 405 ext4_es_free_extent(inode, es1); 406 } 407 408 return es; 409} 410 411#ifdef ES_AGGRESSIVE_TEST | 266 lblk, es1->es_lblk, es1->es_len, 267 ext4_es_pblock(es1), ext4_es_status(es1)); 268 goto out; 269 } 270 } 271 272 es1 = __es_tree_search(&tree->root, lblk); 273 --- 129 unchanged lines hidden (view full) --- 403 rb_erase(node, &tree->root); 404 ext4_es_free_extent(inode, es1); 405 } 406 407 return es; 408} 409 410#ifdef ES_AGGRESSIVE_TEST |
411#include "ext4_extents.h" /* Needed when ES_AGGRESSIVE_TEST is defined */ 412 |
|
412static void ext4_es_insert_extent_ext_check(struct inode *inode, 413 struct extent_status *es) 414{ 415 struct ext4_ext_path *path = NULL; 416 struct ext4_extent *ex; 417 ext4_lblk_t ee_block; 418 ext4_fsblk_t ee_start; 419 unsigned short ee_len; 420 int depth, ee_status, es_status; 421 | 413static void ext4_es_insert_extent_ext_check(struct inode *inode, 414 struct extent_status *es) 415{ 416 struct ext4_ext_path *path = NULL; 417 struct ext4_extent *ex; 418 ext4_lblk_t ee_block; 419 ext4_fsblk_t ee_start; 420 unsigned short ee_len; 421 int depth, ee_status, es_status; 422 |
422 path = ext4_ext_find_extent(inode, es->es_lblk, NULL); | 423 path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE); |
423 if (IS_ERR(path)) 424 return; 425 426 depth = ext_depth(inode); 427 ex = path[depth].p_ext; 428 429 if (ex) { 430 --- 205 unchanged lines hidden (view full) --- 636/* 637 * ext4_es_insert_extent() adds information to an inode's extent 638 * status tree. 639 * 640 * Return 0 on success, error code on failure. 641 */ 642int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, 643 ext4_lblk_t len, ext4_fsblk_t pblk, | 424 if (IS_ERR(path)) 425 return; 426 427 depth = ext_depth(inode); 428 ex = path[depth].p_ext; 429 430 if (ex) { 431 --- 205 unchanged lines hidden (view full) --- 637/* 638 * ext4_es_insert_extent() adds information to an inode's extent 639 * status tree. 640 * 641 * Return 0 on success, error code on failure. 642 */ 643int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, 644 ext4_lblk_t len, ext4_fsblk_t pblk, |
644 unsigned long long status) | 645 unsigned int status) |
645{ 646 struct extent_status newes; 647 ext4_lblk_t end = lblk + len - 1; 648 int err = 0; 649 | 646{ 647 struct extent_status newes; 648 ext4_lblk_t end = lblk + len - 1; 649 int err = 0; 650 |
650 es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n", | 651 es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n", |
651 lblk, len, pblk, status, inode->i_ino); 652 653 if (!len) 654 return 0; 655 656 BUG_ON(end < lblk); 657 658 newes.es_lblk = lblk; --- 20 unchanged lines hidden (view full) --- 679 write_unlock(&EXT4_I(inode)->i_es_lock); 680 681 ext4_es_print_tree(inode); 682 683 return err; 684} 685 686/* | 652 lblk, len, pblk, status, inode->i_ino); 653 654 if (!len) 655 return 0; 656 657 BUG_ON(end < lblk); 658 659 newes.es_lblk = lblk; --- 20 unchanged lines hidden (view full) --- 680 write_unlock(&EXT4_I(inode)->i_es_lock); 681 682 ext4_es_print_tree(inode); 683 684 return err; 685} 686 687/* |
688 * ext4_es_cache_extent() inserts information into the extent status 689 * tree if and only if there isn't information about the range in 690 * question already. 691 */ 692void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, 693 ext4_lblk_t len, ext4_fsblk_t pblk, 694 unsigned int status) 695{ 696 struct extent_status *es; 697 struct extent_status newes; 698 ext4_lblk_t end = lblk + len - 1; 699 700 newes.es_lblk = lblk; 701 newes.es_len = len; 702 ext4_es_store_pblock(&newes, pblk); 703 ext4_es_store_status(&newes, status); 704 trace_ext4_es_cache_extent(inode, &newes); 705 706 if (!len) 707 return; 708 709 BUG_ON(end < lblk); 710 711 write_lock(&EXT4_I(inode)->i_es_lock); 712 713 es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk); 714 if (!es || es->es_lblk > end) 715 __es_insert_extent(inode, &newes); 716 write_unlock(&EXT4_I(inode)->i_es_lock); 717} 718 719/* |
|
687 * ext4_es_lookup_extent() looks up an extent in extent status tree. 688 * 689 * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks. 690 * 691 * Return: 1 on found, 0 on not 692 */ 693int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, 694 struct extent_status *es) --- 171 unchanged lines hidden (view full) --- 866 867 write_lock(&EXT4_I(inode)->i_es_lock); 868 err = __es_remove_extent(inode, lblk, end); 869 write_unlock(&EXT4_I(inode)->i_es_lock); 870 ext4_es_print_tree(inode); 871 return err; 872} 873 | 720 * ext4_es_lookup_extent() looks up an extent in extent status tree. 721 * 722 * ext4_es_lookup_extent is called by ext4_map_blocks/ext4_da_map_blocks. 723 * 724 * Return: 1 on found, 0 on not 725 */ 726int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, 727 struct extent_status *es) --- 171 unchanged lines hidden (view full) --- 899 900 write_lock(&EXT4_I(inode)->i_es_lock); 901 err = __es_remove_extent(inode, lblk, end); 902 write_unlock(&EXT4_I(inode)->i_es_lock); 903 ext4_es_print_tree(inode); 904 return err; 905} 906 |
874int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex) 875{ 876 ext4_lblk_t ee_block; 877 ext4_fsblk_t ee_pblock; 878 unsigned int ee_len; 879 880 ee_block = le32_to_cpu(ex->ee_block); 881 ee_len = ext4_ext_get_actual_len(ex); 882 ee_pblock = ext4_ext_pblock(ex); 883 884 if (ee_len == 0) 885 return 0; 886 887 return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, 888 EXTENT_STATUS_WRITTEN); 889} 890 | |
891static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, 892 struct list_head *b) 893{ 894 struct ext4_inode_info *eia, *eib; 895 eia = list_entry(a, struct ext4_inode_info, i_es_lru); 896 eib = list_entry(b, struct ext4_inode_info, i_es_lru); 897 | 907static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, 908 struct list_head *b) 909{ 910 struct ext4_inode_info *eia, *eib; 911 eia = list_entry(a, struct ext4_inode_info, i_es_lru); 912 eib = list_entry(b, struct ext4_inode_info, i_es_lru); 913 |
914 if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && 915 !ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) 916 return 1; 917 if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) && 918 ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED)) 919 return -1; |
|
898 if (eia->i_touch_when == eib->i_touch_when) 899 return 0; 900 if (time_after(eia->i_touch_when, eib->i_touch_when)) 901 return 1; 902 else 903 return -1; 904} 905 906static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, 907 struct ext4_inode_info *locked_ei) 908{ 909 struct ext4_inode_info *ei; 910 struct list_head *cur, *tmp; | 920 if (eia->i_touch_when == eib->i_touch_when) 921 return 0; 922 if (time_after(eia->i_touch_when, eib->i_touch_when)) 923 return 1; 924 else 925 return -1; 926} 927 928static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, 929 struct ext4_inode_info *locked_ei) 930{ 931 struct ext4_inode_info *ei; 932 struct list_head *cur, *tmp; |
911 LIST_HEAD(skiped); 912 int ret, nr_shrunk = 0; | 933 LIST_HEAD(skipped); 934 int nr_shrunk = 0; 935 int retried = 0, skip_precached = 1, nr_skipped = 0; |
913 914 spin_lock(&sbi->s_es_lru_lock); 915 | 936 937 spin_lock(&sbi->s_es_lru_lock); 938 |
916 /* 917 * If the inode that is at the head of LRU list is newer than 918 * last_sorted time, that means that we need to sort this list. 919 */ 920 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru); 921 if (sbi->s_es_last_sorted < ei->i_touch_when) { 922 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); 923 sbi->s_es_last_sorted = jiffies; 924 } 925 | 939retry: |
926 list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 940 list_for_each_safe(cur, tmp, &sbi->s_es_lru) { |
941 int shrunk; 942 |
|
927 /* 928 * If we have already reclaimed all extents from extent 929 * status tree, just stop the loop immediately. 930 */ 931 if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) 932 break; 933 934 ei = list_entry(cur, struct ext4_inode_info, i_es_lru); 935 | 943 /* 944 * If we have already reclaimed all extents from extent 945 * status tree, just stop the loop immediately. 946 */ 947 if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) 948 break; 949 950 ei = list_entry(cur, struct ext4_inode_info, i_es_lru); 951 |
936 /* Skip the inode that is newer than the last_sorted time */ 937 if (sbi->s_es_last_sorted < ei->i_touch_when) { 938 list_move_tail(cur, &skiped); | 952 /* 953 * Skip the inode that is newer than the last_sorted 954 * time. Normally we try hard to avoid shrinking 955 * precached inodes, but we will as a last resort. 956 */ 957 if ((sbi->s_es_last_sorted < ei->i_touch_when) || 958 (skip_precached && ext4_test_inode_state(&ei->vfs_inode, 959 EXT4_STATE_EXT_PRECACHED))) { 960 nr_skipped++; 961 list_move_tail(cur, &skipped); |
939 continue; 940 } 941 942 if (ei->i_es_lru_nr == 0 || ei == locked_ei) 943 continue; 944 945 write_lock(&ei->i_es_lock); | 962 continue; 963 } 964 965 if (ei->i_es_lru_nr == 0 || ei == locked_ei) 966 continue; 967 968 write_lock(&ei->i_es_lock); |
946 ret = __es_try_to_reclaim_extents(ei, nr_to_scan); | 969 shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); |
947 if (ei->i_es_lru_nr == 0) 948 list_del_init(&ei->i_es_lru); 949 write_unlock(&ei->i_es_lock); 950 | 970 if (ei->i_es_lru_nr == 0) 971 list_del_init(&ei->i_es_lru); 972 write_unlock(&ei->i_es_lock); 973 |
951 nr_shrunk += ret; 952 nr_to_scan -= ret; | 974 nr_shrunk += shrunk; 975 nr_to_scan -= shrunk; |
953 if (nr_to_scan == 0) 954 break; 955 } 956 957 /* Move the newer inodes into the tail of the LRU list. */ | 976 if (nr_to_scan == 0) 977 break; 978 } 979 980 /* Move the newer inodes into the tail of the LRU list. */ |
958 list_splice_tail(&skiped, &sbi->s_es_lru); | 981 list_splice_tail(&skipped, &sbi->s_es_lru); 982 INIT_LIST_HEAD(&skipped); 983 984 /* 985 * If we skipped any inodes, and we weren't able to make any 986 * forward progress, sort the list and try again. 987 */ 988 if ((nr_shrunk == 0) && nr_skipped && !retried) { 989 retried++; 990 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); 991 sbi->s_es_last_sorted = jiffies; 992 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, 993 i_es_lru); 994 /* 995 * If there are no non-precached inodes left on the 996 * list, start releasing precached extents. 997 */ 998 if (ext4_test_inode_state(&ei->vfs_inode, 999 EXT4_STATE_EXT_PRECACHED)) 1000 skip_precached = 0; 1001 goto retry; 1002 } 1003 |
959 spin_unlock(&sbi->s_es_lru_lock); 960 961 if (locked_ei && nr_shrunk == 0) | 1004 spin_unlock(&sbi->s_es_lru_lock); 1005 1006 if (locked_ei && nr_shrunk == 0) |
962 nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); | 1007 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); |
963 964 return nr_shrunk; 965} 966 | 1008 1009 return nr_shrunk; 1010} 1011 |
967static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | 1012static unsigned long ext4_es_count(struct shrinker *shrink, 1013 struct shrink_control *sc) |
968{ | 1014{ |
1015 unsigned long nr; 1016 struct ext4_sb_info *sbi; 1017 1018 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); 1019 nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); 1020 trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr); 1021 return nr; 1022} 1023 1024static unsigned long ext4_es_scan(struct shrinker *shrink, 1025 struct shrink_control *sc) 1026{ |
|
969 struct ext4_sb_info *sbi = container_of(shrink, 970 struct ext4_sb_info, s_es_shrinker); 971 int nr_to_scan = sc->nr_to_scan; 972 int ret, nr_shrunk; 973 974 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); 975 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); 976 977 if (!nr_to_scan) 978 return ret; 979 980 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); 981 | 1027 struct ext4_sb_info *sbi = container_of(shrink, 1028 struct ext4_sb_info, s_es_shrinker); 1029 int nr_to_scan = sc->nr_to_scan; 1030 int ret, nr_shrunk; 1031 1032 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); 1033 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); 1034 1035 if (!nr_to_scan) 1036 return ret; 1037 1038 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); 1039 |
982 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | |
983 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); | 1040 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); |
984 return ret; | 1041 return nr_shrunk; |
985} 986 987void ext4_es_register_shrinker(struct ext4_sb_info *sbi) 988{ 989 INIT_LIST_HEAD(&sbi->s_es_lru); 990 spin_lock_init(&sbi->s_es_lru_lock); 991 sbi->s_es_last_sorted = 0; | 1042} 1043 1044void ext4_es_register_shrinker(struct ext4_sb_info *sbi) 1045{ 1046 INIT_LIST_HEAD(&sbi->s_es_lru); 1047 spin_lock_init(&sbi->s_es_lru_lock); 1048 sbi->s_es_last_sorted = 0; |
992 sbi->s_es_shrinker.shrink = ext4_es_shrink; | 1049 sbi->s_es_shrinker.scan_objects = ext4_es_scan; 1050 sbi->s_es_shrinker.count_objects = ext4_es_count; |
993 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; 994 register_shrinker(&sbi->s_es_shrinker); 995} 996 997void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) 998{ 999 unregister_shrinker(&sbi->s_es_shrinker); 1000} --- 27 unchanged lines hidden (view full) --- 1028 1029static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, 1030 int nr_to_scan) 1031{ 1032 struct inode *inode = &ei->vfs_inode; 1033 struct ext4_es_tree *tree = &ei->i_es_tree; 1034 struct rb_node *node; 1035 struct extent_status *es; | 1051 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; 1052 register_shrinker(&sbi->s_es_shrinker); 1053} 1054 1055void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) 1056{ 1057 unregister_shrinker(&sbi->s_es_shrinker); 1058} --- 27 unchanged lines hidden (view full) --- 1086 1087static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, 1088 int nr_to_scan) 1089{ 1090 struct inode *inode = &ei->vfs_inode; 1091 struct ext4_es_tree *tree = &ei->i_es_tree; 1092 struct rb_node *node; 1093 struct extent_status *es; |
1036 int nr_shrunk = 0; | 1094 unsigned long nr_shrunk = 0; 1095 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, 1096 DEFAULT_RATELIMIT_BURST); |
1037 1038 if (ei->i_es_lru_nr == 0) 1039 return 0; 1040 | 1097 1098 if (ei->i_es_lru_nr == 0) 1099 return 0; 1100 |
1101 if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) && 1102 __ratelimit(&_rs)) 1103 ext4_warning(inode->i_sb, "forced shrink of precached extents"); 1104 |
|
1041 node = rb_first(&tree->root); 1042 while (node != NULL) { 1043 es = rb_entry(node, struct extent_status, rb_node); 1044 node = rb_next(&es->rb_node); 1045 /* 1046 * We can't reclaim delayed extent from status tree because 1047 * fiemap, bigallic, and seek_data/hole need to use it. 1048 */ 1049 if (!ext4_es_is_delayed(es)) { 1050 rb_erase(&es->rb_node, &tree->root); 1051 ext4_es_free_extent(inode, es); 1052 nr_shrunk++; 1053 if (--nr_to_scan == 0) 1054 break; 1055 } 1056 } 1057 tree->cache_es = NULL; 1058 return nr_shrunk; 1059} | 1105 node = rb_first(&tree->root); 1106 while (node != NULL) { 1107 es = rb_entry(node, struct extent_status, rb_node); 1108 node = rb_next(&es->rb_node); 1109 /* 1110 * We can't reclaim delayed extent from status tree because 1111 * fiemap, bigallic, and seek_data/hole need to use it. 1112 */ 1113 if (!ext4_es_is_delayed(es)) { 1114 rb_erase(&es->rb_node, &tree->root); 1115 ext4_es_free_extent(inode, es); 1116 nr_shrunk++; 1117 if (--nr_to_scan == 0) 1118 break; 1119 } 1120 } 1121 tree->cache_es = NULL; 1122 return nr_shrunk; 1123} |