1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 * Created by Gao Xiang <gaoxiang25@huawei.com> 6 */ 7 #include "internal.h" 8 #include <linux/pagevec.h> 9 10 struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) 11 { 12 struct page *page; 13 14 if (!list_empty(pool)) { 15 page = lru_to_page(pool); 16 DBG_BUGON(page_ref_count(page) != 1); 17 list_del(&page->lru); 18 } else { 19 page = alloc_page(gfp); 20 } 21 return page; 22 } 23 24 #ifdef CONFIG_EROFS_FS_ZIP 25 /* global shrink count (for all mounted EROFS instances) */ 26 static atomic_long_t erofs_global_shrink_cnt; 27 28 static int erofs_workgroup_get(struct erofs_workgroup *grp) 29 { 30 int o; 31 32 repeat: 33 o = erofs_wait_on_workgroup_freezed(grp); 34 if (o <= 0) 35 return -1; 36 37 if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) 38 goto repeat; 39 40 /* decrease refcount paired by erofs_workgroup_put */ 41 if (o == 1) 42 atomic_long_dec(&erofs_global_shrink_cnt); 43 return 0; 44 } 45 46 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 47 pgoff_t index) 48 { 49 struct erofs_sb_info *sbi = EROFS_SB(sb); 50 struct erofs_workgroup *grp; 51 52 repeat: 53 rcu_read_lock(); 54 grp = xa_load(&sbi->managed_pslots, index); 55 if (grp) { 56 if (erofs_workgroup_get(grp)) { 57 /* prefer to relax rcu read side */ 58 rcu_read_unlock(); 59 goto repeat; 60 } 61 62 DBG_BUGON(index != grp->index); 63 } 64 rcu_read_unlock(); 65 return grp; 66 } 67 68 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, 69 struct erofs_workgroup *grp) 70 { 71 struct erofs_sb_info *const sbi = EROFS_SB(sb); 72 struct erofs_workgroup *pre; 73 74 /* 75 * Bump up a reference count before making this visible 76 * to others for the XArray in order to avoid potential 77 * UAF without serialized by xa_lock. 78 */ 79 atomic_inc(&grp->refcount); 80 81 repeat: 82 xa_lock(&sbi->managed_pslots); 83 pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, 84 NULL, grp, GFP_NOFS); 85 if (pre) { 86 if (xa_is_err(pre)) { 87 pre = ERR_PTR(xa_err(pre)); 88 } else if (erofs_workgroup_get(pre)) { 89 /* try to legitimize the current in-tree one */ 90 xa_unlock(&sbi->managed_pslots); 91 cond_resched(); 92 goto repeat; 93 } 94 atomic_dec(&grp->refcount); 95 grp = pre; 96 } 97 xa_unlock(&sbi->managed_pslots); 98 return grp; 99 } 100 101 static void __erofs_workgroup_free(struct erofs_workgroup *grp) 102 { 103 atomic_long_dec(&erofs_global_shrink_cnt); 104 erofs_workgroup_free_rcu(grp); 105 } 106 107 int erofs_workgroup_put(struct erofs_workgroup *grp) 108 { 109 int count = atomic_dec_return(&grp->refcount); 110 111 if (count == 1) 112 atomic_long_inc(&erofs_global_shrink_cnt); 113 else if (!count) 114 __erofs_workgroup_free(grp); 115 return count; 116 } 117 118 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 119 struct erofs_workgroup *grp) 120 { 121 /* 122 * If managed cache is on, refcount of workgroups 123 * themselves could be < 0 (freezed). In other words, 124 * there is no guarantee that all refcounts > 0. 125 */ 126 if (!erofs_workgroup_try_to_freeze(grp, 1)) 127 return false; 128 129 /* 130 * Note that all cached pages should be unattached 131 * before deleted from the XArray. Otherwise some 132 * cached pages could be still attached to the orphan 133 * old workgroup when the new one is available in the tree. 134 */ 135 if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 136 erofs_workgroup_unfreeze(grp, 1); 137 return false; 138 } 139 140 /* 141 * It's impossible to fail after the workgroup is freezed, 142 * however in order to avoid some race conditions, add a 143 * DBG_BUGON to observe this in advance. 144 */ 145 DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp); 146 147 /* last refcount should be connected with its managed pslot. */ 148 erofs_workgroup_unfreeze(grp, 0); 149 __erofs_workgroup_free(grp); 150 return true; 151 } 152 153 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 154 unsigned long nr_shrink) 155 { 156 struct erofs_workgroup *grp; 157 unsigned int freed = 0; 158 unsigned long index; 159 160 xa_for_each(&sbi->managed_pslots, index, grp) { 161 /* try to shrink each valid workgroup */ 162 if (!erofs_try_to_release_workgroup(sbi, grp)) 163 continue; 164 165 ++freed; 166 if (!--nr_shrink) 167 break; 168 } 169 return freed; 170 } 171 172 /* protected by 'erofs_sb_list_lock' */ 173 static unsigned int shrinker_run_no; 174 175 /* protects the mounted 'erofs_sb_list' */ 176 static DEFINE_SPINLOCK(erofs_sb_list_lock); 177 static LIST_HEAD(erofs_sb_list); 178 179 void erofs_shrinker_register(struct super_block *sb) 180 { 181 struct erofs_sb_info *sbi = EROFS_SB(sb); 182 183 mutex_init(&sbi->umount_mutex); 184 185 spin_lock(&erofs_sb_list_lock); 186 list_add(&sbi->list, &erofs_sb_list); 187 spin_unlock(&erofs_sb_list_lock); 188 } 189 190 void erofs_shrinker_unregister(struct super_block *sb) 191 { 192 struct erofs_sb_info *const sbi = EROFS_SB(sb); 193 194 mutex_lock(&sbi->umount_mutex); 195 /* clean up all remaining workgroups in memory */ 196 erofs_shrink_workstation(sbi, ~0UL); 197 198 spin_lock(&erofs_sb_list_lock); 199 list_del(&sbi->list); 200 spin_unlock(&erofs_sb_list_lock); 201 mutex_unlock(&sbi->umount_mutex); 202 } 203 204 static unsigned long erofs_shrink_count(struct shrinker *shrink, 205 struct shrink_control *sc) 206 { 207 return atomic_long_read(&erofs_global_shrink_cnt); 208 } 209 210 static unsigned long erofs_shrink_scan(struct shrinker *shrink, 211 struct shrink_control *sc) 212 { 213 struct erofs_sb_info *sbi; 214 struct list_head *p; 215 216 unsigned long nr = sc->nr_to_scan; 217 unsigned int run_no; 218 unsigned long freed = 0; 219 220 spin_lock(&erofs_sb_list_lock); 221 do { 222 run_no = ++shrinker_run_no; 223 } while (run_no == 0); 224 225 /* Iterate over all mounted superblocks and try to shrink them */ 226 p = erofs_sb_list.next; 227 while (p != &erofs_sb_list) { 228 sbi = list_entry(p, struct erofs_sb_info, list); 229 230 /* 231 * We move the ones we do to the end of the list, so we stop 232 * when we see one we have already done. 233 */ 234 if (sbi->shrinker_run_no == run_no) 235 break; 236 237 if (!mutex_trylock(&sbi->umount_mutex)) { 238 p = p->next; 239 continue; 240 } 241 242 spin_unlock(&erofs_sb_list_lock); 243 sbi->shrinker_run_no = run_no; 244 245 freed += erofs_shrink_workstation(sbi, nr - freed); 246 247 spin_lock(&erofs_sb_list_lock); 248 /* Get the next list element before we move this one */ 249 p = p->next; 250 251 /* 252 * Move this one to the end of the list to provide some 253 * fairness. 254 */ 255 list_move_tail(&sbi->list, &erofs_sb_list); 256 mutex_unlock(&sbi->umount_mutex); 257 258 if (freed >= nr) 259 break; 260 } 261 spin_unlock(&erofs_sb_list_lock); 262 return freed; 263 } 264 265 static struct shrinker erofs_shrinker_info = { 266 .scan_objects = erofs_shrink_scan, 267 .count_objects = erofs_shrink_count, 268 .seeks = DEFAULT_SEEKS, 269 }; 270 271 int __init erofs_init_shrinker(void) 272 { 273 return register_shrinker(&erofs_shrinker_info); 274 } 275 276 void erofs_exit_shrinker(void) 277 { 278 unregister_shrinker(&erofs_shrinker_info); 279 } 280 #endif /* !CONFIG_EROFS_FS_ZIP */ 281 282