1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 */ 6 #include "internal.h" 7 #include <linux/pagevec.h> 8 9 struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) 10 { 11 struct page *page; 12 13 if (!list_empty(pool)) { 14 page = lru_to_page(pool); 15 DBG_BUGON(page_ref_count(page) != 1); 16 list_del(&page->lru); 17 } else { 18 page = alloc_page(gfp); 19 } 20 return page; 21 } 22 23 #ifdef CONFIG_EROFS_FS_ZIP 24 /* global shrink count (for all mounted EROFS instances) */ 25 static atomic_long_t erofs_global_shrink_cnt; 26 27 static int erofs_workgroup_get(struct erofs_workgroup *grp) 28 { 29 int o; 30 31 repeat: 32 o = erofs_wait_on_workgroup_freezed(grp); 33 if (o <= 0) 34 return -1; 35 36 if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) 37 goto repeat; 38 39 /* decrease refcount paired by erofs_workgroup_put */ 40 if (o == 1) 41 atomic_long_dec(&erofs_global_shrink_cnt); 42 return 0; 43 } 44 45 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 46 pgoff_t index) 47 { 48 struct erofs_sb_info *sbi = EROFS_SB(sb); 49 struct erofs_workgroup *grp; 50 51 repeat: 52 rcu_read_lock(); 53 grp = xa_load(&sbi->managed_pslots, index); 54 if (grp) { 55 if (erofs_workgroup_get(grp)) { 56 /* prefer to relax rcu read side */ 57 rcu_read_unlock(); 58 goto repeat; 59 } 60 61 DBG_BUGON(index != grp->index); 62 } 63 rcu_read_unlock(); 64 return grp; 65 } 66 67 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, 68 struct erofs_workgroup *grp) 69 { 70 struct erofs_sb_info *const sbi = EROFS_SB(sb); 71 struct erofs_workgroup *pre; 72 73 /* 74 * Bump up a reference count before making this visible 75 * to others for the XArray in order to avoid potential 76 * UAF without serialized by xa_lock. 77 */ 78 atomic_inc(&grp->refcount); 79 80 repeat: 81 xa_lock(&sbi->managed_pslots); 82 pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, 83 NULL, grp, GFP_NOFS); 84 if (pre) { 85 if (xa_is_err(pre)) { 86 pre = ERR_PTR(xa_err(pre)); 87 } else if (erofs_workgroup_get(pre)) { 88 /* try to legitimize the current in-tree one */ 89 xa_unlock(&sbi->managed_pslots); 90 cond_resched(); 91 goto repeat; 92 } 93 atomic_dec(&grp->refcount); 94 grp = pre; 95 } 96 xa_unlock(&sbi->managed_pslots); 97 return grp; 98 } 99 100 static void __erofs_workgroup_free(struct erofs_workgroup *grp) 101 { 102 atomic_long_dec(&erofs_global_shrink_cnt); 103 erofs_workgroup_free_rcu(grp); 104 } 105 106 int erofs_workgroup_put(struct erofs_workgroup *grp) 107 { 108 int count = atomic_dec_return(&grp->refcount); 109 110 if (count == 1) 111 atomic_long_inc(&erofs_global_shrink_cnt); 112 else if (!count) 113 __erofs_workgroup_free(grp); 114 return count; 115 } 116 117 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 118 struct erofs_workgroup *grp) 119 { 120 /* 121 * If managed cache is on, refcount of workgroups 122 * themselves could be < 0 (freezed). In other words, 123 * there is no guarantee that all refcounts > 0. 124 */ 125 if (!erofs_workgroup_try_to_freeze(grp, 1)) 126 return false; 127 128 /* 129 * Note that all cached pages should be unattached 130 * before deleted from the XArray. Otherwise some 131 * cached pages could be still attached to the orphan 132 * old workgroup when the new one is available in the tree. 133 */ 134 if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 135 erofs_workgroup_unfreeze(grp, 1); 136 return false; 137 } 138 139 /* 140 * It's impossible to fail after the workgroup is freezed, 141 * however in order to avoid some race conditions, add a 142 * DBG_BUGON to observe this in advance. 143 */ 144 DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp); 145 146 /* last refcount should be connected with its managed pslot. */ 147 erofs_workgroup_unfreeze(grp, 0); 148 __erofs_workgroup_free(grp); 149 return true; 150 } 151 152 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 153 unsigned long nr_shrink) 154 { 155 struct erofs_workgroup *grp; 156 unsigned int freed = 0; 157 unsigned long index; 158 159 xa_for_each(&sbi->managed_pslots, index, grp) { 160 /* try to shrink each valid workgroup */ 161 if (!erofs_try_to_release_workgroup(sbi, grp)) 162 continue; 163 164 ++freed; 165 if (!--nr_shrink) 166 break; 167 } 168 return freed; 169 } 170 171 /* protected by 'erofs_sb_list_lock' */ 172 static unsigned int shrinker_run_no; 173 174 /* protects the mounted 'erofs_sb_list' */ 175 static DEFINE_SPINLOCK(erofs_sb_list_lock); 176 static LIST_HEAD(erofs_sb_list); 177 178 void erofs_shrinker_register(struct super_block *sb) 179 { 180 struct erofs_sb_info *sbi = EROFS_SB(sb); 181 182 mutex_init(&sbi->umount_mutex); 183 184 spin_lock(&erofs_sb_list_lock); 185 list_add(&sbi->list, &erofs_sb_list); 186 spin_unlock(&erofs_sb_list_lock); 187 } 188 189 void erofs_shrinker_unregister(struct super_block *sb) 190 { 191 struct erofs_sb_info *const sbi = EROFS_SB(sb); 192 193 mutex_lock(&sbi->umount_mutex); 194 /* clean up all remaining workgroups in memory */ 195 erofs_shrink_workstation(sbi, ~0UL); 196 197 spin_lock(&erofs_sb_list_lock); 198 list_del(&sbi->list); 199 spin_unlock(&erofs_sb_list_lock); 200 mutex_unlock(&sbi->umount_mutex); 201 } 202 203 static unsigned long erofs_shrink_count(struct shrinker *shrink, 204 struct shrink_control *sc) 205 { 206 return atomic_long_read(&erofs_global_shrink_cnt); 207 } 208 209 static unsigned long erofs_shrink_scan(struct shrinker *shrink, 210 struct shrink_control *sc) 211 { 212 struct erofs_sb_info *sbi; 213 struct list_head *p; 214 215 unsigned long nr = sc->nr_to_scan; 216 unsigned int run_no; 217 unsigned long freed = 0; 218 219 spin_lock(&erofs_sb_list_lock); 220 do { 221 run_no = ++shrinker_run_no; 222 } while (run_no == 0); 223 224 /* Iterate over all mounted superblocks and try to shrink them */ 225 p = erofs_sb_list.next; 226 while (p != &erofs_sb_list) { 227 sbi = list_entry(p, struct erofs_sb_info, list); 228 229 /* 230 * We move the ones we do to the end of the list, so we stop 231 * when we see one we have already done. 232 */ 233 if (sbi->shrinker_run_no == run_no) 234 break; 235 236 if (!mutex_trylock(&sbi->umount_mutex)) { 237 p = p->next; 238 continue; 239 } 240 241 spin_unlock(&erofs_sb_list_lock); 242 sbi->shrinker_run_no = run_no; 243 244 freed += erofs_shrink_workstation(sbi, nr - freed); 245 246 spin_lock(&erofs_sb_list_lock); 247 /* Get the next list element before we move this one */ 248 p = p->next; 249 250 /* 251 * Move this one to the end of the list to provide some 252 * fairness. 253 */ 254 list_move_tail(&sbi->list, &erofs_sb_list); 255 mutex_unlock(&sbi->umount_mutex); 256 257 if (freed >= nr) 258 break; 259 } 260 spin_unlock(&erofs_sb_list_lock); 261 return freed; 262 } 263 264 static struct shrinker erofs_shrinker_info = { 265 .scan_objects = erofs_shrink_scan, 266 .count_objects = erofs_shrink_count, 267 .seeks = DEFAULT_SEEKS, 268 }; 269 270 int __init erofs_init_shrinker(void) 271 { 272 return register_shrinker(&erofs_shrinker_info); 273 } 274 275 void erofs_exit_shrinker(void) 276 { 277 unregister_shrinker(&erofs_shrinker_info); 278 } 279 #endif /* !CONFIG_EROFS_FS_ZIP */ 280