1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 */ 6 #include "internal.h" 7 #include <linux/pagevec.h> 8 9 struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp) 10 { 11 struct page *page = *pagepool; 12 13 if (page) { 14 DBG_BUGON(page_ref_count(page) != 1); 15 *pagepool = (struct page *)page_private(page); 16 } else { 17 page = alloc_page(gfp); 18 } 19 return page; 20 } 21 22 void erofs_release_pages(struct page **pagepool) 23 { 24 while (*pagepool) { 25 struct page *page = *pagepool; 26 27 *pagepool = (struct page *)page_private(page); 28 put_page(page); 29 } 30 } 31 32 #ifdef CONFIG_EROFS_FS_ZIP 33 /* global shrink count (for all mounted EROFS instances) */ 34 static atomic_long_t erofs_global_shrink_cnt; 35 36 static int erofs_workgroup_get(struct erofs_workgroup *grp) 37 { 38 int o; 39 40 repeat: 41 o = erofs_wait_on_workgroup_freezed(grp); 42 if (o <= 0) 43 return -1; 44 45 if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) 46 goto repeat; 47 48 /* decrease refcount paired by erofs_workgroup_put */ 49 if (o == 1) 50 atomic_long_dec(&erofs_global_shrink_cnt); 51 return 0; 52 } 53 54 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 55 pgoff_t index) 56 { 57 struct erofs_sb_info *sbi = EROFS_SB(sb); 58 struct erofs_workgroup *grp; 59 60 repeat: 61 rcu_read_lock(); 62 grp = xa_load(&sbi->managed_pslots, index); 63 if (grp) { 64 if (erofs_workgroup_get(grp)) { 65 /* prefer to relax rcu read side */ 66 rcu_read_unlock(); 67 goto repeat; 68 } 69 70 DBG_BUGON(index != grp->index); 71 } 72 rcu_read_unlock(); 73 return grp; 74 } 75 76 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, 77 struct erofs_workgroup *grp) 78 { 79 struct erofs_sb_info *const sbi = EROFS_SB(sb); 80 struct erofs_workgroup *pre; 81 82 /* 83 * Bump up a reference count before making this visible 84 * to others for the XArray in order to avoid potential 85 * UAF without serialized by xa_lock. 86 */ 87 atomic_inc(&grp->refcount); 88 89 repeat: 90 xa_lock(&sbi->managed_pslots); 91 pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, 92 NULL, grp, GFP_NOFS); 93 if (pre) { 94 if (xa_is_err(pre)) { 95 pre = ERR_PTR(xa_err(pre)); 96 } else if (erofs_workgroup_get(pre)) { 97 /* try to legitimize the current in-tree one */ 98 xa_unlock(&sbi->managed_pslots); 99 cond_resched(); 100 goto repeat; 101 } 102 atomic_dec(&grp->refcount); 103 grp = pre; 104 } 105 xa_unlock(&sbi->managed_pslots); 106 return grp; 107 } 108 109 static void __erofs_workgroup_free(struct erofs_workgroup *grp) 110 { 111 atomic_long_dec(&erofs_global_shrink_cnt); 112 erofs_workgroup_free_rcu(grp); 113 } 114 115 int erofs_workgroup_put(struct erofs_workgroup *grp) 116 { 117 int count = atomic_dec_return(&grp->refcount); 118 119 if (count == 1) 120 atomic_long_inc(&erofs_global_shrink_cnt); 121 else if (!count) 122 __erofs_workgroup_free(grp); 123 return count; 124 } 125 126 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 127 struct erofs_workgroup *grp) 128 { 129 /* 130 * If managed cache is on, refcount of workgroups 131 * themselves could be < 0 (freezed). In other words, 132 * there is no guarantee that all refcounts > 0. 133 */ 134 if (!erofs_workgroup_try_to_freeze(grp, 1)) 135 return false; 136 137 /* 138 * Note that all cached pages should be unattached 139 * before deleted from the XArray. Otherwise some 140 * cached pages could be still attached to the orphan 141 * old workgroup when the new one is available in the tree. 142 */ 143 if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 144 erofs_workgroup_unfreeze(grp, 1); 145 return false; 146 } 147 148 /* 149 * It's impossible to fail after the workgroup is freezed, 150 * however in order to avoid some race conditions, add a 151 * DBG_BUGON to observe this in advance. 152 */ 153 DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp); 154 155 /* last refcount should be connected with its managed pslot. */ 156 erofs_workgroup_unfreeze(grp, 0); 157 __erofs_workgroup_free(grp); 158 return true; 159 } 160 161 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 162 unsigned long nr_shrink) 163 { 164 struct erofs_workgroup *grp; 165 unsigned int freed = 0; 166 unsigned long index; 167 168 xa_for_each(&sbi->managed_pslots, index, grp) { 169 /* try to shrink each valid workgroup */ 170 if (!erofs_try_to_release_workgroup(sbi, grp)) 171 continue; 172 173 ++freed; 174 if (!--nr_shrink) 175 break; 176 } 177 return freed; 178 } 179 180 /* protected by 'erofs_sb_list_lock' */ 181 static unsigned int shrinker_run_no; 182 183 /* protects the mounted 'erofs_sb_list' */ 184 static DEFINE_SPINLOCK(erofs_sb_list_lock); 185 static LIST_HEAD(erofs_sb_list); 186 187 void erofs_shrinker_register(struct super_block *sb) 188 { 189 struct erofs_sb_info *sbi = EROFS_SB(sb); 190 191 mutex_init(&sbi->umount_mutex); 192 193 spin_lock(&erofs_sb_list_lock); 194 list_add(&sbi->list, &erofs_sb_list); 195 spin_unlock(&erofs_sb_list_lock); 196 } 197 198 void erofs_shrinker_unregister(struct super_block *sb) 199 { 200 struct erofs_sb_info *const sbi = EROFS_SB(sb); 201 202 mutex_lock(&sbi->umount_mutex); 203 /* clean up all remaining workgroups in memory */ 204 erofs_shrink_workstation(sbi, ~0UL); 205 206 spin_lock(&erofs_sb_list_lock); 207 list_del(&sbi->list); 208 spin_unlock(&erofs_sb_list_lock); 209 mutex_unlock(&sbi->umount_mutex); 210 } 211 212 static unsigned long erofs_shrink_count(struct shrinker *shrink, 213 struct shrink_control *sc) 214 { 215 return atomic_long_read(&erofs_global_shrink_cnt); 216 } 217 218 static unsigned long erofs_shrink_scan(struct shrinker *shrink, 219 struct shrink_control *sc) 220 { 221 struct erofs_sb_info *sbi; 222 struct list_head *p; 223 224 unsigned long nr = sc->nr_to_scan; 225 unsigned int run_no; 226 unsigned long freed = 0; 227 228 spin_lock(&erofs_sb_list_lock); 229 do { 230 run_no = ++shrinker_run_no; 231 } while (run_no == 0); 232 233 /* Iterate over all mounted superblocks and try to shrink them */ 234 p = erofs_sb_list.next; 235 while (p != &erofs_sb_list) { 236 sbi = list_entry(p, struct erofs_sb_info, list); 237 238 /* 239 * We move the ones we do to the end of the list, so we stop 240 * when we see one we have already done. 241 */ 242 if (sbi->shrinker_run_no == run_no) 243 break; 244 245 if (!mutex_trylock(&sbi->umount_mutex)) { 246 p = p->next; 247 continue; 248 } 249 250 spin_unlock(&erofs_sb_list_lock); 251 sbi->shrinker_run_no = run_no; 252 253 freed += erofs_shrink_workstation(sbi, nr - freed); 254 255 spin_lock(&erofs_sb_list_lock); 256 /* Get the next list element before we move this one */ 257 p = p->next; 258 259 /* 260 * Move this one to the end of the list to provide some 261 * fairness. 262 */ 263 list_move_tail(&sbi->list, &erofs_sb_list); 264 mutex_unlock(&sbi->umount_mutex); 265 266 if (freed >= nr) 267 break; 268 } 269 spin_unlock(&erofs_sb_list_lock); 270 return freed; 271 } 272 273 static struct shrinker erofs_shrinker_info = { 274 .scan_objects = erofs_shrink_scan, 275 .count_objects = erofs_shrink_count, 276 .seeks = DEFAULT_SEEKS, 277 }; 278 279 int __init erofs_init_shrinker(void) 280 { 281 return register_shrinker(&erofs_shrinker_info); 282 } 283 284 void erofs_exit_shrinker(void) 285 { 286 unregister_shrinker(&erofs_shrinker_info); 287 } 288 #endif /* !CONFIG_EROFS_FS_ZIP */ 289