1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2018 HUAWEI, Inc. 4 * http://www.huawei.com/ 5 * Created by Gao Xiang <gaoxiang25@huawei.com> 6 */ 7 #include "internal.h" 8 #include <linux/pagevec.h> 9 10 struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) 11 { 12 struct page *page; 13 14 if (!list_empty(pool)) { 15 page = lru_to_page(pool); 16 DBG_BUGON(page_ref_count(page) != 1); 17 list_del(&page->lru); 18 } else { 19 page = alloc_page(gfp); 20 } 21 return page; 22 } 23 24 #if (EROFS_PCPUBUF_NR_PAGES > 0) 25 static struct { 26 u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; 27 } ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; 28 29 void *erofs_get_pcpubuf(unsigned int pagenr) 30 { 31 preempt_disable(); 32 return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; 33 } 34 #endif 35 36 #ifdef CONFIG_EROFS_FS_ZIP 37 /* global shrink count (for all mounted EROFS instances) */ 38 static atomic_long_t erofs_global_shrink_cnt; 39 40 #define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) 41 #define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) 42 43 static int erofs_workgroup_get(struct erofs_workgroup *grp) 44 { 45 int o; 46 47 repeat: 48 o = erofs_wait_on_workgroup_freezed(grp); 49 if (o <= 0) 50 return -1; 51 52 if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) 53 goto repeat; 54 55 /* decrease refcount paired by erofs_workgroup_put */ 56 if (o == 1) 57 atomic_long_dec(&erofs_global_shrink_cnt); 58 return 0; 59 } 60 61 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 62 pgoff_t index) 63 { 64 struct erofs_sb_info *sbi = EROFS_SB(sb); 65 struct erofs_workgroup *grp; 66 67 repeat: 68 rcu_read_lock(); 69 grp = radix_tree_lookup(&sbi->workstn_tree, index); 70 if (grp) { 71 if (erofs_workgroup_get(grp)) { 72 /* prefer to relax rcu read side */ 73 rcu_read_unlock(); 74 goto repeat; 75 } 76 77 DBG_BUGON(index != grp->index); 78 } 79 rcu_read_unlock(); 80 return grp; 81 } 82 83 int erofs_register_workgroup(struct super_block *sb, 84 struct erofs_workgroup *grp) 85 { 86 struct erofs_sb_info *sbi; 87 int err; 88 89 /* grp shouldn't be broken or used before */ 90 if (atomic_read(&grp->refcount) != 1) { 91 DBG_BUGON(1); 92 return -EINVAL; 93 } 94 95 err = radix_tree_preload(GFP_NOFS); 96 if (err) 97 return err; 98 99 sbi = EROFS_SB(sb); 100 xa_lock(&sbi->workstn_tree); 101 102 /* 103 * Bump up reference count before making this workgroup 104 * visible to other users in order to avoid potential UAF 105 * without serialized by workstn_lock. 106 */ 107 __erofs_workgroup_get(grp); 108 109 err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp); 110 if (err) 111 /* 112 * it's safe to decrease since the workgroup isn't visible 113 * and refcount >= 2 (cannot be freezed). 114 */ 115 __erofs_workgroup_put(grp); 116 117 xa_unlock(&sbi->workstn_tree); 118 radix_tree_preload_end(); 119 return err; 120 } 121 122 static void __erofs_workgroup_free(struct erofs_workgroup *grp) 123 { 124 atomic_long_dec(&erofs_global_shrink_cnt); 125 erofs_workgroup_free_rcu(grp); 126 } 127 128 int erofs_workgroup_put(struct erofs_workgroup *grp) 129 { 130 int count = atomic_dec_return(&grp->refcount); 131 132 if (count == 1) 133 atomic_long_inc(&erofs_global_shrink_cnt); 134 else if (!count) 135 __erofs_workgroup_free(grp); 136 return count; 137 } 138 139 static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) 140 { 141 erofs_workgroup_unfreeze(grp, 0); 142 __erofs_workgroup_free(grp); 143 } 144 145 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 146 struct erofs_workgroup *grp) 147 { 148 /* 149 * If managed cache is on, refcount of workgroups 150 * themselves could be < 0 (freezed). In other words, 151 * there is no guarantee that all refcounts > 0. 152 */ 153 if (!erofs_workgroup_try_to_freeze(grp, 1)) 154 return false; 155 156 /* 157 * Note that all cached pages should be unattached 158 * before deleted from the radix tree. Otherwise some 159 * cached pages could be still attached to the orphan 160 * old workgroup when the new one is available in the tree. 161 */ 162 if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 163 erofs_workgroup_unfreeze(grp, 1); 164 return false; 165 } 166 167 /* 168 * It's impossible to fail after the workgroup is freezed, 169 * however in order to avoid some race conditions, add a 170 * DBG_BUGON to observe this in advance. 171 */ 172 DBG_BUGON(radix_tree_delete(&sbi->workstn_tree, grp->index) != grp); 173 174 /* 175 * If managed cache is on, last refcount should indicate 176 * the related workstation. 177 */ 178 erofs_workgroup_unfreeze_final(grp); 179 return true; 180 } 181 182 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 183 unsigned long nr_shrink) 184 { 185 pgoff_t first_index = 0; 186 void *batch[PAGEVEC_SIZE]; 187 unsigned int freed = 0; 188 189 int i, found; 190 repeat: 191 xa_lock(&sbi->workstn_tree); 192 193 found = radix_tree_gang_lookup(&sbi->workstn_tree, 194 batch, first_index, PAGEVEC_SIZE); 195 196 for (i = 0; i < found; ++i) { 197 struct erofs_workgroup *grp = batch[i]; 198 199 first_index = grp->index + 1; 200 201 /* try to shrink each valid workgroup */ 202 if (!erofs_try_to_release_workgroup(sbi, grp)) 203 continue; 204 205 ++freed; 206 if (!--nr_shrink) 207 break; 208 } 209 xa_unlock(&sbi->workstn_tree); 210 211 if (i && nr_shrink) 212 goto repeat; 213 return freed; 214 } 215 216 /* protected by 'erofs_sb_list_lock' */ 217 static unsigned int shrinker_run_no; 218 219 /* protects the mounted 'erofs_sb_list' */ 220 static DEFINE_SPINLOCK(erofs_sb_list_lock); 221 static LIST_HEAD(erofs_sb_list); 222 223 void erofs_shrinker_register(struct super_block *sb) 224 { 225 struct erofs_sb_info *sbi = EROFS_SB(sb); 226 227 mutex_init(&sbi->umount_mutex); 228 229 spin_lock(&erofs_sb_list_lock); 230 list_add(&sbi->list, &erofs_sb_list); 231 spin_unlock(&erofs_sb_list_lock); 232 } 233 234 void erofs_shrinker_unregister(struct super_block *sb) 235 { 236 struct erofs_sb_info *const sbi = EROFS_SB(sb); 237 238 mutex_lock(&sbi->umount_mutex); 239 /* clean up all remaining workgroups in memory */ 240 erofs_shrink_workstation(sbi, ~0UL); 241 242 spin_lock(&erofs_sb_list_lock); 243 list_del(&sbi->list); 244 spin_unlock(&erofs_sb_list_lock); 245 mutex_unlock(&sbi->umount_mutex); 246 } 247 248 static unsigned long erofs_shrink_count(struct shrinker *shrink, 249 struct shrink_control *sc) 250 { 251 return atomic_long_read(&erofs_global_shrink_cnt); 252 } 253 254 static unsigned long erofs_shrink_scan(struct shrinker *shrink, 255 struct shrink_control *sc) 256 { 257 struct erofs_sb_info *sbi; 258 struct list_head *p; 259 260 unsigned long nr = sc->nr_to_scan; 261 unsigned int run_no; 262 unsigned long freed = 0; 263 264 spin_lock(&erofs_sb_list_lock); 265 do { 266 run_no = ++shrinker_run_no; 267 } while (run_no == 0); 268 269 /* Iterate over all mounted superblocks and try to shrink them */ 270 p = erofs_sb_list.next; 271 while (p != &erofs_sb_list) { 272 sbi = list_entry(p, struct erofs_sb_info, list); 273 274 /* 275 * We move the ones we do to the end of the list, so we stop 276 * when we see one we have already done. 277 */ 278 if (sbi->shrinker_run_no == run_no) 279 break; 280 281 if (!mutex_trylock(&sbi->umount_mutex)) { 282 p = p->next; 283 continue; 284 } 285 286 spin_unlock(&erofs_sb_list_lock); 287 sbi->shrinker_run_no = run_no; 288 289 freed += erofs_shrink_workstation(sbi, nr); 290 291 spin_lock(&erofs_sb_list_lock); 292 /* Get the next list element before we move this one */ 293 p = p->next; 294 295 /* 296 * Move this one to the end of the list to provide some 297 * fairness. 298 */ 299 list_move_tail(&sbi->list, &erofs_sb_list); 300 mutex_unlock(&sbi->umount_mutex); 301 302 if (freed >= nr) 303 break; 304 } 305 spin_unlock(&erofs_sb_list_lock); 306 return freed; 307 } 308 309 static struct shrinker erofs_shrinker_info = { 310 .scan_objects = erofs_shrink_scan, 311 .count_objects = erofs_shrink_count, 312 .seeks = DEFAULT_SEEKS, 313 }; 314 315 int __init erofs_init_shrinker(void) 316 { 317 return register_shrinker(&erofs_shrinker_info); 318 } 319 320 void erofs_exit_shrinker(void) 321 { 322 unregister_shrinker(&erofs_shrinker_info); 323 } 324 #endif /* !CONFIG_EROFS_FS_ZIP */ 325 326