1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2018 HUAWEI, Inc. 4 * http://www.huawei.com/ 5 * Created by Gao Xiang <gaoxiang25@huawei.com> 6 */ 7 #include "internal.h" 8 #include <linux/pagevec.h> 9 10 struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) 11 { 12 struct page *page; 13 14 if (!list_empty(pool)) { 15 page = lru_to_page(pool); 16 DBG_BUGON(page_ref_count(page) != 1); 17 list_del(&page->lru); 18 } else { 19 page = alloc_page(gfp); 20 } 21 return page; 22 } 23 24 #if (EROFS_PCPUBUF_NR_PAGES > 0) 25 static struct { 26 u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; 27 } ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; 28 29 void *erofs_get_pcpubuf(unsigned int pagenr) 30 { 31 preempt_disable(); 32 return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; 33 } 34 #endif 35 36 #ifdef CONFIG_EROFS_FS_ZIP 37 /* global shrink count (for all mounted EROFS instances) */ 38 static atomic_long_t erofs_global_shrink_cnt; 39 40 #define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) 41 #define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) 42 43 static int erofs_workgroup_get(struct erofs_workgroup *grp) 44 { 45 int o; 46 47 repeat: 48 o = erofs_wait_on_workgroup_freezed(grp); 49 if (o <= 0) 50 return -1; 51 52 if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) 53 goto repeat; 54 55 /* decrease refcount paired by erofs_workgroup_put */ 56 if (o == 1) 57 atomic_long_dec(&erofs_global_shrink_cnt); 58 return 0; 59 } 60 61 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 62 pgoff_t index, bool *tag) 63 { 64 struct erofs_sb_info *sbi = EROFS_SB(sb); 65 struct erofs_workgroup *grp; 66 67 repeat: 68 rcu_read_lock(); 69 grp = radix_tree_lookup(&sbi->workstn_tree, index); 70 if (grp) { 71 *tag = xa_pointer_tag(grp); 72 grp = xa_untag_pointer(grp); 73 74 if (erofs_workgroup_get(grp)) { 75 /* prefer to relax rcu read side */ 76 rcu_read_unlock(); 77 goto repeat; 78 } 79 80 DBG_BUGON(index != grp->index); 81 } 82 rcu_read_unlock(); 83 return grp; 84 } 85 86 int erofs_register_workgroup(struct super_block *sb, 87 struct erofs_workgroup *grp, 88 bool tag) 89 { 90 struct erofs_sb_info *sbi; 91 int err; 92 93 /* grp shouldn't be broken or used before */ 94 if (atomic_read(&grp->refcount) != 1) { 95 DBG_BUGON(1); 96 return -EINVAL; 97 } 98 99 err = radix_tree_preload(GFP_NOFS); 100 if (err) 101 return err; 102 103 sbi = EROFS_SB(sb); 104 xa_lock(&sbi->workstn_tree); 105 106 grp = xa_tag_pointer(grp, tag); 107 108 /* 109 * Bump up reference count before making this workgroup 110 * visible to other users in order to avoid potential UAF 111 * without serialized by workstn_lock. 112 */ 113 __erofs_workgroup_get(grp); 114 115 err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp); 116 if (err) 117 /* 118 * it's safe to decrease since the workgroup isn't visible 119 * and refcount >= 2 (cannot be freezed). 120 */ 121 __erofs_workgroup_put(grp); 122 123 xa_unlock(&sbi->workstn_tree); 124 radix_tree_preload_end(); 125 return err; 126 } 127 128 static void __erofs_workgroup_free(struct erofs_workgroup *grp) 129 { 130 atomic_long_dec(&erofs_global_shrink_cnt); 131 erofs_workgroup_free_rcu(grp); 132 } 133 134 int erofs_workgroup_put(struct erofs_workgroup *grp) 135 { 136 int count = atomic_dec_return(&grp->refcount); 137 138 if (count == 1) 139 atomic_long_inc(&erofs_global_shrink_cnt); 140 else if (!count) 141 __erofs_workgroup_free(grp); 142 return count; 143 } 144 145 static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) 146 { 147 erofs_workgroup_unfreeze(grp, 0); 148 __erofs_workgroup_free(grp); 149 } 150 151 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 152 struct erofs_workgroup *grp) 153 { 154 /* 155 * If managed cache is on, refcount of workgroups 156 * themselves could be < 0 (freezed). In other words, 157 * there is no guarantee that all refcounts > 0. 158 */ 159 if (!erofs_workgroup_try_to_freeze(grp, 1)) 160 return false; 161 162 /* 163 * Note that all cached pages should be unattached 164 * before deleted from the radix tree. Otherwise some 165 * cached pages could be still attached to the orphan 166 * old workgroup when the new one is available in the tree. 167 */ 168 if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 169 erofs_workgroup_unfreeze(grp, 1); 170 return false; 171 } 172 173 /* 174 * It's impossible to fail after the workgroup is freezed, 175 * however in order to avoid some race conditions, add a 176 * DBG_BUGON to observe this in advance. 177 */ 178 DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree, 179 grp->index)) != grp); 180 181 /* 182 * If managed cache is on, last refcount should indicate 183 * the related workstation. 184 */ 185 erofs_workgroup_unfreeze_final(grp); 186 return true; 187 } 188 189 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 190 unsigned long nr_shrink) 191 { 192 pgoff_t first_index = 0; 193 void *batch[PAGEVEC_SIZE]; 194 unsigned int freed = 0; 195 196 int i, found; 197 repeat: 198 xa_lock(&sbi->workstn_tree); 199 200 found = radix_tree_gang_lookup(&sbi->workstn_tree, 201 batch, first_index, PAGEVEC_SIZE); 202 203 for (i = 0; i < found; ++i) { 204 struct erofs_workgroup *grp = xa_untag_pointer(batch[i]); 205 206 first_index = grp->index + 1; 207 208 /* try to shrink each valid workgroup */ 209 if (!erofs_try_to_release_workgroup(sbi, grp)) 210 continue; 211 212 ++freed; 213 if (!--nr_shrink) 214 break; 215 } 216 xa_unlock(&sbi->workstn_tree); 217 218 if (i && nr_shrink) 219 goto repeat; 220 return freed; 221 } 222 223 /* protected by 'erofs_sb_list_lock' */ 224 static unsigned int shrinker_run_no; 225 226 /* protects the mounted 'erofs_sb_list' */ 227 static DEFINE_SPINLOCK(erofs_sb_list_lock); 228 static LIST_HEAD(erofs_sb_list); 229 230 void erofs_shrinker_register(struct super_block *sb) 231 { 232 struct erofs_sb_info *sbi = EROFS_SB(sb); 233 234 mutex_init(&sbi->umount_mutex); 235 236 spin_lock(&erofs_sb_list_lock); 237 list_add(&sbi->list, &erofs_sb_list); 238 spin_unlock(&erofs_sb_list_lock); 239 } 240 241 void erofs_shrinker_unregister(struct super_block *sb) 242 { 243 struct erofs_sb_info *const sbi = EROFS_SB(sb); 244 245 mutex_lock(&sbi->umount_mutex); 246 /* clean up all remaining workgroups in memory */ 247 erofs_shrink_workstation(sbi, ~0UL); 248 249 spin_lock(&erofs_sb_list_lock); 250 list_del(&sbi->list); 251 spin_unlock(&erofs_sb_list_lock); 252 mutex_unlock(&sbi->umount_mutex); 253 } 254 255 static unsigned long erofs_shrink_count(struct shrinker *shrink, 256 struct shrink_control *sc) 257 { 258 return atomic_long_read(&erofs_global_shrink_cnt); 259 } 260 261 static unsigned long erofs_shrink_scan(struct shrinker *shrink, 262 struct shrink_control *sc) 263 { 264 struct erofs_sb_info *sbi; 265 struct list_head *p; 266 267 unsigned long nr = sc->nr_to_scan; 268 unsigned int run_no; 269 unsigned long freed = 0; 270 271 spin_lock(&erofs_sb_list_lock); 272 do { 273 run_no = ++shrinker_run_no; 274 } while (run_no == 0); 275 276 /* Iterate over all mounted superblocks and try to shrink them */ 277 p = erofs_sb_list.next; 278 while (p != &erofs_sb_list) { 279 sbi = list_entry(p, struct erofs_sb_info, list); 280 281 /* 282 * We move the ones we do to the end of the list, so we stop 283 * when we see one we have already done. 284 */ 285 if (sbi->shrinker_run_no == run_no) 286 break; 287 288 if (!mutex_trylock(&sbi->umount_mutex)) { 289 p = p->next; 290 continue; 291 } 292 293 spin_unlock(&erofs_sb_list_lock); 294 sbi->shrinker_run_no = run_no; 295 296 freed += erofs_shrink_workstation(sbi, nr); 297 298 spin_lock(&erofs_sb_list_lock); 299 /* Get the next list element before we move this one */ 300 p = p->next; 301 302 /* 303 * Move this one to the end of the list to provide some 304 * fairness. 305 */ 306 list_move_tail(&sbi->list, &erofs_sb_list); 307 mutex_unlock(&sbi->umount_mutex); 308 309 if (freed >= nr) 310 break; 311 } 312 spin_unlock(&erofs_sb_list_lock); 313 return freed; 314 } 315 316 static struct shrinker erofs_shrinker_info = { 317 .scan_objects = erofs_shrink_scan, 318 .count_objects = erofs_shrink_count, 319 .seeks = DEFAULT_SEEKS, 320 }; 321 322 int __init erofs_init_shrinker(void) 323 { 324 return register_shrinker(&erofs_shrinker_info); 325 } 326 327 void erofs_exit_shrinker(void) 328 { 329 unregister_shrinker(&erofs_shrinker_info); 330 } 331 #endif /* !CONFIG_EROFS_FS_ZIP */ 332 333