1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2018 HUAWEI, Inc. 4 * http://www.huawei.com/ 5 * Created by Gao Xiang <gaoxiang25@huawei.com> 6 */ 7 #include "internal.h" 8 #include <linux/pagevec.h> 9 10 struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp, bool nofail) 11 { 12 struct page *page; 13 14 if (!list_empty(pool)) { 15 page = lru_to_page(pool); 16 DBG_BUGON(page_ref_count(page) != 1); 17 list_del(&page->lru); 18 } else { 19 page = alloc_pages(gfp | (nofail ? __GFP_NOFAIL : 0), 0); 20 } 21 return page; 22 } 23 24 #if (EROFS_PCPUBUF_NR_PAGES > 0) 25 static struct { 26 u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; 27 } ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; 28 29 void *erofs_get_pcpubuf(unsigned int pagenr) 30 { 31 preempt_disable(); 32 return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; 33 } 34 #endif 35 36 #ifdef CONFIG_EROFS_FS_ZIP 37 /* global shrink count (for all mounted EROFS instances) */ 38 static atomic_long_t erofs_global_shrink_cnt; 39 40 #define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) 41 #define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) 42 43 static int erofs_workgroup_get(struct erofs_workgroup *grp) 44 { 45 int o; 46 47 repeat: 48 o = erofs_wait_on_workgroup_freezed(grp); 49 if (o <= 0) 50 return -1; 51 52 if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) 53 goto repeat; 54 55 /* decrease refcount paired by erofs_workgroup_put */ 56 if (o == 1) 57 atomic_long_dec(&erofs_global_shrink_cnt); 58 return 0; 59 } 60 61 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 62 pgoff_t index, bool *tag) 63 { 64 struct erofs_sb_info *sbi = EROFS_SB(sb); 65 struct erofs_workgroup *grp; 66 67 repeat: 68 rcu_read_lock(); 69 grp = radix_tree_lookup(&sbi->workstn_tree, index); 70 if (grp) { 71 *tag = xa_pointer_tag(grp); 72 grp = xa_untag_pointer(grp); 73 74 if (erofs_workgroup_get(grp)) { 75 /* prefer to relax rcu read side */ 76 rcu_read_unlock(); 77 goto repeat; 78 } 79 80 DBG_BUGON(index != grp->index); 81 } 82 rcu_read_unlock(); 83 return grp; 84 } 85 86 int erofs_register_workgroup(struct super_block *sb, 87 struct erofs_workgroup *grp, 88 bool tag) 89 { 90 struct erofs_sb_info *sbi; 91 int err; 92 93 /* grp shouldn't be broken or used before */ 94 if (atomic_read(&grp->refcount) != 1) { 95 DBG_BUGON(1); 96 return -EINVAL; 97 } 98 99 err = radix_tree_preload(GFP_NOFS); 100 if (err) 101 return err; 102 103 sbi = EROFS_SB(sb); 104 xa_lock(&sbi->workstn_tree); 105 106 grp = xa_tag_pointer(grp, tag); 107 108 /* 109 * Bump up reference count before making this workgroup 110 * visible to other users in order to avoid potential UAF 111 * without serialized by workstn_lock. 112 */ 113 __erofs_workgroup_get(grp); 114 115 err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp); 116 if (err) 117 /* 118 * it's safe to decrease since the workgroup isn't visible 119 * and refcount >= 2 (cannot be freezed). 120 */ 121 __erofs_workgroup_put(grp); 122 123 xa_unlock(&sbi->workstn_tree); 124 radix_tree_preload_end(); 125 return err; 126 } 127 128 static void __erofs_workgroup_free(struct erofs_workgroup *grp) 129 { 130 atomic_long_dec(&erofs_global_shrink_cnt); 131 erofs_workgroup_free_rcu(grp); 132 } 133 134 int erofs_workgroup_put(struct erofs_workgroup *grp) 135 { 136 int count = atomic_dec_return(&grp->refcount); 137 138 if (count == 1) 139 atomic_long_inc(&erofs_global_shrink_cnt); 140 else if (!count) 141 __erofs_workgroup_free(grp); 142 return count; 143 } 144 145 static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) 146 { 147 erofs_workgroup_unfreeze(grp, 0); 148 __erofs_workgroup_free(grp); 149 } 150 151 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 152 struct erofs_workgroup *grp, 153 bool cleanup) 154 { 155 /* 156 * If managed cache is on, refcount of workgroups 157 * themselves could be < 0 (freezed). In other words, 158 * there is no guarantee that all refcounts > 0. 159 */ 160 if (!erofs_workgroup_try_to_freeze(grp, 1)) 161 return false; 162 163 /* 164 * Note that all cached pages should be unattached 165 * before deleted from the radix tree. Otherwise some 166 * cached pages could be still attached to the orphan 167 * old workgroup when the new one is available in the tree. 168 */ 169 if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 170 erofs_workgroup_unfreeze(grp, 1); 171 return false; 172 } 173 174 /* 175 * It's impossible to fail after the workgroup is freezed, 176 * however in order to avoid some race conditions, add a 177 * DBG_BUGON to observe this in advance. 178 */ 179 DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree, 180 grp->index)) != grp); 181 182 /* 183 * If managed cache is on, last refcount should indicate 184 * the related workstation. 185 */ 186 erofs_workgroup_unfreeze_final(grp); 187 return true; 188 } 189 190 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 191 unsigned long nr_shrink, 192 bool cleanup) 193 { 194 pgoff_t first_index = 0; 195 void *batch[PAGEVEC_SIZE]; 196 unsigned int freed = 0; 197 198 int i, found; 199 repeat: 200 xa_lock(&sbi->workstn_tree); 201 202 found = radix_tree_gang_lookup(&sbi->workstn_tree, 203 batch, first_index, PAGEVEC_SIZE); 204 205 for (i = 0; i < found; ++i) { 206 struct erofs_workgroup *grp = xa_untag_pointer(batch[i]); 207 208 first_index = grp->index + 1; 209 210 /* try to shrink each valid workgroup */ 211 if (!erofs_try_to_release_workgroup(sbi, grp, cleanup)) 212 continue; 213 214 ++freed; 215 if (!--nr_shrink) 216 break; 217 } 218 xa_unlock(&sbi->workstn_tree); 219 220 if (i && nr_shrink) 221 goto repeat; 222 return freed; 223 } 224 225 /* protected by 'erofs_sb_list_lock' */ 226 static unsigned int shrinker_run_no; 227 228 /* protects the mounted 'erofs_sb_list' */ 229 static DEFINE_SPINLOCK(erofs_sb_list_lock); 230 static LIST_HEAD(erofs_sb_list); 231 232 void erofs_shrinker_register(struct super_block *sb) 233 { 234 struct erofs_sb_info *sbi = EROFS_SB(sb); 235 236 mutex_init(&sbi->umount_mutex); 237 238 spin_lock(&erofs_sb_list_lock); 239 list_add(&sbi->list, &erofs_sb_list); 240 spin_unlock(&erofs_sb_list_lock); 241 } 242 243 void erofs_shrinker_unregister(struct super_block *sb) 244 { 245 struct erofs_sb_info *const sbi = EROFS_SB(sb); 246 247 mutex_lock(&sbi->umount_mutex); 248 erofs_shrink_workstation(sbi, ~0UL, true); 249 250 spin_lock(&erofs_sb_list_lock); 251 list_del(&sbi->list); 252 spin_unlock(&erofs_sb_list_lock); 253 mutex_unlock(&sbi->umount_mutex); 254 } 255 256 static unsigned long erofs_shrink_count(struct shrinker *shrink, 257 struct shrink_control *sc) 258 { 259 return atomic_long_read(&erofs_global_shrink_cnt); 260 } 261 262 static unsigned long erofs_shrink_scan(struct shrinker *shrink, 263 struct shrink_control *sc) 264 { 265 struct erofs_sb_info *sbi; 266 struct list_head *p; 267 268 unsigned long nr = sc->nr_to_scan; 269 unsigned int run_no; 270 unsigned long freed = 0; 271 272 spin_lock(&erofs_sb_list_lock); 273 do { 274 run_no = ++shrinker_run_no; 275 } while (run_no == 0); 276 277 /* Iterate over all mounted superblocks and try to shrink them */ 278 p = erofs_sb_list.next; 279 while (p != &erofs_sb_list) { 280 sbi = list_entry(p, struct erofs_sb_info, list); 281 282 /* 283 * We move the ones we do to the end of the list, so we stop 284 * when we see one we have already done. 285 */ 286 if (sbi->shrinker_run_no == run_no) 287 break; 288 289 if (!mutex_trylock(&sbi->umount_mutex)) { 290 p = p->next; 291 continue; 292 } 293 294 spin_unlock(&erofs_sb_list_lock); 295 sbi->shrinker_run_no = run_no; 296 297 freed += erofs_shrink_workstation(sbi, nr, false); 298 299 spin_lock(&erofs_sb_list_lock); 300 /* Get the next list element before we move this one */ 301 p = p->next; 302 303 /* 304 * Move this one to the end of the list to provide some 305 * fairness. 306 */ 307 list_move_tail(&sbi->list, &erofs_sb_list); 308 mutex_unlock(&sbi->umount_mutex); 309 310 if (freed >= nr) 311 break; 312 } 313 spin_unlock(&erofs_sb_list_lock); 314 return freed; 315 } 316 317 static struct shrinker erofs_shrinker_info = { 318 .scan_objects = erofs_shrink_scan, 319 .count_objects = erofs_shrink_count, 320 .seeks = DEFAULT_SEEKS, 321 }; 322 323 int __init erofs_init_shrinker(void) 324 { 325 return register_shrinker(&erofs_shrinker_info); 326 } 327 328 void erofs_exit_shrinker(void) 329 { 330 unregister_shrinker(&erofs_shrinker_info); 331 } 332 #endif /* !CONFIG_EROFS_FS_ZIP */ 333 334