1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 * Created by Gao Xiang <gaoxiang25@huawei.com> 6 */ 7 #include "internal.h" 8 #include <linux/pagevec.h> 9 10 struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) 11 { 12 struct page *page; 13 14 if (!list_empty(pool)) { 15 page = lru_to_page(pool); 16 DBG_BUGON(page_ref_count(page) != 1); 17 list_del(&page->lru); 18 } else { 19 page = alloc_page(gfp); 20 } 21 return page; 22 } 23 24 #if (EROFS_PCPUBUF_NR_PAGES > 0) 25 static struct { 26 u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; 27 } ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; 28 29 void *erofs_get_pcpubuf(unsigned int pagenr) 30 { 31 preempt_disable(); 32 return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; 33 } 34 #endif 35 36 #ifdef CONFIG_EROFS_FS_ZIP 37 /* global shrink count (for all mounted EROFS instances) */ 38 static atomic_long_t erofs_global_shrink_cnt; 39 40 static int erofs_workgroup_get(struct erofs_workgroup *grp) 41 { 42 int o; 43 44 repeat: 45 o = erofs_wait_on_workgroup_freezed(grp); 46 if (o <= 0) 47 return -1; 48 49 if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) 50 goto repeat; 51 52 /* decrease refcount paired by erofs_workgroup_put */ 53 if (o == 1) 54 atomic_long_dec(&erofs_global_shrink_cnt); 55 return 0; 56 } 57 58 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 59 pgoff_t index) 60 { 61 struct erofs_sb_info *sbi = EROFS_SB(sb); 62 struct erofs_workgroup *grp; 63 64 repeat: 65 rcu_read_lock(); 66 grp = xa_load(&sbi->managed_pslots, index); 67 if (grp) { 68 if (erofs_workgroup_get(grp)) { 69 /* prefer to relax rcu read side */ 70 rcu_read_unlock(); 71 goto repeat; 72 } 73 74 DBG_BUGON(index != grp->index); 75 } 76 rcu_read_unlock(); 77 return grp; 78 } 79 80 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, 81 struct erofs_workgroup *grp) 82 { 83 struct erofs_sb_info *const sbi = EROFS_SB(sb); 84 struct erofs_workgroup *pre; 85 86 /* 87 * Bump up a reference count before making this visible 88 * to others for the XArray in order to avoid potential 89 * UAF without serialized by xa_lock. 90 */ 91 atomic_inc(&grp->refcount); 92 93 repeat: 94 xa_lock(&sbi->managed_pslots); 95 pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, 96 NULL, grp, GFP_NOFS); 97 if (pre) { 98 if (xa_is_err(pre)) { 99 pre = ERR_PTR(xa_err(pre)); 100 } else if (erofs_workgroup_get(pre)) { 101 /* try to legitimize the current in-tree one */ 102 xa_unlock(&sbi->managed_pslots); 103 cond_resched(); 104 goto repeat; 105 } 106 atomic_dec(&grp->refcount); 107 grp = pre; 108 } 109 xa_unlock(&sbi->managed_pslots); 110 return grp; 111 } 112 113 static void __erofs_workgroup_free(struct erofs_workgroup *grp) 114 { 115 atomic_long_dec(&erofs_global_shrink_cnt); 116 erofs_workgroup_free_rcu(grp); 117 } 118 119 int erofs_workgroup_put(struct erofs_workgroup *grp) 120 { 121 int count = atomic_dec_return(&grp->refcount); 122 123 if (count == 1) 124 atomic_long_inc(&erofs_global_shrink_cnt); 125 else if (!count) 126 __erofs_workgroup_free(grp); 127 return count; 128 } 129 130 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 131 struct erofs_workgroup *grp) 132 { 133 /* 134 * If managed cache is on, refcount of workgroups 135 * themselves could be < 0 (freezed). In other words, 136 * there is no guarantee that all refcounts > 0. 137 */ 138 if (!erofs_workgroup_try_to_freeze(grp, 1)) 139 return false; 140 141 /* 142 * Note that all cached pages should be unattached 143 * before deleted from the XArray. Otherwise some 144 * cached pages could be still attached to the orphan 145 * old workgroup when the new one is available in the tree. 146 */ 147 if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 148 erofs_workgroup_unfreeze(grp, 1); 149 return false; 150 } 151 152 /* 153 * It's impossible to fail after the workgroup is freezed, 154 * however in order to avoid some race conditions, add a 155 * DBG_BUGON to observe this in advance. 156 */ 157 DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp); 158 159 /* last refcount should be connected with its managed pslot. */ 160 erofs_workgroup_unfreeze(grp, 0); 161 __erofs_workgroup_free(grp); 162 return true; 163 } 164 165 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 166 unsigned long nr_shrink) 167 { 168 struct erofs_workgroup *grp; 169 unsigned int freed = 0; 170 unsigned long index; 171 172 xa_for_each(&sbi->managed_pslots, index, grp) { 173 /* try to shrink each valid workgroup */ 174 if (!erofs_try_to_release_workgroup(sbi, grp)) 175 continue; 176 177 ++freed; 178 if (!--nr_shrink) 179 break; 180 } 181 return freed; 182 } 183 184 /* protected by 'erofs_sb_list_lock' */ 185 static unsigned int shrinker_run_no; 186 187 /* protects the mounted 'erofs_sb_list' */ 188 static DEFINE_SPINLOCK(erofs_sb_list_lock); 189 static LIST_HEAD(erofs_sb_list); 190 191 void erofs_shrinker_register(struct super_block *sb) 192 { 193 struct erofs_sb_info *sbi = EROFS_SB(sb); 194 195 mutex_init(&sbi->umount_mutex); 196 197 spin_lock(&erofs_sb_list_lock); 198 list_add(&sbi->list, &erofs_sb_list); 199 spin_unlock(&erofs_sb_list_lock); 200 } 201 202 void erofs_shrinker_unregister(struct super_block *sb) 203 { 204 struct erofs_sb_info *const sbi = EROFS_SB(sb); 205 206 mutex_lock(&sbi->umount_mutex); 207 /* clean up all remaining workgroups in memory */ 208 erofs_shrink_workstation(sbi, ~0UL); 209 210 spin_lock(&erofs_sb_list_lock); 211 list_del(&sbi->list); 212 spin_unlock(&erofs_sb_list_lock); 213 mutex_unlock(&sbi->umount_mutex); 214 } 215 216 static unsigned long erofs_shrink_count(struct shrinker *shrink, 217 struct shrink_control *sc) 218 { 219 return atomic_long_read(&erofs_global_shrink_cnt); 220 } 221 222 static unsigned long erofs_shrink_scan(struct shrinker *shrink, 223 struct shrink_control *sc) 224 { 225 struct erofs_sb_info *sbi; 226 struct list_head *p; 227 228 unsigned long nr = sc->nr_to_scan; 229 unsigned int run_no; 230 unsigned long freed = 0; 231 232 spin_lock(&erofs_sb_list_lock); 233 do { 234 run_no = ++shrinker_run_no; 235 } while (run_no == 0); 236 237 /* Iterate over all mounted superblocks and try to shrink them */ 238 p = erofs_sb_list.next; 239 while (p != &erofs_sb_list) { 240 sbi = list_entry(p, struct erofs_sb_info, list); 241 242 /* 243 * We move the ones we do to the end of the list, so we stop 244 * when we see one we have already done. 245 */ 246 if (sbi->shrinker_run_no == run_no) 247 break; 248 249 if (!mutex_trylock(&sbi->umount_mutex)) { 250 p = p->next; 251 continue; 252 } 253 254 spin_unlock(&erofs_sb_list_lock); 255 sbi->shrinker_run_no = run_no; 256 257 freed += erofs_shrink_workstation(sbi, nr - freed); 258 259 spin_lock(&erofs_sb_list_lock); 260 /* Get the next list element before we move this one */ 261 p = p->next; 262 263 /* 264 * Move this one to the end of the list to provide some 265 * fairness. 266 */ 267 list_move_tail(&sbi->list, &erofs_sb_list); 268 mutex_unlock(&sbi->umount_mutex); 269 270 if (freed >= nr) 271 break; 272 } 273 spin_unlock(&erofs_sb_list_lock); 274 return freed; 275 } 276 277 static struct shrinker erofs_shrinker_info = { 278 .scan_objects = erofs_shrink_scan, 279 .count_objects = erofs_shrink_count, 280 .seeks = DEFAULT_SEEKS, 281 }; 282 283 int __init erofs_init_shrinker(void) 284 { 285 return register_shrinker(&erofs_shrinker_info); 286 } 287 288 void erofs_exit_shrinker(void) 289 { 290 unregister_shrinker(&erofs_shrinker_info); 291 } 292 #endif /* !CONFIG_EROFS_FS_ZIP */ 293 294