1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2018 HUAWEI, Inc. 4 * http://www.huawei.com/ 5 * Created by Gao Xiang <gaoxiang25@huawei.com> 6 */ 7 #include "internal.h" 8 #include <linux/pagevec.h> 9 10 struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) 11 { 12 struct page *page; 13 14 if (!list_empty(pool)) { 15 page = lru_to_page(pool); 16 DBG_BUGON(page_ref_count(page) != 1); 17 list_del(&page->lru); 18 } else { 19 page = alloc_page(gfp); 20 } 21 return page; 22 } 23 24 #if (EROFS_PCPUBUF_NR_PAGES > 0) 25 static struct { 26 u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; 27 } ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; 28 29 void *erofs_get_pcpubuf(unsigned int pagenr) 30 { 31 preempt_disable(); 32 return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; 33 } 34 #endif 35 36 #ifdef CONFIG_EROFS_FS_ZIP 37 /* global shrink count (for all mounted EROFS instances) */ 38 static atomic_long_t erofs_global_shrink_cnt; 39 40 static int erofs_workgroup_get(struct erofs_workgroup *grp) 41 { 42 int o; 43 44 repeat: 45 o = erofs_wait_on_workgroup_freezed(grp); 46 if (o <= 0) 47 return -1; 48 49 if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) 50 goto repeat; 51 52 /* decrease refcount paired by erofs_workgroup_put */ 53 if (o == 1) 54 atomic_long_dec(&erofs_global_shrink_cnt); 55 return 0; 56 } 57 58 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 59 pgoff_t index) 60 { 61 struct erofs_sb_info *sbi = EROFS_SB(sb); 62 struct erofs_workgroup *grp; 63 64 repeat: 65 rcu_read_lock(); 66 grp = xa_load(&sbi->managed_pslots, index); 67 if (grp) { 68 if (erofs_workgroup_get(grp)) { 69 /* prefer to relax rcu read side */ 70 rcu_read_unlock(); 71 goto repeat; 72 } 73 74 DBG_BUGON(index != grp->index); 75 } 76 rcu_read_unlock(); 77 return grp; 78 } 79 80 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, 81 struct erofs_workgroup *grp) 82 { 83 struct erofs_sb_info *const sbi = EROFS_SB(sb); 84 struct erofs_workgroup *pre; 85 86 /* 87 * Bump up a reference count before making this visible 88 * to others for the XArray in order to avoid potential 89 * UAF without serialized by xa_lock. 90 */ 91 atomic_inc(&grp->refcount); 92 93 repeat: 94 xa_lock(&sbi->managed_pslots); 95 pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, 96 NULL, grp, GFP_NOFS); 97 if (pre) { 98 if (xa_is_err(pre)) { 99 pre = ERR_PTR(xa_err(pre)); 100 } else if (erofs_workgroup_get(pre)) { 101 /* try to legitimize the current in-tree one */ 102 xa_unlock(&sbi->managed_pslots); 103 cond_resched(); 104 goto repeat; 105 } 106 atomic_dec(&grp->refcount); 107 grp = pre; 108 } 109 xa_unlock(&sbi->managed_pslots); 110 return grp; 111 } 112 113 static void __erofs_workgroup_free(struct erofs_workgroup *grp) 114 { 115 atomic_long_dec(&erofs_global_shrink_cnt); 116 erofs_workgroup_free_rcu(grp); 117 } 118 119 int erofs_workgroup_put(struct erofs_workgroup *grp) 120 { 121 int count = atomic_dec_return(&grp->refcount); 122 123 if (count == 1) 124 atomic_long_inc(&erofs_global_shrink_cnt); 125 else if (!count) 126 __erofs_workgroup_free(grp); 127 return count; 128 } 129 130 static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) 131 { 132 erofs_workgroup_unfreeze(grp, 0); 133 __erofs_workgroup_free(grp); 134 } 135 136 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 137 struct erofs_workgroup *grp) 138 { 139 /* 140 * If managed cache is on, refcount of workgroups 141 * themselves could be < 0 (freezed). In other words, 142 * there is no guarantee that all refcounts > 0. 143 */ 144 if (!erofs_workgroup_try_to_freeze(grp, 1)) 145 return false; 146 147 /* 148 * Note that all cached pages should be unattached 149 * before deleted from the XArray. Otherwise some 150 * cached pages could be still attached to the orphan 151 * old workgroup when the new one is available in the tree. 152 */ 153 if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 154 erofs_workgroup_unfreeze(grp, 1); 155 return false; 156 } 157 158 /* 159 * It's impossible to fail after the workgroup is freezed, 160 * however in order to avoid some race conditions, add a 161 * DBG_BUGON to observe this in advance. 162 */ 163 DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp); 164 165 /* 166 * If managed cache is on, last refcount should indicate 167 * the related workstation. 168 */ 169 erofs_workgroup_unfreeze_final(grp); 170 return true; 171 } 172 173 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 174 unsigned long nr_shrink) 175 { 176 struct erofs_workgroup *grp; 177 unsigned int freed = 0; 178 unsigned long index; 179 180 xa_for_each(&sbi->managed_pslots, index, grp) { 181 /* try to shrink each valid workgroup */ 182 if (!erofs_try_to_release_workgroup(sbi, grp)) 183 continue; 184 185 ++freed; 186 if (!--nr_shrink) 187 break; 188 } 189 return freed; 190 } 191 192 /* protected by 'erofs_sb_list_lock' */ 193 static unsigned int shrinker_run_no; 194 195 /* protects the mounted 'erofs_sb_list' */ 196 static DEFINE_SPINLOCK(erofs_sb_list_lock); 197 static LIST_HEAD(erofs_sb_list); 198 199 void erofs_shrinker_register(struct super_block *sb) 200 { 201 struct erofs_sb_info *sbi = EROFS_SB(sb); 202 203 mutex_init(&sbi->umount_mutex); 204 205 spin_lock(&erofs_sb_list_lock); 206 list_add(&sbi->list, &erofs_sb_list); 207 spin_unlock(&erofs_sb_list_lock); 208 } 209 210 void erofs_shrinker_unregister(struct super_block *sb) 211 { 212 struct erofs_sb_info *const sbi = EROFS_SB(sb); 213 214 mutex_lock(&sbi->umount_mutex); 215 /* clean up all remaining workgroups in memory */ 216 erofs_shrink_workstation(sbi, ~0UL); 217 218 spin_lock(&erofs_sb_list_lock); 219 list_del(&sbi->list); 220 spin_unlock(&erofs_sb_list_lock); 221 mutex_unlock(&sbi->umount_mutex); 222 } 223 224 static unsigned long erofs_shrink_count(struct shrinker *shrink, 225 struct shrink_control *sc) 226 { 227 return atomic_long_read(&erofs_global_shrink_cnt); 228 } 229 230 static unsigned long erofs_shrink_scan(struct shrinker *shrink, 231 struct shrink_control *sc) 232 { 233 struct erofs_sb_info *sbi; 234 struct list_head *p; 235 236 unsigned long nr = sc->nr_to_scan; 237 unsigned int run_no; 238 unsigned long freed = 0; 239 240 spin_lock(&erofs_sb_list_lock); 241 do { 242 run_no = ++shrinker_run_no; 243 } while (run_no == 0); 244 245 /* Iterate over all mounted superblocks and try to shrink them */ 246 p = erofs_sb_list.next; 247 while (p != &erofs_sb_list) { 248 sbi = list_entry(p, struct erofs_sb_info, list); 249 250 /* 251 * We move the ones we do to the end of the list, so we stop 252 * when we see one we have already done. 253 */ 254 if (sbi->shrinker_run_no == run_no) 255 break; 256 257 if (!mutex_trylock(&sbi->umount_mutex)) { 258 p = p->next; 259 continue; 260 } 261 262 spin_unlock(&erofs_sb_list_lock); 263 sbi->shrinker_run_no = run_no; 264 265 freed += erofs_shrink_workstation(sbi, nr - freed); 266 267 spin_lock(&erofs_sb_list_lock); 268 /* Get the next list element before we move this one */ 269 p = p->next; 270 271 /* 272 * Move this one to the end of the list to provide some 273 * fairness. 274 */ 275 list_move_tail(&sbi->list, &erofs_sb_list); 276 mutex_unlock(&sbi->umount_mutex); 277 278 if (freed >= nr) 279 break; 280 } 281 spin_unlock(&erofs_sb_list_lock); 282 return freed; 283 } 284 285 static struct shrinker erofs_shrinker_info = { 286 .scan_objects = erofs_shrink_scan, 287 .count_objects = erofs_shrink_count, 288 .seeks = DEFAULT_SEEKS, 289 }; 290 291 int __init erofs_init_shrinker(void) 292 { 293 return register_shrinker(&erofs_shrinker_info); 294 } 295 296 void erofs_exit_shrinker(void) 297 { 298 unregister_shrinker(&erofs_shrinker_info); 299 } 300 #endif /* !CONFIG_EROFS_FS_ZIP */ 301 302