147e4937aSGao Xiang // SPDX-License-Identifier: GPL-2.0-only 247e4937aSGao Xiang /* 347e4937aSGao Xiang * Copyright (C) 2018 HUAWEI, Inc. 447e4937aSGao Xiang * http://www.huawei.com/ 547e4937aSGao Xiang * Created by Gao Xiang <gaoxiang25@huawei.com> 647e4937aSGao Xiang */ 747e4937aSGao Xiang #include "internal.h" 847e4937aSGao Xiang #include <linux/pagevec.h> 947e4937aSGao Xiang 105ddcee1fSGao Xiang struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) 1147e4937aSGao Xiang { 1247e4937aSGao Xiang struct page *page; 1347e4937aSGao Xiang 1447e4937aSGao Xiang if (!list_empty(pool)) { 1547e4937aSGao Xiang page = lru_to_page(pool); 1647e4937aSGao Xiang DBG_BUGON(page_ref_count(page) != 1); 1747e4937aSGao Xiang list_del(&page->lru); 1847e4937aSGao Xiang } else { 195ddcee1fSGao Xiang page = alloc_page(gfp); 2047e4937aSGao Xiang } 2147e4937aSGao Xiang return page; 2247e4937aSGao Xiang } 2347e4937aSGao Xiang 2447e4937aSGao Xiang #if (EROFS_PCPUBUF_NR_PAGES > 0) 2547e4937aSGao Xiang static struct { 2647e4937aSGao Xiang u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; 2747e4937aSGao Xiang } ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; 2847e4937aSGao Xiang 2947e4937aSGao Xiang void *erofs_get_pcpubuf(unsigned int pagenr) 3047e4937aSGao Xiang { 3147e4937aSGao Xiang preempt_disable(); 3247e4937aSGao Xiang return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; 3347e4937aSGao Xiang } 3447e4937aSGao Xiang #endif 3547e4937aSGao Xiang 3647e4937aSGao Xiang #ifdef CONFIG_EROFS_FS_ZIP 3747e4937aSGao Xiang /* global shrink count (for all mounted EROFS instances) */ 3847e4937aSGao Xiang static atomic_long_t erofs_global_shrink_cnt; 3947e4937aSGao Xiang 4047e4937aSGao Xiang static int erofs_workgroup_get(struct erofs_workgroup *grp) 4147e4937aSGao Xiang { 4247e4937aSGao Xiang int o; 4347e4937aSGao Xiang 4447e4937aSGao Xiang repeat: 4547e4937aSGao Xiang o = erofs_wait_on_workgroup_freezed(grp); 468d8a09b0SGao Xiang if (o <= 0) 4747e4937aSGao Xiang return -1; 4847e4937aSGao Xiang 498d8a09b0SGao Xiang if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) 5047e4937aSGao Xiang goto repeat; 5147e4937aSGao Xiang 5247e4937aSGao Xiang /* decrease refcount paired by erofs_workgroup_put */ 538d8a09b0SGao Xiang if (o == 1) 5447e4937aSGao Xiang atomic_long_dec(&erofs_global_shrink_cnt); 5547e4937aSGao Xiang return 0; 5647e4937aSGao Xiang } 5747e4937aSGao Xiang 5847e4937aSGao Xiang struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 59997626d8SVladimir Zapolskiy pgoff_t index) 6047e4937aSGao Xiang { 6147e4937aSGao Xiang struct erofs_sb_info *sbi = EROFS_SB(sb); 6247e4937aSGao Xiang struct erofs_workgroup *grp; 6347e4937aSGao Xiang 6447e4937aSGao Xiang repeat: 6547e4937aSGao Xiang rcu_read_lock(); 6664094a04SGao Xiang grp = xa_load(&sbi->managed_pslots, index); 6747e4937aSGao Xiang if (grp) { 6847e4937aSGao Xiang if (erofs_workgroup_get(grp)) { 6947e4937aSGao Xiang /* prefer to relax rcu read side */ 7047e4937aSGao Xiang rcu_read_unlock(); 7147e4937aSGao Xiang goto repeat; 7247e4937aSGao Xiang } 7347e4937aSGao Xiang 7447e4937aSGao Xiang DBG_BUGON(index != grp->index); 7547e4937aSGao Xiang } 7647e4937aSGao Xiang rcu_read_unlock(); 7747e4937aSGao Xiang return grp; 7847e4937aSGao Xiang } 7947e4937aSGao Xiang 8064094a04SGao Xiang struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, 81e5e9a432SVladimir Zapolskiy struct erofs_workgroup *grp) 8247e4937aSGao Xiang { 8364094a04SGao Xiang struct erofs_sb_info *const sbi = EROFS_SB(sb); 8464094a04SGao Xiang struct erofs_workgroup *pre; 8547e4937aSGao Xiang 8664094a04SGao Xiang /* 8764094a04SGao Xiang * Bump up a reference count before making this visible 8864094a04SGao Xiang * to others for the XArray in order to avoid potential 8964094a04SGao Xiang * UAF without serialized by xa_lock. 9064094a04SGao Xiang */ 9164094a04SGao Xiang atomic_inc(&grp->refcount); 9264094a04SGao Xiang 9364094a04SGao Xiang repeat: 9464094a04SGao Xiang xa_lock(&sbi->managed_pslots); 9564094a04SGao Xiang pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, 9664094a04SGao Xiang NULL, grp, GFP_NOFS); 9764094a04SGao Xiang if (pre) { 9864094a04SGao Xiang if (xa_is_err(pre)) { 9964094a04SGao Xiang pre = ERR_PTR(xa_err(pre)); 10064094a04SGao Xiang } else if (erofs_workgroup_get(pre)) { 10164094a04SGao Xiang /* try to legitimize the current in-tree one */ 10264094a04SGao Xiang xa_unlock(&sbi->managed_pslots); 10364094a04SGao Xiang cond_resched(); 10464094a04SGao Xiang goto repeat; 10547e4937aSGao Xiang } 10664094a04SGao Xiang atomic_dec(&grp->refcount); 10764094a04SGao Xiang grp = pre; 10864094a04SGao Xiang } 10964094a04SGao Xiang xa_unlock(&sbi->managed_pslots); 11064094a04SGao Xiang return grp; 11147e4937aSGao Xiang } 11247e4937aSGao Xiang 11347e4937aSGao Xiang static void __erofs_workgroup_free(struct erofs_workgroup *grp) 11447e4937aSGao Xiang { 11547e4937aSGao Xiang atomic_long_dec(&erofs_global_shrink_cnt); 11647e4937aSGao Xiang erofs_workgroup_free_rcu(grp); 11747e4937aSGao Xiang } 11847e4937aSGao Xiang 11947e4937aSGao Xiang int erofs_workgroup_put(struct erofs_workgroup *grp) 12047e4937aSGao Xiang { 12147e4937aSGao Xiang int count = atomic_dec_return(&grp->refcount); 12247e4937aSGao Xiang 12347e4937aSGao Xiang if (count == 1) 12447e4937aSGao Xiang atomic_long_inc(&erofs_global_shrink_cnt); 12547e4937aSGao Xiang else if (!count) 12647e4937aSGao Xiang __erofs_workgroup_free(grp); 12747e4937aSGao Xiang return count; 12847e4937aSGao Xiang } 12947e4937aSGao Xiang 13047e4937aSGao Xiang static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) 13147e4937aSGao Xiang { 13247e4937aSGao Xiang erofs_workgroup_unfreeze(grp, 0); 13347e4937aSGao Xiang __erofs_workgroup_free(grp); 13447e4937aSGao Xiang } 13547e4937aSGao Xiang 13647e4937aSGao Xiang static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 137bda17a45SGao Xiang struct erofs_workgroup *grp) 13847e4937aSGao Xiang { 13947e4937aSGao Xiang /* 14047e4937aSGao Xiang * If managed cache is on, refcount of workgroups 14147e4937aSGao Xiang * themselves could be < 0 (freezed). In other words, 14247e4937aSGao Xiang * there is no guarantee that all refcounts > 0. 14347e4937aSGao Xiang */ 14447e4937aSGao Xiang if (!erofs_workgroup_try_to_freeze(grp, 1)) 14547e4937aSGao Xiang return false; 14647e4937aSGao Xiang 14747e4937aSGao Xiang /* 14847e4937aSGao Xiang * Note that all cached pages should be unattached 14964094a04SGao Xiang * before deleted from the XArray. Otherwise some 15047e4937aSGao Xiang * cached pages could be still attached to the orphan 15147e4937aSGao Xiang * old workgroup when the new one is available in the tree. 15247e4937aSGao Xiang */ 15347e4937aSGao Xiang if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 15447e4937aSGao Xiang erofs_workgroup_unfreeze(grp, 1); 15547e4937aSGao Xiang return false; 15647e4937aSGao Xiang } 15747e4937aSGao Xiang 15847e4937aSGao Xiang /* 15947e4937aSGao Xiang * It's impossible to fail after the workgroup is freezed, 16047e4937aSGao Xiang * however in order to avoid some race conditions, add a 16147e4937aSGao Xiang * DBG_BUGON to observe this in advance. 16247e4937aSGao Xiang */ 16364094a04SGao Xiang DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp); 16447e4937aSGao Xiang 16547e4937aSGao Xiang /* 16647e4937aSGao Xiang * If managed cache is on, last refcount should indicate 16747e4937aSGao Xiang * the related workstation. 16847e4937aSGao Xiang */ 16947e4937aSGao Xiang erofs_workgroup_unfreeze_final(grp); 17047e4937aSGao Xiang return true; 17147e4937aSGao Xiang } 17247e4937aSGao Xiang 17347e4937aSGao Xiang static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 174bda17a45SGao Xiang unsigned long nr_shrink) 17547e4937aSGao Xiang { 17664094a04SGao Xiang struct erofs_workgroup *grp; 17747e4937aSGao Xiang unsigned int freed = 0; 17864094a04SGao Xiang unsigned long index; 17947e4937aSGao Xiang 18064094a04SGao Xiang xa_for_each(&sbi->managed_pslots, index, grp) { 18147e4937aSGao Xiang /* try to shrink each valid workgroup */ 182bda17a45SGao Xiang if (!erofs_try_to_release_workgroup(sbi, grp)) 18347e4937aSGao Xiang continue; 18447e4937aSGao Xiang 18547e4937aSGao Xiang ++freed; 1868d8a09b0SGao Xiang if (!--nr_shrink) 18747e4937aSGao Xiang break; 18847e4937aSGao Xiang } 18947e4937aSGao Xiang return freed; 19047e4937aSGao Xiang } 19147e4937aSGao Xiang 19247e4937aSGao Xiang /* protected by 'erofs_sb_list_lock' */ 19347e4937aSGao Xiang static unsigned int shrinker_run_no; 19447e4937aSGao Xiang 19547e4937aSGao Xiang /* protects the mounted 'erofs_sb_list' */ 19647e4937aSGao Xiang static DEFINE_SPINLOCK(erofs_sb_list_lock); 19747e4937aSGao Xiang static LIST_HEAD(erofs_sb_list); 19847e4937aSGao Xiang 19947e4937aSGao Xiang void erofs_shrinker_register(struct super_block *sb) 20047e4937aSGao Xiang { 20147e4937aSGao Xiang struct erofs_sb_info *sbi = EROFS_SB(sb); 20247e4937aSGao Xiang 20347e4937aSGao Xiang mutex_init(&sbi->umount_mutex); 20447e4937aSGao Xiang 20547e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 20647e4937aSGao Xiang list_add(&sbi->list, &erofs_sb_list); 20747e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 20847e4937aSGao Xiang } 20947e4937aSGao Xiang 21047e4937aSGao Xiang void erofs_shrinker_unregister(struct super_block *sb) 21147e4937aSGao Xiang { 21247e4937aSGao Xiang struct erofs_sb_info *const sbi = EROFS_SB(sb); 21347e4937aSGao Xiang 21447e4937aSGao Xiang mutex_lock(&sbi->umount_mutex); 215bda17a45SGao Xiang /* clean up all remaining workgroups in memory */ 216bda17a45SGao Xiang erofs_shrink_workstation(sbi, ~0UL); 21747e4937aSGao Xiang 21847e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 21947e4937aSGao Xiang list_del(&sbi->list); 22047e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 22147e4937aSGao Xiang mutex_unlock(&sbi->umount_mutex); 22247e4937aSGao Xiang } 22347e4937aSGao Xiang 22447e4937aSGao Xiang static unsigned long erofs_shrink_count(struct shrinker *shrink, 22547e4937aSGao Xiang struct shrink_control *sc) 22647e4937aSGao Xiang { 22747e4937aSGao Xiang return atomic_long_read(&erofs_global_shrink_cnt); 22847e4937aSGao Xiang } 22947e4937aSGao Xiang 23047e4937aSGao Xiang static unsigned long erofs_shrink_scan(struct shrinker *shrink, 23147e4937aSGao Xiang struct shrink_control *sc) 23247e4937aSGao Xiang { 23347e4937aSGao Xiang struct erofs_sb_info *sbi; 23447e4937aSGao Xiang struct list_head *p; 23547e4937aSGao Xiang 23647e4937aSGao Xiang unsigned long nr = sc->nr_to_scan; 23747e4937aSGao Xiang unsigned int run_no; 23847e4937aSGao Xiang unsigned long freed = 0; 23947e4937aSGao Xiang 24047e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 24147e4937aSGao Xiang do { 24247e4937aSGao Xiang run_no = ++shrinker_run_no; 24347e4937aSGao Xiang } while (run_no == 0); 24447e4937aSGao Xiang 24547e4937aSGao Xiang /* Iterate over all mounted superblocks and try to shrink them */ 24647e4937aSGao Xiang p = erofs_sb_list.next; 24747e4937aSGao Xiang while (p != &erofs_sb_list) { 24847e4937aSGao Xiang sbi = list_entry(p, struct erofs_sb_info, list); 24947e4937aSGao Xiang 25047e4937aSGao Xiang /* 25147e4937aSGao Xiang * We move the ones we do to the end of the list, so we stop 25247e4937aSGao Xiang * when we see one we have already done. 25347e4937aSGao Xiang */ 25447e4937aSGao Xiang if (sbi->shrinker_run_no == run_no) 25547e4937aSGao Xiang break; 25647e4937aSGao Xiang 25747e4937aSGao Xiang if (!mutex_trylock(&sbi->umount_mutex)) { 25847e4937aSGao Xiang p = p->next; 25947e4937aSGao Xiang continue; 26047e4937aSGao Xiang } 26147e4937aSGao Xiang 26247e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 26347e4937aSGao Xiang sbi->shrinker_run_no = run_no; 26447e4937aSGao Xiang 265bda17a45SGao Xiang freed += erofs_shrink_workstation(sbi, nr); 26647e4937aSGao Xiang 26747e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 26847e4937aSGao Xiang /* Get the next list element before we move this one */ 26947e4937aSGao Xiang p = p->next; 27047e4937aSGao Xiang 27147e4937aSGao Xiang /* 27247e4937aSGao Xiang * Move this one to the end of the list to provide some 27347e4937aSGao Xiang * fairness. 27447e4937aSGao Xiang */ 27547e4937aSGao Xiang list_move_tail(&sbi->list, &erofs_sb_list); 27647e4937aSGao Xiang mutex_unlock(&sbi->umount_mutex); 27747e4937aSGao Xiang 27847e4937aSGao Xiang if (freed >= nr) 27947e4937aSGao Xiang break; 28047e4937aSGao Xiang } 28147e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 28247e4937aSGao Xiang return freed; 28347e4937aSGao Xiang } 28447e4937aSGao Xiang 28547e4937aSGao Xiang static struct shrinker erofs_shrinker_info = { 28647e4937aSGao Xiang .scan_objects = erofs_shrink_scan, 28747e4937aSGao Xiang .count_objects = erofs_shrink_count, 28847e4937aSGao Xiang .seeks = DEFAULT_SEEKS, 28947e4937aSGao Xiang }; 29047e4937aSGao Xiang 29147e4937aSGao Xiang int __init erofs_init_shrinker(void) 29247e4937aSGao Xiang { 29347e4937aSGao Xiang return register_shrinker(&erofs_shrinker_info); 29447e4937aSGao Xiang } 29547e4937aSGao Xiang 29647e4937aSGao Xiang void erofs_exit_shrinker(void) 29747e4937aSGao Xiang { 29847e4937aSGao Xiang unregister_shrinker(&erofs_shrinker_info); 29947e4937aSGao Xiang } 30047e4937aSGao Xiang #endif /* !CONFIG_EROFS_FS_ZIP */ 30147e4937aSGao Xiang 302