147e4937aSGao Xiang // SPDX-License-Identifier: GPL-2.0-only 247e4937aSGao Xiang /* 347e4937aSGao Xiang * Copyright (C) 2018 HUAWEI, Inc. 447e4937aSGao Xiang * http://www.huawei.com/ 547e4937aSGao Xiang * Created by Gao Xiang <gaoxiang25@huawei.com> 647e4937aSGao Xiang */ 747e4937aSGao Xiang #include "internal.h" 847e4937aSGao Xiang #include <linux/pagevec.h> 947e4937aSGao Xiang 1047e4937aSGao Xiang struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp, bool nofail) 1147e4937aSGao Xiang { 1247e4937aSGao Xiang struct page *page; 1347e4937aSGao Xiang 1447e4937aSGao Xiang if (!list_empty(pool)) { 1547e4937aSGao Xiang page = lru_to_page(pool); 1647e4937aSGao Xiang DBG_BUGON(page_ref_count(page) != 1); 1747e4937aSGao Xiang list_del(&page->lru); 1847e4937aSGao Xiang } else { 1947e4937aSGao Xiang page = alloc_pages(gfp | (nofail ? __GFP_NOFAIL : 0), 0); 2047e4937aSGao Xiang } 2147e4937aSGao Xiang return page; 2247e4937aSGao Xiang } 2347e4937aSGao Xiang 2447e4937aSGao Xiang #if (EROFS_PCPUBUF_NR_PAGES > 0) 2547e4937aSGao Xiang static struct { 2647e4937aSGao Xiang u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; 2747e4937aSGao Xiang } ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; 2847e4937aSGao Xiang 2947e4937aSGao Xiang void *erofs_get_pcpubuf(unsigned int pagenr) 3047e4937aSGao Xiang { 3147e4937aSGao Xiang preempt_disable(); 3247e4937aSGao Xiang return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; 3347e4937aSGao Xiang } 3447e4937aSGao Xiang #endif 3547e4937aSGao Xiang 3647e4937aSGao Xiang #ifdef CONFIG_EROFS_FS_ZIP 3747e4937aSGao Xiang /* global shrink count (for all mounted EROFS instances) */ 3847e4937aSGao Xiang static atomic_long_t erofs_global_shrink_cnt; 3947e4937aSGao Xiang 4047e4937aSGao Xiang #define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) 4147e4937aSGao Xiang #define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) 4247e4937aSGao Xiang 4347e4937aSGao Xiang static int erofs_workgroup_get(struct erofs_workgroup *grp) 4447e4937aSGao Xiang { 4547e4937aSGao Xiang int o; 4647e4937aSGao Xiang 4747e4937aSGao Xiang repeat: 4847e4937aSGao Xiang o = erofs_wait_on_workgroup_freezed(grp); 498d8a09b0SGao Xiang if (o <= 0) 5047e4937aSGao Xiang return -1; 5147e4937aSGao Xiang 528d8a09b0SGao Xiang if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) 5347e4937aSGao Xiang goto repeat; 5447e4937aSGao Xiang 5547e4937aSGao Xiang /* decrease refcount paired by erofs_workgroup_put */ 568d8a09b0SGao Xiang if (o == 1) 5747e4937aSGao Xiang atomic_long_dec(&erofs_global_shrink_cnt); 5847e4937aSGao Xiang return 0; 5947e4937aSGao Xiang } 6047e4937aSGao Xiang 6147e4937aSGao Xiang struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 6247e4937aSGao Xiang pgoff_t index, bool *tag) 6347e4937aSGao Xiang { 6447e4937aSGao Xiang struct erofs_sb_info *sbi = EROFS_SB(sb); 6547e4937aSGao Xiang struct erofs_workgroup *grp; 6647e4937aSGao Xiang 6747e4937aSGao Xiang repeat: 6847e4937aSGao Xiang rcu_read_lock(); 6947e4937aSGao Xiang grp = radix_tree_lookup(&sbi->workstn_tree, index); 7047e4937aSGao Xiang if (grp) { 7147e4937aSGao Xiang *tag = xa_pointer_tag(grp); 7247e4937aSGao Xiang grp = xa_untag_pointer(grp); 7347e4937aSGao Xiang 7447e4937aSGao Xiang if (erofs_workgroup_get(grp)) { 7547e4937aSGao Xiang /* prefer to relax rcu read side */ 7647e4937aSGao Xiang rcu_read_unlock(); 7747e4937aSGao Xiang goto repeat; 7847e4937aSGao Xiang } 7947e4937aSGao Xiang 8047e4937aSGao Xiang DBG_BUGON(index != grp->index); 8147e4937aSGao Xiang } 8247e4937aSGao Xiang rcu_read_unlock(); 8347e4937aSGao Xiang return grp; 8447e4937aSGao Xiang } 8547e4937aSGao Xiang 8647e4937aSGao Xiang int erofs_register_workgroup(struct super_block *sb, 8747e4937aSGao Xiang struct erofs_workgroup *grp, 8847e4937aSGao Xiang bool tag) 8947e4937aSGao Xiang { 9047e4937aSGao Xiang struct erofs_sb_info *sbi; 9147e4937aSGao Xiang int err; 9247e4937aSGao Xiang 9347e4937aSGao Xiang /* grp shouldn't be broken or used before */ 948d8a09b0SGao Xiang if (atomic_read(&grp->refcount) != 1) { 9547e4937aSGao Xiang DBG_BUGON(1); 9647e4937aSGao Xiang return -EINVAL; 9747e4937aSGao Xiang } 9847e4937aSGao Xiang 9947e4937aSGao Xiang err = radix_tree_preload(GFP_NOFS); 10047e4937aSGao Xiang if (err) 10147e4937aSGao Xiang return err; 10247e4937aSGao Xiang 10347e4937aSGao Xiang sbi = EROFS_SB(sb); 10447e4937aSGao Xiang xa_lock(&sbi->workstn_tree); 10547e4937aSGao Xiang 10647e4937aSGao Xiang grp = xa_tag_pointer(grp, tag); 10747e4937aSGao Xiang 10847e4937aSGao Xiang /* 10947e4937aSGao Xiang * Bump up reference count before making this workgroup 11047e4937aSGao Xiang * visible to other users in order to avoid potential UAF 11147e4937aSGao Xiang * without serialized by workstn_lock. 11247e4937aSGao Xiang */ 11347e4937aSGao Xiang __erofs_workgroup_get(grp); 11447e4937aSGao Xiang 11547e4937aSGao Xiang err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp); 1168d8a09b0SGao Xiang if (err) 11747e4937aSGao Xiang /* 11847e4937aSGao Xiang * it's safe to decrease since the workgroup isn't visible 11947e4937aSGao Xiang * and refcount >= 2 (cannot be freezed). 12047e4937aSGao Xiang */ 12147e4937aSGao Xiang __erofs_workgroup_put(grp); 12247e4937aSGao Xiang 12347e4937aSGao Xiang xa_unlock(&sbi->workstn_tree); 12447e4937aSGao Xiang radix_tree_preload_end(); 12547e4937aSGao Xiang return err; 12647e4937aSGao Xiang } 12747e4937aSGao Xiang 12847e4937aSGao Xiang static void __erofs_workgroup_free(struct erofs_workgroup *grp) 12947e4937aSGao Xiang { 13047e4937aSGao Xiang atomic_long_dec(&erofs_global_shrink_cnt); 13147e4937aSGao Xiang erofs_workgroup_free_rcu(grp); 13247e4937aSGao Xiang } 13347e4937aSGao Xiang 13447e4937aSGao Xiang int erofs_workgroup_put(struct erofs_workgroup *grp) 13547e4937aSGao Xiang { 13647e4937aSGao Xiang int count = atomic_dec_return(&grp->refcount); 13747e4937aSGao Xiang 13847e4937aSGao Xiang if (count == 1) 13947e4937aSGao Xiang atomic_long_inc(&erofs_global_shrink_cnt); 14047e4937aSGao Xiang else if (!count) 14147e4937aSGao Xiang __erofs_workgroup_free(grp); 14247e4937aSGao Xiang return count; 14347e4937aSGao Xiang } 14447e4937aSGao Xiang 14547e4937aSGao Xiang static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) 14647e4937aSGao Xiang { 14747e4937aSGao Xiang erofs_workgroup_unfreeze(grp, 0); 14847e4937aSGao Xiang __erofs_workgroup_free(grp); 14947e4937aSGao Xiang } 15047e4937aSGao Xiang 15147e4937aSGao Xiang static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 152bda17a45SGao Xiang struct erofs_workgroup *grp) 15347e4937aSGao Xiang { 15447e4937aSGao Xiang /* 15547e4937aSGao Xiang * If managed cache is on, refcount of workgroups 15647e4937aSGao Xiang * themselves could be < 0 (freezed). In other words, 15747e4937aSGao Xiang * there is no guarantee that all refcounts > 0. 15847e4937aSGao Xiang */ 15947e4937aSGao Xiang if (!erofs_workgroup_try_to_freeze(grp, 1)) 16047e4937aSGao Xiang return false; 16147e4937aSGao Xiang 16247e4937aSGao Xiang /* 16347e4937aSGao Xiang * Note that all cached pages should be unattached 16447e4937aSGao Xiang * before deleted from the radix tree. Otherwise some 16547e4937aSGao Xiang * cached pages could be still attached to the orphan 16647e4937aSGao Xiang * old workgroup when the new one is available in the tree. 16747e4937aSGao Xiang */ 16847e4937aSGao Xiang if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 16947e4937aSGao Xiang erofs_workgroup_unfreeze(grp, 1); 17047e4937aSGao Xiang return false; 17147e4937aSGao Xiang } 17247e4937aSGao Xiang 17347e4937aSGao Xiang /* 17447e4937aSGao Xiang * It's impossible to fail after the workgroup is freezed, 17547e4937aSGao Xiang * however in order to avoid some race conditions, add a 17647e4937aSGao Xiang * DBG_BUGON to observe this in advance. 17747e4937aSGao Xiang */ 17847e4937aSGao Xiang DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree, 17947e4937aSGao Xiang grp->index)) != grp); 18047e4937aSGao Xiang 18147e4937aSGao Xiang /* 18247e4937aSGao Xiang * If managed cache is on, last refcount should indicate 18347e4937aSGao Xiang * the related workstation. 18447e4937aSGao Xiang */ 18547e4937aSGao Xiang erofs_workgroup_unfreeze_final(grp); 18647e4937aSGao Xiang return true; 18747e4937aSGao Xiang } 18847e4937aSGao Xiang 18947e4937aSGao Xiang static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 190bda17a45SGao Xiang unsigned long nr_shrink) 19147e4937aSGao Xiang { 19247e4937aSGao Xiang pgoff_t first_index = 0; 19347e4937aSGao Xiang void *batch[PAGEVEC_SIZE]; 19447e4937aSGao Xiang unsigned int freed = 0; 19547e4937aSGao Xiang 19647e4937aSGao Xiang int i, found; 19747e4937aSGao Xiang repeat: 19847e4937aSGao Xiang xa_lock(&sbi->workstn_tree); 19947e4937aSGao Xiang 20047e4937aSGao Xiang found = radix_tree_gang_lookup(&sbi->workstn_tree, 20147e4937aSGao Xiang batch, first_index, PAGEVEC_SIZE); 20247e4937aSGao Xiang 20347e4937aSGao Xiang for (i = 0; i < found; ++i) { 20447e4937aSGao Xiang struct erofs_workgroup *grp = xa_untag_pointer(batch[i]); 20547e4937aSGao Xiang 20647e4937aSGao Xiang first_index = grp->index + 1; 20747e4937aSGao Xiang 20847e4937aSGao Xiang /* try to shrink each valid workgroup */ 209bda17a45SGao Xiang if (!erofs_try_to_release_workgroup(sbi, grp)) 21047e4937aSGao Xiang continue; 21147e4937aSGao Xiang 21247e4937aSGao Xiang ++freed; 2138d8a09b0SGao Xiang if (!--nr_shrink) 21447e4937aSGao Xiang break; 21547e4937aSGao Xiang } 21647e4937aSGao Xiang xa_unlock(&sbi->workstn_tree); 21747e4937aSGao Xiang 21847e4937aSGao Xiang if (i && nr_shrink) 21947e4937aSGao Xiang goto repeat; 22047e4937aSGao Xiang return freed; 22147e4937aSGao Xiang } 22247e4937aSGao Xiang 22347e4937aSGao Xiang /* protected by 'erofs_sb_list_lock' */ 22447e4937aSGao Xiang static unsigned int shrinker_run_no; 22547e4937aSGao Xiang 22647e4937aSGao Xiang /* protects the mounted 'erofs_sb_list' */ 22747e4937aSGao Xiang static DEFINE_SPINLOCK(erofs_sb_list_lock); 22847e4937aSGao Xiang static LIST_HEAD(erofs_sb_list); 22947e4937aSGao Xiang 23047e4937aSGao Xiang void erofs_shrinker_register(struct super_block *sb) 23147e4937aSGao Xiang { 23247e4937aSGao Xiang struct erofs_sb_info *sbi = EROFS_SB(sb); 23347e4937aSGao Xiang 23447e4937aSGao Xiang mutex_init(&sbi->umount_mutex); 23547e4937aSGao Xiang 23647e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 23747e4937aSGao Xiang list_add(&sbi->list, &erofs_sb_list); 23847e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 23947e4937aSGao Xiang } 24047e4937aSGao Xiang 24147e4937aSGao Xiang void erofs_shrinker_unregister(struct super_block *sb) 24247e4937aSGao Xiang { 24347e4937aSGao Xiang struct erofs_sb_info *const sbi = EROFS_SB(sb); 24447e4937aSGao Xiang 24547e4937aSGao Xiang mutex_lock(&sbi->umount_mutex); 246bda17a45SGao Xiang /* clean up all remaining workgroups in memory */ 247bda17a45SGao Xiang erofs_shrink_workstation(sbi, ~0UL); 24847e4937aSGao Xiang 24947e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 25047e4937aSGao Xiang list_del(&sbi->list); 25147e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 25247e4937aSGao Xiang mutex_unlock(&sbi->umount_mutex); 25347e4937aSGao Xiang } 25447e4937aSGao Xiang 25547e4937aSGao Xiang static unsigned long erofs_shrink_count(struct shrinker *shrink, 25647e4937aSGao Xiang struct shrink_control *sc) 25747e4937aSGao Xiang { 25847e4937aSGao Xiang return atomic_long_read(&erofs_global_shrink_cnt); 25947e4937aSGao Xiang } 26047e4937aSGao Xiang 26147e4937aSGao Xiang static unsigned long erofs_shrink_scan(struct shrinker *shrink, 26247e4937aSGao Xiang struct shrink_control *sc) 26347e4937aSGao Xiang { 26447e4937aSGao Xiang struct erofs_sb_info *sbi; 26547e4937aSGao Xiang struct list_head *p; 26647e4937aSGao Xiang 26747e4937aSGao Xiang unsigned long nr = sc->nr_to_scan; 26847e4937aSGao Xiang unsigned int run_no; 26947e4937aSGao Xiang unsigned long freed = 0; 27047e4937aSGao Xiang 27147e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 27247e4937aSGao Xiang do { 27347e4937aSGao Xiang run_no = ++shrinker_run_no; 27447e4937aSGao Xiang } while (run_no == 0); 27547e4937aSGao Xiang 27647e4937aSGao Xiang /* Iterate over all mounted superblocks and try to shrink them */ 27747e4937aSGao Xiang p = erofs_sb_list.next; 27847e4937aSGao Xiang while (p != &erofs_sb_list) { 27947e4937aSGao Xiang sbi = list_entry(p, struct erofs_sb_info, list); 28047e4937aSGao Xiang 28147e4937aSGao Xiang /* 28247e4937aSGao Xiang * We move the ones we do to the end of the list, so we stop 28347e4937aSGao Xiang * when we see one we have already done. 28447e4937aSGao Xiang */ 28547e4937aSGao Xiang if (sbi->shrinker_run_no == run_no) 28647e4937aSGao Xiang break; 28747e4937aSGao Xiang 28847e4937aSGao Xiang if (!mutex_trylock(&sbi->umount_mutex)) { 28947e4937aSGao Xiang p = p->next; 29047e4937aSGao Xiang continue; 29147e4937aSGao Xiang } 29247e4937aSGao Xiang 29347e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 29447e4937aSGao Xiang sbi->shrinker_run_no = run_no; 29547e4937aSGao Xiang 296bda17a45SGao Xiang freed += erofs_shrink_workstation(sbi, nr); 29747e4937aSGao Xiang 29847e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 29947e4937aSGao Xiang /* Get the next list element before we move this one */ 30047e4937aSGao Xiang p = p->next; 30147e4937aSGao Xiang 30247e4937aSGao Xiang /* 30347e4937aSGao Xiang * Move this one to the end of the list to provide some 30447e4937aSGao Xiang * fairness. 30547e4937aSGao Xiang */ 30647e4937aSGao Xiang list_move_tail(&sbi->list, &erofs_sb_list); 30747e4937aSGao Xiang mutex_unlock(&sbi->umount_mutex); 30847e4937aSGao Xiang 30947e4937aSGao Xiang if (freed >= nr) 31047e4937aSGao Xiang break; 31147e4937aSGao Xiang } 31247e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 31347e4937aSGao Xiang return freed; 31447e4937aSGao Xiang } 31547e4937aSGao Xiang 31647e4937aSGao Xiang static struct shrinker erofs_shrinker_info = { 31747e4937aSGao Xiang .scan_objects = erofs_shrink_scan, 31847e4937aSGao Xiang .count_objects = erofs_shrink_count, 31947e4937aSGao Xiang .seeks = DEFAULT_SEEKS, 32047e4937aSGao Xiang }; 32147e4937aSGao Xiang 32247e4937aSGao Xiang int __init erofs_init_shrinker(void) 32347e4937aSGao Xiang { 32447e4937aSGao Xiang return register_shrinker(&erofs_shrinker_info); 32547e4937aSGao Xiang } 32647e4937aSGao Xiang 32747e4937aSGao Xiang void erofs_exit_shrinker(void) 32847e4937aSGao Xiang { 32947e4937aSGao Xiang unregister_shrinker(&erofs_shrinker_info); 33047e4937aSGao Xiang } 33147e4937aSGao Xiang #endif /* !CONFIG_EROFS_FS_ZIP */ 33247e4937aSGao Xiang 333