147e4937aSGao Xiang // SPDX-License-Identifier: GPL-2.0-only 247e4937aSGao Xiang /* 347e4937aSGao Xiang * Copyright (C) 2018 HUAWEI, Inc. 447e4937aSGao Xiang * http://www.huawei.com/ 547e4937aSGao Xiang * Created by Gao Xiang <gaoxiang25@huawei.com> 647e4937aSGao Xiang */ 747e4937aSGao Xiang #include "internal.h" 847e4937aSGao Xiang #include <linux/pagevec.h> 947e4937aSGao Xiang 105ddcee1fSGao Xiang struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) 1147e4937aSGao Xiang { 1247e4937aSGao Xiang struct page *page; 1347e4937aSGao Xiang 1447e4937aSGao Xiang if (!list_empty(pool)) { 1547e4937aSGao Xiang page = lru_to_page(pool); 1647e4937aSGao Xiang DBG_BUGON(page_ref_count(page) != 1); 1747e4937aSGao Xiang list_del(&page->lru); 1847e4937aSGao Xiang } else { 195ddcee1fSGao Xiang page = alloc_page(gfp); 2047e4937aSGao Xiang } 2147e4937aSGao Xiang return page; 2247e4937aSGao Xiang } 2347e4937aSGao Xiang 2447e4937aSGao Xiang #if (EROFS_PCPUBUF_NR_PAGES > 0) 2547e4937aSGao Xiang static struct { 2647e4937aSGao Xiang u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; 2747e4937aSGao Xiang } ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; 2847e4937aSGao Xiang 2947e4937aSGao Xiang void *erofs_get_pcpubuf(unsigned int pagenr) 3047e4937aSGao Xiang { 3147e4937aSGao Xiang preempt_disable(); 3247e4937aSGao Xiang return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; 3347e4937aSGao Xiang } 3447e4937aSGao Xiang #endif 3547e4937aSGao Xiang 3647e4937aSGao Xiang #ifdef CONFIG_EROFS_FS_ZIP 3747e4937aSGao Xiang /* global shrink count (for all mounted EROFS instances) */ 3847e4937aSGao Xiang static atomic_long_t erofs_global_shrink_cnt; 3947e4937aSGao Xiang 4047e4937aSGao Xiang #define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) 4147e4937aSGao Xiang #define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) 4247e4937aSGao Xiang 4347e4937aSGao Xiang static int erofs_workgroup_get(struct erofs_workgroup *grp) 4447e4937aSGao Xiang { 4547e4937aSGao Xiang int o; 4647e4937aSGao Xiang 4747e4937aSGao Xiang repeat: 4847e4937aSGao Xiang o = erofs_wait_on_workgroup_freezed(grp); 498d8a09b0SGao Xiang if (o <= 0) 5047e4937aSGao Xiang return -1; 5147e4937aSGao Xiang 528d8a09b0SGao Xiang if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) 5347e4937aSGao Xiang goto repeat; 5447e4937aSGao Xiang 5547e4937aSGao Xiang /* decrease refcount paired by erofs_workgroup_put */ 568d8a09b0SGao Xiang if (o == 1) 5747e4937aSGao Xiang atomic_long_dec(&erofs_global_shrink_cnt); 5847e4937aSGao Xiang return 0; 5947e4937aSGao Xiang } 6047e4937aSGao Xiang 6147e4937aSGao Xiang struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 62997626d8SVladimir Zapolskiy pgoff_t index) 6347e4937aSGao Xiang { 6447e4937aSGao Xiang struct erofs_sb_info *sbi = EROFS_SB(sb); 6547e4937aSGao Xiang struct erofs_workgroup *grp; 6647e4937aSGao Xiang 6747e4937aSGao Xiang repeat: 6847e4937aSGao Xiang rcu_read_lock(); 6947e4937aSGao Xiang grp = radix_tree_lookup(&sbi->workstn_tree, index); 7047e4937aSGao Xiang if (grp) { 7147e4937aSGao Xiang grp = xa_untag_pointer(grp); 7247e4937aSGao Xiang 7347e4937aSGao Xiang if (erofs_workgroup_get(grp)) { 7447e4937aSGao Xiang /* prefer to relax rcu read side */ 7547e4937aSGao Xiang rcu_read_unlock(); 7647e4937aSGao Xiang goto repeat; 7747e4937aSGao Xiang } 7847e4937aSGao Xiang 7947e4937aSGao Xiang DBG_BUGON(index != grp->index); 8047e4937aSGao Xiang } 8147e4937aSGao Xiang rcu_read_unlock(); 8247e4937aSGao Xiang return grp; 8347e4937aSGao Xiang } 8447e4937aSGao Xiang 8547e4937aSGao Xiang int erofs_register_workgroup(struct super_block *sb, 8647e4937aSGao Xiang struct erofs_workgroup *grp, 8747e4937aSGao Xiang bool tag) 8847e4937aSGao Xiang { 8947e4937aSGao Xiang struct erofs_sb_info *sbi; 9047e4937aSGao Xiang int err; 9147e4937aSGao Xiang 9247e4937aSGao Xiang /* grp shouldn't be broken or used before */ 938d8a09b0SGao Xiang if (atomic_read(&grp->refcount) != 1) { 9447e4937aSGao Xiang DBG_BUGON(1); 9547e4937aSGao Xiang return -EINVAL; 9647e4937aSGao Xiang } 9747e4937aSGao Xiang 9847e4937aSGao Xiang err = radix_tree_preload(GFP_NOFS); 9947e4937aSGao Xiang if (err) 10047e4937aSGao Xiang return err; 10147e4937aSGao Xiang 10247e4937aSGao Xiang sbi = EROFS_SB(sb); 10347e4937aSGao Xiang xa_lock(&sbi->workstn_tree); 10447e4937aSGao Xiang 10547e4937aSGao Xiang grp = xa_tag_pointer(grp, tag); 10647e4937aSGao Xiang 10747e4937aSGao Xiang /* 10847e4937aSGao Xiang * Bump up reference count before making this workgroup 10947e4937aSGao Xiang * visible to other users in order to avoid potential UAF 11047e4937aSGao Xiang * without serialized by workstn_lock. 11147e4937aSGao Xiang */ 11247e4937aSGao Xiang __erofs_workgroup_get(grp); 11347e4937aSGao Xiang 11447e4937aSGao Xiang err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp); 1158d8a09b0SGao Xiang if (err) 11647e4937aSGao Xiang /* 11747e4937aSGao Xiang * it's safe to decrease since the workgroup isn't visible 11847e4937aSGao Xiang * and refcount >= 2 (cannot be freezed). 11947e4937aSGao Xiang */ 12047e4937aSGao Xiang __erofs_workgroup_put(grp); 12147e4937aSGao Xiang 12247e4937aSGao Xiang xa_unlock(&sbi->workstn_tree); 12347e4937aSGao Xiang radix_tree_preload_end(); 12447e4937aSGao Xiang return err; 12547e4937aSGao Xiang } 12647e4937aSGao Xiang 12747e4937aSGao Xiang static void __erofs_workgroup_free(struct erofs_workgroup *grp) 12847e4937aSGao Xiang { 12947e4937aSGao Xiang atomic_long_dec(&erofs_global_shrink_cnt); 13047e4937aSGao Xiang erofs_workgroup_free_rcu(grp); 13147e4937aSGao Xiang } 13247e4937aSGao Xiang 13347e4937aSGao Xiang int erofs_workgroup_put(struct erofs_workgroup *grp) 13447e4937aSGao Xiang { 13547e4937aSGao Xiang int count = atomic_dec_return(&grp->refcount); 13647e4937aSGao Xiang 13747e4937aSGao Xiang if (count == 1) 13847e4937aSGao Xiang atomic_long_inc(&erofs_global_shrink_cnt); 13947e4937aSGao Xiang else if (!count) 14047e4937aSGao Xiang __erofs_workgroup_free(grp); 14147e4937aSGao Xiang return count; 14247e4937aSGao Xiang } 14347e4937aSGao Xiang 14447e4937aSGao Xiang static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) 14547e4937aSGao Xiang { 14647e4937aSGao Xiang erofs_workgroup_unfreeze(grp, 0); 14747e4937aSGao Xiang __erofs_workgroup_free(grp); 14847e4937aSGao Xiang } 14947e4937aSGao Xiang 15047e4937aSGao Xiang static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 151bda17a45SGao Xiang struct erofs_workgroup *grp) 15247e4937aSGao Xiang { 15347e4937aSGao Xiang /* 15447e4937aSGao Xiang * If managed cache is on, refcount of workgroups 15547e4937aSGao Xiang * themselves could be < 0 (freezed). In other words, 15647e4937aSGao Xiang * there is no guarantee that all refcounts > 0. 15747e4937aSGao Xiang */ 15847e4937aSGao Xiang if (!erofs_workgroup_try_to_freeze(grp, 1)) 15947e4937aSGao Xiang return false; 16047e4937aSGao Xiang 16147e4937aSGao Xiang /* 16247e4937aSGao Xiang * Note that all cached pages should be unattached 16347e4937aSGao Xiang * before deleted from the radix tree. Otherwise some 16447e4937aSGao Xiang * cached pages could be still attached to the orphan 16547e4937aSGao Xiang * old workgroup when the new one is available in the tree. 16647e4937aSGao Xiang */ 16747e4937aSGao Xiang if (erofs_try_to_free_all_cached_pages(sbi, grp)) { 16847e4937aSGao Xiang erofs_workgroup_unfreeze(grp, 1); 16947e4937aSGao Xiang return false; 17047e4937aSGao Xiang } 17147e4937aSGao Xiang 17247e4937aSGao Xiang /* 17347e4937aSGao Xiang * It's impossible to fail after the workgroup is freezed, 17447e4937aSGao Xiang * however in order to avoid some race conditions, add a 17547e4937aSGao Xiang * DBG_BUGON to observe this in advance. 17647e4937aSGao Xiang */ 17747e4937aSGao Xiang DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree, 17847e4937aSGao Xiang grp->index)) != grp); 17947e4937aSGao Xiang 18047e4937aSGao Xiang /* 18147e4937aSGao Xiang * If managed cache is on, last refcount should indicate 18247e4937aSGao Xiang * the related workstation. 18347e4937aSGao Xiang */ 18447e4937aSGao Xiang erofs_workgroup_unfreeze_final(grp); 18547e4937aSGao Xiang return true; 18647e4937aSGao Xiang } 18747e4937aSGao Xiang 18847e4937aSGao Xiang static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 189bda17a45SGao Xiang unsigned long nr_shrink) 19047e4937aSGao Xiang { 19147e4937aSGao Xiang pgoff_t first_index = 0; 19247e4937aSGao Xiang void *batch[PAGEVEC_SIZE]; 19347e4937aSGao Xiang unsigned int freed = 0; 19447e4937aSGao Xiang 19547e4937aSGao Xiang int i, found; 19647e4937aSGao Xiang repeat: 19747e4937aSGao Xiang xa_lock(&sbi->workstn_tree); 19847e4937aSGao Xiang 19947e4937aSGao Xiang found = radix_tree_gang_lookup(&sbi->workstn_tree, 20047e4937aSGao Xiang batch, first_index, PAGEVEC_SIZE); 20147e4937aSGao Xiang 20247e4937aSGao Xiang for (i = 0; i < found; ++i) { 20347e4937aSGao Xiang struct erofs_workgroup *grp = xa_untag_pointer(batch[i]); 20447e4937aSGao Xiang 20547e4937aSGao Xiang first_index = grp->index + 1; 20647e4937aSGao Xiang 20747e4937aSGao Xiang /* try to shrink each valid workgroup */ 208bda17a45SGao Xiang if (!erofs_try_to_release_workgroup(sbi, grp)) 20947e4937aSGao Xiang continue; 21047e4937aSGao Xiang 21147e4937aSGao Xiang ++freed; 2128d8a09b0SGao Xiang if (!--nr_shrink) 21347e4937aSGao Xiang break; 21447e4937aSGao Xiang } 21547e4937aSGao Xiang xa_unlock(&sbi->workstn_tree); 21647e4937aSGao Xiang 21747e4937aSGao Xiang if (i && nr_shrink) 21847e4937aSGao Xiang goto repeat; 21947e4937aSGao Xiang return freed; 22047e4937aSGao Xiang } 22147e4937aSGao Xiang 22247e4937aSGao Xiang /* protected by 'erofs_sb_list_lock' */ 22347e4937aSGao Xiang static unsigned int shrinker_run_no; 22447e4937aSGao Xiang 22547e4937aSGao Xiang /* protects the mounted 'erofs_sb_list' */ 22647e4937aSGao Xiang static DEFINE_SPINLOCK(erofs_sb_list_lock); 22747e4937aSGao Xiang static LIST_HEAD(erofs_sb_list); 22847e4937aSGao Xiang 22947e4937aSGao Xiang void erofs_shrinker_register(struct super_block *sb) 23047e4937aSGao Xiang { 23147e4937aSGao Xiang struct erofs_sb_info *sbi = EROFS_SB(sb); 23247e4937aSGao Xiang 23347e4937aSGao Xiang mutex_init(&sbi->umount_mutex); 23447e4937aSGao Xiang 23547e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 23647e4937aSGao Xiang list_add(&sbi->list, &erofs_sb_list); 23747e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 23847e4937aSGao Xiang } 23947e4937aSGao Xiang 24047e4937aSGao Xiang void erofs_shrinker_unregister(struct super_block *sb) 24147e4937aSGao Xiang { 24247e4937aSGao Xiang struct erofs_sb_info *const sbi = EROFS_SB(sb); 24347e4937aSGao Xiang 24447e4937aSGao Xiang mutex_lock(&sbi->umount_mutex); 245bda17a45SGao Xiang /* clean up all remaining workgroups in memory */ 246bda17a45SGao Xiang erofs_shrink_workstation(sbi, ~0UL); 24747e4937aSGao Xiang 24847e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 24947e4937aSGao Xiang list_del(&sbi->list); 25047e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 25147e4937aSGao Xiang mutex_unlock(&sbi->umount_mutex); 25247e4937aSGao Xiang } 25347e4937aSGao Xiang 25447e4937aSGao Xiang static unsigned long erofs_shrink_count(struct shrinker *shrink, 25547e4937aSGao Xiang struct shrink_control *sc) 25647e4937aSGao Xiang { 25747e4937aSGao Xiang return atomic_long_read(&erofs_global_shrink_cnt); 25847e4937aSGao Xiang } 25947e4937aSGao Xiang 26047e4937aSGao Xiang static unsigned long erofs_shrink_scan(struct shrinker *shrink, 26147e4937aSGao Xiang struct shrink_control *sc) 26247e4937aSGao Xiang { 26347e4937aSGao Xiang struct erofs_sb_info *sbi; 26447e4937aSGao Xiang struct list_head *p; 26547e4937aSGao Xiang 26647e4937aSGao Xiang unsigned long nr = sc->nr_to_scan; 26747e4937aSGao Xiang unsigned int run_no; 26847e4937aSGao Xiang unsigned long freed = 0; 26947e4937aSGao Xiang 27047e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 27147e4937aSGao Xiang do { 27247e4937aSGao Xiang run_no = ++shrinker_run_no; 27347e4937aSGao Xiang } while (run_no == 0); 27447e4937aSGao Xiang 27547e4937aSGao Xiang /* Iterate over all mounted superblocks and try to shrink them */ 27647e4937aSGao Xiang p = erofs_sb_list.next; 27747e4937aSGao Xiang while (p != &erofs_sb_list) { 27847e4937aSGao Xiang sbi = list_entry(p, struct erofs_sb_info, list); 27947e4937aSGao Xiang 28047e4937aSGao Xiang /* 28147e4937aSGao Xiang * We move the ones we do to the end of the list, so we stop 28247e4937aSGao Xiang * when we see one we have already done. 28347e4937aSGao Xiang */ 28447e4937aSGao Xiang if (sbi->shrinker_run_no == run_no) 28547e4937aSGao Xiang break; 28647e4937aSGao Xiang 28747e4937aSGao Xiang if (!mutex_trylock(&sbi->umount_mutex)) { 28847e4937aSGao Xiang p = p->next; 28947e4937aSGao Xiang continue; 29047e4937aSGao Xiang } 29147e4937aSGao Xiang 29247e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 29347e4937aSGao Xiang sbi->shrinker_run_no = run_no; 29447e4937aSGao Xiang 295bda17a45SGao Xiang freed += erofs_shrink_workstation(sbi, nr); 29647e4937aSGao Xiang 29747e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 29847e4937aSGao Xiang /* Get the next list element before we move this one */ 29947e4937aSGao Xiang p = p->next; 30047e4937aSGao Xiang 30147e4937aSGao Xiang /* 30247e4937aSGao Xiang * Move this one to the end of the list to provide some 30347e4937aSGao Xiang * fairness. 30447e4937aSGao Xiang */ 30547e4937aSGao Xiang list_move_tail(&sbi->list, &erofs_sb_list); 30647e4937aSGao Xiang mutex_unlock(&sbi->umount_mutex); 30747e4937aSGao Xiang 30847e4937aSGao Xiang if (freed >= nr) 30947e4937aSGao Xiang break; 31047e4937aSGao Xiang } 31147e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 31247e4937aSGao Xiang return freed; 31347e4937aSGao Xiang } 31447e4937aSGao Xiang 31547e4937aSGao Xiang static struct shrinker erofs_shrinker_info = { 31647e4937aSGao Xiang .scan_objects = erofs_shrink_scan, 31747e4937aSGao Xiang .count_objects = erofs_shrink_count, 31847e4937aSGao Xiang .seeks = DEFAULT_SEEKS, 31947e4937aSGao Xiang }; 32047e4937aSGao Xiang 32147e4937aSGao Xiang int __init erofs_init_shrinker(void) 32247e4937aSGao Xiang { 32347e4937aSGao Xiang return register_shrinker(&erofs_shrinker_info); 32447e4937aSGao Xiang } 32547e4937aSGao Xiang 32647e4937aSGao Xiang void erofs_exit_shrinker(void) 32747e4937aSGao Xiang { 32847e4937aSGao Xiang unregister_shrinker(&erofs_shrinker_info); 32947e4937aSGao Xiang } 33047e4937aSGao Xiang #endif /* !CONFIG_EROFS_FS_ZIP */ 33147e4937aSGao Xiang 332