147e4937aSGao Xiang // SPDX-License-Identifier: GPL-2.0-only 247e4937aSGao Xiang /* 347e4937aSGao Xiang * Copyright (C) 2018 HUAWEI, Inc. 4592e7cd0SAlexander A. Klimov * https://www.huawei.com/ 547e4937aSGao Xiang */ 647e4937aSGao Xiang #include "internal.h" 747e4937aSGao Xiang 8eaa9172aSGao Xiang struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp) 947e4937aSGao Xiang { 10eaa9172aSGao Xiang struct page *page = *pagepool; 1147e4937aSGao Xiang 12eaa9172aSGao Xiang if (page) { 1347e4937aSGao Xiang DBG_BUGON(page_ref_count(page) != 1); 14eaa9172aSGao Xiang *pagepool = (struct page *)page_private(page); 1547e4937aSGao Xiang } else { 165ddcee1fSGao Xiang page = alloc_page(gfp); 1747e4937aSGao Xiang } 1847e4937aSGao Xiang return page; 1947e4937aSGao Xiang } 2047e4937aSGao Xiang 21eaa9172aSGao Xiang void erofs_release_pages(struct page **pagepool) 22eaa9172aSGao Xiang { 23eaa9172aSGao Xiang while (*pagepool) { 24eaa9172aSGao Xiang struct page *page = *pagepool; 25eaa9172aSGao Xiang 26eaa9172aSGao Xiang *pagepool = (struct page *)page_private(page); 27eaa9172aSGao Xiang put_page(page); 28eaa9172aSGao Xiang } 29eaa9172aSGao Xiang } 30eaa9172aSGao Xiang 3147e4937aSGao Xiang #ifdef CONFIG_EROFS_FS_ZIP 3247e4937aSGao Xiang /* global shrink count (for all mounted EROFS instances) */ 3347e4937aSGao Xiang static atomic_long_t erofs_global_shrink_cnt; 3447e4937aSGao Xiang 35*7674a42fSGao Xiang static bool erofs_workgroup_get(struct erofs_workgroup *grp) 3647e4937aSGao Xiang { 37*7674a42fSGao Xiang if (lockref_get_not_zero(&grp->lockref)) 38*7674a42fSGao Xiang return true; 3947e4937aSGao Xiang 40*7674a42fSGao Xiang spin_lock(&grp->lockref.lock); 41*7674a42fSGao Xiang if (__lockref_is_dead(&grp->lockref)) { 42*7674a42fSGao Xiang spin_unlock(&grp->lockref.lock); 43*7674a42fSGao Xiang return false; 44*7674a42fSGao Xiang } 4547e4937aSGao Xiang 46*7674a42fSGao Xiang if (!grp->lockref.count++) 4747e4937aSGao Xiang atomic_long_dec(&erofs_global_shrink_cnt); 48*7674a42fSGao Xiang spin_unlock(&grp->lockref.lock); 49*7674a42fSGao Xiang return true; 5047e4937aSGao Xiang } 5147e4937aSGao Xiang 5247e4937aSGao Xiang struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, 53997626d8SVladimir Zapolskiy pgoff_t index) 5447e4937aSGao Xiang { 5547e4937aSGao Xiang struct erofs_sb_info *sbi = EROFS_SB(sb); 5647e4937aSGao Xiang struct erofs_workgroup *grp; 5747e4937aSGao Xiang 5847e4937aSGao Xiang repeat: 5947e4937aSGao Xiang rcu_read_lock(); 6064094a04SGao Xiang grp = xa_load(&sbi->managed_pslots, index); 6147e4937aSGao Xiang if (grp) { 62*7674a42fSGao Xiang if (!erofs_workgroup_get(grp)) { 6347e4937aSGao Xiang /* prefer to relax rcu read side */ 6447e4937aSGao Xiang rcu_read_unlock(); 6547e4937aSGao Xiang goto repeat; 6647e4937aSGao Xiang } 6747e4937aSGao Xiang 6847e4937aSGao Xiang DBG_BUGON(index != grp->index); 6947e4937aSGao Xiang } 7047e4937aSGao Xiang rcu_read_unlock(); 7147e4937aSGao Xiang return grp; 7247e4937aSGao Xiang } 7347e4937aSGao Xiang 7464094a04SGao Xiang struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, 75e5e9a432SVladimir Zapolskiy struct erofs_workgroup *grp) 7647e4937aSGao Xiang { 7764094a04SGao Xiang struct erofs_sb_info *const sbi = EROFS_SB(sb); 7864094a04SGao Xiang struct erofs_workgroup *pre; 7947e4937aSGao Xiang 8064094a04SGao Xiang /* 81*7674a42fSGao Xiang * Bump up before making this visible to others for the XArray in order 82*7674a42fSGao Xiang * to avoid potential UAF without serialized by xa_lock. 8364094a04SGao Xiang */ 84*7674a42fSGao Xiang lockref_get(&grp->lockref); 8564094a04SGao Xiang 8664094a04SGao Xiang repeat: 8764094a04SGao Xiang xa_lock(&sbi->managed_pslots); 8864094a04SGao Xiang pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, 8964094a04SGao Xiang NULL, grp, GFP_NOFS); 9064094a04SGao Xiang if (pre) { 9164094a04SGao Xiang if (xa_is_err(pre)) { 9264094a04SGao Xiang pre = ERR_PTR(xa_err(pre)); 93*7674a42fSGao Xiang } else if (!erofs_workgroup_get(pre)) { 9464094a04SGao Xiang /* try to legitimize the current in-tree one */ 9564094a04SGao Xiang xa_unlock(&sbi->managed_pslots); 9664094a04SGao Xiang cond_resched(); 9764094a04SGao Xiang goto repeat; 9847e4937aSGao Xiang } 99*7674a42fSGao Xiang lockref_put_return(&grp->lockref); 10064094a04SGao Xiang grp = pre; 10164094a04SGao Xiang } 10264094a04SGao Xiang xa_unlock(&sbi->managed_pslots); 10364094a04SGao Xiang return grp; 10447e4937aSGao Xiang } 10547e4937aSGao Xiang 10647e4937aSGao Xiang static void __erofs_workgroup_free(struct erofs_workgroup *grp) 10747e4937aSGao Xiang { 10847e4937aSGao Xiang atomic_long_dec(&erofs_global_shrink_cnt); 10947e4937aSGao Xiang erofs_workgroup_free_rcu(grp); 11047e4937aSGao Xiang } 11147e4937aSGao Xiang 112*7674a42fSGao Xiang void erofs_workgroup_put(struct erofs_workgroup *grp) 11347e4937aSGao Xiang { 114*7674a42fSGao Xiang if (lockref_put_or_lock(&grp->lockref)) 115*7674a42fSGao Xiang return; 11647e4937aSGao Xiang 117*7674a42fSGao Xiang DBG_BUGON(__lockref_is_dead(&grp->lockref)); 118*7674a42fSGao Xiang if (grp->lockref.count == 1) 11947e4937aSGao Xiang atomic_long_inc(&erofs_global_shrink_cnt); 120*7674a42fSGao Xiang --grp->lockref.count; 121*7674a42fSGao Xiang spin_unlock(&grp->lockref.lock); 12247e4937aSGao Xiang } 12347e4937aSGao Xiang 12447e4937aSGao Xiang static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, 125bda17a45SGao Xiang struct erofs_workgroup *grp) 12647e4937aSGao Xiang { 127*7674a42fSGao Xiang int free = false; 128*7674a42fSGao Xiang 129*7674a42fSGao Xiang spin_lock(&grp->lockref.lock); 130*7674a42fSGao Xiang if (grp->lockref.count) 131*7674a42fSGao Xiang goto out; 13247e4937aSGao Xiang 13347e4937aSGao Xiang /* 134*7674a42fSGao Xiang * Note that all cached pages should be detached before deleted from 135*7674a42fSGao Xiang * the XArray. Otherwise some cached pages could be still attached to 136*7674a42fSGao Xiang * the orphan old workgroup when the new one is available in the tree. 13747e4937aSGao Xiang */ 138*7674a42fSGao Xiang if (erofs_try_to_free_all_cached_pages(sbi, grp)) 139*7674a42fSGao Xiang goto out; 14047e4937aSGao Xiang 14147e4937aSGao Xiang /* 14247e4937aSGao Xiang * It's impossible to fail after the workgroup is freezed, 14347e4937aSGao Xiang * however in order to avoid some race conditions, add a 14447e4937aSGao Xiang * DBG_BUGON to observe this in advance. 14547e4937aSGao Xiang */ 14657bbeacdSHuang Jianan DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp); 14747e4937aSGao Xiang 148*7674a42fSGao Xiang lockref_mark_dead(&grp->lockref); 149*7674a42fSGao Xiang free = true; 150*7674a42fSGao Xiang out: 151*7674a42fSGao Xiang spin_unlock(&grp->lockref.lock); 152*7674a42fSGao Xiang if (free) 153ee4bf86cSGao Xiang __erofs_workgroup_free(grp); 154*7674a42fSGao Xiang return free; 15547e4937aSGao Xiang } 15647e4937aSGao Xiang 15747e4937aSGao Xiang static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, 158bda17a45SGao Xiang unsigned long nr_shrink) 15947e4937aSGao Xiang { 16064094a04SGao Xiang struct erofs_workgroup *grp; 16147e4937aSGao Xiang unsigned int freed = 0; 16264094a04SGao Xiang unsigned long index; 16347e4937aSGao Xiang 16457bbeacdSHuang Jianan xa_lock(&sbi->managed_pslots); 16564094a04SGao Xiang xa_for_each(&sbi->managed_pslots, index, grp) { 16647e4937aSGao Xiang /* try to shrink each valid workgroup */ 167bda17a45SGao Xiang if (!erofs_try_to_release_workgroup(sbi, grp)) 16847e4937aSGao Xiang continue; 16957bbeacdSHuang Jianan xa_unlock(&sbi->managed_pslots); 17047e4937aSGao Xiang 17147e4937aSGao Xiang ++freed; 1728d8a09b0SGao Xiang if (!--nr_shrink) 17357bbeacdSHuang Jianan return freed; 17457bbeacdSHuang Jianan xa_lock(&sbi->managed_pslots); 17547e4937aSGao Xiang } 17657bbeacdSHuang Jianan xa_unlock(&sbi->managed_pslots); 17747e4937aSGao Xiang return freed; 17847e4937aSGao Xiang } 17947e4937aSGao Xiang 18047e4937aSGao Xiang /* protected by 'erofs_sb_list_lock' */ 18147e4937aSGao Xiang static unsigned int shrinker_run_no; 18247e4937aSGao Xiang 18347e4937aSGao Xiang /* protects the mounted 'erofs_sb_list' */ 18447e4937aSGao Xiang static DEFINE_SPINLOCK(erofs_sb_list_lock); 18547e4937aSGao Xiang static LIST_HEAD(erofs_sb_list); 18647e4937aSGao Xiang 18747e4937aSGao Xiang void erofs_shrinker_register(struct super_block *sb) 18847e4937aSGao Xiang { 18947e4937aSGao Xiang struct erofs_sb_info *sbi = EROFS_SB(sb); 19047e4937aSGao Xiang 19147e4937aSGao Xiang mutex_init(&sbi->umount_mutex); 19247e4937aSGao Xiang 19347e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 19447e4937aSGao Xiang list_add(&sbi->list, &erofs_sb_list); 19547e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 19647e4937aSGao Xiang } 19747e4937aSGao Xiang 19847e4937aSGao Xiang void erofs_shrinker_unregister(struct super_block *sb) 19947e4937aSGao Xiang { 20047e4937aSGao Xiang struct erofs_sb_info *const sbi = EROFS_SB(sb); 20147e4937aSGao Xiang 20247e4937aSGao Xiang mutex_lock(&sbi->umount_mutex); 203bda17a45SGao Xiang /* clean up all remaining workgroups in memory */ 204bda17a45SGao Xiang erofs_shrink_workstation(sbi, ~0UL); 20547e4937aSGao Xiang 20647e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 20747e4937aSGao Xiang list_del(&sbi->list); 20847e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 20947e4937aSGao Xiang mutex_unlock(&sbi->umount_mutex); 21047e4937aSGao Xiang } 21147e4937aSGao Xiang 21247e4937aSGao Xiang static unsigned long erofs_shrink_count(struct shrinker *shrink, 21347e4937aSGao Xiang struct shrink_control *sc) 21447e4937aSGao Xiang { 21547e4937aSGao Xiang return atomic_long_read(&erofs_global_shrink_cnt); 21647e4937aSGao Xiang } 21747e4937aSGao Xiang 21847e4937aSGao Xiang static unsigned long erofs_shrink_scan(struct shrinker *shrink, 21947e4937aSGao Xiang struct shrink_control *sc) 22047e4937aSGao Xiang { 22147e4937aSGao Xiang struct erofs_sb_info *sbi; 22247e4937aSGao Xiang struct list_head *p; 22347e4937aSGao Xiang 22447e4937aSGao Xiang unsigned long nr = sc->nr_to_scan; 22547e4937aSGao Xiang unsigned int run_no; 22647e4937aSGao Xiang unsigned long freed = 0; 22747e4937aSGao Xiang 22847e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 22947e4937aSGao Xiang do { 23047e4937aSGao Xiang run_no = ++shrinker_run_no; 23147e4937aSGao Xiang } while (run_no == 0); 23247e4937aSGao Xiang 23347e4937aSGao Xiang /* Iterate over all mounted superblocks and try to shrink them */ 23447e4937aSGao Xiang p = erofs_sb_list.next; 23547e4937aSGao Xiang while (p != &erofs_sb_list) { 23647e4937aSGao Xiang sbi = list_entry(p, struct erofs_sb_info, list); 23747e4937aSGao Xiang 23847e4937aSGao Xiang /* 23947e4937aSGao Xiang * We move the ones we do to the end of the list, so we stop 24047e4937aSGao Xiang * when we see one we have already done. 24147e4937aSGao Xiang */ 24247e4937aSGao Xiang if (sbi->shrinker_run_no == run_no) 24347e4937aSGao Xiang break; 24447e4937aSGao Xiang 24547e4937aSGao Xiang if (!mutex_trylock(&sbi->umount_mutex)) { 24647e4937aSGao Xiang p = p->next; 24747e4937aSGao Xiang continue; 24847e4937aSGao Xiang } 24947e4937aSGao Xiang 25047e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 25147e4937aSGao Xiang sbi->shrinker_run_no = run_no; 25247e4937aSGao Xiang 2539d5a09c6SGao Xiang freed += erofs_shrink_workstation(sbi, nr - freed); 25447e4937aSGao Xiang 25547e4937aSGao Xiang spin_lock(&erofs_sb_list_lock); 25647e4937aSGao Xiang /* Get the next list element before we move this one */ 25747e4937aSGao Xiang p = p->next; 25847e4937aSGao Xiang 25947e4937aSGao Xiang /* 26047e4937aSGao Xiang * Move this one to the end of the list to provide some 26147e4937aSGao Xiang * fairness. 26247e4937aSGao Xiang */ 26347e4937aSGao Xiang list_move_tail(&sbi->list, &erofs_sb_list); 26447e4937aSGao Xiang mutex_unlock(&sbi->umount_mutex); 26547e4937aSGao Xiang 26647e4937aSGao Xiang if (freed >= nr) 26747e4937aSGao Xiang break; 26847e4937aSGao Xiang } 26947e4937aSGao Xiang spin_unlock(&erofs_sb_list_lock); 27047e4937aSGao Xiang return freed; 27147e4937aSGao Xiang } 27247e4937aSGao Xiang 27347e4937aSGao Xiang static struct shrinker erofs_shrinker_info = { 27447e4937aSGao Xiang .scan_objects = erofs_shrink_scan, 27547e4937aSGao Xiang .count_objects = erofs_shrink_count, 27647e4937aSGao Xiang .seeks = DEFAULT_SEEKS, 27747e4937aSGao Xiang }; 27847e4937aSGao Xiang 27947e4937aSGao Xiang int __init erofs_init_shrinker(void) 28047e4937aSGao Xiang { 281e33c267aSRoman Gushchin return register_shrinker(&erofs_shrinker_info, "erofs-shrinker"); 28247e4937aSGao Xiang } 28347e4937aSGao Xiang 28447e4937aSGao Xiang void erofs_exit_shrinker(void) 28547e4937aSGao Xiang { 28647e4937aSGao Xiang unregister_shrinker(&erofs_shrinker_info); 28747e4937aSGao Xiang } 28847e4937aSGao Xiang #endif /* !CONFIG_EROFS_FS_ZIP */ 289