xref: /openbmc/linux/fs/erofs/utils.c (revision 867e6d38)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2018 HUAWEI, Inc.
4  *             https://www.huawei.com/
5  * Created by Gao Xiang <gaoxiang25@huawei.com>
6  */
7 #include "internal.h"
8 #include <linux/pagevec.h>
9 
10 struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
11 {
12 	struct page *page;
13 
14 	if (!list_empty(pool)) {
15 		page = lru_to_page(pool);
16 		DBG_BUGON(page_ref_count(page) != 1);
17 		list_del(&page->lru);
18 	} else {
19 		page = alloc_page(gfp);
20 	}
21 	return page;
22 }
23 
24 #ifdef CONFIG_EROFS_FS_ZIP
25 /* global shrink count (for all mounted EROFS instances) */
26 static atomic_long_t erofs_global_shrink_cnt;
27 
28 static int erofs_workgroup_get(struct erofs_workgroup *grp)
29 {
30 	int o;
31 
32 repeat:
33 	o = erofs_wait_on_workgroup_freezed(grp);
34 	if (o <= 0)
35 		return -1;
36 
37 	if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
38 		goto repeat;
39 
40 	/* decrease refcount paired by erofs_workgroup_put */
41 	if (o == 1)
42 		atomic_long_dec(&erofs_global_shrink_cnt);
43 	return 0;
44 }
45 
46 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
47 					     pgoff_t index)
48 {
49 	struct erofs_sb_info *sbi = EROFS_SB(sb);
50 	struct erofs_workgroup *grp;
51 
52 repeat:
53 	rcu_read_lock();
54 	grp = xa_load(&sbi->managed_pslots, index);
55 	if (grp) {
56 		if (erofs_workgroup_get(grp)) {
57 			/* prefer to relax rcu read side */
58 			rcu_read_unlock();
59 			goto repeat;
60 		}
61 
62 		DBG_BUGON(index != grp->index);
63 	}
64 	rcu_read_unlock();
65 	return grp;
66 }
67 
68 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
69 					       struct erofs_workgroup *grp)
70 {
71 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
72 	struct erofs_workgroup *pre;
73 
74 	/*
75 	 * Bump up a reference count before making this visible
76 	 * to others for the XArray in order to avoid potential
77 	 * UAF without serialized by xa_lock.
78 	 */
79 	atomic_inc(&grp->refcount);
80 
81 repeat:
82 	xa_lock(&sbi->managed_pslots);
83 	pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
84 			   NULL, grp, GFP_NOFS);
85 	if (pre) {
86 		if (xa_is_err(pre)) {
87 			pre = ERR_PTR(xa_err(pre));
88 		} else if (erofs_workgroup_get(pre)) {
89 			/* try to legitimize the current in-tree one */
90 			xa_unlock(&sbi->managed_pslots);
91 			cond_resched();
92 			goto repeat;
93 		}
94 		atomic_dec(&grp->refcount);
95 		grp = pre;
96 	}
97 	xa_unlock(&sbi->managed_pslots);
98 	return grp;
99 }
100 
101 static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
102 {
103 	atomic_long_dec(&erofs_global_shrink_cnt);
104 	erofs_workgroup_free_rcu(grp);
105 }
106 
107 int erofs_workgroup_put(struct erofs_workgroup *grp)
108 {
109 	int count = atomic_dec_return(&grp->refcount);
110 
111 	if (count == 1)
112 		atomic_long_inc(&erofs_global_shrink_cnt);
113 	else if (!count)
114 		__erofs_workgroup_free(grp);
115 	return count;
116 }
117 
118 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
119 					   struct erofs_workgroup *grp)
120 {
121 	/*
122 	 * If managed cache is on, refcount of workgroups
123 	 * themselves could be < 0 (freezed). In other words,
124 	 * there is no guarantee that all refcounts > 0.
125 	 */
126 	if (!erofs_workgroup_try_to_freeze(grp, 1))
127 		return false;
128 
129 	/*
130 	 * Note that all cached pages should be unattached
131 	 * before deleted from the XArray. Otherwise some
132 	 * cached pages could be still attached to the orphan
133 	 * old workgroup when the new one is available in the tree.
134 	 */
135 	if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
136 		erofs_workgroup_unfreeze(grp, 1);
137 		return false;
138 	}
139 
140 	/*
141 	 * It's impossible to fail after the workgroup is freezed,
142 	 * however in order to avoid some race conditions, add a
143 	 * DBG_BUGON to observe this in advance.
144 	 */
145 	DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp);
146 
147 	/* last refcount should be connected with its managed pslot.  */
148 	erofs_workgroup_unfreeze(grp, 0);
149 	__erofs_workgroup_free(grp);
150 	return true;
151 }
152 
153 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
154 					      unsigned long nr_shrink)
155 {
156 	struct erofs_workgroup *grp;
157 	unsigned int freed = 0;
158 	unsigned long index;
159 
160 	xa_for_each(&sbi->managed_pslots, index, grp) {
161 		/* try to shrink each valid workgroup */
162 		if (!erofs_try_to_release_workgroup(sbi, grp))
163 			continue;
164 
165 		++freed;
166 		if (!--nr_shrink)
167 			break;
168 	}
169 	return freed;
170 }
171 
172 /* protected by 'erofs_sb_list_lock' */
173 static unsigned int shrinker_run_no;
174 
175 /* protects the mounted 'erofs_sb_list' */
176 static DEFINE_SPINLOCK(erofs_sb_list_lock);
177 static LIST_HEAD(erofs_sb_list);
178 
179 void erofs_shrinker_register(struct super_block *sb)
180 {
181 	struct erofs_sb_info *sbi = EROFS_SB(sb);
182 
183 	mutex_init(&sbi->umount_mutex);
184 
185 	spin_lock(&erofs_sb_list_lock);
186 	list_add(&sbi->list, &erofs_sb_list);
187 	spin_unlock(&erofs_sb_list_lock);
188 }
189 
190 void erofs_shrinker_unregister(struct super_block *sb)
191 {
192 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
193 
194 	mutex_lock(&sbi->umount_mutex);
195 	/* clean up all remaining workgroups in memory */
196 	erofs_shrink_workstation(sbi, ~0UL);
197 
198 	spin_lock(&erofs_sb_list_lock);
199 	list_del(&sbi->list);
200 	spin_unlock(&erofs_sb_list_lock);
201 	mutex_unlock(&sbi->umount_mutex);
202 }
203 
204 static unsigned long erofs_shrink_count(struct shrinker *shrink,
205 					struct shrink_control *sc)
206 {
207 	return atomic_long_read(&erofs_global_shrink_cnt);
208 }
209 
210 static unsigned long erofs_shrink_scan(struct shrinker *shrink,
211 				       struct shrink_control *sc)
212 {
213 	struct erofs_sb_info *sbi;
214 	struct list_head *p;
215 
216 	unsigned long nr = sc->nr_to_scan;
217 	unsigned int run_no;
218 	unsigned long freed = 0;
219 
220 	spin_lock(&erofs_sb_list_lock);
221 	do {
222 		run_no = ++shrinker_run_no;
223 	} while (run_no == 0);
224 
225 	/* Iterate over all mounted superblocks and try to shrink them */
226 	p = erofs_sb_list.next;
227 	while (p != &erofs_sb_list) {
228 		sbi = list_entry(p, struct erofs_sb_info, list);
229 
230 		/*
231 		 * We move the ones we do to the end of the list, so we stop
232 		 * when we see one we have already done.
233 		 */
234 		if (sbi->shrinker_run_no == run_no)
235 			break;
236 
237 		if (!mutex_trylock(&sbi->umount_mutex)) {
238 			p = p->next;
239 			continue;
240 		}
241 
242 		spin_unlock(&erofs_sb_list_lock);
243 		sbi->shrinker_run_no = run_no;
244 
245 		freed += erofs_shrink_workstation(sbi, nr - freed);
246 
247 		spin_lock(&erofs_sb_list_lock);
248 		/* Get the next list element before we move this one */
249 		p = p->next;
250 
251 		/*
252 		 * Move this one to the end of the list to provide some
253 		 * fairness.
254 		 */
255 		list_move_tail(&sbi->list, &erofs_sb_list);
256 		mutex_unlock(&sbi->umount_mutex);
257 
258 		if (freed >= nr)
259 			break;
260 	}
261 	spin_unlock(&erofs_sb_list_lock);
262 	return freed;
263 }
264 
265 static struct shrinker erofs_shrinker_info = {
266 	.scan_objects = erofs_shrink_scan,
267 	.count_objects = erofs_shrink_count,
268 	.seeks = DEFAULT_SEEKS,
269 };
270 
271 int __init erofs_init_shrinker(void)
272 {
273 	return register_shrinker(&erofs_shrinker_info);
274 }
275 
276 void erofs_exit_shrinker(void)
277 {
278 	unregister_shrinker(&erofs_shrinker_info);
279 }
280 #endif	/* !CONFIG_EROFS_FS_ZIP */
281 
282