xref: /openbmc/linux/fs/erofs/utils.c (revision f3d7c2cd)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2018 HUAWEI, Inc.
4  *             https://www.huawei.com/
5  */
6 #include "internal.h"
7 #include <linux/pagevec.h>
8 
9 struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
10 {
11 	struct page *page;
12 
13 	if (!list_empty(pool)) {
14 		page = lru_to_page(pool);
15 		DBG_BUGON(page_ref_count(page) != 1);
16 		list_del(&page->lru);
17 	} else {
18 		page = alloc_page(gfp);
19 	}
20 	return page;
21 }
22 
23 #ifdef CONFIG_EROFS_FS_ZIP
24 /* global shrink count (for all mounted EROFS instances) */
25 static atomic_long_t erofs_global_shrink_cnt;
26 
27 static int erofs_workgroup_get(struct erofs_workgroup *grp)
28 {
29 	int o;
30 
31 repeat:
32 	o = erofs_wait_on_workgroup_freezed(grp);
33 	if (o <= 0)
34 		return -1;
35 
36 	if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
37 		goto repeat;
38 
39 	/* decrease refcount paired by erofs_workgroup_put */
40 	if (o == 1)
41 		atomic_long_dec(&erofs_global_shrink_cnt);
42 	return 0;
43 }
44 
45 struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
46 					     pgoff_t index)
47 {
48 	struct erofs_sb_info *sbi = EROFS_SB(sb);
49 	struct erofs_workgroup *grp;
50 
51 repeat:
52 	rcu_read_lock();
53 	grp = xa_load(&sbi->managed_pslots, index);
54 	if (grp) {
55 		if (erofs_workgroup_get(grp)) {
56 			/* prefer to relax rcu read side */
57 			rcu_read_unlock();
58 			goto repeat;
59 		}
60 
61 		DBG_BUGON(index != grp->index);
62 	}
63 	rcu_read_unlock();
64 	return grp;
65 }
66 
67 struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
68 					       struct erofs_workgroup *grp)
69 {
70 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
71 	struct erofs_workgroup *pre;
72 
73 	/*
74 	 * Bump up a reference count before making this visible
75 	 * to others for the XArray in order to avoid potential
76 	 * UAF without serialized by xa_lock.
77 	 */
78 	atomic_inc(&grp->refcount);
79 
80 repeat:
81 	xa_lock(&sbi->managed_pslots);
82 	pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
83 			   NULL, grp, GFP_NOFS);
84 	if (pre) {
85 		if (xa_is_err(pre)) {
86 			pre = ERR_PTR(xa_err(pre));
87 		} else if (erofs_workgroup_get(pre)) {
88 			/* try to legitimize the current in-tree one */
89 			xa_unlock(&sbi->managed_pslots);
90 			cond_resched();
91 			goto repeat;
92 		}
93 		atomic_dec(&grp->refcount);
94 		grp = pre;
95 	}
96 	xa_unlock(&sbi->managed_pslots);
97 	return grp;
98 }
99 
100 static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
101 {
102 	atomic_long_dec(&erofs_global_shrink_cnt);
103 	erofs_workgroup_free_rcu(grp);
104 }
105 
106 int erofs_workgroup_put(struct erofs_workgroup *grp)
107 {
108 	int count = atomic_dec_return(&grp->refcount);
109 
110 	if (count == 1)
111 		atomic_long_inc(&erofs_global_shrink_cnt);
112 	else if (!count)
113 		__erofs_workgroup_free(grp);
114 	return count;
115 }
116 
117 static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
118 					   struct erofs_workgroup *grp)
119 {
120 	/*
121 	 * If managed cache is on, refcount of workgroups
122 	 * themselves could be < 0 (freezed). In other words,
123 	 * there is no guarantee that all refcounts > 0.
124 	 */
125 	if (!erofs_workgroup_try_to_freeze(grp, 1))
126 		return false;
127 
128 	/*
129 	 * Note that all cached pages should be unattached
130 	 * before deleted from the XArray. Otherwise some
131 	 * cached pages could be still attached to the orphan
132 	 * old workgroup when the new one is available in the tree.
133 	 */
134 	if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
135 		erofs_workgroup_unfreeze(grp, 1);
136 		return false;
137 	}
138 
139 	/*
140 	 * It's impossible to fail after the workgroup is freezed,
141 	 * however in order to avoid some race conditions, add a
142 	 * DBG_BUGON to observe this in advance.
143 	 */
144 	DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp);
145 
146 	/* last refcount should be connected with its managed pslot.  */
147 	erofs_workgroup_unfreeze(grp, 0);
148 	__erofs_workgroup_free(grp);
149 	return true;
150 }
151 
152 static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
153 					      unsigned long nr_shrink)
154 {
155 	struct erofs_workgroup *grp;
156 	unsigned int freed = 0;
157 	unsigned long index;
158 
159 	xa_for_each(&sbi->managed_pslots, index, grp) {
160 		/* try to shrink each valid workgroup */
161 		if (!erofs_try_to_release_workgroup(sbi, grp))
162 			continue;
163 
164 		++freed;
165 		if (!--nr_shrink)
166 			break;
167 	}
168 	return freed;
169 }
170 
171 /* protected by 'erofs_sb_list_lock' */
172 static unsigned int shrinker_run_no;
173 
174 /* protects the mounted 'erofs_sb_list' */
175 static DEFINE_SPINLOCK(erofs_sb_list_lock);
176 static LIST_HEAD(erofs_sb_list);
177 
178 void erofs_shrinker_register(struct super_block *sb)
179 {
180 	struct erofs_sb_info *sbi = EROFS_SB(sb);
181 
182 	mutex_init(&sbi->umount_mutex);
183 
184 	spin_lock(&erofs_sb_list_lock);
185 	list_add(&sbi->list, &erofs_sb_list);
186 	spin_unlock(&erofs_sb_list_lock);
187 }
188 
189 void erofs_shrinker_unregister(struct super_block *sb)
190 {
191 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
192 
193 	mutex_lock(&sbi->umount_mutex);
194 	/* clean up all remaining workgroups in memory */
195 	erofs_shrink_workstation(sbi, ~0UL);
196 
197 	spin_lock(&erofs_sb_list_lock);
198 	list_del(&sbi->list);
199 	spin_unlock(&erofs_sb_list_lock);
200 	mutex_unlock(&sbi->umount_mutex);
201 }
202 
203 static unsigned long erofs_shrink_count(struct shrinker *shrink,
204 					struct shrink_control *sc)
205 {
206 	return atomic_long_read(&erofs_global_shrink_cnt);
207 }
208 
209 static unsigned long erofs_shrink_scan(struct shrinker *shrink,
210 				       struct shrink_control *sc)
211 {
212 	struct erofs_sb_info *sbi;
213 	struct list_head *p;
214 
215 	unsigned long nr = sc->nr_to_scan;
216 	unsigned int run_no;
217 	unsigned long freed = 0;
218 
219 	spin_lock(&erofs_sb_list_lock);
220 	do {
221 		run_no = ++shrinker_run_no;
222 	} while (run_no == 0);
223 
224 	/* Iterate over all mounted superblocks and try to shrink them */
225 	p = erofs_sb_list.next;
226 	while (p != &erofs_sb_list) {
227 		sbi = list_entry(p, struct erofs_sb_info, list);
228 
229 		/*
230 		 * We move the ones we do to the end of the list, so we stop
231 		 * when we see one we have already done.
232 		 */
233 		if (sbi->shrinker_run_no == run_no)
234 			break;
235 
236 		if (!mutex_trylock(&sbi->umount_mutex)) {
237 			p = p->next;
238 			continue;
239 		}
240 
241 		spin_unlock(&erofs_sb_list_lock);
242 		sbi->shrinker_run_no = run_no;
243 
244 		freed += erofs_shrink_workstation(sbi, nr - freed);
245 
246 		spin_lock(&erofs_sb_list_lock);
247 		/* Get the next list element before we move this one */
248 		p = p->next;
249 
250 		/*
251 		 * Move this one to the end of the list to provide some
252 		 * fairness.
253 		 */
254 		list_move_tail(&sbi->list, &erofs_sb_list);
255 		mutex_unlock(&sbi->umount_mutex);
256 
257 		if (freed >= nr)
258 			break;
259 	}
260 	spin_unlock(&erofs_sb_list_lock);
261 	return freed;
262 }
263 
264 static struct shrinker erofs_shrinker_info = {
265 	.scan_objects = erofs_shrink_scan,
266 	.count_objects = erofs_shrink_count,
267 	.seeks = DEFAULT_SEEKS,
268 };
269 
270 int __init erofs_init_shrinker(void)
271 {
272 	return register_shrinker(&erofs_shrinker_info);
273 }
274 
275 void erofs_exit_shrinker(void)
276 {
277 	unregister_shrinker(&erofs_shrinker_info);
278 }
279 #endif	/* !CONFIG_EROFS_FS_ZIP */
280