xref: /openbmc/linux/fs/ubifs/shrinker.c (revision 12eb4683)
1 /*
2  * This file is part of UBIFS.
3  *
4  * Copyright (C) 2006-2008 Nokia Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published by
8  * the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; if not, write to the Free Software Foundation, Inc., 51
17  * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18  *
19  * Authors: Artem Bityutskiy (Битюцкий Артём)
20  *          Adrian Hunter
21  */
22 
23 /*
24  * This file implements UBIFS shrinker which evicts clean znodes from the TNC
25  * tree when Linux VM needs more RAM.
26  *
27  * We do not implement any LRU lists to find oldest znodes to free because it
28  * would add additional overhead to the file system fast paths. So the shrinker
29  * just walks the TNC tree when searching for znodes to free.
30  *
31  * If the root of a TNC sub-tree is clean and old enough, then the children are
32  * also clean and old enough. So the shrinker walks the TNC in level order and
33  * dumps entire sub-trees.
34  *
35  * The age of znodes is just the time-stamp when they were last looked at.
36  * The current shrinker first tries to evict old znodes, then young ones.
37  *
38  * Since the shrinker is global, it has to protect against races with FS
39  * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
40  */
41 
42 #include "ubifs.h"
43 
44 /* List of all UBIFS file-system instances */
45 LIST_HEAD(ubifs_infos);
46 
47 /*
48  * We number each shrinker run and record the number on the ubifs_info structure
49  * so that we can easily work out which ubifs_info structures have already been
50  * done by the current run.
51  */
52 static unsigned int shrinker_run_no;
53 
54 /* Protects 'ubifs_infos' list */
55 DEFINE_SPINLOCK(ubifs_infos_lock);
56 
57 /* Global clean znode counter (for all mounted UBIFS instances) */
58 atomic_long_t ubifs_clean_zn_cnt;
59 
60 /**
61  * shrink_tnc - shrink TNC tree.
62  * @c: UBIFS file-system description object
63  * @nr: number of znodes to free
64  * @age: the age of znodes to free
65  * @contention: if any contention, this is set to %1
66  *
67  * This function traverses TNC tree and frees clean znodes. It does not free
68  * clean znodes which younger then @age. Returns number of freed znodes.
69  */
70 static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
71 {
72 	int total_freed = 0;
73 	struct ubifs_znode *znode, *zprev;
74 	int time = get_seconds();
75 
76 	ubifs_assert(mutex_is_locked(&c->umount_mutex));
77 	ubifs_assert(mutex_is_locked(&c->tnc_mutex));
78 
79 	if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
80 		return 0;
81 
82 	/*
83 	 * Traverse the TNC tree in levelorder manner, so that it is possible
84 	 * to destroy large sub-trees. Indeed, if a znode is old, then all its
85 	 * children are older or of the same age.
86 	 *
87 	 * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
88 	 * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
89 	 * changed only when the 'c->tnc_mutex' is held.
90 	 */
91 	zprev = NULL;
92 	znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
93 	while (znode && total_freed < nr &&
94 	       atomic_long_read(&c->clean_zn_cnt) > 0) {
95 		int freed;
96 
97 		/*
98 		 * If the znode is clean, but it is in the 'c->cnext' list, this
99 		 * means that this znode has just been written to flash as a
100 		 * part of commit and was marked clean. They will be removed
101 		 * from the list at end commit. We cannot change the list,
102 		 * because it is not protected by any mutex (design decision to
103 		 * make commit really independent and parallel to main I/O). So
104 		 * we just skip these znodes.
105 		 *
106 		 * Note, the 'clean_zn_cnt' counters are not updated until
107 		 * after the commit, so the UBIFS shrinker does not report
108 		 * the znodes which are in the 'c->cnext' list as freeable.
109 		 *
110 		 * Also note, if the root of a sub-tree is not in 'c->cnext',
111 		 * then the whole sub-tree is not in 'c->cnext' as well, so it
112 		 * is safe to dump whole sub-tree.
113 		 */
114 
115 		if (znode->cnext) {
116 			/*
117 			 * Very soon these znodes will be removed from the list
118 			 * and become freeable.
119 			 */
120 			*contention = 1;
121 		} else if (!ubifs_zn_dirty(znode) &&
122 			   abs(time - znode->time) >= age) {
123 			if (znode->parent)
124 				znode->parent->zbranch[znode->iip].znode = NULL;
125 			else
126 				c->zroot.znode = NULL;
127 
128 			freed = ubifs_destroy_tnc_subtree(znode);
129 			atomic_long_sub(freed, &ubifs_clean_zn_cnt);
130 			atomic_long_sub(freed, &c->clean_zn_cnt);
131 			ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0);
132 			total_freed += freed;
133 			znode = zprev;
134 		}
135 
136 		if (unlikely(!c->zroot.znode))
137 			break;
138 
139 		zprev = znode;
140 		znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
141 		cond_resched();
142 	}
143 
144 	return total_freed;
145 }
146 
147 /**
148  * shrink_tnc_trees - shrink UBIFS TNC trees.
149  * @nr: number of znodes to free
150  * @age: the age of znodes to free
151  * @contention: if any contention, this is set to %1
152  *
153  * This function walks the list of mounted UBIFS file-systems and frees clean
154  * znodes which are older than @age, until at least @nr znodes are freed.
155  * Returns the number of freed znodes.
156  */
157 static int shrink_tnc_trees(int nr, int age, int *contention)
158 {
159 	struct ubifs_info *c;
160 	struct list_head *p;
161 	unsigned int run_no;
162 	int freed = 0;
163 
164 	spin_lock(&ubifs_infos_lock);
165 	do {
166 		run_no = ++shrinker_run_no;
167 	} while (run_no == 0);
168 	/* Iterate over all mounted UBIFS file-systems and try to shrink them */
169 	p = ubifs_infos.next;
170 	while (p != &ubifs_infos) {
171 		c = list_entry(p, struct ubifs_info, infos_list);
172 		/*
173 		 * We move the ones we do to the end of the list, so we stop
174 		 * when we see one we have already done.
175 		 */
176 		if (c->shrinker_run_no == run_no)
177 			break;
178 		if (!mutex_trylock(&c->umount_mutex)) {
179 			/* Some un-mount is in progress, try next FS */
180 			*contention = 1;
181 			p = p->next;
182 			continue;
183 		}
184 		/*
185 		 * We're holding 'c->umount_mutex', so the file-system won't go
186 		 * away.
187 		 */
188 		if (!mutex_trylock(&c->tnc_mutex)) {
189 			mutex_unlock(&c->umount_mutex);
190 			*contention = 1;
191 			p = p->next;
192 			continue;
193 		}
194 		spin_unlock(&ubifs_infos_lock);
195 		/*
196 		 * OK, now we have TNC locked, the file-system cannot go away -
197 		 * it is safe to reap the cache.
198 		 */
199 		c->shrinker_run_no = run_no;
200 		freed += shrink_tnc(c, nr, age, contention);
201 		mutex_unlock(&c->tnc_mutex);
202 		spin_lock(&ubifs_infos_lock);
203 		/* Get the next list element before we move this one */
204 		p = p->next;
205 		/*
206 		 * Move this one to the end of the list to provide some
207 		 * fairness.
208 		 */
209 		list_move_tail(&c->infos_list, &ubifs_infos);
210 		mutex_unlock(&c->umount_mutex);
211 		if (freed >= nr)
212 			break;
213 	}
214 	spin_unlock(&ubifs_infos_lock);
215 	return freed;
216 }
217 
218 /**
219  * kick_a_thread - kick a background thread to start commit.
220  *
221  * This function kicks a background thread to start background commit. Returns
222  * %-1 if a thread was kicked or there is another reason to assume the memory
223  * will soon be freed or become freeable. If there are no dirty znodes, returns
224  * %0.
225  */
226 static int kick_a_thread(void)
227 {
228 	int i;
229 	struct ubifs_info *c;
230 
231 	/*
232 	 * Iterate over all mounted UBIFS file-systems and find out if there is
233 	 * already an ongoing commit operation there. If no, then iterate for
234 	 * the second time and initiate background commit.
235 	 */
236 	spin_lock(&ubifs_infos_lock);
237 	for (i = 0; i < 2; i++) {
238 		list_for_each_entry(c, &ubifs_infos, infos_list) {
239 			long dirty_zn_cnt;
240 
241 			if (!mutex_trylock(&c->umount_mutex)) {
242 				/*
243 				 * Some un-mount is in progress, it will
244 				 * certainly free memory, so just return.
245 				 */
246 				spin_unlock(&ubifs_infos_lock);
247 				return -1;
248 			}
249 
250 			dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
251 
252 			if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
253 			    c->ro_mount || c->ro_error) {
254 				mutex_unlock(&c->umount_mutex);
255 				continue;
256 			}
257 
258 			if (c->cmt_state != COMMIT_RESTING) {
259 				spin_unlock(&ubifs_infos_lock);
260 				mutex_unlock(&c->umount_mutex);
261 				return -1;
262 			}
263 
264 			if (i == 1) {
265 				list_move_tail(&c->infos_list, &ubifs_infos);
266 				spin_unlock(&ubifs_infos_lock);
267 
268 				ubifs_request_bg_commit(c);
269 				mutex_unlock(&c->umount_mutex);
270 				return -1;
271 			}
272 			mutex_unlock(&c->umount_mutex);
273 		}
274 	}
275 	spin_unlock(&ubifs_infos_lock);
276 
277 	return 0;
278 }
279 
280 unsigned long ubifs_shrink_count(struct shrinker *shrink,
281 				 struct shrink_control *sc)
282 {
283 	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
284 
285 	/*
286 	 * Due to the way UBIFS updates the clean znode counter it may
287 	 * temporarily be negative.
288 	 */
289 	return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
290 }
291 
292 unsigned long ubifs_shrink_scan(struct shrinker *shrink,
293 				struct shrink_control *sc)
294 {
295 	unsigned long nr = sc->nr_to_scan;
296 	int contention = 0;
297 	unsigned long freed;
298 	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
299 
300 	if (!clean_zn_cnt) {
301 		/*
302 		 * No clean znodes, nothing to reap. All we can do in this case
303 		 * is to kick background threads to start commit, which will
304 		 * probably make clean znodes which, in turn, will be freeable.
305 		 * And we return -1 which means will make VM call us again
306 		 * later.
307 		 */
308 		dbg_tnc("no clean znodes, kick a thread");
309 		return kick_a_thread();
310 	}
311 
312 	freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
313 	if (freed >= nr)
314 		goto out;
315 
316 	dbg_tnc("not enough old znodes, try to free young ones");
317 	freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
318 	if (freed >= nr)
319 		goto out;
320 
321 	dbg_tnc("not enough young znodes, free all");
322 	freed += shrink_tnc_trees(nr - freed, 0, &contention);
323 
324 	if (!freed && contention) {
325 		dbg_tnc("freed nothing, but contention");
326 		return SHRINK_STOP;
327 	}
328 
329 out:
330 	dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
331 	return freed;
332 }
333