xref: /openbmc/linux/fs/ubifs/shrinker.c (revision 2b27bdcc)
12b27bdccSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21e51764aSArtem Bityutskiy /*
31e51764aSArtem Bityutskiy  * This file is part of UBIFS.
41e51764aSArtem Bityutskiy  *
51e51764aSArtem Bityutskiy  * Copyright (C) 2006-2008 Nokia Corporation.
61e51764aSArtem Bityutskiy  *
71e51764aSArtem Bityutskiy  * Authors: Artem Bityutskiy (Битюцкий Артём)
81e51764aSArtem Bityutskiy  *          Adrian Hunter
91e51764aSArtem Bityutskiy  */
101e51764aSArtem Bityutskiy 
111e51764aSArtem Bityutskiy /*
121e51764aSArtem Bityutskiy  * This file implements UBIFS shrinker which evicts clean znodes from the TNC
131e51764aSArtem Bityutskiy  * tree when Linux VM needs more RAM.
141e51764aSArtem Bityutskiy  *
151e51764aSArtem Bityutskiy  * We do not implement any LRU lists to find oldest znodes to free because it
161e51764aSArtem Bityutskiy  * would add additional overhead to the file system fast paths. So the shrinker
171e51764aSArtem Bityutskiy  * just walks the TNC tree when searching for znodes to free.
181e51764aSArtem Bityutskiy  *
191e51764aSArtem Bityutskiy  * If the root of a TNC sub-tree is clean and old enough, then the children are
201e51764aSArtem Bityutskiy  * also clean and old enough. So the shrinker walks the TNC in level order and
211e51764aSArtem Bityutskiy  * dumps entire sub-trees.
221e51764aSArtem Bityutskiy  *
231e51764aSArtem Bityutskiy  * The age of znodes is just the time-stamp when they were last looked at.
241e51764aSArtem Bityutskiy  * The current shrinker first tries to evict old znodes, then young ones.
251e51764aSArtem Bityutskiy  *
261e51764aSArtem Bityutskiy  * Since the shrinker is global, it has to protect against races with FS
271e51764aSArtem Bityutskiy  * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
281e51764aSArtem Bityutskiy  */
291e51764aSArtem Bityutskiy 
301e51764aSArtem Bityutskiy #include "ubifs.h"
311e51764aSArtem Bityutskiy 
321e51764aSArtem Bityutskiy /* List of all UBIFS file-system instances */
331e51764aSArtem Bityutskiy LIST_HEAD(ubifs_infos);
341e51764aSArtem Bityutskiy 
351e51764aSArtem Bityutskiy /*
361e51764aSArtem Bityutskiy  * We number each shrinker run and record the number on the ubifs_info structure
371e51764aSArtem Bityutskiy  * so that we can easily work out which ubifs_info structures have already been
381e51764aSArtem Bityutskiy  * done by the current run.
391e51764aSArtem Bityutskiy  */
401e51764aSArtem Bityutskiy static unsigned int shrinker_run_no;
411e51764aSArtem Bityutskiy 
421e51764aSArtem Bityutskiy /* Protects 'ubifs_infos' list */
431e51764aSArtem Bityutskiy DEFINE_SPINLOCK(ubifs_infos_lock);
441e51764aSArtem Bityutskiy 
451e51764aSArtem Bityutskiy /* Global clean znode counter (for all mounted UBIFS instances) */
461e51764aSArtem Bityutskiy atomic_long_t ubifs_clean_zn_cnt;
471e51764aSArtem Bityutskiy 
481e51764aSArtem Bityutskiy /**
491e51764aSArtem Bityutskiy  * shrink_tnc - shrink TNC tree.
501e51764aSArtem Bityutskiy  * @c: UBIFS file-system description object
511e51764aSArtem Bityutskiy  * @nr: number of znodes to free
521e51764aSArtem Bityutskiy  * @age: the age of znodes to free
531e51764aSArtem Bityutskiy  * @contention: if any contention, this is set to %1
541e51764aSArtem Bityutskiy  *
551e51764aSArtem Bityutskiy  * This function traverses TNC tree and frees clean znodes. It does not free
561e51764aSArtem Bityutskiy  * clean znodes which younger then @age. Returns number of freed znodes.
571e51764aSArtem Bityutskiy  */
shrink_tnc(struct ubifs_info * c,int nr,int age,int * contention)581e51764aSArtem Bityutskiy static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
591e51764aSArtem Bityutskiy {
601e51764aSArtem Bityutskiy 	int total_freed = 0;
611e51764aSArtem Bityutskiy 	struct ubifs_znode *znode, *zprev;
626cff5732SArnd Bergmann 	time64_t time = ktime_get_seconds();
631e51764aSArtem Bityutskiy 
646eb61d58SRichard Weinberger 	ubifs_assert(c, mutex_is_locked(&c->umount_mutex));
656eb61d58SRichard Weinberger 	ubifs_assert(c, mutex_is_locked(&c->tnc_mutex));
661e51764aSArtem Bityutskiy 
671e51764aSArtem Bityutskiy 	if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
681e51764aSArtem Bityutskiy 		return 0;
691e51764aSArtem Bityutskiy 
701e51764aSArtem Bityutskiy 	/*
711e51764aSArtem Bityutskiy 	 * Traverse the TNC tree in levelorder manner, so that it is possible
721e51764aSArtem Bityutskiy 	 * to destroy large sub-trees. Indeed, if a znode is old, then all its
731e51764aSArtem Bityutskiy 	 * children are older or of the same age.
741e51764aSArtem Bityutskiy 	 *
751e51764aSArtem Bityutskiy 	 * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
761e51764aSArtem Bityutskiy 	 * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
771e51764aSArtem Bityutskiy 	 * changed only when the 'c->tnc_mutex' is held.
781e51764aSArtem Bityutskiy 	 */
791e51764aSArtem Bityutskiy 	zprev = NULL;
806eb61d58SRichard Weinberger 	znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL);
811e51764aSArtem Bityutskiy 	while (znode && total_freed < nr &&
821e51764aSArtem Bityutskiy 	       atomic_long_read(&c->clean_zn_cnt) > 0) {
831e51764aSArtem Bityutskiy 		int freed;
841e51764aSArtem Bityutskiy 
851e51764aSArtem Bityutskiy 		/*
861e51764aSArtem Bityutskiy 		 * If the znode is clean, but it is in the 'c->cnext' list, this
871e51764aSArtem Bityutskiy 		 * means that this znode has just been written to flash as a
881e51764aSArtem Bityutskiy 		 * part of commit and was marked clean. They will be removed
891e51764aSArtem Bityutskiy 		 * from the list at end commit. We cannot change the list,
901e51764aSArtem Bityutskiy 		 * because it is not protected by any mutex (design decision to
911e51764aSArtem Bityutskiy 		 * make commit really independent and parallel to main I/O). So
921e51764aSArtem Bityutskiy 		 * we just skip these znodes.
931e51764aSArtem Bityutskiy 		 *
941e51764aSArtem Bityutskiy 		 * Note, the 'clean_zn_cnt' counters are not updated until
951e51764aSArtem Bityutskiy 		 * after the commit, so the UBIFS shrinker does not report
961e51764aSArtem Bityutskiy 		 * the znodes which are in the 'c->cnext' list as freeable.
971e51764aSArtem Bityutskiy 		 *
981e51764aSArtem Bityutskiy 		 * Also note, if the root of a sub-tree is not in 'c->cnext',
991e51764aSArtem Bityutskiy 		 * then the whole sub-tree is not in 'c->cnext' as well, so it
1001e51764aSArtem Bityutskiy 		 * is safe to dump whole sub-tree.
1011e51764aSArtem Bityutskiy 		 */
1021e51764aSArtem Bityutskiy 
1031e51764aSArtem Bityutskiy 		if (znode->cnext) {
1041e51764aSArtem Bityutskiy 			/*
1051e51764aSArtem Bityutskiy 			 * Very soon these znodes will be removed from the list
1061e51764aSArtem Bityutskiy 			 * and become freeable.
1071e51764aSArtem Bityutskiy 			 */
1081e51764aSArtem Bityutskiy 			*contention = 1;
1091e51764aSArtem Bityutskiy 		} else if (!ubifs_zn_dirty(znode) &&
1101e51764aSArtem Bityutskiy 			   abs(time - znode->time) >= age) {
1111e51764aSArtem Bityutskiy 			if (znode->parent)
1121e51764aSArtem Bityutskiy 				znode->parent->zbranch[znode->iip].znode = NULL;
1131e51764aSArtem Bityutskiy 			else
1141e51764aSArtem Bityutskiy 				c->zroot.znode = NULL;
1151e51764aSArtem Bityutskiy 
1166eb61d58SRichard Weinberger 			freed = ubifs_destroy_tnc_subtree(c, znode);
1171e51764aSArtem Bityutskiy 			atomic_long_sub(freed, &ubifs_clean_zn_cnt);
1181e51764aSArtem Bityutskiy 			atomic_long_sub(freed, &c->clean_zn_cnt);
1191e51764aSArtem Bityutskiy 			total_freed += freed;
1201e51764aSArtem Bityutskiy 			znode = zprev;
1211e51764aSArtem Bityutskiy 		}
1221e51764aSArtem Bityutskiy 
1231e51764aSArtem Bityutskiy 		if (unlikely(!c->zroot.znode))
1241e51764aSArtem Bityutskiy 			break;
1251e51764aSArtem Bityutskiy 
1261e51764aSArtem Bityutskiy 		zprev = znode;
1276eb61d58SRichard Weinberger 		znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode);
1281e51764aSArtem Bityutskiy 		cond_resched();
1291e51764aSArtem Bityutskiy 	}
1301e51764aSArtem Bityutskiy 
1311e51764aSArtem Bityutskiy 	return total_freed;
1321e51764aSArtem Bityutskiy }
1331e51764aSArtem Bityutskiy 
1341e51764aSArtem Bityutskiy /**
1351e51764aSArtem Bityutskiy  * shrink_tnc_trees - shrink UBIFS TNC trees.
1361e51764aSArtem Bityutskiy  * @nr: number of znodes to free
1371e51764aSArtem Bityutskiy  * @age: the age of znodes to free
1381e51764aSArtem Bityutskiy  * @contention: if any contention, this is set to %1
1391e51764aSArtem Bityutskiy  *
1401e51764aSArtem Bityutskiy  * This function walks the list of mounted UBIFS file-systems and frees clean
141025dfdafSFrederik Schwarzer  * znodes which are older than @age, until at least @nr znodes are freed.
1421e51764aSArtem Bityutskiy  * Returns the number of freed znodes.
1431e51764aSArtem Bityutskiy  */
shrink_tnc_trees(int nr,int age,int * contention)1441e51764aSArtem Bityutskiy static int shrink_tnc_trees(int nr, int age, int *contention)
1451e51764aSArtem Bityutskiy {
1461e51764aSArtem Bityutskiy 	struct ubifs_info *c;
1471e51764aSArtem Bityutskiy 	struct list_head *p;
1481e51764aSArtem Bityutskiy 	unsigned int run_no;
1491e51764aSArtem Bityutskiy 	int freed = 0;
1501e51764aSArtem Bityutskiy 
1511e51764aSArtem Bityutskiy 	spin_lock(&ubifs_infos_lock);
1521e51764aSArtem Bityutskiy 	do {
1531e51764aSArtem Bityutskiy 		run_no = ++shrinker_run_no;
1541e51764aSArtem Bityutskiy 	} while (run_no == 0);
1551e51764aSArtem Bityutskiy 	/* Iterate over all mounted UBIFS file-systems and try to shrink them */
1561e51764aSArtem Bityutskiy 	p = ubifs_infos.next;
1571e51764aSArtem Bityutskiy 	while (p != &ubifs_infos) {
1581e51764aSArtem Bityutskiy 		c = list_entry(p, struct ubifs_info, infos_list);
1591e51764aSArtem Bityutskiy 		/*
1601e51764aSArtem Bityutskiy 		 * We move the ones we do to the end of the list, so we stop
1611e51764aSArtem Bityutskiy 		 * when we see one we have already done.
1621e51764aSArtem Bityutskiy 		 */
1631e51764aSArtem Bityutskiy 		if (c->shrinker_run_no == run_no)
1641e51764aSArtem Bityutskiy 			break;
1651e51764aSArtem Bityutskiy 		if (!mutex_trylock(&c->umount_mutex)) {
1661e51764aSArtem Bityutskiy 			/* Some un-mount is in progress, try next FS */
1671e51764aSArtem Bityutskiy 			*contention = 1;
1681e51764aSArtem Bityutskiy 			p = p->next;
1691e51764aSArtem Bityutskiy 			continue;
1701e51764aSArtem Bityutskiy 		}
1711e51764aSArtem Bityutskiy 		/*
1721e51764aSArtem Bityutskiy 		 * We're holding 'c->umount_mutex', so the file-system won't go
1731e51764aSArtem Bityutskiy 		 * away.
1741e51764aSArtem Bityutskiy 		 */
1751e51764aSArtem Bityutskiy 		if (!mutex_trylock(&c->tnc_mutex)) {
1761e51764aSArtem Bityutskiy 			mutex_unlock(&c->umount_mutex);
1771e51764aSArtem Bityutskiy 			*contention = 1;
1781e51764aSArtem Bityutskiy 			p = p->next;
1791e51764aSArtem Bityutskiy 			continue;
1801e51764aSArtem Bityutskiy 		}
1811e51764aSArtem Bityutskiy 		spin_unlock(&ubifs_infos_lock);
1821e51764aSArtem Bityutskiy 		/*
1831e51764aSArtem Bityutskiy 		 * OK, now we have TNC locked, the file-system cannot go away -
1841e51764aSArtem Bityutskiy 		 * it is safe to reap the cache.
1851e51764aSArtem Bityutskiy 		 */
1861e51764aSArtem Bityutskiy 		c->shrinker_run_no = run_no;
1871e51764aSArtem Bityutskiy 		freed += shrink_tnc(c, nr, age, contention);
1881e51764aSArtem Bityutskiy 		mutex_unlock(&c->tnc_mutex);
1891e51764aSArtem Bityutskiy 		spin_lock(&ubifs_infos_lock);
1901e51764aSArtem Bityutskiy 		/* Get the next list element before we move this one */
1911e51764aSArtem Bityutskiy 		p = p->next;
1921e51764aSArtem Bityutskiy 		/*
1931e51764aSArtem Bityutskiy 		 * Move this one to the end of the list to provide some
1941e51764aSArtem Bityutskiy 		 * fairness.
1951e51764aSArtem Bityutskiy 		 */
196ec32816fSEric Sesterhenn 		list_move_tail(&c->infos_list, &ubifs_infos);
1971e51764aSArtem Bityutskiy 		mutex_unlock(&c->umount_mutex);
1981e51764aSArtem Bityutskiy 		if (freed >= nr)
1991e51764aSArtem Bityutskiy 			break;
2001e51764aSArtem Bityutskiy 	}
2011e51764aSArtem Bityutskiy 	spin_unlock(&ubifs_infos_lock);
2021e51764aSArtem Bityutskiy 	return freed;
2031e51764aSArtem Bityutskiy }
2041e51764aSArtem Bityutskiy 
2051e51764aSArtem Bityutskiy /**
2061e51764aSArtem Bityutskiy  * kick_a_thread - kick a background thread to start commit.
2071e51764aSArtem Bityutskiy  *
2081e51764aSArtem Bityutskiy  * This function kicks a background thread to start background commit. Returns
2091e51764aSArtem Bityutskiy  * %-1 if a thread was kicked or there is another reason to assume the memory
2101e51764aSArtem Bityutskiy  * will soon be freed or become freeable. If there are no dirty znodes, returns
2111e51764aSArtem Bityutskiy  * %0.
2121e51764aSArtem Bityutskiy  */
kick_a_thread(void)2131e51764aSArtem Bityutskiy static int kick_a_thread(void)
2141e51764aSArtem Bityutskiy {
2151e51764aSArtem Bityutskiy 	int i;
2161e51764aSArtem Bityutskiy 	struct ubifs_info *c;
2171e51764aSArtem Bityutskiy 
2181e51764aSArtem Bityutskiy 	/*
2191e51764aSArtem Bityutskiy 	 * Iterate over all mounted UBIFS file-systems and find out if there is
2201e51764aSArtem Bityutskiy 	 * already an ongoing commit operation there. If no, then iterate for
2211e51764aSArtem Bityutskiy 	 * the second time and initiate background commit.
2221e51764aSArtem Bityutskiy 	 */
2231e51764aSArtem Bityutskiy 	spin_lock(&ubifs_infos_lock);
2241e51764aSArtem Bityutskiy 	for (i = 0; i < 2; i++) {
2251e51764aSArtem Bityutskiy 		list_for_each_entry(c, &ubifs_infos, infos_list) {
2261e51764aSArtem Bityutskiy 			long dirty_zn_cnt;
2271e51764aSArtem Bityutskiy 
2281e51764aSArtem Bityutskiy 			if (!mutex_trylock(&c->umount_mutex)) {
2291e51764aSArtem Bityutskiy 				/*
2301e51764aSArtem Bityutskiy 				 * Some un-mount is in progress, it will
2311e51764aSArtem Bityutskiy 				 * certainly free memory, so just return.
2321e51764aSArtem Bityutskiy 				 */
2331e51764aSArtem Bityutskiy 				spin_unlock(&ubifs_infos_lock);
2341e51764aSArtem Bityutskiy 				return -1;
2351e51764aSArtem Bityutskiy 			}
2361e51764aSArtem Bityutskiy 
2371e51764aSArtem Bityutskiy 			dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
2381e51764aSArtem Bityutskiy 
2391e51764aSArtem Bityutskiy 			if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
2402ef13294SArtem Bityutskiy 			    c->ro_mount || c->ro_error) {
2411e51764aSArtem Bityutskiy 				mutex_unlock(&c->umount_mutex);
2421e51764aSArtem Bityutskiy 				continue;
2431e51764aSArtem Bityutskiy 			}
2441e51764aSArtem Bityutskiy 
2451e51764aSArtem Bityutskiy 			if (c->cmt_state != COMMIT_RESTING) {
2461e51764aSArtem Bityutskiy 				spin_unlock(&ubifs_infos_lock);
2471e51764aSArtem Bityutskiy 				mutex_unlock(&c->umount_mutex);
2481e51764aSArtem Bityutskiy 				return -1;
2491e51764aSArtem Bityutskiy 			}
2501e51764aSArtem Bityutskiy 
2511e51764aSArtem Bityutskiy 			if (i == 1) {
252ec32816fSEric Sesterhenn 				list_move_tail(&c->infos_list, &ubifs_infos);
2531e51764aSArtem Bityutskiy 				spin_unlock(&ubifs_infos_lock);
2541e51764aSArtem Bityutskiy 
2551e51764aSArtem Bityutskiy 				ubifs_request_bg_commit(c);
2561e51764aSArtem Bityutskiy 				mutex_unlock(&c->umount_mutex);
2571e51764aSArtem Bityutskiy 				return -1;
2581e51764aSArtem Bityutskiy 			}
2591e51764aSArtem Bityutskiy 			mutex_unlock(&c->umount_mutex);
2601e51764aSArtem Bityutskiy 		}
2611e51764aSArtem Bityutskiy 	}
2621e51764aSArtem Bityutskiy 	spin_unlock(&ubifs_infos_lock);
2631e51764aSArtem Bityutskiy 
2641e51764aSArtem Bityutskiy 	return 0;
2651e51764aSArtem Bityutskiy }
2661e51764aSArtem Bityutskiy 
ubifs_shrink_count(struct shrinker * shrink,struct shrink_control * sc)2671ab6c499SDave Chinner unsigned long ubifs_shrink_count(struct shrinker *shrink,
2681ab6c499SDave Chinner 				 struct shrink_control *sc)
2691e51764aSArtem Bityutskiy {
2701e51764aSArtem Bityutskiy 	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
2711e51764aSArtem Bityutskiy 
272cf610bf4SArtem Bityutskiy 	/*
273cf610bf4SArtem Bityutskiy 	 * Due to the way UBIFS updates the clean znode counter it may
274cf610bf4SArtem Bityutskiy 	 * temporarily be negative.
275cf610bf4SArtem Bityutskiy 	 */
276cf610bf4SArtem Bityutskiy 	return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
2771ab6c499SDave Chinner }
2781ab6c499SDave Chinner 
ubifs_shrink_scan(struct shrinker * shrink,struct shrink_control * sc)2791ab6c499SDave Chinner unsigned long ubifs_shrink_scan(struct shrinker *shrink,
2801ab6c499SDave Chinner 				struct shrink_control *sc)
2811ab6c499SDave Chinner {
2821ab6c499SDave Chinner 	unsigned long nr = sc->nr_to_scan;
2831ab6c499SDave Chinner 	int contention = 0;
2841ab6c499SDave Chinner 	unsigned long freed;
2851ab6c499SDave Chinner 	long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
2861e51764aSArtem Bityutskiy 
2871e51764aSArtem Bityutskiy 	if (!clean_zn_cnt) {
2881e51764aSArtem Bityutskiy 		/*
2891e51764aSArtem Bityutskiy 		 * No clean znodes, nothing to reap. All we can do in this case
2901e51764aSArtem Bityutskiy 		 * is to kick background threads to start commit, which will
2911e51764aSArtem Bityutskiy 		 * probably make clean znodes which, in turn, will be freeable.
2921e51764aSArtem Bityutskiy 		 * And we return -1 which means will make VM call us again
2931e51764aSArtem Bityutskiy 		 * later.
2941e51764aSArtem Bityutskiy 		 */
2951e51764aSArtem Bityutskiy 		dbg_tnc("no clean znodes, kick a thread");
2961e51764aSArtem Bityutskiy 		return kick_a_thread();
2971e51764aSArtem Bityutskiy 	}
2981e51764aSArtem Bityutskiy 
2991e51764aSArtem Bityutskiy 	freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
3001e51764aSArtem Bityutskiy 	if (freed >= nr)
3011e51764aSArtem Bityutskiy 		goto out;
3021e51764aSArtem Bityutskiy 
3031e51764aSArtem Bityutskiy 	dbg_tnc("not enough old znodes, try to free young ones");
3041e51764aSArtem Bityutskiy 	freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
3051e51764aSArtem Bityutskiy 	if (freed >= nr)
3061e51764aSArtem Bityutskiy 		goto out;
3071e51764aSArtem Bityutskiy 
3081e51764aSArtem Bityutskiy 	dbg_tnc("not enough young znodes, free all");
3091e51764aSArtem Bityutskiy 	freed += shrink_tnc_trees(nr - freed, 0, &contention);
3101e51764aSArtem Bityutskiy 
3111e51764aSArtem Bityutskiy 	if (!freed && contention) {
3121e51764aSArtem Bityutskiy 		dbg_tnc("freed nothing, but contention");
3131ab6c499SDave Chinner 		return SHRINK_STOP;
3141e51764aSArtem Bityutskiy 	}
3151e51764aSArtem Bityutskiy 
3161e51764aSArtem Bityutskiy out:
3171ab6c499SDave Chinner 	dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
3181e51764aSArtem Bityutskiy 	return freed;
3191e51764aSArtem Bityutskiy }
320