xref: /openbmc/linux/drivers/md/bcache/writeback.c (revision ba2929159000dc7015cc01cdf7bb72542e19952a)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2cafe5635SKent Overstreet /*
3cafe5635SKent Overstreet  * background writeback - scan btree for dirty data and write it to the backing
4cafe5635SKent Overstreet  * device
5cafe5635SKent Overstreet  *
6cafe5635SKent Overstreet  * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
7cafe5635SKent Overstreet  * Copyright 2012 Google, Inc.
8cafe5635SKent Overstreet  */
9cafe5635SKent Overstreet 
10cafe5635SKent Overstreet #include "bcache.h"
11cafe5635SKent Overstreet #include "btree.h"
12cafe5635SKent Overstreet #include "debug.h"
13279afbadSKent Overstreet #include "writeback.h"
14cafe5635SKent Overstreet 
155e6926daSKent Overstreet #include <linux/delay.h>
165e6926daSKent Overstreet #include <linux/kthread.h>
17e6017571SIngo Molnar #include <linux/sched/clock.h>
18c37511b8SKent Overstreet #include <trace/events/bcache.h>
19c37511b8SKent Overstreet 
update_gc_after_writeback(struct cache_set * c)207a671d8eSColy Li static void update_gc_after_writeback(struct cache_set *c)
217a671d8eSColy Li {
227a671d8eSColy Li 	if (c->gc_after_writeback != (BCH_ENABLE_AUTO_GC) ||
237a671d8eSColy Li 	    c->gc_stats.in_use < BCH_AUTO_GC_DIRTY_THRESHOLD)
247a671d8eSColy Li 		return;
257a671d8eSColy Li 
267a671d8eSColy Li 	c->gc_after_writeback |= BCH_DO_AUTO_GC;
277a671d8eSColy Li }
287a671d8eSColy Li 
29cafe5635SKent Overstreet /* Rate limiting */
__calc_target_rate(struct cached_dev * dc)30616486abSMichael Lyle static uint64_t __calc_target_rate(struct cached_dev *dc)
31616486abSMichael Lyle {
32616486abSMichael Lyle 	struct cache_set *c = dc->disk.c;
33616486abSMichael Lyle 
34616486abSMichael Lyle 	/*
35616486abSMichael Lyle 	 * This is the size of the cache, minus the amount used for
36616486abSMichael Lyle 	 * flash-only devices
37616486abSMichael Lyle 	 */
384a784266SColy Li 	uint64_t cache_sectors = c->nbuckets * c->cache->sb.bucket_size -
3999a27d59STang Junhui 				atomic_long_read(&c->flash_dev_dirty_sectors);
40616486abSMichael Lyle 
41616486abSMichael Lyle 	/*
42616486abSMichael Lyle 	 * Unfortunately there is no control of global dirty data.  If the
43616486abSMichael Lyle 	 * user states that they want 10% dirty data in the cache, and has,
44616486abSMichael Lyle 	 * e.g., 5 backing volumes of equal size, we try and ensure each
45616486abSMichael Lyle 	 * backing volume uses about 2% of the cache for dirty data.
46616486abSMichael Lyle 	 */
47616486abSMichael Lyle 	uint32_t bdev_share =
48cda25b82SChristoph Hellwig 		div64_u64(bdev_nr_sectors(dc->bdev) << WRITEBACK_SHARE_SHIFT,
49616486abSMichael Lyle 				c->cached_dev_sectors);
50616486abSMichael Lyle 
51616486abSMichael Lyle 	uint64_t cache_dirty_target =
52616486abSMichael Lyle 		div_u64(cache_sectors * dc->writeback_percent, 100);
53616486abSMichael Lyle 
54616486abSMichael Lyle 	/* Ensure each backing dev gets at least one dirty share */
55616486abSMichael Lyle 	if (bdev_share < 1)
56616486abSMichael Lyle 		bdev_share = 1;
57616486abSMichael Lyle 
58616486abSMichael Lyle 	return (cache_dirty_target * bdev_share) >> WRITEBACK_SHARE_SHIFT;
59616486abSMichael Lyle }
60cafe5635SKent Overstreet 
__update_writeback_rate(struct cached_dev * dc)61cafe5635SKent Overstreet static void __update_writeback_rate(struct cached_dev *dc)
62cafe5635SKent Overstreet {
631d316e65SMichael Lyle 	/*
641d316e65SMichael Lyle 	 * PI controller:
651d316e65SMichael Lyle 	 * Figures out the amount that should be written per second.
661d316e65SMichael Lyle 	 *
671d316e65SMichael Lyle 	 * First, the error (number of sectors that are dirty beyond our
681d316e65SMichael Lyle 	 * target) is calculated.  The error is accumulated (numerically
691d316e65SMichael Lyle 	 * integrated).
701d316e65SMichael Lyle 	 *
711d316e65SMichael Lyle 	 * Then, the proportional value and integral value are scaled
721d316e65SMichael Lyle 	 * based on configured values.  These are stored as inverses to
731d316e65SMichael Lyle 	 * avoid fixed point math and to make configuration easy-- e.g.
741d316e65SMichael Lyle 	 * the default value of 40 for writeback_rate_p_term_inverse
751d316e65SMichael Lyle 	 * attempts to write at a rate that would retire all the dirty
761d316e65SMichael Lyle 	 * blocks in 40 seconds.
771d316e65SMichael Lyle 	 *
781d316e65SMichael Lyle 	 * The writeback_rate_i_inverse value of 10000 means that 1/10000th
791d316e65SMichael Lyle 	 * of the error is accumulated in the integral term per second.
801d316e65SMichael Lyle 	 * This acts as a slow, long-term average that is not subject to
811d316e65SMichael Lyle 	 * variations in usage like the p term.
821d316e65SMichael Lyle 	 */
83616486abSMichael Lyle 	int64_t target = __calc_target_rate(dc);
84279afbadSKent Overstreet 	int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
851d316e65SMichael Lyle 	int64_t error = dirty - target;
861d316e65SMichael Lyle 	int64_t proportional_scaled =
871d316e65SMichael Lyle 		div_s64(error, dc->writeback_rate_p_term_inverse);
88e41166c5SMichael Lyle 	int64_t integral_scaled;
89e41166c5SMichael Lyle 	uint32_t new_rate;
90cafe5635SKent Overstreet 
9171dda2a5Sdongdong tao 	/*
9271dda2a5Sdongdong tao 	 * We need to consider the number of dirty buckets as well
9371dda2a5Sdongdong tao 	 * when calculating the proportional_scaled, Otherwise we might
9471dda2a5Sdongdong tao 	 * have an unreasonable small writeback rate at a highly fragmented situation
9571dda2a5Sdongdong tao 	 * when very few dirty sectors consumed a lot dirty buckets, the
9671dda2a5Sdongdong tao 	 * worst case is when dirty buckets reached cutoff_writeback_sync and
9771dda2a5Sdongdong tao 	 * dirty data is still not even reached to writeback percent, so the rate
9871dda2a5Sdongdong tao 	 * still will be at the minimum value, which will cause the write
9971dda2a5Sdongdong tao 	 * stuck at a non-writeback mode.
10071dda2a5Sdongdong tao 	 */
10171dda2a5Sdongdong tao 	struct cache_set *c = dc->disk.c;
10271dda2a5Sdongdong tao 
10371dda2a5Sdongdong tao 	int64_t dirty_buckets = c->nbuckets - c->avail_nbuckets;
10471dda2a5Sdongdong tao 
10571dda2a5Sdongdong tao 	if (dc->writeback_consider_fragment &&
10671dda2a5Sdongdong tao 		c->gc_stats.in_use > BCH_WRITEBACK_FRAGMENT_THRESHOLD_LOW && dirty > 0) {
10771dda2a5Sdongdong tao 		int64_t fragment =
10871dda2a5Sdongdong tao 			div_s64((dirty_buckets *  c->cache->sb.bucket_size), dirty);
10971dda2a5Sdongdong tao 		int64_t fp_term;
11071dda2a5Sdongdong tao 		int64_t fps;
11171dda2a5Sdongdong tao 
11271dda2a5Sdongdong tao 		if (c->gc_stats.in_use <= BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID) {
11362594f18SGustavo A. R. Silva 			fp_term = (int64_t)dc->writeback_rate_fp_term_low *
11471dda2a5Sdongdong tao 			(c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_LOW);
11571dda2a5Sdongdong tao 		} else if (c->gc_stats.in_use <= BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH) {
11662594f18SGustavo A. R. Silva 			fp_term = (int64_t)dc->writeback_rate_fp_term_mid *
11771dda2a5Sdongdong tao 			(c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID);
11871dda2a5Sdongdong tao 		} else {
11962594f18SGustavo A. R. Silva 			fp_term = (int64_t)dc->writeback_rate_fp_term_high *
12071dda2a5Sdongdong tao 			(c->gc_stats.in_use - BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH);
12171dda2a5Sdongdong tao 		}
12271dda2a5Sdongdong tao 		fps = div_s64(dirty, dirty_buckets) * fp_term;
12371dda2a5Sdongdong tao 		if (fragment > 3 && fps > proportional_scaled) {
12471dda2a5Sdongdong tao 			/* Only overrite the p when fragment > 3 */
12571dda2a5Sdongdong tao 			proportional_scaled = fps;
12671dda2a5Sdongdong tao 		}
12771dda2a5Sdongdong tao 	}
12871dda2a5Sdongdong tao 
1291d316e65SMichael Lyle 	if ((error < 0 && dc->writeback_rate_integral > 0) ||
1301d316e65SMichael Lyle 	    (error > 0 && time_before64(local_clock(),
1311d316e65SMichael Lyle 			 dc->writeback_rate.next + NSEC_PER_MSEC))) {
1321d316e65SMichael Lyle 		/*
1331d316e65SMichael Lyle 		 * Only decrease the integral term if it's more than
1341d316e65SMichael Lyle 		 * zero.  Only increase the integral term if the device
1351d316e65SMichael Lyle 		 * is keeping up.  (Don't wind up the integral
1361d316e65SMichael Lyle 		 * ineffectively in either case).
1371d316e65SMichael Lyle 		 *
1381d316e65SMichael Lyle 		 * It's necessary to scale this by
1391d316e65SMichael Lyle 		 * writeback_rate_update_seconds to keep the integral
1401d316e65SMichael Lyle 		 * term dimensioned properly.
1411d316e65SMichael Lyle 		 */
1421d316e65SMichael Lyle 		dc->writeback_rate_integral += error *
1431d316e65SMichael Lyle 			dc->writeback_rate_update_seconds;
1441d316e65SMichael Lyle 	}
145cafe5635SKent Overstreet 
1461d316e65SMichael Lyle 	integral_scaled = div_s64(dc->writeback_rate_integral,
1471d316e65SMichael Lyle 			dc->writeback_rate_i_term_inverse);
14816749c23SKent Overstreet 
149e41166c5SMichael Lyle 	new_rate = clamp_t(int32_t, (proportional_scaled + integral_scaled),
150e41166c5SMichael Lyle 			dc->writeback_rate_minimum, NSEC_PER_SEC);
15116749c23SKent Overstreet 
1521d316e65SMichael Lyle 	dc->writeback_rate_proportional = proportional_scaled;
1531d316e65SMichael Lyle 	dc->writeback_rate_integral_scaled = integral_scaled;
154ea8c5356SColy Li 	dc->writeback_rate_change = new_rate -
155ea8c5356SColy Li 			atomic_long_read(&dc->writeback_rate.rate);
156ea8c5356SColy Li 	atomic_long_set(&dc->writeback_rate.rate, new_rate);
157cafe5635SKent Overstreet 	dc->writeback_rate_target = target;
158cafe5635SKent Overstreet }
159cafe5635SKent Overstreet 
idle_counter_exceeded(struct cache_set * c)160d2d05b88SColy Li static bool idle_counter_exceeded(struct cache_set *c)
161ea8c5356SColy Li {
162d2d05b88SColy Li 	int counter, dev_nr;
163d2d05b88SColy Li 
164d2d05b88SColy Li 	/*
165d2d05b88SColy Li 	 * If c->idle_counter is overflow (idel for really long time),
166d2d05b88SColy Li 	 * reset as 0 and not set maximum rate this time for code
167d2d05b88SColy Li 	 * simplicity.
168d2d05b88SColy Li 	 */
169d2d05b88SColy Li 	counter = atomic_inc_return(&c->idle_counter);
170d2d05b88SColy Li 	if (counter <= 0) {
171d2d05b88SColy Li 		atomic_set(&c->idle_counter, 0);
172d2d05b88SColy Li 		return false;
173d2d05b88SColy Li 	}
174d2d05b88SColy Li 
175d2d05b88SColy Li 	dev_nr = atomic_read(&c->attached_dev_nr);
176d2d05b88SColy Li 	if (dev_nr == 0)
177c5fcdedcSColy Li 		return false;
178c5fcdedcSColy Li 
179d2d05b88SColy Li 	/*
180d2d05b88SColy Li 	 * c->idle_counter is increased by writeback thread of all
181d2d05b88SColy Li 	 * attached backing devices, in order to represent a rough
182d2d05b88SColy Li 	 * time period, counter should be divided by dev_nr.
183d2d05b88SColy Li 	 * Otherwise the idle time cannot be larger with more backing
184d2d05b88SColy Li 	 * device attached.
185d2d05b88SColy Li 	 * The following calculation equals to checking
186d2d05b88SColy Li 	 *	(counter / dev_nr) < (dev_nr * 6)
187d2d05b88SColy Li 	 */
188d2d05b88SColy Li 	if (counter < (dev_nr * dev_nr * 6))
189141df8bbSColy Li 		return false;
190d2d05b88SColy Li 
191d2d05b88SColy Li 	return true;
192d2d05b88SColy Li }
193d2d05b88SColy Li 
194ea8c5356SColy Li /*
195ea8c5356SColy Li  * Idle_counter is increased every time when update_writeback_rate() is
196ea8c5356SColy Li  * called. If all backing devices attached to the same cache set have
197ea8c5356SColy Li  * identical dc->writeback_rate_update_seconds values, it is about 6
198ea8c5356SColy Li  * rounds of update_writeback_rate() on each backing device before
199ea8c5356SColy Li  * c->at_max_writeback_rate is set to 1, and then max wrteback rate set
200ea8c5356SColy Li  * to each dc->writeback_rate.rate.
201ea8c5356SColy Li  * In order to avoid extra locking cost for counting exact dirty cached
202ea8c5356SColy Li  * devices number, c->attached_dev_nr is used to calculate the idle
203ea8c5356SColy Li  * throushold. It might be bigger if not all cached device are in write-
204ea8c5356SColy Li  * back mode, but it still works well with limited extra rounds of
205ea8c5356SColy Li  * update_writeback_rate().
206ea8c5356SColy Li  */
set_at_max_writeback_rate(struct cache_set * c,struct cached_dev * dc)207d2d05b88SColy Li static bool set_at_max_writeback_rate(struct cache_set *c,
208d2d05b88SColy Li 				       struct cached_dev *dc)
209d2d05b88SColy Li {
210d2d05b88SColy Li 	/* Don't sst max writeback rate if it is disabled */
211d2d05b88SColy Li 	if (!c->idle_max_writeback_rate_enabled)
212d2d05b88SColy Li 		return false;
213d2d05b88SColy Li 
214d2d05b88SColy Li 	/* Don't set max writeback rate if gc is running */
215d2d05b88SColy Li 	if (!c->gc_mark_valid)
216d2d05b88SColy Li 		return false;
217d2d05b88SColy Li 
218d2d05b88SColy Li 	if (!idle_counter_exceeded(c))
219ea8c5356SColy Li 		return false;
220ea8c5356SColy Li 
221ea8c5356SColy Li 	if (atomic_read(&c->at_max_writeback_rate) != 1)
222ea8c5356SColy Li 		atomic_set(&c->at_max_writeback_rate, 1);
223ea8c5356SColy Li 
224ea8c5356SColy Li 	atomic_long_set(&dc->writeback_rate.rate, INT_MAX);
225ea8c5356SColy Li 
226ea8c5356SColy Li 	/* keep writeback_rate_target as existing value */
227ea8c5356SColy Li 	dc->writeback_rate_proportional = 0;
228ea8c5356SColy Li 	dc->writeback_rate_integral_scaled = 0;
229ea8c5356SColy Li 	dc->writeback_rate_change = 0;
230ea8c5356SColy Li 
231ea8c5356SColy Li 	/*
232d2d05b88SColy Li 	 * In case new I/O arrives during before
233d2d05b88SColy Li 	 * set_at_max_writeback_rate() returns.
234ea8c5356SColy Li 	 */
235d2d05b88SColy Li 	if (!idle_counter_exceeded(c) ||
236ea8c5356SColy Li 	    !atomic_read(&c->at_max_writeback_rate))
237ea8c5356SColy Li 		return false;
238ea8c5356SColy Li 
239ea8c5356SColy Li 	return true;
240ea8c5356SColy Li }
241ea8c5356SColy Li 
update_writeback_rate(struct work_struct * work)242cafe5635SKent Overstreet static void update_writeback_rate(struct work_struct *work)
243cafe5635SKent Overstreet {
244cafe5635SKent Overstreet 	struct cached_dev *dc = container_of(to_delayed_work(work),
245cafe5635SKent Overstreet 					     struct cached_dev,
246cafe5635SKent Overstreet 					     writeback_rate_update);
247771f393eSColy Li 	struct cache_set *c = dc->disk.c;
248cafe5635SKent Overstreet 
2493fd47bfeSColy Li 	/*
2503fd47bfeSColy Li 	 * should check BCACHE_DEV_RATE_DW_RUNNING before calling
2513fd47bfeSColy Li 	 * cancel_delayed_work_sync().
2523fd47bfeSColy Li 	 */
2533fd47bfeSColy Li 	set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
2543fd47bfeSColy Li 	/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
255b004aa86SDavidlohr Bueso 	smp_mb__after_atomic();
2563fd47bfeSColy Li 
257771f393eSColy Li 	/*
258771f393eSColy Li 	 * CACHE_SET_IO_DISABLE might be set via sysfs interface,
259771f393eSColy Li 	 * check it here too.
260771f393eSColy Li 	 */
261771f393eSColy Li 	if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) ||
262771f393eSColy Li 	    test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
2633fd47bfeSColy Li 		clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
2643fd47bfeSColy Li 		/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
265b004aa86SDavidlohr Bueso 		smp_mb__after_atomic();
2663fd47bfeSColy Li 		return;
2673fd47bfeSColy Li 	}
2683fd47bfeSColy Li 
269ea8c5356SColy Li 	/*
270ea8c5356SColy Li 	 * If the whole cache set is idle, set_at_max_writeback_rate()
271ea8c5356SColy Li 	 * will set writeback rate to a max number. Then it is
272ea8c5356SColy Li 	 * unncessary to update writeback rate for an idle cache set
273ea8c5356SColy Li 	 * in maximum writeback rate number(s).
274ea8c5356SColy Li 	 */
275a1a2d8f0SColy Li 	if (atomic_read(&dc->has_dirty) && dc->writeback_percent &&
276a1a2d8f0SColy Li 	    !set_at_max_writeback_rate(c, dc)) {
277a1a2d8f0SColy Li 		do {
278a1a2d8f0SColy Li 			if (!down_read_trylock((&dc->writeback_lock))) {
279a1a2d8f0SColy Li 				dc->rate_update_retry++;
280a1a2d8f0SColy Li 				if (dc->rate_update_retry <=
281a1a2d8f0SColy Li 				    BCH_WBRATE_UPDATE_MAX_SKIPS)
282a1a2d8f0SColy Li 					break;
283cafe5635SKent Overstreet 				down_read(&dc->writeback_lock);
284a1a2d8f0SColy Li 				dc->rate_update_retry = 0;
285a1a2d8f0SColy Li 			}
286cafe5635SKent Overstreet 			__update_writeback_rate(dc);
2877a671d8eSColy Li 			update_gc_after_writeback(c);
288cafe5635SKent Overstreet 			up_read(&dc->writeback_lock);
289a1a2d8f0SColy Li 		} while (0);
290ea8c5356SColy Li 	}
291ea8c5356SColy Li 
2925e6926daSKent Overstreet 
293771f393eSColy Li 	/*
294771f393eSColy Li 	 * CACHE_SET_IO_DISABLE might be set via sysfs interface,
295771f393eSColy Li 	 * check it here too.
296771f393eSColy Li 	 */
297771f393eSColy Li 	if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) &&
298771f393eSColy Li 	    !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
2995e6926daSKent Overstreet 		schedule_delayed_work(&dc->writeback_rate_update,
3005e6926daSKent Overstreet 			      dc->writeback_rate_update_seconds * HZ);
301cafe5635SKent Overstreet 	}
302cafe5635SKent Overstreet 
3033fd47bfeSColy Li 	/*
3043fd47bfeSColy Li 	 * should check BCACHE_DEV_RATE_DW_RUNNING before calling
3053fd47bfeSColy Li 	 * cancel_delayed_work_sync().
3063fd47bfeSColy Li 	 */
3073fd47bfeSColy Li 	clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
3083fd47bfeSColy Li 	/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
309b004aa86SDavidlohr Bueso 	smp_mb__after_atomic();
3103fd47bfeSColy Li }
3113fd47bfeSColy Li 
writeback_delay(struct cached_dev * dc,unsigned int sectors)3126f10f7d1SColy Li static unsigned int writeback_delay(struct cached_dev *dc,
3136f10f7d1SColy Li 				    unsigned int sectors)
314cafe5635SKent Overstreet {
315c4d951ddSKent Overstreet 	if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
316cafe5635SKent Overstreet 	    !dc->writeback_percent)
317cafe5635SKent Overstreet 		return 0;
318cafe5635SKent Overstreet 
31916749c23SKent Overstreet 	return bch_next_delay(&dc->writeback_rate, sectors);
320cafe5635SKent Overstreet }
321cafe5635SKent Overstreet 
3225e6926daSKent Overstreet struct dirty_io {
3235e6926daSKent Overstreet 	struct closure		cl;
3245e6926daSKent Overstreet 	struct cached_dev	*dc;
3256e6ccc67SMichael Lyle 	uint16_t		sequence;
3265e6926daSKent Overstreet 	struct bio		bio;
3275e6926daSKent Overstreet };
32872c27061SKent Overstreet 
dirty_init(struct keybuf_key * w)329cafe5635SKent Overstreet static void dirty_init(struct keybuf_key *w)
330cafe5635SKent Overstreet {
331cafe5635SKent Overstreet 	struct dirty_io *io = w->private;
332cafe5635SKent Overstreet 	struct bio *bio = &io->bio;
333cafe5635SKent Overstreet 
33449add496SChristoph Hellwig 	bio_init(bio, NULL, bio->bi_inline_vecs,
33549add496SChristoph Hellwig 		 DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), 0);
336cafe5635SKent Overstreet 	if (!io->dc->writeback_percent)
337cafe5635SKent Overstreet 		bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
338cafe5635SKent Overstreet 
3394f024f37SKent Overstreet 	bio->bi_iter.bi_size	= KEY_SIZE(&w->key) << 9;
340cafe5635SKent Overstreet 	bio->bi_private		= w;
341169ef1cfSKent Overstreet 	bch_bio_map(bio, NULL);
342cafe5635SKent Overstreet }
343cafe5635SKent Overstreet 
dirty_io_destructor(struct closure * cl)344cafe5635SKent Overstreet static void dirty_io_destructor(struct closure *cl)
345cafe5635SKent Overstreet {
346cafe5635SKent Overstreet 	struct dirty_io *io = container_of(cl, struct dirty_io, cl);
3471fae7cf0SColy Li 
348cafe5635SKent Overstreet 	kfree(io);
349cafe5635SKent Overstreet }
350cafe5635SKent Overstreet 
write_dirty_finish(struct closure * cl)351cafe5635SKent Overstreet static void write_dirty_finish(struct closure *cl)
352cafe5635SKent Overstreet {
353cafe5635SKent Overstreet 	struct dirty_io *io = container_of(cl, struct dirty_io, cl);
354cafe5635SKent Overstreet 	struct keybuf_key *w = io->bio.bi_private;
355cafe5635SKent Overstreet 	struct cached_dev *dc = io->dc;
356cafe5635SKent Overstreet 
357491221f8SGuoqing Jiang 	bio_free_pages(&io->bio);
358cafe5635SKent Overstreet 
359cafe5635SKent Overstreet 	/* This is kind of a dumb way of signalling errors. */
360cafe5635SKent Overstreet 	if (KEY_DIRTY(&w->key)) {
3616054c6d4SKent Overstreet 		int ret;
3626f10f7d1SColy Li 		unsigned int i;
363cc7b8819SKent Overstreet 		struct keylist keys;
3640b93207aSKent Overstreet 
3650b93207aSKent Overstreet 		bch_keylist_init(&keys);
366cafe5635SKent Overstreet 
3671b207d80SKent Overstreet 		bkey_copy(keys.top, &w->key);
3681b207d80SKent Overstreet 		SET_KEY_DIRTY(keys.top, false);
3691b207d80SKent Overstreet 		bch_keylist_push(&keys);
370cafe5635SKent Overstreet 
371cafe5635SKent Overstreet 		for (i = 0; i < KEY_PTRS(&w->key); i++)
372cafe5635SKent Overstreet 			atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin);
373cafe5635SKent Overstreet 
374cc7b8819SKent Overstreet 		ret = bch_btree_insert(dc->disk.c, &keys, NULL, &w->key);
375cafe5635SKent Overstreet 
3766054c6d4SKent Overstreet 		if (ret)
377c37511b8SKent Overstreet 			trace_bcache_writeback_collision(&w->key);
378c37511b8SKent Overstreet 
3796054c6d4SKent Overstreet 		atomic_long_inc(ret
380cafe5635SKent Overstreet 				? &dc->disk.c->writeback_keys_failed
381cafe5635SKent Overstreet 				: &dc->disk.c->writeback_keys_done);
382cafe5635SKent Overstreet 	}
383cafe5635SKent Overstreet 
384cafe5635SKent Overstreet 	bch_keybuf_del(&dc->writeback_keys, w);
385c2a4f318SKent Overstreet 	up(&dc->in_flight);
386cafe5635SKent Overstreet 
387cafe5635SKent Overstreet 	closure_return_with_destructor(cl, dirty_io_destructor);
388cafe5635SKent Overstreet }
389cafe5635SKent Overstreet 
dirty_endio(struct bio * bio)3904246a0b6SChristoph Hellwig static void dirty_endio(struct bio *bio)
391cafe5635SKent Overstreet {
392cafe5635SKent Overstreet 	struct keybuf_key *w = bio->bi_private;
393cafe5635SKent Overstreet 	struct dirty_io *io = w->private;
394cafe5635SKent Overstreet 
395bf78980fSColy Li 	if (bio->bi_status) {
396cafe5635SKent Overstreet 		SET_KEY_DIRTY(&w->key, false);
397bf78980fSColy Li 		bch_count_backing_io_errors(io->dc, bio);
398bf78980fSColy Li 	}
399cafe5635SKent Overstreet 
400cafe5635SKent Overstreet 	closure_put(&io->cl);
401cafe5635SKent Overstreet }
402cafe5635SKent Overstreet 
write_dirty(struct closure * cl)403cafe5635SKent Overstreet static void write_dirty(struct closure *cl)
404cafe5635SKent Overstreet {
405cafe5635SKent Overstreet 	struct dirty_io *io = container_of(cl, struct dirty_io, cl);
406cafe5635SKent Overstreet 	struct keybuf_key *w = io->bio.bi_private;
4076e6ccc67SMichael Lyle 	struct cached_dev *dc = io->dc;
4086e6ccc67SMichael Lyle 
4096e6ccc67SMichael Lyle 	uint16_t next_sequence;
4106e6ccc67SMichael Lyle 
4116e6ccc67SMichael Lyle 	if (atomic_read(&dc->writeback_sequence_next) != io->sequence) {
4126e6ccc67SMichael Lyle 		/* Not our turn to write; wait for a write to complete */
4136e6ccc67SMichael Lyle 		closure_wait(&dc->writeback_ordering_wait, cl);
4146e6ccc67SMichael Lyle 
4156e6ccc67SMichael Lyle 		if (atomic_read(&dc->writeback_sequence_next) == io->sequence) {
4166e6ccc67SMichael Lyle 			/*
4176e6ccc67SMichael Lyle 			 * Edge case-- it happened in indeterminate order
4186e6ccc67SMichael Lyle 			 * relative to when we were added to wait list..
4196e6ccc67SMichael Lyle 			 */
4206e6ccc67SMichael Lyle 			closure_wake_up(&dc->writeback_ordering_wait);
4216e6ccc67SMichael Lyle 		}
4226e6ccc67SMichael Lyle 
4236e6ccc67SMichael Lyle 		continue_at(cl, write_dirty, io->dc->writeback_write_wq);
4246e6ccc67SMichael Lyle 		return;
4256e6ccc67SMichael Lyle 	}
4266e6ccc67SMichael Lyle 
4276e6ccc67SMichael Lyle 	next_sequence = io->sequence + 1;
428cafe5635SKent Overstreet 
4295fa89fb9SMichael Lyle 	/*
4305fa89fb9SMichael Lyle 	 * IO errors are signalled using the dirty bit on the key.
4315fa89fb9SMichael Lyle 	 * If we failed to read, we should not attempt to write to the
4325fa89fb9SMichael Lyle 	 * backing device.  Instead, immediately go to write_dirty_finish
4335fa89fb9SMichael Lyle 	 * to clean up.
4345fa89fb9SMichael Lyle 	 */
4355fa89fb9SMichael Lyle 	if (KEY_DIRTY(&w->key)) {
436cafe5635SKent Overstreet 		dirty_init(w);
437c34b7ac6SChristoph Hellwig 		io->bio.bi_opf = REQ_OP_WRITE;
4384f024f37SKent Overstreet 		io->bio.bi_iter.bi_sector = KEY_START(&w->key);
43974d46992SChristoph Hellwig 		bio_set_dev(&io->bio, io->dc->bdev);
440cafe5635SKent Overstreet 		io->bio.bi_end_io	= dirty_endio;
441cafe5635SKent Overstreet 
44227a40ab9SColy Li 		/* I/O request sent to backing device */
443771f393eSColy Li 		closure_bio_submit(io->dc->disk.c, &io->bio, cl);
4445fa89fb9SMichael Lyle 	}
445cafe5635SKent Overstreet 
4466e6ccc67SMichael Lyle 	atomic_set(&dc->writeback_sequence_next, next_sequence);
4476e6ccc67SMichael Lyle 	closure_wake_up(&dc->writeback_ordering_wait);
4486e6ccc67SMichael Lyle 
4499baf3097STang Junhui 	continue_at(cl, write_dirty_finish, io->dc->writeback_write_wq);
450cafe5635SKent Overstreet }
451cafe5635SKent Overstreet 
read_dirty_endio(struct bio * bio)4524246a0b6SChristoph Hellwig static void read_dirty_endio(struct bio *bio)
453cafe5635SKent Overstreet {
454cafe5635SKent Overstreet 	struct keybuf_key *w = bio->bi_private;
455cafe5635SKent Overstreet 	struct dirty_io *io = w->private;
456cafe5635SKent Overstreet 
4575138ac67SColy Li 	/* is_read = 1 */
45811e9560eSChristoph Hellwig 	bch_count_io_errors(io->dc->disk.c->cache,
4595138ac67SColy Li 			    bio->bi_status, 1,
4605138ac67SColy Li 			    "reading dirty data from cache");
461cafe5635SKent Overstreet 
4624246a0b6SChristoph Hellwig 	dirty_endio(bio);
463cafe5635SKent Overstreet }
464cafe5635SKent Overstreet 
read_dirty_submit(struct closure * cl)465cafe5635SKent Overstreet static void read_dirty_submit(struct closure *cl)
466cafe5635SKent Overstreet {
467cafe5635SKent Overstreet 	struct dirty_io *io = container_of(cl, struct dirty_io, cl);
468cafe5635SKent Overstreet 
469771f393eSColy Li 	closure_bio_submit(io->dc->disk.c, &io->bio, cl);
470cafe5635SKent Overstreet 
4719baf3097STang Junhui 	continue_at(cl, write_dirty, io->dc->writeback_write_wq);
472cafe5635SKent Overstreet }
473cafe5635SKent Overstreet 
read_dirty(struct cached_dev * dc)4745e6926daSKent Overstreet static void read_dirty(struct cached_dev *dc)
475cafe5635SKent Overstreet {
4766f10f7d1SColy Li 	unsigned int delay = 0;
477539d39ebSTang Junhui 	struct keybuf_key *next, *keys[MAX_WRITEBACKS_IN_PASS], *w;
478539d39ebSTang Junhui 	size_t size;
479539d39ebSTang Junhui 	int nk, i;
480cafe5635SKent Overstreet 	struct dirty_io *io;
4815e6926daSKent Overstreet 	struct closure cl;
4826e6ccc67SMichael Lyle 	uint16_t sequence = 0;
4835e6926daSKent Overstreet 
4846e6ccc67SMichael Lyle 	BUG_ON(!llist_empty(&dc->writeback_ordering_wait.list));
4856e6ccc67SMichael Lyle 	atomic_set(&dc->writeback_sequence_next, sequence);
4865e6926daSKent Overstreet 	closure_init_stack(&cl);
487cafe5635SKent Overstreet 
488cafe5635SKent Overstreet 	/*
489cafe5635SKent Overstreet 	 * XXX: if we error, background writeback just spins. Should use some
490cafe5635SKent Overstreet 	 * mempools.
491cafe5635SKent Overstreet 	 */
492cafe5635SKent Overstreet 
493539d39ebSTang Junhui 	next = bch_keybuf_next(&dc->writeback_keys);
4945e6926daSKent Overstreet 
495771f393eSColy Li 	while (!kthread_should_stop() &&
496771f393eSColy Li 	       !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
497771f393eSColy Li 	       next) {
498539d39ebSTang Junhui 		size = 0;
499539d39ebSTang Junhui 		nk = 0;
500539d39ebSTang Junhui 
501539d39ebSTang Junhui 		do {
502539d39ebSTang Junhui 			BUG_ON(ptr_stale(dc->disk.c, &next->key, 0));
503539d39ebSTang Junhui 
504539d39ebSTang Junhui 			/*
505539d39ebSTang Junhui 			 * Don't combine too many operations, even if they
506539d39ebSTang Junhui 			 * are all small.
507539d39ebSTang Junhui 			 */
508539d39ebSTang Junhui 			if (nk >= MAX_WRITEBACKS_IN_PASS)
509cafe5635SKent Overstreet 				break;
510cafe5635SKent Overstreet 
511539d39ebSTang Junhui 			/*
512539d39ebSTang Junhui 			 * If the current operation is very large, don't
513539d39ebSTang Junhui 			 * further combine operations.
514539d39ebSTang Junhui 			 */
515539d39ebSTang Junhui 			if (size >= MAX_WRITESIZE_IN_PASS)
516539d39ebSTang Junhui 				break;
517cafe5635SKent Overstreet 
518539d39ebSTang Junhui 			/*
519539d39ebSTang Junhui 			 * Operations are only eligible to be combined
520539d39ebSTang Junhui 			 * if they are contiguous.
521539d39ebSTang Junhui 			 *
522539d39ebSTang Junhui 			 * TODO: add a heuristic willing to fire a
523539d39ebSTang Junhui 			 * certain amount of non-contiguous IO per pass,
524539d39ebSTang Junhui 			 * so that we can benefit from backing device
525539d39ebSTang Junhui 			 * command queueing.
526539d39ebSTang Junhui 			 */
527539d39ebSTang Junhui 			if ((nk != 0) && bkey_cmp(&keys[nk-1]->key,
528539d39ebSTang Junhui 						&START_KEY(&next->key)))
529539d39ebSTang Junhui 				break;
530cafe5635SKent Overstreet 
531539d39ebSTang Junhui 			size += KEY_SIZE(&next->key);
532539d39ebSTang Junhui 			keys[nk++] = next;
533539d39ebSTang Junhui 		} while ((next = bch_keybuf_next(&dc->writeback_keys)));
534cafe5635SKent Overstreet 
535539d39ebSTang Junhui 		/* Now we have gathered a set of 1..5 keys to write back. */
536539d39ebSTang Junhui 		for (i = 0; i < nk; i++) {
537539d39ebSTang Junhui 			w = keys[i];
538539d39ebSTang Junhui 
53929f1d5caSGustavo A. R. Silva 			io = kzalloc(struct_size(io, bio.bi_inline_vecs,
54029f1d5caSGustavo A. R. Silva 						DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS)),
541cafe5635SKent Overstreet 				     GFP_KERNEL);
542cafe5635SKent Overstreet 			if (!io)
543cafe5635SKent Overstreet 				goto err;
544cafe5635SKent Overstreet 
545cafe5635SKent Overstreet 			w->private	= io;
546cafe5635SKent Overstreet 			io->dc		= dc;
5476e6ccc67SMichael Lyle 			io->sequence    = sequence++;
548cafe5635SKent Overstreet 
549cafe5635SKent Overstreet 			dirty_init(w);
550c34b7ac6SChristoph Hellwig 			io->bio.bi_opf = REQ_OP_READ;
5514f024f37SKent Overstreet 			io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0);
55211e9560eSChristoph Hellwig 			bio_set_dev(&io->bio, dc->disk.c->cache->bdev);
553cafe5635SKent Overstreet 			io->bio.bi_end_io	= read_dirty_endio;
554cafe5635SKent Overstreet 
55525d8be77SMing Lei 			if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL))
556cafe5635SKent Overstreet 				goto err_free;
557cafe5635SKent Overstreet 
558c37511b8SKent Overstreet 			trace_bcache_writeback(&w->key);
559cafe5635SKent Overstreet 
560c2a4f318SKent Overstreet 			down(&dc->in_flight);
561cafe5635SKent Overstreet 
5623be11dbaSColy Li 			/*
5633be11dbaSColy Li 			 * We've acquired a semaphore for the maximum
564539d39ebSTang Junhui 			 * simultaneous number of writebacks; from here
565539d39ebSTang Junhui 			 * everything happens asynchronously.
566539d39ebSTang Junhui 			 */
567539d39ebSTang Junhui 			closure_call(&io->cl, read_dirty_submit, NULL, &cl);
568539d39ebSTang Junhui 		}
569539d39ebSTang Junhui 
570539d39ebSTang Junhui 		delay = writeback_delay(dc, size);
571539d39ebSTang Junhui 
572771f393eSColy Li 		while (!kthread_should_stop() &&
573771f393eSColy Li 		       !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) &&
574771f393eSColy Li 		       delay) {
575539d39ebSTang Junhui 			schedule_timeout_interruptible(delay);
576539d39ebSTang Junhui 			delay = writeback_delay(dc, 0);
577539d39ebSTang Junhui 		}
578cafe5635SKent Overstreet 	}
579cafe5635SKent Overstreet 
580cafe5635SKent Overstreet 	if (0) {
581cafe5635SKent Overstreet err_free:
582cafe5635SKent Overstreet 		kfree(w->private);
583cafe5635SKent Overstreet err:
584cafe5635SKent Overstreet 		bch_keybuf_del(&dc->writeback_keys, w);
585cafe5635SKent Overstreet 	}
586cafe5635SKent Overstreet 
587c2a4f318SKent Overstreet 	/*
588c2a4f318SKent Overstreet 	 * Wait for outstanding writeback IOs to finish (and keybuf slots to be
589c2a4f318SKent Overstreet 	 * freed) before refilling again
590c2a4f318SKent Overstreet 	 */
5915e6926daSKent Overstreet 	closure_sync(&cl);
5925e6926daSKent Overstreet }
5935e6926daSKent Overstreet 
5945e6926daSKent Overstreet /* Scan for dirty data */
5955e6926daSKent Overstreet 
bcache_dev_sectors_dirty_add(struct cache_set * c,unsigned int inode,uint64_t offset,int nr_sectors)5966f10f7d1SColy Li void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
5975e6926daSKent Overstreet 				  uint64_t offset, int nr_sectors)
5985e6926daSKent Overstreet {
5995e6926daSKent Overstreet 	struct bcache_device *d = c->devices[inode];
6007a148126SColy Li 	unsigned int stripe_offset, sectors_dirty;
6017a148126SColy Li 	int stripe;
6025e6926daSKent Overstreet 
6035e6926daSKent Overstreet 	if (!d)
6045e6926daSKent Overstreet 		return;
6055e6926daSKent Overstreet 
6067a148126SColy Li 	stripe = offset_to_stripe(d, offset);
6077a148126SColy Li 	if (stripe < 0)
6087a148126SColy Li 		return;
6097a148126SColy Li 
61099a27d59STang Junhui 	if (UUID_FLASH_ONLY(&c->uuids[inode]))
61199a27d59STang Junhui 		atomic_long_add(nr_sectors, &c->flash_dev_dirty_sectors);
61299a27d59STang Junhui 
6135e6926daSKent Overstreet 	stripe_offset = offset & (d->stripe_size - 1);
6145e6926daSKent Overstreet 
6155e6926daSKent Overstreet 	while (nr_sectors) {
6166f10f7d1SColy Li 		int s = min_t(unsigned int, abs(nr_sectors),
6175e6926daSKent Overstreet 			      d->stripe_size - stripe_offset);
6185e6926daSKent Overstreet 
6195e6926daSKent Overstreet 		if (nr_sectors < 0)
6205e6926daSKent Overstreet 			s = -s;
6215e6926daSKent Overstreet 
62248a915a8SKent Overstreet 		if (stripe >= d->nr_stripes)
62348a915a8SKent Overstreet 			return;
62448a915a8SKent Overstreet 
62548a915a8SKent Overstreet 		sectors_dirty = atomic_add_return(s,
62648a915a8SKent Overstreet 					d->stripe_sectors_dirty + stripe);
6277b1002f7SMingzhe Zou 		if (sectors_dirty == d->stripe_size) {
6287b1002f7SMingzhe Zou 			if (!test_bit(stripe, d->full_dirty_stripes))
62948a915a8SKent Overstreet 				set_bit(stripe, d->full_dirty_stripes);
6307b1002f7SMingzhe Zou 		} else {
6317b1002f7SMingzhe Zou 			if (test_bit(stripe, d->full_dirty_stripes))
63248a915a8SKent Overstreet 				clear_bit(stripe, d->full_dirty_stripes);
6337b1002f7SMingzhe Zou 		}
63448a915a8SKent Overstreet 
6355e6926daSKent Overstreet 		nr_sectors -= s;
6365e6926daSKent Overstreet 		stripe_offset = 0;
6375e6926daSKent Overstreet 		stripe++;
6385e6926daSKent Overstreet 	}
6395e6926daSKent Overstreet }
6405e6926daSKent Overstreet 
dirty_pred(struct keybuf * buf,struct bkey * k)6415e6926daSKent Overstreet static bool dirty_pred(struct keybuf *buf, struct bkey *k)
6425e6926daSKent Overstreet {
643b0d30981SColy Li 	struct cached_dev *dc = container_of(buf,
644b0d30981SColy Li 					     struct cached_dev,
645b0d30981SColy Li 					     writeback_keys);
646627ccd20SKent Overstreet 
647627ccd20SKent Overstreet 	BUG_ON(KEY_INODE(k) != dc->disk.id);
648627ccd20SKent Overstreet 
6495e6926daSKent Overstreet 	return KEY_DIRTY(k);
6505e6926daSKent Overstreet }
6515e6926daSKent Overstreet 
refill_full_stripes(struct cached_dev * dc)65248a915a8SKent Overstreet static void refill_full_stripes(struct cached_dev *dc)
6535e6926daSKent Overstreet {
65448a915a8SKent Overstreet 	struct keybuf *buf = &dc->writeback_keys;
6557a148126SColy Li 	unsigned int start_stripe, next_stripe;
6567a148126SColy Li 	int stripe;
65748a915a8SKent Overstreet 	bool wrapped = false;
6585e6926daSKent Overstreet 
65948a915a8SKent Overstreet 	stripe = offset_to_stripe(&dc->disk, KEY_OFFSET(&buf->last_scanned));
6607a148126SColy Li 	if (stripe < 0)
66148a915a8SKent Overstreet 		stripe = 0;
66248a915a8SKent Overstreet 
66348a915a8SKent Overstreet 	start_stripe = stripe;
6645e6926daSKent Overstreet 
6655e6926daSKent Overstreet 	while (1) {
66648a915a8SKent Overstreet 		stripe = find_next_bit(dc->disk.full_dirty_stripes,
66748a915a8SKent Overstreet 				       dc->disk.nr_stripes, stripe);
6685e6926daSKent Overstreet 
66948a915a8SKent Overstreet 		if (stripe == dc->disk.nr_stripes)
67048a915a8SKent Overstreet 			goto next;
6715e6926daSKent Overstreet 
67248a915a8SKent Overstreet 		next_stripe = find_next_zero_bit(dc->disk.full_dirty_stripes,
67348a915a8SKent Overstreet 						 dc->disk.nr_stripes, stripe);
67448a915a8SKent Overstreet 
67548a915a8SKent Overstreet 		buf->last_scanned = KEY(dc->disk.id,
67648a915a8SKent Overstreet 					stripe * dc->disk.stripe_size, 0);
67748a915a8SKent Overstreet 
67848a915a8SKent Overstreet 		bch_refill_keybuf(dc->disk.c, buf,
67948a915a8SKent Overstreet 				  &KEY(dc->disk.id,
68048a915a8SKent Overstreet 				       next_stripe * dc->disk.stripe_size, 0),
68148a915a8SKent Overstreet 				  dirty_pred);
68248a915a8SKent Overstreet 
68348a915a8SKent Overstreet 		if (array_freelist_empty(&buf->freelist))
68448a915a8SKent Overstreet 			return;
68548a915a8SKent Overstreet 
68648a915a8SKent Overstreet 		stripe = next_stripe;
68748a915a8SKent Overstreet next:
68848a915a8SKent Overstreet 		if (wrapped && stripe > start_stripe)
68948a915a8SKent Overstreet 			return;
69048a915a8SKent Overstreet 
69148a915a8SKent Overstreet 		if (stripe == dc->disk.nr_stripes) {
69248a915a8SKent Overstreet 			stripe = 0;
69348a915a8SKent Overstreet 			wrapped = true;
69448a915a8SKent Overstreet 		}
6955e6926daSKent Overstreet 	}
6965e6926daSKent Overstreet }
6975e6926daSKent Overstreet 
698627ccd20SKent Overstreet /*
699627ccd20SKent Overstreet  * Returns true if we scanned the entire disk
700627ccd20SKent Overstreet  */
refill_dirty(struct cached_dev * dc)7015e6926daSKent Overstreet static bool refill_dirty(struct cached_dev *dc)
7025e6926daSKent Overstreet {
7035e6926daSKent Overstreet 	struct keybuf *buf = &dc->writeback_keys;
704627ccd20SKent Overstreet 	struct bkey start = KEY(dc->disk.id, 0, 0);
7055e6926daSKent Overstreet 	struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0);
706627ccd20SKent Overstreet 	struct bkey start_pos;
707627ccd20SKent Overstreet 
708627ccd20SKent Overstreet 	/*
709627ccd20SKent Overstreet 	 * make sure keybuf pos is inside the range for this disk - at bringup
710627ccd20SKent Overstreet 	 * we might not be attached yet so this disk's inode nr isn't
711627ccd20SKent Overstreet 	 * initialized then
712627ccd20SKent Overstreet 	 */
713627ccd20SKent Overstreet 	if (bkey_cmp(&buf->last_scanned, &start) < 0 ||
714627ccd20SKent Overstreet 	    bkey_cmp(&buf->last_scanned, &end) > 0)
715627ccd20SKent Overstreet 		buf->last_scanned = start;
71648a915a8SKent Overstreet 
71748a915a8SKent Overstreet 	if (dc->partial_stripes_expensive) {
71848a915a8SKent Overstreet 		refill_full_stripes(dc);
71948a915a8SKent Overstreet 		if (array_freelist_empty(&buf->freelist))
72048a915a8SKent Overstreet 			return false;
72148a915a8SKent Overstreet 	}
7225e6926daSKent Overstreet 
723627ccd20SKent Overstreet 	start_pos = buf->last_scanned;
7245e6926daSKent Overstreet 	bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred);
7255e6926daSKent Overstreet 
726627ccd20SKent Overstreet 	if (bkey_cmp(&buf->last_scanned, &end) < 0)
727627ccd20SKent Overstreet 		return false;
728627ccd20SKent Overstreet 
729627ccd20SKent Overstreet 	/*
730627ccd20SKent Overstreet 	 * If we get to the end start scanning again from the beginning, and
731627ccd20SKent Overstreet 	 * only scan up to where we initially started scanning from:
732627ccd20SKent Overstreet 	 */
733627ccd20SKent Overstreet 	buf->last_scanned = start;
734627ccd20SKent Overstreet 	bch_refill_keybuf(dc->disk.c, buf, &start_pos, dirty_pred);
735627ccd20SKent Overstreet 
736627ccd20SKent Overstreet 	return bkey_cmp(&buf->last_scanned, &start_pos) >= 0;
7375e6926daSKent Overstreet }
7385e6926daSKent Overstreet 
bch_writeback_thread(void * arg)7395e6926daSKent Overstreet static int bch_writeback_thread(void *arg)
7405e6926daSKent Overstreet {
7415e6926daSKent Overstreet 	struct cached_dev *dc = arg;
742771f393eSColy Li 	struct cache_set *c = dc->disk.c;
7435e6926daSKent Overstreet 	bool searched_full_index;
7445e6926daSKent Overstreet 
745a8500fc8SMichael Lyle 	bch_ratelimit_reset(&dc->writeback_rate);
746a8500fc8SMichael Lyle 
747771f393eSColy Li 	while (!kthread_should_stop() &&
748771f393eSColy Li 	       !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
7495e6926daSKent Overstreet 		down_write(&dc->writeback_lock);
75099361bbfSColy Li 		set_current_state(TASK_INTERRUPTIBLE);
751fadd94e0SColy Li 		/*
752fadd94e0SColy Li 		 * If the bache device is detaching, skip here and continue
753fadd94e0SColy Li 		 * to perform writeback. Otherwise, if no dirty data on cache,
754fadd94e0SColy Li 		 * or there is dirty data on cache but writeback is disabled,
755fadd94e0SColy Li 		 * the writeback thread should sleep here and wait for others
756fadd94e0SColy Li 		 * to wake up it.
757fadd94e0SColy Li 		 */
758fadd94e0SColy Li 		if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) &&
759fadd94e0SColy Li 		    (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) {
7605e6926daSKent Overstreet 			up_write(&dc->writeback_lock);
7615e6926daSKent Overstreet 
762771f393eSColy Li 			if (kthread_should_stop() ||
763771f393eSColy Li 			    test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
76499361bbfSColy Li 				set_current_state(TASK_RUNNING);
765804f3c69SColy Li 				break;
76699361bbfSColy Li 			}
7675e6926daSKent Overstreet 
7685e6926daSKent Overstreet 			schedule();
7695e6926daSKent Overstreet 			continue;
7705e6926daSKent Overstreet 		}
77199361bbfSColy Li 		set_current_state(TASK_RUNNING);
7725e6926daSKent Overstreet 
7735e6926daSKent Overstreet 		searched_full_index = refill_dirty(dc);
7745e6926daSKent Overstreet 
7755e6926daSKent Overstreet 		if (searched_full_index &&
7765e6926daSKent Overstreet 		    RB_EMPTY_ROOT(&dc->writeback_keys.keys)) {
7775e6926daSKent Overstreet 			atomic_set(&dc->has_dirty, 0);
7785e6926daSKent Overstreet 			SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);
7795e6926daSKent Overstreet 			bch_write_bdev_super(dc, NULL);
780fadd94e0SColy Li 			/*
781fadd94e0SColy Li 			 * If bcache device is detaching via sysfs interface,
782fadd94e0SColy Li 			 * writeback thread should stop after there is no dirty
783fadd94e0SColy Li 			 * data on cache. BCACHE_DEV_DETACHING flag is set in
784fadd94e0SColy Li 			 * bch_cached_dev_detach().
785fadd94e0SColy Li 			 */
7863943b040SShan Hai 			if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) {
787df4ad532SDongsheng Yang 				struct closure cl;
788df4ad532SDongsheng Yang 
789df4ad532SDongsheng Yang 				closure_init_stack(&cl);
790df4ad532SDongsheng Yang 				memset(&dc->sb.set_uuid, 0, 16);
791df4ad532SDongsheng Yang 				SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE);
792df4ad532SDongsheng Yang 
793df4ad532SDongsheng Yang 				bch_write_bdev_super(dc, &cl);
794df4ad532SDongsheng Yang 				closure_sync(&cl);
795df4ad532SDongsheng Yang 
7963943b040SShan Hai 				up_write(&dc->writeback_lock);
797fadd94e0SColy Li 				break;
7985e6926daSKent Overstreet 			}
7997a671d8eSColy Li 
8007a671d8eSColy Li 			/*
8017a671d8eSColy Li 			 * When dirty data rate is high (e.g. 50%+), there might
8027a671d8eSColy Li 			 * be heavy buckets fragmentation after writeback
8037a671d8eSColy Li 			 * finished, which hurts following write performance.
8047a671d8eSColy Li 			 * If users really care about write performance they
8057a671d8eSColy Li 			 * may set BCH_ENABLE_AUTO_GC via sysfs, then when
8067a671d8eSColy Li 			 * BCH_DO_AUTO_GC is set, garbage collection thread
8077a671d8eSColy Li 			 * will be wake up here. After moving gc, the shrunk
8087a671d8eSColy Li 			 * btree and discarded free buckets SSD space may be
8097a671d8eSColy Li 			 * helpful for following write requests.
8107a671d8eSColy Li 			 */
8117a671d8eSColy Li 			if (c->gc_after_writeback ==
8127a671d8eSColy Li 			    (BCH_ENABLE_AUTO_GC|BCH_DO_AUTO_GC)) {
8137a671d8eSColy Li 				c->gc_after_writeback &= ~BCH_DO_AUTO_GC;
8147a671d8eSColy Li 				force_wake_up_gc(c);
8157a671d8eSColy Li 			}
8163943b040SShan Hai 		}
8175e6926daSKent Overstreet 
8185e6926daSKent Overstreet 		up_write(&dc->writeback_lock);
8195e6926daSKent Overstreet 
8205e6926daSKent Overstreet 		read_dirty(dc);
8215e6926daSKent Overstreet 
8225e6926daSKent Overstreet 		if (searched_full_index) {
8236f10f7d1SColy Li 			unsigned int delay = dc->writeback_delay * HZ;
8245e6926daSKent Overstreet 
8255e6926daSKent Overstreet 			while (delay &&
8265e6926daSKent Overstreet 			       !kthread_should_stop() &&
827771f393eSColy Li 			       !test_bit(CACHE_SET_IO_DISABLE, &c->flags) &&
828c4d951ddSKent Overstreet 			       !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
8299e5c3535SSlava Pestov 				delay = schedule_timeout_interruptible(delay);
830a8500fc8SMichael Lyle 
831a8500fc8SMichael Lyle 			bch_ratelimit_reset(&dc->writeback_rate);
8325e6926daSKent Overstreet 		}
8335e6926daSKent Overstreet 	}
8345e6926daSKent Overstreet 
83597d26ae7SLi Lei 	if (dc->writeback_write_wq)
8367e865ebaSColy Li 		destroy_workqueue(dc->writeback_write_wq);
83797d26ae7SLi Lei 
838804f3c69SColy Li 	cached_dev_put(dc);
839771f393eSColy Li 	wait_for_kthread_stop();
840804f3c69SColy Li 
8415e6926daSKent Overstreet 	return 0;
842cafe5635SKent Overstreet }
843cafe5635SKent Overstreet 
844444fc0b6SKent Overstreet /* Init */
84594f71c16STang Junhui #define INIT_KEYS_EACH_TIME	500000
846444fc0b6SKent Overstreet 
847c18536a7SKent Overstreet struct sectors_dirty_init {
848c18536a7SKent Overstreet 	struct btree_op	op;
8496f10f7d1SColy Li 	unsigned int	inode;
85094f71c16STang Junhui 	size_t		count;
851c18536a7SKent Overstreet };
852c18536a7SKent Overstreet 
sectors_dirty_init_fn(struct btree_op * _op,struct btree * b,struct bkey * k)853c18536a7SKent Overstreet static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
85448dad8baSKent Overstreet 				 struct bkey *k)
855444fc0b6SKent Overstreet {
856c18536a7SKent Overstreet 	struct sectors_dirty_init *op = container_of(_op,
857c18536a7SKent Overstreet 						struct sectors_dirty_init, op);
85848dad8baSKent Overstreet 	if (KEY_INODE(k) > op->inode)
85948dad8baSKent Overstreet 		return MAP_DONE;
860444fc0b6SKent Overstreet 
861444fc0b6SKent Overstreet 	if (KEY_DIRTY(k))
86248dad8baSKent Overstreet 		bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
86348dad8baSKent Overstreet 					     KEY_START(k), KEY_SIZE(k));
864444fc0b6SKent Overstreet 
86594f71c16STang Junhui 	op->count++;
86680db4e47SColy Li 	if (!(op->count % INIT_KEYS_EACH_TIME))
86780db4e47SColy Li 		cond_resched();
86894f71c16STang Junhui 
86948dad8baSKent Overstreet 	return MAP_CONTINUE;
870444fc0b6SKent Overstreet }
871444fc0b6SKent Overstreet 
bch_root_node_dirty_init(struct cache_set * c,struct bcache_device * d,struct bkey * k)872b144e45fSColy Li static int bch_root_node_dirty_init(struct cache_set *c,
873b144e45fSColy Li 				     struct bcache_device *d,
874b144e45fSColy Li 				     struct bkey *k)
875444fc0b6SKent Overstreet {
876c18536a7SKent Overstreet 	struct sectors_dirty_init op;
87794f71c16STang Junhui 	int ret;
878444fc0b6SKent Overstreet 
879b54d6934SKent Overstreet 	bch_btree_op_init(&op.op, -1);
880175206cfSTang Junhui 	op.inode = d->id;
88194f71c16STang Junhui 	op.count = 0;
88248dad8baSKent Overstreet 
883b144e45fSColy Li 	ret = bcache_btree(map_keys_recurse,
884b144e45fSColy Li 			   k,
885b144e45fSColy Li 			   c->root,
886b144e45fSColy Li 			   &op.op,
88780db4e47SColy Li 			   &KEY(op.inode, 0, 0),
888b144e45fSColy Li 			   sectors_dirty_init_fn,
889b144e45fSColy Li 			   0);
89080db4e47SColy Li 	if (ret < 0)
89146f5aa88SJoe Perches 		pr_warn("sectors dirty init failed, ret=%d!\n", ret);
892b144e45fSColy Li 
893f0854489SMingzhe Zou 	/*
894f0854489SMingzhe Zou 	 * The op may be added to cache_set's btree_cache_wait
895f0854489SMingzhe Zou 	 * in mca_cannibalize(), must ensure it is removed from
896f0854489SMingzhe Zou 	 * the list and release btree_cache_alloc_lock before
897f0854489SMingzhe Zou 	 * free op memory.
898f0854489SMingzhe Zou 	 * Otherwise, the btree_cache_wait will be damaged.
899f0854489SMingzhe Zou 	 */
900f0854489SMingzhe Zou 	bch_cannibalize_unlock(c);
901f0854489SMingzhe Zou 	finish_wait(&c->btree_cache_wait, &(&op.op)->wait);
902f0854489SMingzhe Zou 
903b144e45fSColy Li 	return ret;
904b144e45fSColy Li }
905b144e45fSColy Li 
bch_dirty_init_thread(void * arg)906b144e45fSColy Li static int bch_dirty_init_thread(void *arg)
907b144e45fSColy Li {
908b144e45fSColy Li 	struct dirty_init_thrd_info *info = arg;
909b144e45fSColy Li 	struct bch_dirty_init_state *state = info->state;
910b144e45fSColy Li 	struct cache_set *c = state->c;
911*6479b9f4SMatthew Mirvish 	struct btree_iter_stack iter;
912b144e45fSColy Li 	struct bkey *k, *p;
913b144e45fSColy Li 	int cur_idx, prev_idx, skip_nr;
914b144e45fSColy Li 
915b144e45fSColy Li 	k = p = NULL;
91666534172SColin Ian King 	prev_idx = 0;
917b144e45fSColy Li 
918*6479b9f4SMatthew Mirvish 	bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
919*6479b9f4SMatthew Mirvish 	k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
920b144e45fSColy Li 	BUG_ON(!k);
921b144e45fSColy Li 
922b144e45fSColy Li 	p = k;
923b144e45fSColy Li 
924b144e45fSColy Li 	while (k) {
925b144e45fSColy Li 		spin_lock(&state->idx_lock);
926b144e45fSColy Li 		cur_idx = state->key_idx;
927b144e45fSColy Li 		state->key_idx++;
928b144e45fSColy Li 		spin_unlock(&state->idx_lock);
929b144e45fSColy Li 
930b144e45fSColy Li 		skip_nr = cur_idx - prev_idx;
931b144e45fSColy Li 
932b144e45fSColy Li 		while (skip_nr) {
933*6479b9f4SMatthew Mirvish 			k = bch_btree_iter_next_filter(&iter.iter,
934b144e45fSColy Li 						       &c->root->keys,
935b144e45fSColy Li 						       bch_ptr_bad);
936b144e45fSColy Li 			if (k)
937b144e45fSColy Li 				p = k;
938b144e45fSColy Li 			else {
939b144e45fSColy Li 				atomic_set(&state->enough, 1);
940b144e45fSColy Li 				/* Update state->enough earlier */
941eb9b6666SColy Li 				smp_mb__after_atomic();
942b144e45fSColy Li 				goto out;
943b144e45fSColy Li 			}
944b144e45fSColy Li 			skip_nr--;
945b144e45fSColy Li 		}
946b144e45fSColy Li 
947b144e45fSColy Li 		if (p) {
948b144e45fSColy Li 			if (bch_root_node_dirty_init(c, state->d, p) < 0)
949b144e45fSColy Li 				goto out;
950b144e45fSColy Li 		}
951b144e45fSColy Li 
952b144e45fSColy Li 		p = NULL;
953b144e45fSColy Li 		prev_idx = cur_idx;
954b144e45fSColy Li 	}
955b144e45fSColy Li 
956b144e45fSColy Li out:
957b144e45fSColy Li 	/* In order to wake up state->wait in time */
958eb9b6666SColy Li 	smp_mb__before_atomic();
959b144e45fSColy Li 	if (atomic_dec_and_test(&state->started))
960b144e45fSColy Li 		wake_up(&state->wait);
961b144e45fSColy Li 
962b144e45fSColy Li 	return 0;
963b144e45fSColy Li }
964b144e45fSColy Li 
bch_btre_dirty_init_thread_nr(void)965b144e45fSColy Li static int bch_btre_dirty_init_thread_nr(void)
966b144e45fSColy Li {
967b144e45fSColy Li 	int n = num_online_cpus()/2;
968b144e45fSColy Li 
969b144e45fSColy Li 	if (n == 0)
970b144e45fSColy Li 		n = 1;
971b144e45fSColy Li 	else if (n > BCH_DIRTY_INIT_THRD_MAX)
972b144e45fSColy Li 		n = BCH_DIRTY_INIT_THRD_MAX;
973b144e45fSColy Li 
974b144e45fSColy Li 	return n;
975b144e45fSColy Li }
976b144e45fSColy Li 
bch_sectors_dirty_init(struct bcache_device * d)977b144e45fSColy Li void bch_sectors_dirty_init(struct bcache_device *d)
978b144e45fSColy Li {
979b144e45fSColy Li 	int i;
980b163173dSMingzhe Zou 	struct btree *b = NULL;
981b144e45fSColy Li 	struct bkey *k = NULL;
982*6479b9f4SMatthew Mirvish 	struct btree_iter_stack iter;
983b144e45fSColy Li 	struct sectors_dirty_init op;
984b144e45fSColy Li 	struct cache_set *c = d->c;
9854dc34ae1SColy Li 	struct bch_dirty_init_state state;
986b144e45fSColy Li 
987b163173dSMingzhe Zou retry_lock:
988b163173dSMingzhe Zou 	b = c->root;
989b163173dSMingzhe Zou 	rw_lock(0, b, b->level);
990b163173dSMingzhe Zou 	if (b != c->root) {
991b163173dSMingzhe Zou 		rw_unlock(0, b);
992b163173dSMingzhe Zou 		goto retry_lock;
993b163173dSMingzhe Zou 	}
994b163173dSMingzhe Zou 
995b144e45fSColy Li 	/* Just count root keys if no leaf node */
996b144e45fSColy Li 	if (c->root->level == 0) {
997b144e45fSColy Li 		bch_btree_op_init(&op.op, -1);
998b144e45fSColy Li 		op.inode = d->id;
999b144e45fSColy Li 		op.count = 0;
1000b144e45fSColy Li 
1001b144e45fSColy Li 		for_each_key_filter(&c->root->keys,
100219aff881SMingzhe Zou 				    k, &iter, bch_ptr_invalid) {
100319aff881SMingzhe Zou 			if (KEY_INODE(k) != op.inode)
100419aff881SMingzhe Zou 				continue;
1005b144e45fSColy Li 			sectors_dirty_init_fn(&op.op, c->root, k);
100619aff881SMingzhe Zou 		}
100780db4e47SColy Li 
1008b163173dSMingzhe Zou 		rw_unlock(0, b);
1009b144e45fSColy Li 		return;
1010b144e45fSColy Li 	}
1011b144e45fSColy Li 
10127d6b902eSColy Li 	memset(&state, 0, sizeof(struct bch_dirty_init_state));
10134dc34ae1SColy Li 	state.c = c;
10144dc34ae1SColy Li 	state.d = d;
10154dc34ae1SColy Li 	state.total_threads = bch_btre_dirty_init_thread_nr();
10164dc34ae1SColy Li 	state.key_idx = 0;
10174dc34ae1SColy Li 	spin_lock_init(&state.idx_lock);
10184dc34ae1SColy Li 	atomic_set(&state.started, 0);
10194dc34ae1SColy Li 	atomic_set(&state.enough, 0);
10204dc34ae1SColy Li 	init_waitqueue_head(&state.wait);
1021b144e45fSColy Li 
10224dc34ae1SColy Li 	for (i = 0; i < state.total_threads; i++) {
10234dc34ae1SColy Li 		/* Fetch latest state.enough earlier */
1024eb9b6666SColy Li 		smp_mb__before_atomic();
10254dc34ae1SColy Li 		if (atomic_read(&state.enough))
1026b144e45fSColy Li 			break;
1027b144e45fSColy Li 
1028d1280fd4SMingzhe Zou 		atomic_inc(&state.started);
10294dc34ae1SColy Li 		state.infos[i].state = &state;
10304dc34ae1SColy Li 		state.infos[i].thread =
10314dc34ae1SColy Li 			kthread_run(bch_dirty_init_thread, &state.infos[i],
10324dc34ae1SColy Li 				    "bch_dirtcnt[%d]", i);
10334dc34ae1SColy Li 		if (IS_ERR(state.infos[i].thread)) {
103446f5aa88SJoe Perches 			pr_err("fails to run thread bch_dirty_init[%d]\n", i);
1035d1280fd4SMingzhe Zou 			atomic_dec(&state.started);
1036b144e45fSColy Li 			for (--i; i >= 0; i--)
10374dc34ae1SColy Li 				kthread_stop(state.infos[i].thread);
1038b144e45fSColy Li 			goto out;
1039b144e45fSColy Li 		}
1040b144e45fSColy Li 	}
1041b144e45fSColy Li 
1042b144e45fSColy Li out:
10434dc34ae1SColy Li 	/* Must wait for all threads to stop. */
10444dc34ae1SColy Li 	wait_event(state.wait, atomic_read(&state.started) == 0);
1045b163173dSMingzhe Zou 	rw_unlock(0, b);
1046444fc0b6SKent Overstreet }
1047444fc0b6SKent Overstreet 
bch_cached_dev_writeback_init(struct cached_dev * dc)10489e5c3535SSlava Pestov void bch_cached_dev_writeback_init(struct cached_dev *dc)
1049cafe5635SKent Overstreet {
1050c2a4f318SKent Overstreet 	sema_init(&dc->in_flight, 64);
1051cafe5635SKent Overstreet 	init_rwsem(&dc->writeback_lock);
105272c27061SKent Overstreet 	bch_keybuf_init(&dc->writeback_keys);
1053cafe5635SKent Overstreet 
1054cafe5635SKent Overstreet 	dc->writeback_metadata		= true;
105579b79146SShenghui Wang 	dc->writeback_running		= false;
105671dda2a5Sdongdong tao 	dc->writeback_consider_fragment = true;
1057cafe5635SKent Overstreet 	dc->writeback_percent		= 10;
1058cafe5635SKent Overstreet 	dc->writeback_delay		= 30;
1059ea8c5356SColy Li 	atomic_long_set(&dc->writeback_rate.rate, 1024);
1060ae82ddbfSMichael Lyle 	dc->writeback_rate_minimum	= 8;
1061cafe5635SKent Overstreet 
10627a5e3ecbSColy Li 	dc->writeback_rate_update_seconds = WRITEBACK_RATE_UPDATE_SECS_DEFAULT;
10631d316e65SMichael Lyle 	dc->writeback_rate_p_term_inverse = 40;
106471dda2a5Sdongdong tao 	dc->writeback_rate_fp_term_low = 1;
106571dda2a5Sdongdong tao 	dc->writeback_rate_fp_term_mid = 10;
106671dda2a5Sdongdong tao 	dc->writeback_rate_fp_term_high = 1000;
10671d316e65SMichael Lyle 	dc->writeback_rate_i_term_inverse = 10000;
1068cafe5635SKent Overstreet 
1069a1a2d8f0SColy Li 	/* For dc->writeback_lock contention in update_writeback_rate() */
1070a1a2d8f0SColy Li 	dc->rate_update_retry = 0;
1071a1a2d8f0SColy Li 
10723fd47bfeSColy Li 	WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
10739e5c3535SSlava Pestov 	INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
10749e5c3535SSlava Pestov }
10759e5c3535SSlava Pestov 
bch_cached_dev_writeback_start(struct cached_dev * dc)10769e5c3535SSlava Pestov int bch_cached_dev_writeback_start(struct cached_dev *dc)
10779e5c3535SSlava Pestov {
10789baf3097STang Junhui 	dc->writeback_write_wq = alloc_workqueue("bcache_writeback_wq",
10799baf3097STang Junhui 						WQ_MEM_RECLAIM, 0);
10809baf3097STang Junhui 	if (!dc->writeback_write_wq)
10819baf3097STang Junhui 		return -ENOMEM;
10829baf3097STang Junhui 
1083804f3c69SColy Li 	cached_dev_get(dc);
10845e6926daSKent Overstreet 	dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
10855e6926daSKent Overstreet 					      "bcache_writeback");
1086804f3c69SColy Li 	if (IS_ERR(dc->writeback_thread)) {
1087804f3c69SColy Li 		cached_dev_put(dc);
1088f54d801dSColy Li 		destroy_workqueue(dc->writeback_write_wq);
10895e6926daSKent Overstreet 		return PTR_ERR(dc->writeback_thread);
1090804f3c69SColy Li 	}
109179b79146SShenghui Wang 	dc->writeback_running = true;
10925e6926daSKent Overstreet 
10933fd47bfeSColy Li 	WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
1094cafe5635SKent Overstreet 	schedule_delayed_work(&dc->writeback_rate_update,
1095cafe5635SKent Overstreet 			      dc->writeback_rate_update_seconds * HZ);
1096cafe5635SKent Overstreet 
10979e5c3535SSlava Pestov 	bch_writeback_queue(dc);
10989e5c3535SSlava Pestov 
1099cafe5635SKent Overstreet 	return 0;
1100cafe5635SKent Overstreet }
1101