xref: /openbmc/linux/block/blk-flush.c (revision 4fed947cb311e5aa51781d316cefca836352f6ce)
18839a0e0STejun Heo /*
2*4fed947cSTejun Heo  * Functions to sequence FLUSH and FUA writes.
38839a0e0STejun Heo  */
48839a0e0STejun Heo #include <linux/kernel.h>
58839a0e0STejun Heo #include <linux/module.h>
68839a0e0STejun Heo #include <linux/bio.h>
78839a0e0STejun Heo #include <linux/blkdev.h>
88839a0e0STejun Heo #include <linux/gfp.h>
98839a0e0STejun Heo 
108839a0e0STejun Heo #include "blk.h"
118839a0e0STejun Heo 
12*4fed947cSTejun Heo /* FLUSH/FUA sequences */
13*4fed947cSTejun Heo enum {
14*4fed947cSTejun Heo 	QUEUE_FSEQ_STARTED	= (1 << 0), /* flushing in progress */
15*4fed947cSTejun Heo 	QUEUE_FSEQ_PREFLUSH	= (1 << 1), /* pre-flushing in progress */
16*4fed947cSTejun Heo 	QUEUE_FSEQ_DATA		= (1 << 2), /* data write in progress */
17*4fed947cSTejun Heo 	QUEUE_FSEQ_POSTFLUSH	= (1 << 3), /* post-flushing in progress */
18*4fed947cSTejun Heo 	QUEUE_FSEQ_DONE		= (1 << 4),
19*4fed947cSTejun Heo };
20*4fed947cSTejun Heo 
21dd4c133fSTejun Heo static struct request *queue_next_fseq(struct request_queue *q);
228839a0e0STejun Heo 
23dd4c133fSTejun Heo unsigned blk_flush_cur_seq(struct request_queue *q)
248839a0e0STejun Heo {
25dd4c133fSTejun Heo 	if (!q->flush_seq)
268839a0e0STejun Heo 		return 0;
27dd4c133fSTejun Heo 	return 1 << ffz(q->flush_seq);
288839a0e0STejun Heo }
298839a0e0STejun Heo 
30dd4c133fSTejun Heo static struct request *blk_flush_complete_seq(struct request_queue *q,
318839a0e0STejun Heo 					      unsigned seq, int error)
328839a0e0STejun Heo {
338839a0e0STejun Heo 	struct request *next_rq = NULL;
348839a0e0STejun Heo 
35dd4c133fSTejun Heo 	if (error && !q->flush_err)
36dd4c133fSTejun Heo 		q->flush_err = error;
378839a0e0STejun Heo 
38dd4c133fSTejun Heo 	BUG_ON(q->flush_seq & seq);
39dd4c133fSTejun Heo 	q->flush_seq |= seq;
408839a0e0STejun Heo 
41dd4c133fSTejun Heo 	if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) {
42dd4c133fSTejun Heo 		/* not complete yet, queue the next flush sequence */
43dd4c133fSTejun Heo 		next_rq = queue_next_fseq(q);
448839a0e0STejun Heo 	} else {
45dd4c133fSTejun Heo 		/* complete this flush request */
46dd4c133fSTejun Heo 		__blk_end_request_all(q->orig_flush_rq, q->flush_err);
47dd4c133fSTejun Heo 		q->orig_flush_rq = NULL;
48dd4c133fSTejun Heo 		q->flush_seq = 0;
498839a0e0STejun Heo 
50dd4c133fSTejun Heo 		/* dispatch the next flush if there's one */
51dd4c133fSTejun Heo 		if (!list_empty(&q->pending_flushes)) {
52dd4c133fSTejun Heo 			next_rq = list_entry_rq(q->pending_flushes.next);
538839a0e0STejun Heo 			list_move(&next_rq->queuelist, &q->queue_head);
548839a0e0STejun Heo 		}
558839a0e0STejun Heo 	}
568839a0e0STejun Heo 	return next_rq;
578839a0e0STejun Heo }
588839a0e0STejun Heo 
598839a0e0STejun Heo static void pre_flush_end_io(struct request *rq, int error)
608839a0e0STejun Heo {
618839a0e0STejun Heo 	elv_completed_request(rq->q, rq);
62dd4c133fSTejun Heo 	blk_flush_complete_seq(rq->q, QUEUE_FSEQ_PREFLUSH, error);
638839a0e0STejun Heo }
648839a0e0STejun Heo 
65dd4c133fSTejun Heo static void flush_data_end_io(struct request *rq, int error)
668839a0e0STejun Heo {
678839a0e0STejun Heo 	elv_completed_request(rq->q, rq);
68dd4c133fSTejun Heo 	blk_flush_complete_seq(rq->q, QUEUE_FSEQ_DATA, error);
698839a0e0STejun Heo }
708839a0e0STejun Heo 
718839a0e0STejun Heo static void post_flush_end_io(struct request *rq, int error)
728839a0e0STejun Heo {
738839a0e0STejun Heo 	elv_completed_request(rq->q, rq);
74dd4c133fSTejun Heo 	blk_flush_complete_seq(rq->q, QUEUE_FSEQ_POSTFLUSH, error);
758839a0e0STejun Heo }
768839a0e0STejun Heo 
778839a0e0STejun Heo static void queue_flush(struct request_queue *q, struct request *rq,
788839a0e0STejun Heo 			rq_end_io_fn *end_io)
798839a0e0STejun Heo {
808839a0e0STejun Heo 	blk_rq_init(q, rq);
818839a0e0STejun Heo 	rq->cmd_type = REQ_TYPE_FS;
828839a0e0STejun Heo 	rq->cmd_flags = REQ_FLUSH;
83dd4c133fSTejun Heo 	rq->rq_disk = q->orig_flush_rq->rq_disk;
848839a0e0STejun Heo 	rq->end_io = end_io;
858839a0e0STejun Heo 
868839a0e0STejun Heo 	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
878839a0e0STejun Heo }
888839a0e0STejun Heo 
89dd4c133fSTejun Heo static struct request *queue_next_fseq(struct request_queue *q)
908839a0e0STejun Heo {
91*4fed947cSTejun Heo 	struct request *orig_rq = q->orig_flush_rq;
92dd4c133fSTejun Heo 	struct request *rq = &q->flush_rq;
938839a0e0STejun Heo 
94dd4c133fSTejun Heo 	switch (blk_flush_cur_seq(q)) {
95dd4c133fSTejun Heo 	case QUEUE_FSEQ_PREFLUSH:
968839a0e0STejun Heo 		queue_flush(q, rq, pre_flush_end_io);
978839a0e0STejun Heo 		break;
988839a0e0STejun Heo 
99dd4c133fSTejun Heo 	case QUEUE_FSEQ_DATA:
100*4fed947cSTejun Heo 		/* initialize proxy request, inherit FLUSH/FUA and queue it */
1018839a0e0STejun Heo 		blk_rq_init(q, rq);
102*4fed947cSTejun Heo 		init_request_from_bio(rq, orig_rq->bio);
103*4fed947cSTejun Heo 		rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA);
104*4fed947cSTejun Heo 		rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA);
105dd4c133fSTejun Heo 		rq->end_io = flush_data_end_io;
1068839a0e0STejun Heo 
1078839a0e0STejun Heo 		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
1088839a0e0STejun Heo 		break;
1098839a0e0STejun Heo 
110dd4c133fSTejun Heo 	case QUEUE_FSEQ_POSTFLUSH:
1118839a0e0STejun Heo 		queue_flush(q, rq, post_flush_end_io);
1128839a0e0STejun Heo 		break;
1138839a0e0STejun Heo 
1148839a0e0STejun Heo 	default:
1158839a0e0STejun Heo 		BUG();
1168839a0e0STejun Heo 	}
1178839a0e0STejun Heo 	return rq;
1188839a0e0STejun Heo }
1198839a0e0STejun Heo 
120dd4c133fSTejun Heo struct request *blk_do_flush(struct request_queue *q, struct request *rq)
1218839a0e0STejun Heo {
122*4fed947cSTejun Heo 	unsigned int fflags = q->flush_flags; /* may change, cache it */
123*4fed947cSTejun Heo 	bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA;
124*4fed947cSTejun Heo 	bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH);
125*4fed947cSTejun Heo 	bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA);
1268839a0e0STejun Heo 	unsigned skip = 0;
1278839a0e0STejun Heo 
1288839a0e0STejun Heo 	/*
129*4fed947cSTejun Heo 	 * Special case.  If there's data but flush is not necessary,
130*4fed947cSTejun Heo 	 * the request can be issued directly.
131*4fed947cSTejun Heo 	 *
132*4fed947cSTejun Heo 	 * Flush w/o data should be able to be issued directly too but
133*4fed947cSTejun Heo 	 * currently some drivers assume that rq->bio contains
134*4fed947cSTejun Heo 	 * non-zero data if it isn't NULL and empty FLUSH requests
135*4fed947cSTejun Heo 	 * getting here usually have bio's without data.
1368839a0e0STejun Heo 	 */
137*4fed947cSTejun Heo 	if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) {
138*4fed947cSTejun Heo 		rq->cmd_flags &= ~REQ_FLUSH;
139*4fed947cSTejun Heo 		if (!has_fua)
140*4fed947cSTejun Heo 			rq->cmd_flags &= ~REQ_FUA;
141*4fed947cSTejun Heo 		return rq;
1428839a0e0STejun Heo 	}
1438839a0e0STejun Heo 
1448839a0e0STejun Heo 	/*
145*4fed947cSTejun Heo 	 * Sequenced flushes can't be processed in parallel.  If
146*4fed947cSTejun Heo 	 * another one is already in progress, queue for later
147*4fed947cSTejun Heo 	 * processing.
1488839a0e0STejun Heo 	 */
149*4fed947cSTejun Heo 	if (q->flush_seq) {
150*4fed947cSTejun Heo 		list_move_tail(&rq->queuelist, &q->pending_flushes);
1518839a0e0STejun Heo 		return NULL;
1528839a0e0STejun Heo 	}
1538839a0e0STejun Heo 
1548839a0e0STejun Heo 	/*
155dd4c133fSTejun Heo 	 * Start a new flush sequence
1568839a0e0STejun Heo 	 */
157dd4c133fSTejun Heo 	q->flush_err = 0;
158dd4c133fSTejun Heo 	q->flush_seq |= QUEUE_FSEQ_STARTED;
1598839a0e0STejun Heo 
160*4fed947cSTejun Heo 	/* adjust FLUSH/FUA of the original request and stash it away */
161*4fed947cSTejun Heo 	rq->cmd_flags &= ~REQ_FLUSH;
162*4fed947cSTejun Heo 	if (!has_fua)
163*4fed947cSTejun Heo 		rq->cmd_flags &= ~REQ_FUA;
1648839a0e0STejun Heo 	blk_dequeue_request(rq);
165dd4c133fSTejun Heo 	q->orig_flush_rq = rq;
1668839a0e0STejun Heo 
167*4fed947cSTejun Heo 	/* skip unneded sequences and return the first one */
168*4fed947cSTejun Heo 	if (!do_preflush)
169dd4c133fSTejun Heo 		skip |= QUEUE_FSEQ_PREFLUSH;
170*4fed947cSTejun Heo 	if (!blk_rq_sectors(rq))
171dd4c133fSTejun Heo 		skip |= QUEUE_FSEQ_DATA;
172*4fed947cSTejun Heo 	if (!do_postflush)
173dd4c133fSTejun Heo 		skip |= QUEUE_FSEQ_POSTFLUSH;
174dd4c133fSTejun Heo 	return blk_flush_complete_seq(q, skip, 0);
1758839a0e0STejun Heo }
1768839a0e0STejun Heo 
1778839a0e0STejun Heo static void bio_end_empty_barrier(struct bio *bio, int err)
1788839a0e0STejun Heo {
1798839a0e0STejun Heo 	if (err) {
1808839a0e0STejun Heo 		if (err == -EOPNOTSUPP)
1818839a0e0STejun Heo 			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
1828839a0e0STejun Heo 		clear_bit(BIO_UPTODATE, &bio->bi_flags);
1838839a0e0STejun Heo 	}
1848839a0e0STejun Heo 	if (bio->bi_private)
1858839a0e0STejun Heo 		complete(bio->bi_private);
1868839a0e0STejun Heo 	bio_put(bio);
1878839a0e0STejun Heo }
1888839a0e0STejun Heo 
1898839a0e0STejun Heo /**
1908839a0e0STejun Heo  * blkdev_issue_flush - queue a flush
1918839a0e0STejun Heo  * @bdev:	blockdev to issue flush for
1928839a0e0STejun Heo  * @gfp_mask:	memory allocation flags (for bio_alloc)
1938839a0e0STejun Heo  * @error_sector:	error sector
1948839a0e0STejun Heo  * @flags:	BLKDEV_IFL_* flags to control behaviour
1958839a0e0STejun Heo  *
1968839a0e0STejun Heo  * Description:
1978839a0e0STejun Heo  *    Issue a flush for the block device in question. Caller can supply
1988839a0e0STejun Heo  *    room for storing the error offset in case of a flush error, if they
1998839a0e0STejun Heo  *    wish to. If WAIT flag is not passed then caller may check only what
2008839a0e0STejun Heo  *    request was pushed in some internal queue for later handling.
2018839a0e0STejun Heo  */
2028839a0e0STejun Heo int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
2038839a0e0STejun Heo 		sector_t *error_sector, unsigned long flags)
2048839a0e0STejun Heo {
2058839a0e0STejun Heo 	DECLARE_COMPLETION_ONSTACK(wait);
2068839a0e0STejun Heo 	struct request_queue *q;
2078839a0e0STejun Heo 	struct bio *bio;
2088839a0e0STejun Heo 	int ret = 0;
2098839a0e0STejun Heo 
2108839a0e0STejun Heo 	if (bdev->bd_disk == NULL)
2118839a0e0STejun Heo 		return -ENXIO;
2128839a0e0STejun Heo 
2138839a0e0STejun Heo 	q = bdev_get_queue(bdev);
2148839a0e0STejun Heo 	if (!q)
2158839a0e0STejun Heo 		return -ENXIO;
2168839a0e0STejun Heo 
2178839a0e0STejun Heo 	/*
2188839a0e0STejun Heo 	 * some block devices may not have their queue correctly set up here
2198839a0e0STejun Heo 	 * (e.g. loop device without a backing file) and so issuing a flush
2208839a0e0STejun Heo 	 * here will panic. Ensure there is a request function before issuing
2218839a0e0STejun Heo 	 * the barrier.
2228839a0e0STejun Heo 	 */
2238839a0e0STejun Heo 	if (!q->make_request_fn)
2248839a0e0STejun Heo 		return -ENXIO;
2258839a0e0STejun Heo 
2268839a0e0STejun Heo 	bio = bio_alloc(gfp_mask, 0);
2278839a0e0STejun Heo 	bio->bi_end_io = bio_end_empty_barrier;
2288839a0e0STejun Heo 	bio->bi_bdev = bdev;
2298839a0e0STejun Heo 	if (test_bit(BLKDEV_WAIT, &flags))
2308839a0e0STejun Heo 		bio->bi_private = &wait;
2318839a0e0STejun Heo 
2328839a0e0STejun Heo 	bio_get(bio);
2338839a0e0STejun Heo 	submit_bio(WRITE_BARRIER, bio);
2348839a0e0STejun Heo 	if (test_bit(BLKDEV_WAIT, &flags)) {
2358839a0e0STejun Heo 		wait_for_completion(&wait);
2368839a0e0STejun Heo 		/*
2378839a0e0STejun Heo 		 * The driver must store the error location in ->bi_sector, if
2388839a0e0STejun Heo 		 * it supports it. For non-stacked drivers, this should be
2398839a0e0STejun Heo 		 * copied from blk_rq_pos(rq).
2408839a0e0STejun Heo 		 */
2418839a0e0STejun Heo 		if (error_sector)
2428839a0e0STejun Heo 			*error_sector = bio->bi_sector;
2438839a0e0STejun Heo 	}
2448839a0e0STejun Heo 
2458839a0e0STejun Heo 	if (bio_flagged(bio, BIO_EOPNOTSUPP))
2468839a0e0STejun Heo 		ret = -EOPNOTSUPP;
2478839a0e0STejun Heo 	else if (!bio_flagged(bio, BIO_UPTODATE))
2488839a0e0STejun Heo 		ret = -EIO;
2498839a0e0STejun Heo 
2508839a0e0STejun Heo 	bio_put(bio);
2518839a0e0STejun Heo 	return ret;
2528839a0e0STejun Heo }
2538839a0e0STejun Heo EXPORT_SYMBOL(blkdev_issue_flush);
254