18839a0e0STejun Heo /* 24fed947cSTejun Heo * Functions to sequence FLUSH and FUA writes. 38839a0e0STejun Heo */ 48839a0e0STejun Heo #include <linux/kernel.h> 58839a0e0STejun Heo #include <linux/module.h> 68839a0e0STejun Heo #include <linux/bio.h> 78839a0e0STejun Heo #include <linux/blkdev.h> 88839a0e0STejun Heo #include <linux/gfp.h> 98839a0e0STejun Heo 108839a0e0STejun Heo #include "blk.h" 118839a0e0STejun Heo 124fed947cSTejun Heo /* FLUSH/FUA sequences */ 134fed947cSTejun Heo enum { 144fed947cSTejun Heo QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ 154fed947cSTejun Heo QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ 164fed947cSTejun Heo QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ 174fed947cSTejun Heo QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ 184fed947cSTejun Heo QUEUE_FSEQ_DONE = (1 << 4), 194fed947cSTejun Heo }; 204fed947cSTejun Heo 21dd4c133fSTejun Heo static struct request *queue_next_fseq(struct request_queue *q); 228839a0e0STejun Heo 23dd4c133fSTejun Heo unsigned blk_flush_cur_seq(struct request_queue *q) 248839a0e0STejun Heo { 25dd4c133fSTejun Heo if (!q->flush_seq) 268839a0e0STejun Heo return 0; 27dd4c133fSTejun Heo return 1 << ffz(q->flush_seq); 288839a0e0STejun Heo } 298839a0e0STejun Heo 30dd4c133fSTejun Heo static struct request *blk_flush_complete_seq(struct request_queue *q, 318839a0e0STejun Heo unsigned seq, int error) 328839a0e0STejun Heo { 338839a0e0STejun Heo struct request *next_rq = NULL; 348839a0e0STejun Heo 35dd4c133fSTejun Heo if (error && !q->flush_err) 36dd4c133fSTejun Heo q->flush_err = error; 378839a0e0STejun Heo 38dd4c133fSTejun Heo BUG_ON(q->flush_seq & seq); 39dd4c133fSTejun Heo q->flush_seq |= seq; 408839a0e0STejun Heo 41dd4c133fSTejun Heo if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) { 42dd4c133fSTejun Heo /* not complete yet, queue the next flush sequence */ 43dd4c133fSTejun Heo next_rq = queue_next_fseq(q); 448839a0e0STejun Heo } else { 45dd4c133fSTejun Heo /* complete this flush request */ 46dd4c133fSTejun Heo __blk_end_request_all(q->orig_flush_rq, q->flush_err); 47dd4c133fSTejun Heo q->orig_flush_rq = NULL; 48dd4c133fSTejun Heo q->flush_seq = 0; 498839a0e0STejun Heo 50dd4c133fSTejun Heo /* dispatch the next flush if there's one */ 51dd4c133fSTejun Heo if (!list_empty(&q->pending_flushes)) { 52dd4c133fSTejun Heo next_rq = list_entry_rq(q->pending_flushes.next); 538839a0e0STejun Heo list_move(&next_rq->queuelist, &q->queue_head); 548839a0e0STejun Heo } 558839a0e0STejun Heo } 568839a0e0STejun Heo return next_rq; 578839a0e0STejun Heo } 588839a0e0STejun Heo 5947f70d5aSTejun Heo static void blk_flush_complete_seq_end_io(struct request_queue *q, 6047f70d5aSTejun Heo unsigned seq, int error) 6147f70d5aSTejun Heo { 6247f70d5aSTejun Heo bool was_empty = elv_queue_empty(q); 6347f70d5aSTejun Heo struct request *next_rq; 6447f70d5aSTejun Heo 6547f70d5aSTejun Heo next_rq = blk_flush_complete_seq(q, seq, error); 6647f70d5aSTejun Heo 6747f70d5aSTejun Heo /* 6847f70d5aSTejun Heo * Moving a request silently to empty queue_head may stall the 6947f70d5aSTejun Heo * queue. Kick the queue in those cases. 7047f70d5aSTejun Heo */ 7147f70d5aSTejun Heo if (was_empty && next_rq) 7247f70d5aSTejun Heo __blk_run_queue(q); 7347f70d5aSTejun Heo } 7447f70d5aSTejun Heo 758839a0e0STejun Heo static void pre_flush_end_io(struct request *rq, int error) 768839a0e0STejun Heo { 778839a0e0STejun Heo elv_completed_request(rq->q, rq); 7847f70d5aSTejun Heo blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error); 798839a0e0STejun Heo } 808839a0e0STejun Heo 81dd4c133fSTejun Heo static void flush_data_end_io(struct request *rq, int error) 828839a0e0STejun Heo { 838839a0e0STejun Heo elv_completed_request(rq->q, rq); 8447f70d5aSTejun Heo blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error); 858839a0e0STejun Heo } 868839a0e0STejun Heo 878839a0e0STejun Heo static void post_flush_end_io(struct request *rq, int error) 888839a0e0STejun Heo { 898839a0e0STejun Heo elv_completed_request(rq->q, rq); 9047f70d5aSTejun Heo blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error); 918839a0e0STejun Heo } 928839a0e0STejun Heo 93cde4c406SChristoph Hellwig static void init_flush_request(struct request *rq, struct gendisk *disk) 948839a0e0STejun Heo { 958839a0e0STejun Heo rq->cmd_type = REQ_TYPE_FS; 96337238beSTejun Heo rq->cmd_flags = WRITE_FLUSH; 97cde4c406SChristoph Hellwig rq->rq_disk = disk; 988839a0e0STejun Heo } 998839a0e0STejun Heo 100dd4c133fSTejun Heo static struct request *queue_next_fseq(struct request_queue *q) 1018839a0e0STejun Heo { 1024fed947cSTejun Heo struct request *orig_rq = q->orig_flush_rq; 103dd4c133fSTejun Heo struct request *rq = &q->flush_rq; 1048839a0e0STejun Heo 105cde4c406SChristoph Hellwig blk_rq_init(q, rq); 106cde4c406SChristoph Hellwig 107dd4c133fSTejun Heo switch (blk_flush_cur_seq(q)) { 108dd4c133fSTejun Heo case QUEUE_FSEQ_PREFLUSH: 109cde4c406SChristoph Hellwig init_flush_request(rq, orig_rq->rq_disk); 110cde4c406SChristoph Hellwig rq->end_io = pre_flush_end_io; 1118839a0e0STejun Heo break; 112dd4c133fSTejun Heo case QUEUE_FSEQ_DATA: 1134fed947cSTejun Heo init_request_from_bio(rq, orig_rq->bio); 114*09d60c70STejun Heo /* 115*09d60c70STejun Heo * orig_rq->rq_disk may be different from 116*09d60c70STejun Heo * bio->bi_bdev->bd_disk if orig_rq got here through 117*09d60c70STejun Heo * remapping drivers. Make sure rq->rq_disk points 118*09d60c70STejun Heo * to the same one as orig_rq. 119*09d60c70STejun Heo */ 120*09d60c70STejun Heo rq->rq_disk = orig_rq->rq_disk; 1214fed947cSTejun Heo rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); 1224fed947cSTejun Heo rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); 123dd4c133fSTejun Heo rq->end_io = flush_data_end_io; 1248839a0e0STejun Heo break; 125dd4c133fSTejun Heo case QUEUE_FSEQ_POSTFLUSH: 126cde4c406SChristoph Hellwig init_flush_request(rq, orig_rq->rq_disk); 127cde4c406SChristoph Hellwig rq->end_io = post_flush_end_io; 1288839a0e0STejun Heo break; 1298839a0e0STejun Heo default: 1308839a0e0STejun Heo BUG(); 1318839a0e0STejun Heo } 132cde4c406SChristoph Hellwig 133cde4c406SChristoph Hellwig elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 1348839a0e0STejun Heo return rq; 1358839a0e0STejun Heo } 1368839a0e0STejun Heo 137dd4c133fSTejun Heo struct request *blk_do_flush(struct request_queue *q, struct request *rq) 1388839a0e0STejun Heo { 1394fed947cSTejun Heo unsigned int fflags = q->flush_flags; /* may change, cache it */ 1404fed947cSTejun Heo bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; 1414fed947cSTejun Heo bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); 1424fed947cSTejun Heo bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); 1438839a0e0STejun Heo unsigned skip = 0; 1448839a0e0STejun Heo 1458839a0e0STejun Heo /* 1464fed947cSTejun Heo * Special case. If there's data but flush is not necessary, 1474fed947cSTejun Heo * the request can be issued directly. 1484fed947cSTejun Heo * 1494fed947cSTejun Heo * Flush w/o data should be able to be issued directly too but 1504fed947cSTejun Heo * currently some drivers assume that rq->bio contains 1514fed947cSTejun Heo * non-zero data if it isn't NULL and empty FLUSH requests 1524fed947cSTejun Heo * getting here usually have bio's without data. 1538839a0e0STejun Heo */ 1544fed947cSTejun Heo if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { 1554fed947cSTejun Heo rq->cmd_flags &= ~REQ_FLUSH; 1564fed947cSTejun Heo if (!has_fua) 1574fed947cSTejun Heo rq->cmd_flags &= ~REQ_FUA; 1584fed947cSTejun Heo return rq; 1598839a0e0STejun Heo } 1608839a0e0STejun Heo 1618839a0e0STejun Heo /* 1624fed947cSTejun Heo * Sequenced flushes can't be processed in parallel. If 1634fed947cSTejun Heo * another one is already in progress, queue for later 1644fed947cSTejun Heo * processing. 1658839a0e0STejun Heo */ 1664fed947cSTejun Heo if (q->flush_seq) { 1674fed947cSTejun Heo list_move_tail(&rq->queuelist, &q->pending_flushes); 1688839a0e0STejun Heo return NULL; 1698839a0e0STejun Heo } 1708839a0e0STejun Heo 1718839a0e0STejun Heo /* 172dd4c133fSTejun Heo * Start a new flush sequence 1738839a0e0STejun Heo */ 174dd4c133fSTejun Heo q->flush_err = 0; 175dd4c133fSTejun Heo q->flush_seq |= QUEUE_FSEQ_STARTED; 1768839a0e0STejun Heo 1774fed947cSTejun Heo /* adjust FLUSH/FUA of the original request and stash it away */ 1784fed947cSTejun Heo rq->cmd_flags &= ~REQ_FLUSH; 1794fed947cSTejun Heo if (!has_fua) 1804fed947cSTejun Heo rq->cmd_flags &= ~REQ_FUA; 1818839a0e0STejun Heo blk_dequeue_request(rq); 182dd4c133fSTejun Heo q->orig_flush_rq = rq; 1838839a0e0STejun Heo 1844fed947cSTejun Heo /* skip unneded sequences and return the first one */ 1854fed947cSTejun Heo if (!do_preflush) 186dd4c133fSTejun Heo skip |= QUEUE_FSEQ_PREFLUSH; 1874fed947cSTejun Heo if (!blk_rq_sectors(rq)) 188dd4c133fSTejun Heo skip |= QUEUE_FSEQ_DATA; 1894fed947cSTejun Heo if (!do_postflush) 190dd4c133fSTejun Heo skip |= QUEUE_FSEQ_POSTFLUSH; 191dd4c133fSTejun Heo return blk_flush_complete_seq(q, skip, 0); 1928839a0e0STejun Heo } 1938839a0e0STejun Heo 1948839a0e0STejun Heo static void bio_end_empty_barrier(struct bio *bio, int err) 1958839a0e0STejun Heo { 1968839a0e0STejun Heo if (err) { 1978839a0e0STejun Heo if (err == -EOPNOTSUPP) 1988839a0e0STejun Heo set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 1998839a0e0STejun Heo clear_bit(BIO_UPTODATE, &bio->bi_flags); 2008839a0e0STejun Heo } 2018839a0e0STejun Heo if (bio->bi_private) 2028839a0e0STejun Heo complete(bio->bi_private); 2038839a0e0STejun Heo bio_put(bio); 2048839a0e0STejun Heo } 2058839a0e0STejun Heo 2068839a0e0STejun Heo /** 2078839a0e0STejun Heo * blkdev_issue_flush - queue a flush 2088839a0e0STejun Heo * @bdev: blockdev to issue flush for 2098839a0e0STejun Heo * @gfp_mask: memory allocation flags (for bio_alloc) 2108839a0e0STejun Heo * @error_sector: error sector 2118839a0e0STejun Heo * @flags: BLKDEV_IFL_* flags to control behaviour 2128839a0e0STejun Heo * 2138839a0e0STejun Heo * Description: 2148839a0e0STejun Heo * Issue a flush for the block device in question. Caller can supply 2158839a0e0STejun Heo * room for storing the error offset in case of a flush error, if they 2168839a0e0STejun Heo * wish to. If WAIT flag is not passed then caller may check only what 2178839a0e0STejun Heo * request was pushed in some internal queue for later handling. 2188839a0e0STejun Heo */ 2198839a0e0STejun Heo int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, 2208839a0e0STejun Heo sector_t *error_sector, unsigned long flags) 2218839a0e0STejun Heo { 2228839a0e0STejun Heo DECLARE_COMPLETION_ONSTACK(wait); 2238839a0e0STejun Heo struct request_queue *q; 2248839a0e0STejun Heo struct bio *bio; 2258839a0e0STejun Heo int ret = 0; 2268839a0e0STejun Heo 2278839a0e0STejun Heo if (bdev->bd_disk == NULL) 2288839a0e0STejun Heo return -ENXIO; 2298839a0e0STejun Heo 2308839a0e0STejun Heo q = bdev_get_queue(bdev); 2318839a0e0STejun Heo if (!q) 2328839a0e0STejun Heo return -ENXIO; 2338839a0e0STejun Heo 2348839a0e0STejun Heo /* 2358839a0e0STejun Heo * some block devices may not have their queue correctly set up here 2368839a0e0STejun Heo * (e.g. loop device without a backing file) and so issuing a flush 2378839a0e0STejun Heo * here will panic. Ensure there is a request function before issuing 2388839a0e0STejun Heo * the barrier. 2398839a0e0STejun Heo */ 2408839a0e0STejun Heo if (!q->make_request_fn) 2418839a0e0STejun Heo return -ENXIO; 2428839a0e0STejun Heo 2438839a0e0STejun Heo bio = bio_alloc(gfp_mask, 0); 2448839a0e0STejun Heo bio->bi_end_io = bio_end_empty_barrier; 2458839a0e0STejun Heo bio->bi_bdev = bdev; 2468839a0e0STejun Heo if (test_bit(BLKDEV_WAIT, &flags)) 2478839a0e0STejun Heo bio->bi_private = &wait; 2488839a0e0STejun Heo 2498839a0e0STejun Heo bio_get(bio); 2508839a0e0STejun Heo submit_bio(WRITE_BARRIER, bio); 2518839a0e0STejun Heo if (test_bit(BLKDEV_WAIT, &flags)) { 2528839a0e0STejun Heo wait_for_completion(&wait); 2538839a0e0STejun Heo /* 2548839a0e0STejun Heo * The driver must store the error location in ->bi_sector, if 2558839a0e0STejun Heo * it supports it. For non-stacked drivers, this should be 2568839a0e0STejun Heo * copied from blk_rq_pos(rq). 2578839a0e0STejun Heo */ 2588839a0e0STejun Heo if (error_sector) 2598839a0e0STejun Heo *error_sector = bio->bi_sector; 2608839a0e0STejun Heo } 2618839a0e0STejun Heo 2628839a0e0STejun Heo if (bio_flagged(bio, BIO_EOPNOTSUPP)) 2638839a0e0STejun Heo ret = -EOPNOTSUPP; 2648839a0e0STejun Heo else if (!bio_flagged(bio, BIO_UPTODATE)) 2658839a0e0STejun Heo ret = -EIO; 2668839a0e0STejun Heo 2678839a0e0STejun Heo bio_put(bio); 2688839a0e0STejun Heo return ret; 2698839a0e0STejun Heo } 2708839a0e0STejun Heo EXPORT_SYMBOL(blkdev_issue_flush); 271