18839a0e0STejun Heo /* 2*4fed947cSTejun Heo * Functions to sequence FLUSH and FUA writes. 38839a0e0STejun Heo */ 48839a0e0STejun Heo #include <linux/kernel.h> 58839a0e0STejun Heo #include <linux/module.h> 68839a0e0STejun Heo #include <linux/bio.h> 78839a0e0STejun Heo #include <linux/blkdev.h> 88839a0e0STejun Heo #include <linux/gfp.h> 98839a0e0STejun Heo 108839a0e0STejun Heo #include "blk.h" 118839a0e0STejun Heo 12*4fed947cSTejun Heo /* FLUSH/FUA sequences */ 13*4fed947cSTejun Heo enum { 14*4fed947cSTejun Heo QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ 15*4fed947cSTejun Heo QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ 16*4fed947cSTejun Heo QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ 17*4fed947cSTejun Heo QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ 18*4fed947cSTejun Heo QUEUE_FSEQ_DONE = (1 << 4), 19*4fed947cSTejun Heo }; 20*4fed947cSTejun Heo 21dd4c133fSTejun Heo static struct request *queue_next_fseq(struct request_queue *q); 228839a0e0STejun Heo 23dd4c133fSTejun Heo unsigned blk_flush_cur_seq(struct request_queue *q) 248839a0e0STejun Heo { 25dd4c133fSTejun Heo if (!q->flush_seq) 268839a0e0STejun Heo return 0; 27dd4c133fSTejun Heo return 1 << ffz(q->flush_seq); 288839a0e0STejun Heo } 298839a0e0STejun Heo 30dd4c133fSTejun Heo static struct request *blk_flush_complete_seq(struct request_queue *q, 318839a0e0STejun Heo unsigned seq, int error) 328839a0e0STejun Heo { 338839a0e0STejun Heo struct request *next_rq = NULL; 348839a0e0STejun Heo 35dd4c133fSTejun Heo if (error && !q->flush_err) 36dd4c133fSTejun Heo q->flush_err = error; 378839a0e0STejun Heo 38dd4c133fSTejun Heo BUG_ON(q->flush_seq & seq); 39dd4c133fSTejun Heo q->flush_seq |= seq; 408839a0e0STejun Heo 41dd4c133fSTejun Heo if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) { 42dd4c133fSTejun Heo /* not complete yet, queue the next flush sequence */ 43dd4c133fSTejun Heo next_rq = queue_next_fseq(q); 448839a0e0STejun Heo } else { 45dd4c133fSTejun Heo /* complete this flush request */ 46dd4c133fSTejun Heo __blk_end_request_all(q->orig_flush_rq, q->flush_err); 47dd4c133fSTejun Heo q->orig_flush_rq = NULL; 48dd4c133fSTejun Heo q->flush_seq = 0; 498839a0e0STejun Heo 50dd4c133fSTejun Heo /* dispatch the next flush if there's one */ 51dd4c133fSTejun Heo if (!list_empty(&q->pending_flushes)) { 52dd4c133fSTejun Heo next_rq = list_entry_rq(q->pending_flushes.next); 538839a0e0STejun Heo list_move(&next_rq->queuelist, &q->queue_head); 548839a0e0STejun Heo } 558839a0e0STejun Heo } 568839a0e0STejun Heo return next_rq; 578839a0e0STejun Heo } 588839a0e0STejun Heo 598839a0e0STejun Heo static void pre_flush_end_io(struct request *rq, int error) 608839a0e0STejun Heo { 618839a0e0STejun Heo elv_completed_request(rq->q, rq); 62dd4c133fSTejun Heo blk_flush_complete_seq(rq->q, QUEUE_FSEQ_PREFLUSH, error); 638839a0e0STejun Heo } 648839a0e0STejun Heo 65dd4c133fSTejun Heo static void flush_data_end_io(struct request *rq, int error) 668839a0e0STejun Heo { 678839a0e0STejun Heo elv_completed_request(rq->q, rq); 68dd4c133fSTejun Heo blk_flush_complete_seq(rq->q, QUEUE_FSEQ_DATA, error); 698839a0e0STejun Heo } 708839a0e0STejun Heo 718839a0e0STejun Heo static void post_flush_end_io(struct request *rq, int error) 728839a0e0STejun Heo { 738839a0e0STejun Heo elv_completed_request(rq->q, rq); 74dd4c133fSTejun Heo blk_flush_complete_seq(rq->q, QUEUE_FSEQ_POSTFLUSH, error); 758839a0e0STejun Heo } 768839a0e0STejun Heo 778839a0e0STejun Heo static void queue_flush(struct request_queue *q, struct request *rq, 788839a0e0STejun Heo rq_end_io_fn *end_io) 798839a0e0STejun Heo { 808839a0e0STejun Heo blk_rq_init(q, rq); 818839a0e0STejun Heo rq->cmd_type = REQ_TYPE_FS; 828839a0e0STejun Heo rq->cmd_flags = REQ_FLUSH; 83dd4c133fSTejun Heo rq->rq_disk = q->orig_flush_rq->rq_disk; 848839a0e0STejun Heo rq->end_io = end_io; 858839a0e0STejun Heo 868839a0e0STejun Heo elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 878839a0e0STejun Heo } 888839a0e0STejun Heo 89dd4c133fSTejun Heo static struct request *queue_next_fseq(struct request_queue *q) 908839a0e0STejun Heo { 91*4fed947cSTejun Heo struct request *orig_rq = q->orig_flush_rq; 92dd4c133fSTejun Heo struct request *rq = &q->flush_rq; 938839a0e0STejun Heo 94dd4c133fSTejun Heo switch (blk_flush_cur_seq(q)) { 95dd4c133fSTejun Heo case QUEUE_FSEQ_PREFLUSH: 968839a0e0STejun Heo queue_flush(q, rq, pre_flush_end_io); 978839a0e0STejun Heo break; 988839a0e0STejun Heo 99dd4c133fSTejun Heo case QUEUE_FSEQ_DATA: 100*4fed947cSTejun Heo /* initialize proxy request, inherit FLUSH/FUA and queue it */ 1018839a0e0STejun Heo blk_rq_init(q, rq); 102*4fed947cSTejun Heo init_request_from_bio(rq, orig_rq->bio); 103*4fed947cSTejun Heo rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); 104*4fed947cSTejun Heo rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); 105dd4c133fSTejun Heo rq->end_io = flush_data_end_io; 1068839a0e0STejun Heo 1078839a0e0STejun Heo elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 1088839a0e0STejun Heo break; 1098839a0e0STejun Heo 110dd4c133fSTejun Heo case QUEUE_FSEQ_POSTFLUSH: 1118839a0e0STejun Heo queue_flush(q, rq, post_flush_end_io); 1128839a0e0STejun Heo break; 1138839a0e0STejun Heo 1148839a0e0STejun Heo default: 1158839a0e0STejun Heo BUG(); 1168839a0e0STejun Heo } 1178839a0e0STejun Heo return rq; 1188839a0e0STejun Heo } 1198839a0e0STejun Heo 120dd4c133fSTejun Heo struct request *blk_do_flush(struct request_queue *q, struct request *rq) 1218839a0e0STejun Heo { 122*4fed947cSTejun Heo unsigned int fflags = q->flush_flags; /* may change, cache it */ 123*4fed947cSTejun Heo bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; 124*4fed947cSTejun Heo bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); 125*4fed947cSTejun Heo bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); 1268839a0e0STejun Heo unsigned skip = 0; 1278839a0e0STejun Heo 1288839a0e0STejun Heo /* 129*4fed947cSTejun Heo * Special case. If there's data but flush is not necessary, 130*4fed947cSTejun Heo * the request can be issued directly. 131*4fed947cSTejun Heo * 132*4fed947cSTejun Heo * Flush w/o data should be able to be issued directly too but 133*4fed947cSTejun Heo * currently some drivers assume that rq->bio contains 134*4fed947cSTejun Heo * non-zero data if it isn't NULL and empty FLUSH requests 135*4fed947cSTejun Heo * getting here usually have bio's without data. 1368839a0e0STejun Heo */ 137*4fed947cSTejun Heo if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { 138*4fed947cSTejun Heo rq->cmd_flags &= ~REQ_FLUSH; 139*4fed947cSTejun Heo if (!has_fua) 140*4fed947cSTejun Heo rq->cmd_flags &= ~REQ_FUA; 141*4fed947cSTejun Heo return rq; 1428839a0e0STejun Heo } 1438839a0e0STejun Heo 1448839a0e0STejun Heo /* 145*4fed947cSTejun Heo * Sequenced flushes can't be processed in parallel. If 146*4fed947cSTejun Heo * another one is already in progress, queue for later 147*4fed947cSTejun Heo * processing. 1488839a0e0STejun Heo */ 149*4fed947cSTejun Heo if (q->flush_seq) { 150*4fed947cSTejun Heo list_move_tail(&rq->queuelist, &q->pending_flushes); 1518839a0e0STejun Heo return NULL; 1528839a0e0STejun Heo } 1538839a0e0STejun Heo 1548839a0e0STejun Heo /* 155dd4c133fSTejun Heo * Start a new flush sequence 1568839a0e0STejun Heo */ 157dd4c133fSTejun Heo q->flush_err = 0; 158dd4c133fSTejun Heo q->flush_seq |= QUEUE_FSEQ_STARTED; 1598839a0e0STejun Heo 160*4fed947cSTejun Heo /* adjust FLUSH/FUA of the original request and stash it away */ 161*4fed947cSTejun Heo rq->cmd_flags &= ~REQ_FLUSH; 162*4fed947cSTejun Heo if (!has_fua) 163*4fed947cSTejun Heo rq->cmd_flags &= ~REQ_FUA; 1648839a0e0STejun Heo blk_dequeue_request(rq); 165dd4c133fSTejun Heo q->orig_flush_rq = rq; 1668839a0e0STejun Heo 167*4fed947cSTejun Heo /* skip unneded sequences and return the first one */ 168*4fed947cSTejun Heo if (!do_preflush) 169dd4c133fSTejun Heo skip |= QUEUE_FSEQ_PREFLUSH; 170*4fed947cSTejun Heo if (!blk_rq_sectors(rq)) 171dd4c133fSTejun Heo skip |= QUEUE_FSEQ_DATA; 172*4fed947cSTejun Heo if (!do_postflush) 173dd4c133fSTejun Heo skip |= QUEUE_FSEQ_POSTFLUSH; 174dd4c133fSTejun Heo return blk_flush_complete_seq(q, skip, 0); 1758839a0e0STejun Heo } 1768839a0e0STejun Heo 1778839a0e0STejun Heo static void bio_end_empty_barrier(struct bio *bio, int err) 1788839a0e0STejun Heo { 1798839a0e0STejun Heo if (err) { 1808839a0e0STejun Heo if (err == -EOPNOTSUPP) 1818839a0e0STejun Heo set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 1828839a0e0STejun Heo clear_bit(BIO_UPTODATE, &bio->bi_flags); 1838839a0e0STejun Heo } 1848839a0e0STejun Heo if (bio->bi_private) 1858839a0e0STejun Heo complete(bio->bi_private); 1868839a0e0STejun Heo bio_put(bio); 1878839a0e0STejun Heo } 1888839a0e0STejun Heo 1898839a0e0STejun Heo /** 1908839a0e0STejun Heo * blkdev_issue_flush - queue a flush 1918839a0e0STejun Heo * @bdev: blockdev to issue flush for 1928839a0e0STejun Heo * @gfp_mask: memory allocation flags (for bio_alloc) 1938839a0e0STejun Heo * @error_sector: error sector 1948839a0e0STejun Heo * @flags: BLKDEV_IFL_* flags to control behaviour 1958839a0e0STejun Heo * 1968839a0e0STejun Heo * Description: 1978839a0e0STejun Heo * Issue a flush for the block device in question. Caller can supply 1988839a0e0STejun Heo * room for storing the error offset in case of a flush error, if they 1998839a0e0STejun Heo * wish to. If WAIT flag is not passed then caller may check only what 2008839a0e0STejun Heo * request was pushed in some internal queue for later handling. 2018839a0e0STejun Heo */ 2028839a0e0STejun Heo int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, 2038839a0e0STejun Heo sector_t *error_sector, unsigned long flags) 2048839a0e0STejun Heo { 2058839a0e0STejun Heo DECLARE_COMPLETION_ONSTACK(wait); 2068839a0e0STejun Heo struct request_queue *q; 2078839a0e0STejun Heo struct bio *bio; 2088839a0e0STejun Heo int ret = 0; 2098839a0e0STejun Heo 2108839a0e0STejun Heo if (bdev->bd_disk == NULL) 2118839a0e0STejun Heo return -ENXIO; 2128839a0e0STejun Heo 2138839a0e0STejun Heo q = bdev_get_queue(bdev); 2148839a0e0STejun Heo if (!q) 2158839a0e0STejun Heo return -ENXIO; 2168839a0e0STejun Heo 2178839a0e0STejun Heo /* 2188839a0e0STejun Heo * some block devices may not have their queue correctly set up here 2198839a0e0STejun Heo * (e.g. loop device without a backing file) and so issuing a flush 2208839a0e0STejun Heo * here will panic. Ensure there is a request function before issuing 2218839a0e0STejun Heo * the barrier. 2228839a0e0STejun Heo */ 2238839a0e0STejun Heo if (!q->make_request_fn) 2248839a0e0STejun Heo return -ENXIO; 2258839a0e0STejun Heo 2268839a0e0STejun Heo bio = bio_alloc(gfp_mask, 0); 2278839a0e0STejun Heo bio->bi_end_io = bio_end_empty_barrier; 2288839a0e0STejun Heo bio->bi_bdev = bdev; 2298839a0e0STejun Heo if (test_bit(BLKDEV_WAIT, &flags)) 2308839a0e0STejun Heo bio->bi_private = &wait; 2318839a0e0STejun Heo 2328839a0e0STejun Heo bio_get(bio); 2338839a0e0STejun Heo submit_bio(WRITE_BARRIER, bio); 2348839a0e0STejun Heo if (test_bit(BLKDEV_WAIT, &flags)) { 2358839a0e0STejun Heo wait_for_completion(&wait); 2368839a0e0STejun Heo /* 2378839a0e0STejun Heo * The driver must store the error location in ->bi_sector, if 2388839a0e0STejun Heo * it supports it. For non-stacked drivers, this should be 2398839a0e0STejun Heo * copied from blk_rq_pos(rq). 2408839a0e0STejun Heo */ 2418839a0e0STejun Heo if (error_sector) 2428839a0e0STejun Heo *error_sector = bio->bi_sector; 2438839a0e0STejun Heo } 2448839a0e0STejun Heo 2458839a0e0STejun Heo if (bio_flagged(bio, BIO_EOPNOTSUPP)) 2468839a0e0STejun Heo ret = -EOPNOTSUPP; 2478839a0e0STejun Heo else if (!bio_flagged(bio, BIO_UPTODATE)) 2488839a0e0STejun Heo ret = -EIO; 2498839a0e0STejun Heo 2508839a0e0STejun Heo bio_put(bio); 2518839a0e0STejun Heo return ret; 2528839a0e0STejun Heo } 2538839a0e0STejun Heo EXPORT_SYMBOL(blkdev_issue_flush); 254