1 /* 2 * Functions related to segment and merge handling 3 */ 4 #include <linux/kernel.h> 5 #include <linux/module.h> 6 #include <linux/bio.h> 7 #include <linux/blkdev.h> 8 #include <linux/scatterlist.h> 9 10 #include "blk.h" 11 12 static struct bio *blk_bio_discard_split(struct request_queue *q, 13 struct bio *bio, 14 struct bio_set *bs) 15 { 16 unsigned int max_discard_sectors, granularity; 17 int alignment; 18 sector_t tmp; 19 unsigned split_sectors; 20 21 /* Zero-sector (unknown) and one-sector granularities are the same. */ 22 granularity = max(q->limits.discard_granularity >> 9, 1U); 23 24 max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); 25 max_discard_sectors -= max_discard_sectors % granularity; 26 27 if (unlikely(!max_discard_sectors)) { 28 /* XXX: warn */ 29 return NULL; 30 } 31 32 if (bio_sectors(bio) <= max_discard_sectors) 33 return NULL; 34 35 split_sectors = max_discard_sectors; 36 37 /* 38 * If the next starting sector would be misaligned, stop the discard at 39 * the previous aligned sector. 40 */ 41 alignment = (q->limits.discard_alignment >> 9) % granularity; 42 43 tmp = bio->bi_iter.bi_sector + split_sectors - alignment; 44 tmp = sector_div(tmp, granularity); 45 46 if (split_sectors > tmp) 47 split_sectors -= tmp; 48 49 return bio_split(bio, split_sectors, GFP_NOIO, bs); 50 } 51 52 static struct bio *blk_bio_write_same_split(struct request_queue *q, 53 struct bio *bio, 54 struct bio_set *bs) 55 { 56 if (!q->limits.max_write_same_sectors) 57 return NULL; 58 59 if (bio_sectors(bio) <= q->limits.max_write_same_sectors) 60 return NULL; 61 62 return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs); 63 } 64 65 static struct bio *blk_bio_segment_split(struct request_queue *q, 66 struct bio *bio, 67 struct bio_set *bs) 68 { 69 struct bio *split; 70 struct bio_vec bv, bvprv, *bvprvp = NULL; 71 struct bvec_iter iter; 72 unsigned seg_size = 0, nsegs = 0, sectors = 0; 73 74 bio_for_each_segment(bv, bio, iter) { 75 sectors += bv.bv_len >> 9; 76 77 if (sectors > queue_max_sectors(q)) 78 goto split; 79 80 /* 81 * If the queue doesn't support SG gaps and adding this 82 * offset would create a gap, disallow it. 83 */ 84 if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset)) 85 goto split; 86 87 if (bvprvp && blk_queue_cluster(q)) { 88 if (seg_size + bv.bv_len > queue_max_segment_size(q)) 89 goto new_segment; 90 if (!BIOVEC_PHYS_MERGEABLE(bvprvp, &bv)) 91 goto new_segment; 92 if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv)) 93 goto new_segment; 94 95 seg_size += bv.bv_len; 96 bvprv = bv; 97 bvprvp = &bv; 98 continue; 99 } 100 new_segment: 101 if (nsegs == queue_max_segments(q)) 102 goto split; 103 104 nsegs++; 105 bvprv = bv; 106 bvprvp = &bv; 107 seg_size = bv.bv_len; 108 } 109 110 return NULL; 111 split: 112 split = bio_clone_bioset(bio, GFP_NOIO, bs); 113 114 split->bi_iter.bi_size -= iter.bi_size; 115 bio->bi_iter = iter; 116 117 if (bio_integrity(bio)) { 118 bio_integrity_advance(bio, split->bi_iter.bi_size); 119 bio_integrity_trim(split, 0, bio_sectors(split)); 120 } 121 122 return split; 123 } 124 125 void blk_queue_split(struct request_queue *q, struct bio **bio, 126 struct bio_set *bs) 127 { 128 struct bio *split; 129 130 if ((*bio)->bi_rw & REQ_DISCARD) 131 split = blk_bio_discard_split(q, *bio, bs); 132 else if ((*bio)->bi_rw & REQ_WRITE_SAME) 133 split = blk_bio_write_same_split(q, *bio, bs); 134 else 135 split = blk_bio_segment_split(q, *bio, q->bio_split); 136 137 if (split) { 138 bio_chain(split, *bio); 139 generic_make_request(*bio); 140 *bio = split; 141 } 142 } 143 EXPORT_SYMBOL(blk_queue_split); 144 145 static unsigned int __blk_recalc_rq_segments(struct request_queue *q, 146 struct bio *bio, 147 bool no_sg_merge) 148 { 149 struct bio_vec bv, bvprv = { NULL }; 150 int cluster, prev = 0; 151 unsigned int seg_size, nr_phys_segs; 152 struct bio *fbio, *bbio; 153 struct bvec_iter iter; 154 155 if (!bio) 156 return 0; 157 158 /* 159 * This should probably be returning 0, but blk_add_request_payload() 160 * (Christoph!!!!) 161 */ 162 if (bio->bi_rw & REQ_DISCARD) 163 return 1; 164 165 if (bio->bi_rw & REQ_WRITE_SAME) 166 return 1; 167 168 fbio = bio; 169 cluster = blk_queue_cluster(q); 170 seg_size = 0; 171 nr_phys_segs = 0; 172 for_each_bio(bio) { 173 bio_for_each_segment(bv, bio, iter) { 174 /* 175 * If SG merging is disabled, each bio vector is 176 * a segment 177 */ 178 if (no_sg_merge) 179 goto new_segment; 180 181 if (prev && cluster) { 182 if (seg_size + bv.bv_len 183 > queue_max_segment_size(q)) 184 goto new_segment; 185 if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) 186 goto new_segment; 187 if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) 188 goto new_segment; 189 190 seg_size += bv.bv_len; 191 bvprv = bv; 192 continue; 193 } 194 new_segment: 195 if (nr_phys_segs == 1 && seg_size > 196 fbio->bi_seg_front_size) 197 fbio->bi_seg_front_size = seg_size; 198 199 nr_phys_segs++; 200 bvprv = bv; 201 prev = 1; 202 seg_size = bv.bv_len; 203 } 204 bbio = bio; 205 } 206 207 if (nr_phys_segs == 1 && seg_size > fbio->bi_seg_front_size) 208 fbio->bi_seg_front_size = seg_size; 209 if (seg_size > bbio->bi_seg_back_size) 210 bbio->bi_seg_back_size = seg_size; 211 212 return nr_phys_segs; 213 } 214 215 void blk_recalc_rq_segments(struct request *rq) 216 { 217 bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE, 218 &rq->q->queue_flags); 219 220 rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio, 221 no_sg_merge); 222 } 223 224 void blk_recount_segments(struct request_queue *q, struct bio *bio) 225 { 226 unsigned short seg_cnt; 227 228 /* estimate segment number by bi_vcnt for non-cloned bio */ 229 if (bio_flagged(bio, BIO_CLONED)) 230 seg_cnt = bio_segments(bio); 231 else 232 seg_cnt = bio->bi_vcnt; 233 234 if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) && 235 (seg_cnt < queue_max_segments(q))) 236 bio->bi_phys_segments = seg_cnt; 237 else { 238 struct bio *nxt = bio->bi_next; 239 240 bio->bi_next = NULL; 241 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false); 242 bio->bi_next = nxt; 243 } 244 245 bio_set_flag(bio, BIO_SEG_VALID); 246 } 247 EXPORT_SYMBOL(blk_recount_segments); 248 249 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 250 struct bio *nxt) 251 { 252 struct bio_vec end_bv = { NULL }, nxt_bv; 253 struct bvec_iter iter; 254 255 if (!blk_queue_cluster(q)) 256 return 0; 257 258 if (bio->bi_seg_back_size + nxt->bi_seg_front_size > 259 queue_max_segment_size(q)) 260 return 0; 261 262 if (!bio_has_data(bio)) 263 return 1; 264 265 bio_for_each_segment(end_bv, bio, iter) 266 if (end_bv.bv_len == iter.bi_size) 267 break; 268 269 nxt_bv = bio_iovec(nxt); 270 271 if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) 272 return 0; 273 274 /* 275 * bio and nxt are contiguous in memory; check if the queue allows 276 * these two to be merged into one 277 */ 278 if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv)) 279 return 1; 280 281 return 0; 282 } 283 284 static inline void 285 __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, 286 struct scatterlist *sglist, struct bio_vec *bvprv, 287 struct scatterlist **sg, int *nsegs, int *cluster) 288 { 289 290 int nbytes = bvec->bv_len; 291 292 if (*sg && *cluster) { 293 if ((*sg)->length + nbytes > queue_max_segment_size(q)) 294 goto new_segment; 295 296 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) 297 goto new_segment; 298 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) 299 goto new_segment; 300 301 (*sg)->length += nbytes; 302 } else { 303 new_segment: 304 if (!*sg) 305 *sg = sglist; 306 else { 307 /* 308 * If the driver previously mapped a shorter 309 * list, we could see a termination bit 310 * prematurely unless it fully inits the sg 311 * table on each mapping. We KNOW that there 312 * must be more entries here or the driver 313 * would be buggy, so force clear the 314 * termination bit to avoid doing a full 315 * sg_init_table() in drivers for each command. 316 */ 317 sg_unmark_end(*sg); 318 *sg = sg_next(*sg); 319 } 320 321 sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); 322 (*nsegs)++; 323 } 324 *bvprv = *bvec; 325 } 326 327 static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, 328 struct scatterlist *sglist, 329 struct scatterlist **sg) 330 { 331 struct bio_vec bvec, bvprv = { NULL }; 332 struct bvec_iter iter; 333 int nsegs, cluster; 334 335 nsegs = 0; 336 cluster = blk_queue_cluster(q); 337 338 if (bio->bi_rw & REQ_DISCARD) { 339 /* 340 * This is a hack - drivers should be neither modifying the 341 * biovec, nor relying on bi_vcnt - but because of 342 * blk_add_request_payload(), a discard bio may or may not have 343 * a payload we need to set up here (thank you Christoph) and 344 * bi_vcnt is really the only way of telling if we need to. 345 */ 346 347 if (bio->bi_vcnt) 348 goto single_segment; 349 350 return 0; 351 } 352 353 if (bio->bi_rw & REQ_WRITE_SAME) { 354 single_segment: 355 *sg = sglist; 356 bvec = bio_iovec(bio); 357 sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); 358 return 1; 359 } 360 361 for_each_bio(bio) 362 bio_for_each_segment(bvec, bio, iter) 363 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg, 364 &nsegs, &cluster); 365 366 return nsegs; 367 } 368 369 /* 370 * map a request to scatterlist, return number of sg entries setup. Caller 371 * must make sure sg can hold rq->nr_phys_segments entries 372 */ 373 int blk_rq_map_sg(struct request_queue *q, struct request *rq, 374 struct scatterlist *sglist) 375 { 376 struct scatterlist *sg = NULL; 377 int nsegs = 0; 378 379 if (rq->bio) 380 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); 381 382 if (unlikely(rq->cmd_flags & REQ_COPY_USER) && 383 (blk_rq_bytes(rq) & q->dma_pad_mask)) { 384 unsigned int pad_len = 385 (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; 386 387 sg->length += pad_len; 388 rq->extra_len += pad_len; 389 } 390 391 if (q->dma_drain_size && q->dma_drain_needed(rq)) { 392 if (rq->cmd_flags & REQ_WRITE) 393 memset(q->dma_drain_buffer, 0, q->dma_drain_size); 394 395 sg_unmark_end(sg); 396 sg = sg_next(sg); 397 sg_set_page(sg, virt_to_page(q->dma_drain_buffer), 398 q->dma_drain_size, 399 ((unsigned long)q->dma_drain_buffer) & 400 (PAGE_SIZE - 1)); 401 nsegs++; 402 rq->extra_len += q->dma_drain_size; 403 } 404 405 if (sg) 406 sg_mark_end(sg); 407 408 return nsegs; 409 } 410 EXPORT_SYMBOL(blk_rq_map_sg); 411 412 static inline int ll_new_hw_segment(struct request_queue *q, 413 struct request *req, 414 struct bio *bio) 415 { 416 int nr_phys_segs = bio_phys_segments(q, bio); 417 418 if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) 419 goto no_merge; 420 421 if (blk_integrity_merge_bio(q, req, bio) == false) 422 goto no_merge; 423 424 /* 425 * This will form the start of a new hw segment. Bump both 426 * counters. 427 */ 428 req->nr_phys_segments += nr_phys_segs; 429 return 1; 430 431 no_merge: 432 req->cmd_flags |= REQ_NOMERGE; 433 if (req == q->last_merge) 434 q->last_merge = NULL; 435 return 0; 436 } 437 438 int ll_back_merge_fn(struct request_queue *q, struct request *req, 439 struct bio *bio) 440 { 441 if (blk_rq_sectors(req) + bio_sectors(bio) > 442 blk_rq_get_max_sectors(req)) { 443 req->cmd_flags |= REQ_NOMERGE; 444 if (req == q->last_merge) 445 q->last_merge = NULL; 446 return 0; 447 } 448 if (!bio_flagged(req->biotail, BIO_SEG_VALID)) 449 blk_recount_segments(q, req->biotail); 450 if (!bio_flagged(bio, BIO_SEG_VALID)) 451 blk_recount_segments(q, bio); 452 453 return ll_new_hw_segment(q, req, bio); 454 } 455 456 int ll_front_merge_fn(struct request_queue *q, struct request *req, 457 struct bio *bio) 458 { 459 if (blk_rq_sectors(req) + bio_sectors(bio) > 460 blk_rq_get_max_sectors(req)) { 461 req->cmd_flags |= REQ_NOMERGE; 462 if (req == q->last_merge) 463 q->last_merge = NULL; 464 return 0; 465 } 466 if (!bio_flagged(bio, BIO_SEG_VALID)) 467 blk_recount_segments(q, bio); 468 if (!bio_flagged(req->bio, BIO_SEG_VALID)) 469 blk_recount_segments(q, req->bio); 470 471 return ll_new_hw_segment(q, req, bio); 472 } 473 474 /* 475 * blk-mq uses req->special to carry normal driver per-request payload, it 476 * does not indicate a prepared command that we cannot merge with. 477 */ 478 static bool req_no_special_merge(struct request *req) 479 { 480 struct request_queue *q = req->q; 481 482 return !q->mq_ops && req->special; 483 } 484 485 static int req_gap_to_prev(struct request *req, struct bio *next) 486 { 487 struct bio *prev = req->biotail; 488 489 return bvec_gap_to_prev(req->q, &prev->bi_io_vec[prev->bi_vcnt - 1], 490 next->bi_io_vec[0].bv_offset); 491 } 492 493 static int ll_merge_requests_fn(struct request_queue *q, struct request *req, 494 struct request *next) 495 { 496 int total_phys_segments; 497 unsigned int seg_size = 498 req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; 499 500 /* 501 * First check if the either of the requests are re-queued 502 * requests. Can't merge them if they are. 503 */ 504 if (req_no_special_merge(req) || req_no_special_merge(next)) 505 return 0; 506 507 if (req_gap_to_prev(req, next->bio)) 508 return 0; 509 510 /* 511 * Will it become too large? 512 */ 513 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > 514 blk_rq_get_max_sectors(req)) 515 return 0; 516 517 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 518 if (blk_phys_contig_segment(q, req->biotail, next->bio)) { 519 if (req->nr_phys_segments == 1) 520 req->bio->bi_seg_front_size = seg_size; 521 if (next->nr_phys_segments == 1) 522 next->biotail->bi_seg_back_size = seg_size; 523 total_phys_segments--; 524 } 525 526 if (total_phys_segments > queue_max_segments(q)) 527 return 0; 528 529 if (blk_integrity_merge_rq(q, req, next) == false) 530 return 0; 531 532 /* Merge is OK... */ 533 req->nr_phys_segments = total_phys_segments; 534 return 1; 535 } 536 537 /** 538 * blk_rq_set_mixed_merge - mark a request as mixed merge 539 * @rq: request to mark as mixed merge 540 * 541 * Description: 542 * @rq is about to be mixed merged. Make sure the attributes 543 * which can be mixed are set in each bio and mark @rq as mixed 544 * merged. 545 */ 546 void blk_rq_set_mixed_merge(struct request *rq) 547 { 548 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; 549 struct bio *bio; 550 551 if (rq->cmd_flags & REQ_MIXED_MERGE) 552 return; 553 554 /* 555 * @rq will no longer represent mixable attributes for all the 556 * contained bios. It will just track those of the first one. 557 * Distributes the attributs to each bio. 558 */ 559 for (bio = rq->bio; bio; bio = bio->bi_next) { 560 WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) && 561 (bio->bi_rw & REQ_FAILFAST_MASK) != ff); 562 bio->bi_rw |= ff; 563 } 564 rq->cmd_flags |= REQ_MIXED_MERGE; 565 } 566 567 static void blk_account_io_merge(struct request *req) 568 { 569 if (blk_do_io_stat(req)) { 570 struct hd_struct *part; 571 int cpu; 572 573 cpu = part_stat_lock(); 574 part = req->part; 575 576 part_round_stats(cpu, part); 577 part_dec_in_flight(part, rq_data_dir(req)); 578 579 hd_struct_put(part); 580 part_stat_unlock(); 581 } 582 } 583 584 /* 585 * Has to be called with the request spinlock acquired 586 */ 587 static int attempt_merge(struct request_queue *q, struct request *req, 588 struct request *next) 589 { 590 if (!rq_mergeable(req) || !rq_mergeable(next)) 591 return 0; 592 593 if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) 594 return 0; 595 596 /* 597 * not contiguous 598 */ 599 if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) 600 return 0; 601 602 if (rq_data_dir(req) != rq_data_dir(next) 603 || req->rq_disk != next->rq_disk 604 || req_no_special_merge(next)) 605 return 0; 606 607 if (req->cmd_flags & REQ_WRITE_SAME && 608 !blk_write_same_mergeable(req->bio, next->bio)) 609 return 0; 610 611 /* 612 * If we are allowed to merge, then append bio list 613 * from next to rq and release next. merge_requests_fn 614 * will have updated segment counts, update sector 615 * counts here. 616 */ 617 if (!ll_merge_requests_fn(q, req, next)) 618 return 0; 619 620 /* 621 * If failfast settings disagree or any of the two is already 622 * a mixed merge, mark both as mixed before proceeding. This 623 * makes sure that all involved bios have mixable attributes 624 * set properly. 625 */ 626 if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE || 627 (req->cmd_flags & REQ_FAILFAST_MASK) != 628 (next->cmd_flags & REQ_FAILFAST_MASK)) { 629 blk_rq_set_mixed_merge(req); 630 blk_rq_set_mixed_merge(next); 631 } 632 633 /* 634 * At this point we have either done a back merge 635 * or front merge. We need the smaller start_time of 636 * the merged requests to be the current request 637 * for accounting purposes. 638 */ 639 if (time_after(req->start_time, next->start_time)) 640 req->start_time = next->start_time; 641 642 req->biotail->bi_next = next->bio; 643 req->biotail = next->biotail; 644 645 req->__data_len += blk_rq_bytes(next); 646 647 elv_merge_requests(q, req, next); 648 649 /* 650 * 'next' is going away, so update stats accordingly 651 */ 652 blk_account_io_merge(next); 653 654 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 655 if (blk_rq_cpu_valid(next)) 656 req->cpu = next->cpu; 657 658 /* owner-ship of bio passed from next to req */ 659 next->bio = NULL; 660 __blk_put_request(q, next); 661 return 1; 662 } 663 664 int attempt_back_merge(struct request_queue *q, struct request *rq) 665 { 666 struct request *next = elv_latter_request(q, rq); 667 668 if (next) 669 return attempt_merge(q, rq, next); 670 671 return 0; 672 } 673 674 int attempt_front_merge(struct request_queue *q, struct request *rq) 675 { 676 struct request *prev = elv_former_request(q, rq); 677 678 if (prev) 679 return attempt_merge(q, prev, rq); 680 681 return 0; 682 } 683 684 int blk_attempt_req_merge(struct request_queue *q, struct request *rq, 685 struct request *next) 686 { 687 return attempt_merge(q, rq, next); 688 } 689 690 bool blk_rq_merge_ok(struct request *rq, struct bio *bio) 691 { 692 if (!rq_mergeable(rq) || !bio_mergeable(bio)) 693 return false; 694 695 if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) 696 return false; 697 698 /* different data direction or already started, don't merge */ 699 if (bio_data_dir(bio) != rq_data_dir(rq)) 700 return false; 701 702 /* must be same device and not a special request */ 703 if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq)) 704 return false; 705 706 /* only merge integrity protected bio into ditto rq */ 707 if (blk_integrity_merge_bio(rq->q, rq, bio) == false) 708 return false; 709 710 /* must be using the same buffer */ 711 if (rq->cmd_flags & REQ_WRITE_SAME && 712 !blk_write_same_mergeable(rq->bio, bio)) 713 return false; 714 715 /* Only check gaps if the bio carries data */ 716 if (bio_has_data(bio) && req_gap_to_prev(rq, bio)) 717 return false; 718 719 return true; 720 } 721 722 int blk_try_merge(struct request *rq, struct bio *bio) 723 { 724 if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) 725 return ELEVATOR_BACK_MERGE; 726 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) 727 return ELEVATOR_FRONT_MERGE; 728 return ELEVATOR_NO_MERGE; 729 } 730