1 /* 2 * Functions related to segment and merge handling 3 */ 4 #include <linux/kernel.h> 5 #include <linux/module.h> 6 #include <linux/bio.h> 7 #include <linux/blkdev.h> 8 #include <linux/scatterlist.h> 9 10 #include "blk.h" 11 12 static struct bio *blk_bio_discard_split(struct request_queue *q, 13 struct bio *bio, 14 struct bio_set *bs) 15 { 16 unsigned int max_discard_sectors, granularity; 17 int alignment; 18 sector_t tmp; 19 unsigned split_sectors; 20 21 /* Zero-sector (unknown) and one-sector granularities are the same. */ 22 granularity = max(q->limits.discard_granularity >> 9, 1U); 23 24 max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); 25 max_discard_sectors -= max_discard_sectors % granularity; 26 27 if (unlikely(!max_discard_sectors)) { 28 /* XXX: warn */ 29 return NULL; 30 } 31 32 if (bio_sectors(bio) <= max_discard_sectors) 33 return NULL; 34 35 split_sectors = max_discard_sectors; 36 37 /* 38 * If the next starting sector would be misaligned, stop the discard at 39 * the previous aligned sector. 40 */ 41 alignment = (q->limits.discard_alignment >> 9) % granularity; 42 43 tmp = bio->bi_iter.bi_sector + split_sectors - alignment; 44 tmp = sector_div(tmp, granularity); 45 46 if (split_sectors > tmp) 47 split_sectors -= tmp; 48 49 return bio_split(bio, split_sectors, GFP_NOIO, bs); 50 } 51 52 static struct bio *blk_bio_write_same_split(struct request_queue *q, 53 struct bio *bio, 54 struct bio_set *bs) 55 { 56 if (!q->limits.max_write_same_sectors) 57 return NULL; 58 59 if (bio_sectors(bio) <= q->limits.max_write_same_sectors) 60 return NULL; 61 62 return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs); 63 } 64 65 static struct bio *blk_bio_segment_split(struct request_queue *q, 66 struct bio *bio, 67 struct bio_set *bs) 68 { 69 struct bio *split; 70 struct bio_vec bv, bvprv; 71 struct bvec_iter iter; 72 unsigned seg_size = 0, nsegs = 0, sectors = 0; 73 int prev = 0; 74 75 bio_for_each_segment(bv, bio, iter) { 76 sectors += bv.bv_len >> 9; 77 78 if (sectors > queue_max_sectors(q)) 79 goto split; 80 81 /* 82 * If the queue doesn't support SG gaps and adding this 83 * offset would create a gap, disallow it. 84 */ 85 if (prev && bvec_gap_to_prev(q, &bvprv, bv.bv_offset)) 86 goto split; 87 88 if (prev && blk_queue_cluster(q)) { 89 if (seg_size + bv.bv_len > queue_max_segment_size(q)) 90 goto new_segment; 91 if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) 92 goto new_segment; 93 if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) 94 goto new_segment; 95 96 seg_size += bv.bv_len; 97 bvprv = bv; 98 prev = 1; 99 continue; 100 } 101 new_segment: 102 if (nsegs == queue_max_segments(q)) 103 goto split; 104 105 nsegs++; 106 bvprv = bv; 107 prev = 1; 108 seg_size = bv.bv_len; 109 } 110 111 return NULL; 112 split: 113 split = bio_clone_bioset(bio, GFP_NOIO, bs); 114 115 split->bi_iter.bi_size -= iter.bi_size; 116 bio->bi_iter = iter; 117 118 if (bio_integrity(bio)) { 119 bio_integrity_advance(bio, split->bi_iter.bi_size); 120 bio_integrity_trim(split, 0, bio_sectors(split)); 121 } 122 123 return split; 124 } 125 126 void blk_queue_split(struct request_queue *q, struct bio **bio, 127 struct bio_set *bs) 128 { 129 struct bio *split; 130 131 if ((*bio)->bi_rw & REQ_DISCARD) 132 split = blk_bio_discard_split(q, *bio, bs); 133 else if ((*bio)->bi_rw & REQ_WRITE_SAME) 134 split = blk_bio_write_same_split(q, *bio, bs); 135 else 136 split = blk_bio_segment_split(q, *bio, q->bio_split); 137 138 if (split) { 139 bio_chain(split, *bio); 140 generic_make_request(*bio); 141 *bio = split; 142 } 143 } 144 EXPORT_SYMBOL(blk_queue_split); 145 146 static unsigned int __blk_recalc_rq_segments(struct request_queue *q, 147 struct bio *bio, 148 bool no_sg_merge) 149 { 150 struct bio_vec bv, bvprv = { NULL }; 151 int cluster, prev = 0; 152 unsigned int seg_size, nr_phys_segs; 153 struct bio *fbio, *bbio; 154 struct bvec_iter iter; 155 156 if (!bio) 157 return 0; 158 159 /* 160 * This should probably be returning 0, but blk_add_request_payload() 161 * (Christoph!!!!) 162 */ 163 if (bio->bi_rw & REQ_DISCARD) 164 return 1; 165 166 if (bio->bi_rw & REQ_WRITE_SAME) 167 return 1; 168 169 fbio = bio; 170 cluster = blk_queue_cluster(q); 171 seg_size = 0; 172 nr_phys_segs = 0; 173 for_each_bio(bio) { 174 bio_for_each_segment(bv, bio, iter) { 175 /* 176 * If SG merging is disabled, each bio vector is 177 * a segment 178 */ 179 if (no_sg_merge) 180 goto new_segment; 181 182 if (prev && cluster) { 183 if (seg_size + bv.bv_len 184 > queue_max_segment_size(q)) 185 goto new_segment; 186 if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) 187 goto new_segment; 188 if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) 189 goto new_segment; 190 191 seg_size += bv.bv_len; 192 bvprv = bv; 193 continue; 194 } 195 new_segment: 196 if (nr_phys_segs == 1 && seg_size > 197 fbio->bi_seg_front_size) 198 fbio->bi_seg_front_size = seg_size; 199 200 nr_phys_segs++; 201 bvprv = bv; 202 prev = 1; 203 seg_size = bv.bv_len; 204 } 205 bbio = bio; 206 } 207 208 if (nr_phys_segs == 1 && seg_size > fbio->bi_seg_front_size) 209 fbio->bi_seg_front_size = seg_size; 210 if (seg_size > bbio->bi_seg_back_size) 211 bbio->bi_seg_back_size = seg_size; 212 213 return nr_phys_segs; 214 } 215 216 void blk_recalc_rq_segments(struct request *rq) 217 { 218 bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE, 219 &rq->q->queue_flags); 220 221 rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio, 222 no_sg_merge); 223 } 224 225 void blk_recount_segments(struct request_queue *q, struct bio *bio) 226 { 227 unsigned short seg_cnt; 228 229 /* estimate segment number by bi_vcnt for non-cloned bio */ 230 if (bio_flagged(bio, BIO_CLONED)) 231 seg_cnt = bio_segments(bio); 232 else 233 seg_cnt = bio->bi_vcnt; 234 235 if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) && 236 (seg_cnt < queue_max_segments(q))) 237 bio->bi_phys_segments = seg_cnt; 238 else { 239 struct bio *nxt = bio->bi_next; 240 241 bio->bi_next = NULL; 242 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false); 243 bio->bi_next = nxt; 244 } 245 246 bio_set_flag(bio, BIO_SEG_VALID); 247 } 248 EXPORT_SYMBOL(blk_recount_segments); 249 250 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 251 struct bio *nxt) 252 { 253 struct bio_vec end_bv = { NULL }, nxt_bv; 254 struct bvec_iter iter; 255 256 if (!blk_queue_cluster(q)) 257 return 0; 258 259 if (bio->bi_seg_back_size + nxt->bi_seg_front_size > 260 queue_max_segment_size(q)) 261 return 0; 262 263 if (!bio_has_data(bio)) 264 return 1; 265 266 bio_for_each_segment(end_bv, bio, iter) 267 if (end_bv.bv_len == iter.bi_size) 268 break; 269 270 nxt_bv = bio_iovec(nxt); 271 272 if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) 273 return 0; 274 275 /* 276 * bio and nxt are contiguous in memory; check if the queue allows 277 * these two to be merged into one 278 */ 279 if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv)) 280 return 1; 281 282 return 0; 283 } 284 285 static inline void 286 __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, 287 struct scatterlist *sglist, struct bio_vec *bvprv, 288 struct scatterlist **sg, int *nsegs, int *cluster) 289 { 290 291 int nbytes = bvec->bv_len; 292 293 if (*sg && *cluster) { 294 if ((*sg)->length + nbytes > queue_max_segment_size(q)) 295 goto new_segment; 296 297 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) 298 goto new_segment; 299 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) 300 goto new_segment; 301 302 (*sg)->length += nbytes; 303 } else { 304 new_segment: 305 if (!*sg) 306 *sg = sglist; 307 else { 308 /* 309 * If the driver previously mapped a shorter 310 * list, we could see a termination bit 311 * prematurely unless it fully inits the sg 312 * table on each mapping. We KNOW that there 313 * must be more entries here or the driver 314 * would be buggy, so force clear the 315 * termination bit to avoid doing a full 316 * sg_init_table() in drivers for each command. 317 */ 318 sg_unmark_end(*sg); 319 *sg = sg_next(*sg); 320 } 321 322 sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); 323 (*nsegs)++; 324 } 325 *bvprv = *bvec; 326 } 327 328 static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, 329 struct scatterlist *sglist, 330 struct scatterlist **sg) 331 { 332 struct bio_vec bvec, bvprv = { NULL }; 333 struct bvec_iter iter; 334 int nsegs, cluster; 335 336 nsegs = 0; 337 cluster = blk_queue_cluster(q); 338 339 if (bio->bi_rw & REQ_DISCARD) { 340 /* 341 * This is a hack - drivers should be neither modifying the 342 * biovec, nor relying on bi_vcnt - but because of 343 * blk_add_request_payload(), a discard bio may or may not have 344 * a payload we need to set up here (thank you Christoph) and 345 * bi_vcnt is really the only way of telling if we need to. 346 */ 347 348 if (bio->bi_vcnt) 349 goto single_segment; 350 351 return 0; 352 } 353 354 if (bio->bi_rw & REQ_WRITE_SAME) { 355 single_segment: 356 *sg = sglist; 357 bvec = bio_iovec(bio); 358 sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); 359 return 1; 360 } 361 362 for_each_bio(bio) 363 bio_for_each_segment(bvec, bio, iter) 364 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg, 365 &nsegs, &cluster); 366 367 return nsegs; 368 } 369 370 /* 371 * map a request to scatterlist, return number of sg entries setup. Caller 372 * must make sure sg can hold rq->nr_phys_segments entries 373 */ 374 int blk_rq_map_sg(struct request_queue *q, struct request *rq, 375 struct scatterlist *sglist) 376 { 377 struct scatterlist *sg = NULL; 378 int nsegs = 0; 379 380 if (rq->bio) 381 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); 382 383 if (unlikely(rq->cmd_flags & REQ_COPY_USER) && 384 (blk_rq_bytes(rq) & q->dma_pad_mask)) { 385 unsigned int pad_len = 386 (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; 387 388 sg->length += pad_len; 389 rq->extra_len += pad_len; 390 } 391 392 if (q->dma_drain_size && q->dma_drain_needed(rq)) { 393 if (rq->cmd_flags & REQ_WRITE) 394 memset(q->dma_drain_buffer, 0, q->dma_drain_size); 395 396 sg_unmark_end(sg); 397 sg = sg_next(sg); 398 sg_set_page(sg, virt_to_page(q->dma_drain_buffer), 399 q->dma_drain_size, 400 ((unsigned long)q->dma_drain_buffer) & 401 (PAGE_SIZE - 1)); 402 nsegs++; 403 rq->extra_len += q->dma_drain_size; 404 } 405 406 if (sg) 407 sg_mark_end(sg); 408 409 return nsegs; 410 } 411 EXPORT_SYMBOL(blk_rq_map_sg); 412 413 static inline int ll_new_hw_segment(struct request_queue *q, 414 struct request *req, 415 struct bio *bio) 416 { 417 int nr_phys_segs = bio_phys_segments(q, bio); 418 419 if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) 420 goto no_merge; 421 422 if (blk_integrity_merge_bio(q, req, bio) == false) 423 goto no_merge; 424 425 /* 426 * This will form the start of a new hw segment. Bump both 427 * counters. 428 */ 429 req->nr_phys_segments += nr_phys_segs; 430 return 1; 431 432 no_merge: 433 req->cmd_flags |= REQ_NOMERGE; 434 if (req == q->last_merge) 435 q->last_merge = NULL; 436 return 0; 437 } 438 439 int ll_back_merge_fn(struct request_queue *q, struct request *req, 440 struct bio *bio) 441 { 442 if (blk_rq_sectors(req) + bio_sectors(bio) > 443 blk_rq_get_max_sectors(req)) { 444 req->cmd_flags |= REQ_NOMERGE; 445 if (req == q->last_merge) 446 q->last_merge = NULL; 447 return 0; 448 } 449 if (!bio_flagged(req->biotail, BIO_SEG_VALID)) 450 blk_recount_segments(q, req->biotail); 451 if (!bio_flagged(bio, BIO_SEG_VALID)) 452 blk_recount_segments(q, bio); 453 454 return ll_new_hw_segment(q, req, bio); 455 } 456 457 int ll_front_merge_fn(struct request_queue *q, struct request *req, 458 struct bio *bio) 459 { 460 if (blk_rq_sectors(req) + bio_sectors(bio) > 461 blk_rq_get_max_sectors(req)) { 462 req->cmd_flags |= REQ_NOMERGE; 463 if (req == q->last_merge) 464 q->last_merge = NULL; 465 return 0; 466 } 467 if (!bio_flagged(bio, BIO_SEG_VALID)) 468 blk_recount_segments(q, bio); 469 if (!bio_flagged(req->bio, BIO_SEG_VALID)) 470 blk_recount_segments(q, req->bio); 471 472 return ll_new_hw_segment(q, req, bio); 473 } 474 475 /* 476 * blk-mq uses req->special to carry normal driver per-request payload, it 477 * does not indicate a prepared command that we cannot merge with. 478 */ 479 static bool req_no_special_merge(struct request *req) 480 { 481 struct request_queue *q = req->q; 482 483 return !q->mq_ops && req->special; 484 } 485 486 static int req_gap_to_prev(struct request *req, struct bio *next) 487 { 488 struct bio *prev = req->biotail; 489 490 return bvec_gap_to_prev(req->q, &prev->bi_io_vec[prev->bi_vcnt - 1], 491 next->bi_io_vec[0].bv_offset); 492 } 493 494 static int ll_merge_requests_fn(struct request_queue *q, struct request *req, 495 struct request *next) 496 { 497 int total_phys_segments; 498 unsigned int seg_size = 499 req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; 500 501 /* 502 * First check if the either of the requests are re-queued 503 * requests. Can't merge them if they are. 504 */ 505 if (req_no_special_merge(req) || req_no_special_merge(next)) 506 return 0; 507 508 if (req_gap_to_prev(req, next->bio)) 509 return 0; 510 511 /* 512 * Will it become too large? 513 */ 514 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > 515 blk_rq_get_max_sectors(req)) 516 return 0; 517 518 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 519 if (blk_phys_contig_segment(q, req->biotail, next->bio)) { 520 if (req->nr_phys_segments == 1) 521 req->bio->bi_seg_front_size = seg_size; 522 if (next->nr_phys_segments == 1) 523 next->biotail->bi_seg_back_size = seg_size; 524 total_phys_segments--; 525 } 526 527 if (total_phys_segments > queue_max_segments(q)) 528 return 0; 529 530 if (blk_integrity_merge_rq(q, req, next) == false) 531 return 0; 532 533 /* Merge is OK... */ 534 req->nr_phys_segments = total_phys_segments; 535 return 1; 536 } 537 538 /** 539 * blk_rq_set_mixed_merge - mark a request as mixed merge 540 * @rq: request to mark as mixed merge 541 * 542 * Description: 543 * @rq is about to be mixed merged. Make sure the attributes 544 * which can be mixed are set in each bio and mark @rq as mixed 545 * merged. 546 */ 547 void blk_rq_set_mixed_merge(struct request *rq) 548 { 549 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; 550 struct bio *bio; 551 552 if (rq->cmd_flags & REQ_MIXED_MERGE) 553 return; 554 555 /* 556 * @rq will no longer represent mixable attributes for all the 557 * contained bios. It will just track those of the first one. 558 * Distributes the attributs to each bio. 559 */ 560 for (bio = rq->bio; bio; bio = bio->bi_next) { 561 WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) && 562 (bio->bi_rw & REQ_FAILFAST_MASK) != ff); 563 bio->bi_rw |= ff; 564 } 565 rq->cmd_flags |= REQ_MIXED_MERGE; 566 } 567 568 static void blk_account_io_merge(struct request *req) 569 { 570 if (blk_do_io_stat(req)) { 571 struct hd_struct *part; 572 int cpu; 573 574 cpu = part_stat_lock(); 575 part = req->part; 576 577 part_round_stats(cpu, part); 578 part_dec_in_flight(part, rq_data_dir(req)); 579 580 hd_struct_put(part); 581 part_stat_unlock(); 582 } 583 } 584 585 /* 586 * Has to be called with the request spinlock acquired 587 */ 588 static int attempt_merge(struct request_queue *q, struct request *req, 589 struct request *next) 590 { 591 if (!rq_mergeable(req) || !rq_mergeable(next)) 592 return 0; 593 594 if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) 595 return 0; 596 597 /* 598 * not contiguous 599 */ 600 if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) 601 return 0; 602 603 if (rq_data_dir(req) != rq_data_dir(next) 604 || req->rq_disk != next->rq_disk 605 || req_no_special_merge(next)) 606 return 0; 607 608 if (req->cmd_flags & REQ_WRITE_SAME && 609 !blk_write_same_mergeable(req->bio, next->bio)) 610 return 0; 611 612 /* 613 * If we are allowed to merge, then append bio list 614 * from next to rq and release next. merge_requests_fn 615 * will have updated segment counts, update sector 616 * counts here. 617 */ 618 if (!ll_merge_requests_fn(q, req, next)) 619 return 0; 620 621 /* 622 * If failfast settings disagree or any of the two is already 623 * a mixed merge, mark both as mixed before proceeding. This 624 * makes sure that all involved bios have mixable attributes 625 * set properly. 626 */ 627 if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE || 628 (req->cmd_flags & REQ_FAILFAST_MASK) != 629 (next->cmd_flags & REQ_FAILFAST_MASK)) { 630 blk_rq_set_mixed_merge(req); 631 blk_rq_set_mixed_merge(next); 632 } 633 634 /* 635 * At this point we have either done a back merge 636 * or front merge. We need the smaller start_time of 637 * the merged requests to be the current request 638 * for accounting purposes. 639 */ 640 if (time_after(req->start_time, next->start_time)) 641 req->start_time = next->start_time; 642 643 req->biotail->bi_next = next->bio; 644 req->biotail = next->biotail; 645 646 req->__data_len += blk_rq_bytes(next); 647 648 elv_merge_requests(q, req, next); 649 650 /* 651 * 'next' is going away, so update stats accordingly 652 */ 653 blk_account_io_merge(next); 654 655 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 656 if (blk_rq_cpu_valid(next)) 657 req->cpu = next->cpu; 658 659 /* owner-ship of bio passed from next to req */ 660 next->bio = NULL; 661 __blk_put_request(q, next); 662 return 1; 663 } 664 665 int attempt_back_merge(struct request_queue *q, struct request *rq) 666 { 667 struct request *next = elv_latter_request(q, rq); 668 669 if (next) 670 return attempt_merge(q, rq, next); 671 672 return 0; 673 } 674 675 int attempt_front_merge(struct request_queue *q, struct request *rq) 676 { 677 struct request *prev = elv_former_request(q, rq); 678 679 if (prev) 680 return attempt_merge(q, prev, rq); 681 682 return 0; 683 } 684 685 int blk_attempt_req_merge(struct request_queue *q, struct request *rq, 686 struct request *next) 687 { 688 return attempt_merge(q, rq, next); 689 } 690 691 bool blk_rq_merge_ok(struct request *rq, struct bio *bio) 692 { 693 if (!rq_mergeable(rq) || !bio_mergeable(bio)) 694 return false; 695 696 if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) 697 return false; 698 699 /* different data direction or already started, don't merge */ 700 if (bio_data_dir(bio) != rq_data_dir(rq)) 701 return false; 702 703 /* must be same device and not a special request */ 704 if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq)) 705 return false; 706 707 /* only merge integrity protected bio into ditto rq */ 708 if (blk_integrity_merge_bio(rq->q, rq, bio) == false) 709 return false; 710 711 /* must be using the same buffer */ 712 if (rq->cmd_flags & REQ_WRITE_SAME && 713 !blk_write_same_mergeable(rq->bio, bio)) 714 return false; 715 716 /* Only check gaps if the bio carries data */ 717 if (bio_has_data(bio) && req_gap_to_prev(rq, bio)) 718 return false; 719 720 return true; 721 } 722 723 int blk_try_merge(struct request *rq, struct bio *bio) 724 { 725 if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) 726 return ELEVATOR_BACK_MERGE; 727 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) 728 return ELEVATOR_FRONT_MERGE; 729 return ELEVATOR_NO_MERGE; 730 } 731