1 /* 2 * Functions related to segment and merge handling 3 */ 4 #include <linux/kernel.h> 5 #include <linux/module.h> 6 #include <linux/bio.h> 7 #include <linux/blkdev.h> 8 #include <linux/scatterlist.h> 9 10 #include "blk.h" 11 12 static struct bio *blk_bio_discard_split(struct request_queue *q, 13 struct bio *bio, 14 struct bio_set *bs, 15 unsigned *nsegs) 16 { 17 unsigned int max_discard_sectors, granularity; 18 int alignment; 19 sector_t tmp; 20 unsigned split_sectors; 21 22 *nsegs = 1; 23 24 /* Zero-sector (unknown) and one-sector granularities are the same. */ 25 granularity = max(q->limits.discard_granularity >> 9, 1U); 26 27 max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); 28 max_discard_sectors -= max_discard_sectors % granularity; 29 30 if (unlikely(!max_discard_sectors)) { 31 /* XXX: warn */ 32 return NULL; 33 } 34 35 if (bio_sectors(bio) <= max_discard_sectors) 36 return NULL; 37 38 split_sectors = max_discard_sectors; 39 40 /* 41 * If the next starting sector would be misaligned, stop the discard at 42 * the previous aligned sector. 43 */ 44 alignment = (q->limits.discard_alignment >> 9) % granularity; 45 46 tmp = bio->bi_iter.bi_sector + split_sectors - alignment; 47 tmp = sector_div(tmp, granularity); 48 49 if (split_sectors > tmp) 50 split_sectors -= tmp; 51 52 return bio_split(bio, split_sectors, GFP_NOIO, bs); 53 } 54 55 static struct bio *blk_bio_write_same_split(struct request_queue *q, 56 struct bio *bio, 57 struct bio_set *bs, 58 unsigned *nsegs) 59 { 60 *nsegs = 1; 61 62 if (!q->limits.max_write_same_sectors) 63 return NULL; 64 65 if (bio_sectors(bio) <= q->limits.max_write_same_sectors) 66 return NULL; 67 68 return bio_split(bio, q->limits.max_write_same_sectors, GFP_NOIO, bs); 69 } 70 71 static struct bio *blk_bio_segment_split(struct request_queue *q, 72 struct bio *bio, 73 struct bio_set *bs, 74 unsigned *segs) 75 { 76 struct bio_vec bv, bvprv, *bvprvp = NULL; 77 struct bvec_iter iter; 78 unsigned seg_size = 0, nsegs = 0, sectors = 0; 79 80 bio_for_each_segment(bv, bio, iter) { 81 if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q)) 82 goto split; 83 84 /* 85 * If the queue doesn't support SG gaps and adding this 86 * offset would create a gap, disallow it. 87 */ 88 if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset)) 89 goto split; 90 91 if (bvprvp && blk_queue_cluster(q)) { 92 if (seg_size + bv.bv_len > queue_max_segment_size(q)) 93 goto new_segment; 94 if (!BIOVEC_PHYS_MERGEABLE(bvprvp, &bv)) 95 goto new_segment; 96 if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv)) 97 goto new_segment; 98 99 seg_size += bv.bv_len; 100 bvprv = bv; 101 bvprvp = &bv; 102 sectors += bv.bv_len >> 9; 103 continue; 104 } 105 new_segment: 106 if (nsegs == queue_max_segments(q)) 107 goto split; 108 109 nsegs++; 110 bvprv = bv; 111 bvprvp = &bv; 112 seg_size = bv.bv_len; 113 sectors += bv.bv_len >> 9; 114 } 115 116 *segs = nsegs; 117 return NULL; 118 split: 119 *segs = nsegs; 120 return bio_split(bio, sectors, GFP_NOIO, bs); 121 } 122 123 void blk_queue_split(struct request_queue *q, struct bio **bio, 124 struct bio_set *bs) 125 { 126 struct bio *split, *res; 127 unsigned nsegs; 128 129 if ((*bio)->bi_rw & REQ_DISCARD) 130 split = blk_bio_discard_split(q, *bio, bs, &nsegs); 131 else if ((*bio)->bi_rw & REQ_WRITE_SAME) 132 split = blk_bio_write_same_split(q, *bio, bs, &nsegs); 133 else 134 split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs); 135 136 /* physical segments can be figured out during splitting */ 137 res = split ? split : *bio; 138 res->bi_phys_segments = nsegs; 139 bio_set_flag(res, BIO_SEG_VALID); 140 141 if (split) { 142 /* there isn't chance to merge the splitted bio */ 143 split->bi_rw |= REQ_NOMERGE; 144 145 bio_chain(split, *bio); 146 generic_make_request(*bio); 147 *bio = split; 148 } 149 } 150 EXPORT_SYMBOL(blk_queue_split); 151 152 static unsigned int __blk_recalc_rq_segments(struct request_queue *q, 153 struct bio *bio, 154 bool no_sg_merge) 155 { 156 struct bio_vec bv, bvprv = { NULL }; 157 int cluster, prev = 0; 158 unsigned int seg_size, nr_phys_segs; 159 struct bio *fbio, *bbio; 160 struct bvec_iter iter; 161 162 if (!bio) 163 return 0; 164 165 /* 166 * This should probably be returning 0, but blk_add_request_payload() 167 * (Christoph!!!!) 168 */ 169 if (bio->bi_rw & REQ_DISCARD) 170 return 1; 171 172 if (bio->bi_rw & REQ_WRITE_SAME) 173 return 1; 174 175 fbio = bio; 176 cluster = blk_queue_cluster(q); 177 seg_size = 0; 178 nr_phys_segs = 0; 179 for_each_bio(bio) { 180 bio_for_each_segment(bv, bio, iter) { 181 /* 182 * If SG merging is disabled, each bio vector is 183 * a segment 184 */ 185 if (no_sg_merge) 186 goto new_segment; 187 188 if (prev && cluster) { 189 if (seg_size + bv.bv_len 190 > queue_max_segment_size(q)) 191 goto new_segment; 192 if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) 193 goto new_segment; 194 if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) 195 goto new_segment; 196 197 seg_size += bv.bv_len; 198 bvprv = bv; 199 continue; 200 } 201 new_segment: 202 if (nr_phys_segs == 1 && seg_size > 203 fbio->bi_seg_front_size) 204 fbio->bi_seg_front_size = seg_size; 205 206 nr_phys_segs++; 207 bvprv = bv; 208 prev = 1; 209 seg_size = bv.bv_len; 210 } 211 bbio = bio; 212 } 213 214 if (nr_phys_segs == 1 && seg_size > fbio->bi_seg_front_size) 215 fbio->bi_seg_front_size = seg_size; 216 if (seg_size > bbio->bi_seg_back_size) 217 bbio->bi_seg_back_size = seg_size; 218 219 return nr_phys_segs; 220 } 221 222 void blk_recalc_rq_segments(struct request *rq) 223 { 224 bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE, 225 &rq->q->queue_flags); 226 227 rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio, 228 no_sg_merge); 229 } 230 231 void blk_recount_segments(struct request_queue *q, struct bio *bio) 232 { 233 unsigned short seg_cnt; 234 235 /* estimate segment number by bi_vcnt for non-cloned bio */ 236 if (bio_flagged(bio, BIO_CLONED)) 237 seg_cnt = bio_segments(bio); 238 else 239 seg_cnt = bio->bi_vcnt; 240 241 if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) && 242 (seg_cnt < queue_max_segments(q))) 243 bio->bi_phys_segments = seg_cnt; 244 else { 245 struct bio *nxt = bio->bi_next; 246 247 bio->bi_next = NULL; 248 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false); 249 bio->bi_next = nxt; 250 } 251 252 bio_set_flag(bio, BIO_SEG_VALID); 253 } 254 EXPORT_SYMBOL(blk_recount_segments); 255 256 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 257 struct bio *nxt) 258 { 259 struct bio_vec end_bv = { NULL }, nxt_bv; 260 struct bvec_iter iter; 261 262 if (!blk_queue_cluster(q)) 263 return 0; 264 265 if (bio->bi_seg_back_size + nxt->bi_seg_front_size > 266 queue_max_segment_size(q)) 267 return 0; 268 269 if (!bio_has_data(bio)) 270 return 1; 271 272 bio_for_each_segment(end_bv, bio, iter) 273 if (end_bv.bv_len == iter.bi_size) 274 break; 275 276 nxt_bv = bio_iovec(nxt); 277 278 if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) 279 return 0; 280 281 /* 282 * bio and nxt are contiguous in memory; check if the queue allows 283 * these two to be merged into one 284 */ 285 if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv)) 286 return 1; 287 288 return 0; 289 } 290 291 static inline void 292 __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, 293 struct scatterlist *sglist, struct bio_vec *bvprv, 294 struct scatterlist **sg, int *nsegs, int *cluster) 295 { 296 297 int nbytes = bvec->bv_len; 298 299 if (*sg && *cluster) { 300 if ((*sg)->length + nbytes > queue_max_segment_size(q)) 301 goto new_segment; 302 303 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) 304 goto new_segment; 305 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) 306 goto new_segment; 307 308 (*sg)->length += nbytes; 309 } else { 310 new_segment: 311 if (!*sg) 312 *sg = sglist; 313 else { 314 /* 315 * If the driver previously mapped a shorter 316 * list, we could see a termination bit 317 * prematurely unless it fully inits the sg 318 * table on each mapping. We KNOW that there 319 * must be more entries here or the driver 320 * would be buggy, so force clear the 321 * termination bit to avoid doing a full 322 * sg_init_table() in drivers for each command. 323 */ 324 sg_unmark_end(*sg); 325 *sg = sg_next(*sg); 326 } 327 328 sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); 329 (*nsegs)++; 330 } 331 *bvprv = *bvec; 332 } 333 334 static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, 335 struct scatterlist *sglist, 336 struct scatterlist **sg) 337 { 338 struct bio_vec bvec, bvprv = { NULL }; 339 struct bvec_iter iter; 340 int nsegs, cluster; 341 342 nsegs = 0; 343 cluster = blk_queue_cluster(q); 344 345 if (bio->bi_rw & REQ_DISCARD) { 346 /* 347 * This is a hack - drivers should be neither modifying the 348 * biovec, nor relying on bi_vcnt - but because of 349 * blk_add_request_payload(), a discard bio may or may not have 350 * a payload we need to set up here (thank you Christoph) and 351 * bi_vcnt is really the only way of telling if we need to. 352 */ 353 354 if (bio->bi_vcnt) 355 goto single_segment; 356 357 return 0; 358 } 359 360 if (bio->bi_rw & REQ_WRITE_SAME) { 361 single_segment: 362 *sg = sglist; 363 bvec = bio_iovec(bio); 364 sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); 365 return 1; 366 } 367 368 for_each_bio(bio) 369 bio_for_each_segment(bvec, bio, iter) 370 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg, 371 &nsegs, &cluster); 372 373 return nsegs; 374 } 375 376 /* 377 * map a request to scatterlist, return number of sg entries setup. Caller 378 * must make sure sg can hold rq->nr_phys_segments entries 379 */ 380 int blk_rq_map_sg(struct request_queue *q, struct request *rq, 381 struct scatterlist *sglist) 382 { 383 struct scatterlist *sg = NULL; 384 int nsegs = 0; 385 386 if (rq->bio) 387 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); 388 389 if (unlikely(rq->cmd_flags & REQ_COPY_USER) && 390 (blk_rq_bytes(rq) & q->dma_pad_mask)) { 391 unsigned int pad_len = 392 (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; 393 394 sg->length += pad_len; 395 rq->extra_len += pad_len; 396 } 397 398 if (q->dma_drain_size && q->dma_drain_needed(rq)) { 399 if (rq->cmd_flags & REQ_WRITE) 400 memset(q->dma_drain_buffer, 0, q->dma_drain_size); 401 402 sg_unmark_end(sg); 403 sg = sg_next(sg); 404 sg_set_page(sg, virt_to_page(q->dma_drain_buffer), 405 q->dma_drain_size, 406 ((unsigned long)q->dma_drain_buffer) & 407 (PAGE_SIZE - 1)); 408 nsegs++; 409 rq->extra_len += q->dma_drain_size; 410 } 411 412 if (sg) 413 sg_mark_end(sg); 414 415 return nsegs; 416 } 417 EXPORT_SYMBOL(blk_rq_map_sg); 418 419 static inline int ll_new_hw_segment(struct request_queue *q, 420 struct request *req, 421 struct bio *bio) 422 { 423 int nr_phys_segs = bio_phys_segments(q, bio); 424 425 if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) 426 goto no_merge; 427 428 if (blk_integrity_merge_bio(q, req, bio) == false) 429 goto no_merge; 430 431 /* 432 * This will form the start of a new hw segment. Bump both 433 * counters. 434 */ 435 req->nr_phys_segments += nr_phys_segs; 436 return 1; 437 438 no_merge: 439 req->cmd_flags |= REQ_NOMERGE; 440 if (req == q->last_merge) 441 q->last_merge = NULL; 442 return 0; 443 } 444 445 int ll_back_merge_fn(struct request_queue *q, struct request *req, 446 struct bio *bio) 447 { 448 if (req_gap_back_merge(req, bio)) 449 return 0; 450 if (blk_integrity_rq(req) && 451 integrity_req_gap_back_merge(req, bio)) 452 return 0; 453 if (blk_rq_sectors(req) + bio_sectors(bio) > 454 blk_rq_get_max_sectors(req)) { 455 req->cmd_flags |= REQ_NOMERGE; 456 if (req == q->last_merge) 457 q->last_merge = NULL; 458 return 0; 459 } 460 if (!bio_flagged(req->biotail, BIO_SEG_VALID)) 461 blk_recount_segments(q, req->biotail); 462 if (!bio_flagged(bio, BIO_SEG_VALID)) 463 blk_recount_segments(q, bio); 464 465 return ll_new_hw_segment(q, req, bio); 466 } 467 468 int ll_front_merge_fn(struct request_queue *q, struct request *req, 469 struct bio *bio) 470 { 471 472 if (req_gap_front_merge(req, bio)) 473 return 0; 474 if (blk_integrity_rq(req) && 475 integrity_req_gap_front_merge(req, bio)) 476 return 0; 477 if (blk_rq_sectors(req) + bio_sectors(bio) > 478 blk_rq_get_max_sectors(req)) { 479 req->cmd_flags |= REQ_NOMERGE; 480 if (req == q->last_merge) 481 q->last_merge = NULL; 482 return 0; 483 } 484 if (!bio_flagged(bio, BIO_SEG_VALID)) 485 blk_recount_segments(q, bio); 486 if (!bio_flagged(req->bio, BIO_SEG_VALID)) 487 blk_recount_segments(q, req->bio); 488 489 return ll_new_hw_segment(q, req, bio); 490 } 491 492 /* 493 * blk-mq uses req->special to carry normal driver per-request payload, it 494 * does not indicate a prepared command that we cannot merge with. 495 */ 496 static bool req_no_special_merge(struct request *req) 497 { 498 struct request_queue *q = req->q; 499 500 return !q->mq_ops && req->special; 501 } 502 503 static int ll_merge_requests_fn(struct request_queue *q, struct request *req, 504 struct request *next) 505 { 506 int total_phys_segments; 507 unsigned int seg_size = 508 req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; 509 510 /* 511 * First check if the either of the requests are re-queued 512 * requests. Can't merge them if they are. 513 */ 514 if (req_no_special_merge(req) || req_no_special_merge(next)) 515 return 0; 516 517 if (req_gap_back_merge(req, next->bio)) 518 return 0; 519 520 /* 521 * Will it become too large? 522 */ 523 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > 524 blk_rq_get_max_sectors(req)) 525 return 0; 526 527 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 528 if (blk_phys_contig_segment(q, req->biotail, next->bio)) { 529 if (req->nr_phys_segments == 1) 530 req->bio->bi_seg_front_size = seg_size; 531 if (next->nr_phys_segments == 1) 532 next->biotail->bi_seg_back_size = seg_size; 533 total_phys_segments--; 534 } 535 536 if (total_phys_segments > queue_max_segments(q)) 537 return 0; 538 539 if (blk_integrity_merge_rq(q, req, next) == false) 540 return 0; 541 542 /* Merge is OK... */ 543 req->nr_phys_segments = total_phys_segments; 544 return 1; 545 } 546 547 /** 548 * blk_rq_set_mixed_merge - mark a request as mixed merge 549 * @rq: request to mark as mixed merge 550 * 551 * Description: 552 * @rq is about to be mixed merged. Make sure the attributes 553 * which can be mixed are set in each bio and mark @rq as mixed 554 * merged. 555 */ 556 void blk_rq_set_mixed_merge(struct request *rq) 557 { 558 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; 559 struct bio *bio; 560 561 if (rq->cmd_flags & REQ_MIXED_MERGE) 562 return; 563 564 /* 565 * @rq will no longer represent mixable attributes for all the 566 * contained bios. It will just track those of the first one. 567 * Distributes the attributs to each bio. 568 */ 569 for (bio = rq->bio; bio; bio = bio->bi_next) { 570 WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) && 571 (bio->bi_rw & REQ_FAILFAST_MASK) != ff); 572 bio->bi_rw |= ff; 573 } 574 rq->cmd_flags |= REQ_MIXED_MERGE; 575 } 576 577 static void blk_account_io_merge(struct request *req) 578 { 579 if (blk_do_io_stat(req)) { 580 struct hd_struct *part; 581 int cpu; 582 583 cpu = part_stat_lock(); 584 part = req->part; 585 586 part_round_stats(cpu, part); 587 part_dec_in_flight(part, rq_data_dir(req)); 588 589 hd_struct_put(part); 590 part_stat_unlock(); 591 } 592 } 593 594 /* 595 * Has to be called with the request spinlock acquired 596 */ 597 static int attempt_merge(struct request_queue *q, struct request *req, 598 struct request *next) 599 { 600 if (!rq_mergeable(req) || !rq_mergeable(next)) 601 return 0; 602 603 if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) 604 return 0; 605 606 /* 607 * not contiguous 608 */ 609 if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) 610 return 0; 611 612 if (rq_data_dir(req) != rq_data_dir(next) 613 || req->rq_disk != next->rq_disk 614 || req_no_special_merge(next)) 615 return 0; 616 617 if (req->cmd_flags & REQ_WRITE_SAME && 618 !blk_write_same_mergeable(req->bio, next->bio)) 619 return 0; 620 621 /* 622 * If we are allowed to merge, then append bio list 623 * from next to rq and release next. merge_requests_fn 624 * will have updated segment counts, update sector 625 * counts here. 626 */ 627 if (!ll_merge_requests_fn(q, req, next)) 628 return 0; 629 630 /* 631 * If failfast settings disagree or any of the two is already 632 * a mixed merge, mark both as mixed before proceeding. This 633 * makes sure that all involved bios have mixable attributes 634 * set properly. 635 */ 636 if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE || 637 (req->cmd_flags & REQ_FAILFAST_MASK) != 638 (next->cmd_flags & REQ_FAILFAST_MASK)) { 639 blk_rq_set_mixed_merge(req); 640 blk_rq_set_mixed_merge(next); 641 } 642 643 /* 644 * At this point we have either done a back merge 645 * or front merge. We need the smaller start_time of 646 * the merged requests to be the current request 647 * for accounting purposes. 648 */ 649 if (time_after(req->start_time, next->start_time)) 650 req->start_time = next->start_time; 651 652 req->biotail->bi_next = next->bio; 653 req->biotail = next->biotail; 654 655 req->__data_len += blk_rq_bytes(next); 656 657 elv_merge_requests(q, req, next); 658 659 /* 660 * 'next' is going away, so update stats accordingly 661 */ 662 blk_account_io_merge(next); 663 664 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 665 if (blk_rq_cpu_valid(next)) 666 req->cpu = next->cpu; 667 668 /* owner-ship of bio passed from next to req */ 669 next->bio = NULL; 670 __blk_put_request(q, next); 671 return 1; 672 } 673 674 int attempt_back_merge(struct request_queue *q, struct request *rq) 675 { 676 struct request *next = elv_latter_request(q, rq); 677 678 if (next) 679 return attempt_merge(q, rq, next); 680 681 return 0; 682 } 683 684 int attempt_front_merge(struct request_queue *q, struct request *rq) 685 { 686 struct request *prev = elv_former_request(q, rq); 687 688 if (prev) 689 return attempt_merge(q, prev, rq); 690 691 return 0; 692 } 693 694 int blk_attempt_req_merge(struct request_queue *q, struct request *rq, 695 struct request *next) 696 { 697 return attempt_merge(q, rq, next); 698 } 699 700 bool blk_rq_merge_ok(struct request *rq, struct bio *bio) 701 { 702 if (!rq_mergeable(rq) || !bio_mergeable(bio)) 703 return false; 704 705 if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) 706 return false; 707 708 /* different data direction or already started, don't merge */ 709 if (bio_data_dir(bio) != rq_data_dir(rq)) 710 return false; 711 712 /* must be same device and not a special request */ 713 if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq)) 714 return false; 715 716 /* only merge integrity protected bio into ditto rq */ 717 if (blk_integrity_merge_bio(rq->q, rq, bio) == false) 718 return false; 719 720 /* must be using the same buffer */ 721 if (rq->cmd_flags & REQ_WRITE_SAME && 722 !blk_write_same_mergeable(rq->bio, bio)) 723 return false; 724 725 return true; 726 } 727 728 int blk_try_merge(struct request *rq, struct bio *bio) 729 { 730 if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) 731 return ELEVATOR_BACK_MERGE; 732 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) 733 return ELEVATOR_FRONT_MERGE; 734 return ELEVATOR_NO_MERGE; 735 } 736