1 /* 2 * Functions related to segment and merge handling 3 */ 4 #include <linux/kernel.h> 5 #include <linux/module.h> 6 #include <linux/bio.h> 7 #include <linux/blkdev.h> 8 #include <linux/scatterlist.h> 9 10 #include "blk.h" 11 12 static unsigned int __blk_recalc_rq_segments(struct request_queue *q, 13 struct bio *bio, 14 bool no_sg_merge) 15 { 16 struct bio_vec bv, bvprv = { NULL }; 17 int cluster, high, highprv = 1; 18 unsigned int seg_size, nr_phys_segs; 19 struct bio *fbio, *bbio; 20 struct bvec_iter iter; 21 22 if (!bio) 23 return 0; 24 25 /* 26 * This should probably be returning 0, but blk_add_request_payload() 27 * (Christoph!!!!) 28 */ 29 if (bio->bi_rw & REQ_DISCARD) 30 return 1; 31 32 if (bio->bi_rw & REQ_WRITE_SAME) 33 return 1; 34 35 fbio = bio; 36 cluster = blk_queue_cluster(q); 37 seg_size = 0; 38 nr_phys_segs = 0; 39 high = 0; 40 for_each_bio(bio) { 41 bio_for_each_segment(bv, bio, iter) { 42 /* 43 * If SG merging is disabled, each bio vector is 44 * a segment 45 */ 46 if (no_sg_merge) 47 goto new_segment; 48 49 /* 50 * the trick here is making sure that a high page is 51 * never considered part of another segment, since 52 * that might change with the bounce page. 53 */ 54 high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q); 55 if (!high && !highprv && cluster) { 56 if (seg_size + bv.bv_len 57 > queue_max_segment_size(q)) 58 goto new_segment; 59 if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) 60 goto new_segment; 61 if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) 62 goto new_segment; 63 64 seg_size += bv.bv_len; 65 bvprv = bv; 66 continue; 67 } 68 new_segment: 69 if (nr_phys_segs == 1 && seg_size > 70 fbio->bi_seg_front_size) 71 fbio->bi_seg_front_size = seg_size; 72 73 nr_phys_segs++; 74 bvprv = bv; 75 seg_size = bv.bv_len; 76 highprv = high; 77 } 78 bbio = bio; 79 } 80 81 if (nr_phys_segs == 1 && seg_size > fbio->bi_seg_front_size) 82 fbio->bi_seg_front_size = seg_size; 83 if (seg_size > bbio->bi_seg_back_size) 84 bbio->bi_seg_back_size = seg_size; 85 86 return nr_phys_segs; 87 } 88 89 void blk_recalc_rq_segments(struct request *rq) 90 { 91 bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE, 92 &rq->q->queue_flags); 93 94 rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio, 95 no_sg_merge); 96 } 97 98 void blk_recount_segments(struct request_queue *q, struct bio *bio) 99 { 100 bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE, 101 &q->queue_flags); 102 103 if (no_sg_merge && !bio_flagged(bio, BIO_CLONED) && 104 bio->bi_vcnt < queue_max_segments(q)) 105 bio->bi_phys_segments = bio->bi_vcnt; 106 else { 107 struct bio *nxt = bio->bi_next; 108 109 bio->bi_next = NULL; 110 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, 111 no_sg_merge); 112 bio->bi_next = nxt; 113 } 114 115 bio->bi_flags |= (1 << BIO_SEG_VALID); 116 } 117 EXPORT_SYMBOL(blk_recount_segments); 118 119 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 120 struct bio *nxt) 121 { 122 struct bio_vec end_bv = { NULL }, nxt_bv; 123 struct bvec_iter iter; 124 125 if (!blk_queue_cluster(q)) 126 return 0; 127 128 if (bio->bi_seg_back_size + nxt->bi_seg_front_size > 129 queue_max_segment_size(q)) 130 return 0; 131 132 if (!bio_has_data(bio)) 133 return 1; 134 135 bio_for_each_segment(end_bv, bio, iter) 136 if (end_bv.bv_len == iter.bi_size) 137 break; 138 139 nxt_bv = bio_iovec(nxt); 140 141 if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) 142 return 0; 143 144 /* 145 * bio and nxt are contiguous in memory; check if the queue allows 146 * these two to be merged into one 147 */ 148 if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv)) 149 return 1; 150 151 return 0; 152 } 153 154 static inline void 155 __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, 156 struct scatterlist *sglist, struct bio_vec *bvprv, 157 struct scatterlist **sg, int *nsegs, int *cluster) 158 { 159 160 int nbytes = bvec->bv_len; 161 162 if (*sg && *cluster) { 163 if ((*sg)->length + nbytes > queue_max_segment_size(q)) 164 goto new_segment; 165 166 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) 167 goto new_segment; 168 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) 169 goto new_segment; 170 171 (*sg)->length += nbytes; 172 } else { 173 new_segment: 174 if (!*sg) 175 *sg = sglist; 176 else { 177 /* 178 * If the driver previously mapped a shorter 179 * list, we could see a termination bit 180 * prematurely unless it fully inits the sg 181 * table on each mapping. We KNOW that there 182 * must be more entries here or the driver 183 * would be buggy, so force clear the 184 * termination bit to avoid doing a full 185 * sg_init_table() in drivers for each command. 186 */ 187 sg_unmark_end(*sg); 188 *sg = sg_next(*sg); 189 } 190 191 sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); 192 (*nsegs)++; 193 } 194 *bvprv = *bvec; 195 } 196 197 static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, 198 struct scatterlist *sglist, 199 struct scatterlist **sg) 200 { 201 struct bio_vec bvec, bvprv = { NULL }; 202 struct bvec_iter iter; 203 int nsegs, cluster; 204 205 nsegs = 0; 206 cluster = blk_queue_cluster(q); 207 208 if (bio->bi_rw & REQ_DISCARD) { 209 /* 210 * This is a hack - drivers should be neither modifying the 211 * biovec, nor relying on bi_vcnt - but because of 212 * blk_add_request_payload(), a discard bio may or may not have 213 * a payload we need to set up here (thank you Christoph) and 214 * bi_vcnt is really the only way of telling if we need to. 215 */ 216 217 if (bio->bi_vcnt) 218 goto single_segment; 219 220 return 0; 221 } 222 223 if (bio->bi_rw & REQ_WRITE_SAME) { 224 single_segment: 225 *sg = sglist; 226 bvec = bio_iovec(bio); 227 sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); 228 return 1; 229 } 230 231 for_each_bio(bio) 232 bio_for_each_segment(bvec, bio, iter) 233 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg, 234 &nsegs, &cluster); 235 236 return nsegs; 237 } 238 239 /* 240 * map a request to scatterlist, return number of sg entries setup. Caller 241 * must make sure sg can hold rq->nr_phys_segments entries 242 */ 243 int blk_rq_map_sg(struct request_queue *q, struct request *rq, 244 struct scatterlist *sglist) 245 { 246 struct scatterlist *sg = NULL; 247 int nsegs = 0; 248 249 if (rq->bio) 250 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); 251 252 if (unlikely(rq->cmd_flags & REQ_COPY_USER) && 253 (blk_rq_bytes(rq) & q->dma_pad_mask)) { 254 unsigned int pad_len = 255 (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; 256 257 sg->length += pad_len; 258 rq->extra_len += pad_len; 259 } 260 261 if (q->dma_drain_size && q->dma_drain_needed(rq)) { 262 if (rq->cmd_flags & REQ_WRITE) 263 memset(q->dma_drain_buffer, 0, q->dma_drain_size); 264 265 sg->page_link &= ~0x02; 266 sg = sg_next(sg); 267 sg_set_page(sg, virt_to_page(q->dma_drain_buffer), 268 q->dma_drain_size, 269 ((unsigned long)q->dma_drain_buffer) & 270 (PAGE_SIZE - 1)); 271 nsegs++; 272 rq->extra_len += q->dma_drain_size; 273 } 274 275 if (sg) 276 sg_mark_end(sg); 277 278 return nsegs; 279 } 280 EXPORT_SYMBOL(blk_rq_map_sg); 281 282 /** 283 * blk_bio_map_sg - map a bio to a scatterlist 284 * @q: request_queue in question 285 * @bio: bio being mapped 286 * @sglist: scatterlist being mapped 287 * 288 * Note: 289 * Caller must make sure sg can hold bio->bi_phys_segments entries 290 * 291 * Will return the number of sg entries setup 292 */ 293 int blk_bio_map_sg(struct request_queue *q, struct bio *bio, 294 struct scatterlist *sglist) 295 { 296 struct scatterlist *sg = NULL; 297 int nsegs; 298 struct bio *next = bio->bi_next; 299 bio->bi_next = NULL; 300 301 nsegs = __blk_bios_map_sg(q, bio, sglist, &sg); 302 bio->bi_next = next; 303 if (sg) 304 sg_mark_end(sg); 305 306 BUG_ON(bio->bi_phys_segments && nsegs > bio->bi_phys_segments); 307 return nsegs; 308 } 309 EXPORT_SYMBOL(blk_bio_map_sg); 310 311 static inline int ll_new_hw_segment(struct request_queue *q, 312 struct request *req, 313 struct bio *bio) 314 { 315 int nr_phys_segs = bio_phys_segments(q, bio); 316 317 if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) 318 goto no_merge; 319 320 if (blk_integrity_merge_bio(q, req, bio) == false) 321 goto no_merge; 322 323 /* 324 * This will form the start of a new hw segment. Bump both 325 * counters. 326 */ 327 req->nr_phys_segments += nr_phys_segs; 328 return 1; 329 330 no_merge: 331 req->cmd_flags |= REQ_NOMERGE; 332 if (req == q->last_merge) 333 q->last_merge = NULL; 334 return 0; 335 } 336 337 int ll_back_merge_fn(struct request_queue *q, struct request *req, 338 struct bio *bio) 339 { 340 if (blk_rq_sectors(req) + bio_sectors(bio) > 341 blk_rq_get_max_sectors(req)) { 342 req->cmd_flags |= REQ_NOMERGE; 343 if (req == q->last_merge) 344 q->last_merge = NULL; 345 return 0; 346 } 347 if (!bio_flagged(req->biotail, BIO_SEG_VALID)) 348 blk_recount_segments(q, req->biotail); 349 if (!bio_flagged(bio, BIO_SEG_VALID)) 350 blk_recount_segments(q, bio); 351 352 return ll_new_hw_segment(q, req, bio); 353 } 354 355 int ll_front_merge_fn(struct request_queue *q, struct request *req, 356 struct bio *bio) 357 { 358 if (blk_rq_sectors(req) + bio_sectors(bio) > 359 blk_rq_get_max_sectors(req)) { 360 req->cmd_flags |= REQ_NOMERGE; 361 if (req == q->last_merge) 362 q->last_merge = NULL; 363 return 0; 364 } 365 if (!bio_flagged(bio, BIO_SEG_VALID)) 366 blk_recount_segments(q, bio); 367 if (!bio_flagged(req->bio, BIO_SEG_VALID)) 368 blk_recount_segments(q, req->bio); 369 370 return ll_new_hw_segment(q, req, bio); 371 } 372 373 /* 374 * blk-mq uses req->special to carry normal driver per-request payload, it 375 * does not indicate a prepared command that we cannot merge with. 376 */ 377 static bool req_no_special_merge(struct request *req) 378 { 379 struct request_queue *q = req->q; 380 381 return !q->mq_ops && req->special; 382 } 383 384 static int ll_merge_requests_fn(struct request_queue *q, struct request *req, 385 struct request *next) 386 { 387 int total_phys_segments; 388 unsigned int seg_size = 389 req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; 390 391 /* 392 * First check if the either of the requests are re-queued 393 * requests. Can't merge them if they are. 394 */ 395 if (req_no_special_merge(req) || req_no_special_merge(next)) 396 return 0; 397 398 /* 399 * Will it become too large? 400 */ 401 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > 402 blk_rq_get_max_sectors(req)) 403 return 0; 404 405 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 406 if (blk_phys_contig_segment(q, req->biotail, next->bio)) { 407 if (req->nr_phys_segments == 1) 408 req->bio->bi_seg_front_size = seg_size; 409 if (next->nr_phys_segments == 1) 410 next->biotail->bi_seg_back_size = seg_size; 411 total_phys_segments--; 412 } 413 414 if (total_phys_segments > queue_max_segments(q)) 415 return 0; 416 417 if (blk_integrity_merge_rq(q, req, next) == false) 418 return 0; 419 420 /* Merge is OK... */ 421 req->nr_phys_segments = total_phys_segments; 422 return 1; 423 } 424 425 /** 426 * blk_rq_set_mixed_merge - mark a request as mixed merge 427 * @rq: request to mark as mixed merge 428 * 429 * Description: 430 * @rq is about to be mixed merged. Make sure the attributes 431 * which can be mixed are set in each bio and mark @rq as mixed 432 * merged. 433 */ 434 void blk_rq_set_mixed_merge(struct request *rq) 435 { 436 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; 437 struct bio *bio; 438 439 if (rq->cmd_flags & REQ_MIXED_MERGE) 440 return; 441 442 /* 443 * @rq will no longer represent mixable attributes for all the 444 * contained bios. It will just track those of the first one. 445 * Distributes the attributs to each bio. 446 */ 447 for (bio = rq->bio; bio; bio = bio->bi_next) { 448 WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) && 449 (bio->bi_rw & REQ_FAILFAST_MASK) != ff); 450 bio->bi_rw |= ff; 451 } 452 rq->cmd_flags |= REQ_MIXED_MERGE; 453 } 454 455 static void blk_account_io_merge(struct request *req) 456 { 457 if (blk_do_io_stat(req)) { 458 struct hd_struct *part; 459 int cpu; 460 461 cpu = part_stat_lock(); 462 part = req->part; 463 464 part_round_stats(cpu, part); 465 part_dec_in_flight(part, rq_data_dir(req)); 466 467 hd_struct_put(part); 468 part_stat_unlock(); 469 } 470 } 471 472 /* 473 * Has to be called with the request spinlock acquired 474 */ 475 static int attempt_merge(struct request_queue *q, struct request *req, 476 struct request *next) 477 { 478 if (!rq_mergeable(req) || !rq_mergeable(next)) 479 return 0; 480 481 if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) 482 return 0; 483 484 /* 485 * not contiguous 486 */ 487 if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) 488 return 0; 489 490 if (rq_data_dir(req) != rq_data_dir(next) 491 || req->rq_disk != next->rq_disk 492 || req_no_special_merge(next)) 493 return 0; 494 495 if (req->cmd_flags & REQ_WRITE_SAME && 496 !blk_write_same_mergeable(req->bio, next->bio)) 497 return 0; 498 499 /* 500 * If we are allowed to merge, then append bio list 501 * from next to rq and release next. merge_requests_fn 502 * will have updated segment counts, update sector 503 * counts here. 504 */ 505 if (!ll_merge_requests_fn(q, req, next)) 506 return 0; 507 508 /* 509 * If failfast settings disagree or any of the two is already 510 * a mixed merge, mark both as mixed before proceeding. This 511 * makes sure that all involved bios have mixable attributes 512 * set properly. 513 */ 514 if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE || 515 (req->cmd_flags & REQ_FAILFAST_MASK) != 516 (next->cmd_flags & REQ_FAILFAST_MASK)) { 517 blk_rq_set_mixed_merge(req); 518 blk_rq_set_mixed_merge(next); 519 } 520 521 /* 522 * At this point we have either done a back merge 523 * or front merge. We need the smaller start_time of 524 * the merged requests to be the current request 525 * for accounting purposes. 526 */ 527 if (time_after(req->start_time, next->start_time)) 528 req->start_time = next->start_time; 529 530 req->biotail->bi_next = next->bio; 531 req->biotail = next->biotail; 532 533 req->__data_len += blk_rq_bytes(next); 534 535 elv_merge_requests(q, req, next); 536 537 /* 538 * 'next' is going away, so update stats accordingly 539 */ 540 blk_account_io_merge(next); 541 542 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 543 if (blk_rq_cpu_valid(next)) 544 req->cpu = next->cpu; 545 546 /* owner-ship of bio passed from next to req */ 547 next->bio = NULL; 548 __blk_put_request(q, next); 549 return 1; 550 } 551 552 int attempt_back_merge(struct request_queue *q, struct request *rq) 553 { 554 struct request *next = elv_latter_request(q, rq); 555 556 if (next) 557 return attempt_merge(q, rq, next); 558 559 return 0; 560 } 561 562 int attempt_front_merge(struct request_queue *q, struct request *rq) 563 { 564 struct request *prev = elv_former_request(q, rq); 565 566 if (prev) 567 return attempt_merge(q, prev, rq); 568 569 return 0; 570 } 571 572 int blk_attempt_req_merge(struct request_queue *q, struct request *rq, 573 struct request *next) 574 { 575 return attempt_merge(q, rq, next); 576 } 577 578 bool blk_rq_merge_ok(struct request *rq, struct bio *bio) 579 { 580 struct request_queue *q = rq->q; 581 582 if (!rq_mergeable(rq) || !bio_mergeable(bio)) 583 return false; 584 585 if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) 586 return false; 587 588 /* different data direction or already started, don't merge */ 589 if (bio_data_dir(bio) != rq_data_dir(rq)) 590 return false; 591 592 /* must be same device and not a special request */ 593 if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq)) 594 return false; 595 596 /* only merge integrity protected bio into ditto rq */ 597 if (blk_integrity_merge_bio(rq->q, rq, bio) == false) 598 return false; 599 600 /* must be using the same buffer */ 601 if (rq->cmd_flags & REQ_WRITE_SAME && 602 !blk_write_same_mergeable(rq->bio, bio)) 603 return false; 604 605 if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) { 606 struct bio_vec *bprev; 607 608 bprev = &rq->biotail->bi_io_vec[bio->bi_vcnt - 1]; 609 if (bvec_gap_to_prev(bprev, bio->bi_io_vec[0].bv_offset)) 610 return false; 611 } 612 613 return true; 614 } 615 616 int blk_try_merge(struct request *rq, struct bio *bio) 617 { 618 if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) 619 return ELEVATOR_BACK_MERGE; 620 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) 621 return ELEVATOR_FRONT_MERGE; 622 return ELEVATOR_NO_MERGE; 623 } 624