1 /* 2 * Functions related to segment and merge handling 3 */ 4 #include <linux/kernel.h> 5 #include <linux/module.h> 6 #include <linux/bio.h> 7 #include <linux/blkdev.h> 8 #include <linux/scatterlist.h> 9 10 #include "blk.h" 11 12 static unsigned int __blk_recalc_rq_segments(struct request_queue *q, 13 struct bio *bio) 14 { 15 struct bio_vec bv, bvprv = { NULL }; 16 int cluster, high, highprv = 1; 17 unsigned int seg_size, nr_phys_segs; 18 struct bio *fbio, *bbio; 19 struct bvec_iter iter; 20 21 if (!bio) 22 return 0; 23 24 /* 25 * This should probably be returning 0, but blk_add_request_payload() 26 * (Christoph!!!!) 27 */ 28 if (bio->bi_rw & REQ_DISCARD) 29 return 1; 30 31 if (bio->bi_rw & REQ_WRITE_SAME) 32 return 1; 33 34 fbio = bio; 35 cluster = blk_queue_cluster(q); 36 seg_size = 0; 37 nr_phys_segs = 0; 38 for_each_bio(bio) { 39 bio_for_each_segment(bv, bio, iter) { 40 /* 41 * the trick here is making sure that a high page is 42 * never considered part of another segment, since that 43 * might change with the bounce page. 44 */ 45 high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q); 46 if (!high && !highprv && cluster) { 47 if (seg_size + bv.bv_len 48 > queue_max_segment_size(q)) 49 goto new_segment; 50 if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) 51 goto new_segment; 52 if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) 53 goto new_segment; 54 55 seg_size += bv.bv_len; 56 bvprv = bv; 57 continue; 58 } 59 new_segment: 60 if (nr_phys_segs == 1 && seg_size > 61 fbio->bi_seg_front_size) 62 fbio->bi_seg_front_size = seg_size; 63 64 nr_phys_segs++; 65 bvprv = bv; 66 seg_size = bv.bv_len; 67 highprv = high; 68 } 69 bbio = bio; 70 } 71 72 if (nr_phys_segs == 1 && seg_size > fbio->bi_seg_front_size) 73 fbio->bi_seg_front_size = seg_size; 74 if (seg_size > bbio->bi_seg_back_size) 75 bbio->bi_seg_back_size = seg_size; 76 77 return nr_phys_segs; 78 } 79 80 void blk_recalc_rq_segments(struct request *rq) 81 { 82 rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio); 83 } 84 85 void blk_recount_segments(struct request_queue *q, struct bio *bio) 86 { 87 struct bio *nxt = bio->bi_next; 88 89 bio->bi_next = NULL; 90 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio); 91 bio->bi_next = nxt; 92 bio->bi_flags |= (1 << BIO_SEG_VALID); 93 } 94 EXPORT_SYMBOL(blk_recount_segments); 95 96 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 97 struct bio *nxt) 98 { 99 struct bio_vec end_bv = { NULL }, nxt_bv; 100 struct bvec_iter iter; 101 102 if (!blk_queue_cluster(q)) 103 return 0; 104 105 if (bio->bi_seg_back_size + nxt->bi_seg_front_size > 106 queue_max_segment_size(q)) 107 return 0; 108 109 if (!bio_has_data(bio)) 110 return 1; 111 112 bio_for_each_segment(end_bv, bio, iter) 113 if (end_bv.bv_len == iter.bi_size) 114 break; 115 116 nxt_bv = bio_iovec(nxt); 117 118 if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) 119 return 0; 120 121 /* 122 * bio and nxt are contiguous in memory; check if the queue allows 123 * these two to be merged into one 124 */ 125 if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv)) 126 return 1; 127 128 return 0; 129 } 130 131 static inline void 132 __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, 133 struct scatterlist *sglist, struct bio_vec *bvprv, 134 struct scatterlist **sg, int *nsegs, int *cluster) 135 { 136 137 int nbytes = bvec->bv_len; 138 139 if (*sg && *cluster) { 140 if ((*sg)->length + nbytes > queue_max_segment_size(q)) 141 goto new_segment; 142 143 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) 144 goto new_segment; 145 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) 146 goto new_segment; 147 148 (*sg)->length += nbytes; 149 } else { 150 new_segment: 151 if (!*sg) 152 *sg = sglist; 153 else { 154 /* 155 * If the driver previously mapped a shorter 156 * list, we could see a termination bit 157 * prematurely unless it fully inits the sg 158 * table on each mapping. We KNOW that there 159 * must be more entries here or the driver 160 * would be buggy, so force clear the 161 * termination bit to avoid doing a full 162 * sg_init_table() in drivers for each command. 163 */ 164 sg_unmark_end(*sg); 165 *sg = sg_next(*sg); 166 } 167 168 sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); 169 (*nsegs)++; 170 } 171 *bvprv = *bvec; 172 } 173 174 static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, 175 struct scatterlist *sglist, 176 struct scatterlist **sg) 177 { 178 struct bio_vec bvec, bvprv = { NULL }; 179 struct bvec_iter iter; 180 int nsegs, cluster; 181 182 nsegs = 0; 183 cluster = blk_queue_cluster(q); 184 185 if (bio->bi_rw & REQ_DISCARD) { 186 /* 187 * This is a hack - drivers should be neither modifying the 188 * biovec, nor relying on bi_vcnt - but because of 189 * blk_add_request_payload(), a discard bio may or may not have 190 * a payload we need to set up here (thank you Christoph) and 191 * bi_vcnt is really the only way of telling if we need to. 192 */ 193 194 if (bio->bi_vcnt) 195 goto single_segment; 196 197 return 0; 198 } 199 200 if (bio->bi_rw & REQ_WRITE_SAME) { 201 single_segment: 202 *sg = sglist; 203 bvec = bio_iovec(bio); 204 sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); 205 return 1; 206 } 207 208 for_each_bio(bio) 209 bio_for_each_segment(bvec, bio, iter) 210 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg, 211 &nsegs, &cluster); 212 213 return nsegs; 214 } 215 216 /* 217 * map a request to scatterlist, return number of sg entries setup. Caller 218 * must make sure sg can hold rq->nr_phys_segments entries 219 */ 220 int blk_rq_map_sg(struct request_queue *q, struct request *rq, 221 struct scatterlist *sglist) 222 { 223 struct scatterlist *sg = NULL; 224 int nsegs = 0; 225 226 if (rq->bio) 227 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); 228 229 if (unlikely(rq->cmd_flags & REQ_COPY_USER) && 230 (blk_rq_bytes(rq) & q->dma_pad_mask)) { 231 unsigned int pad_len = 232 (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; 233 234 sg->length += pad_len; 235 rq->extra_len += pad_len; 236 } 237 238 if (q->dma_drain_size && q->dma_drain_needed(rq)) { 239 if (rq->cmd_flags & REQ_WRITE) 240 memset(q->dma_drain_buffer, 0, q->dma_drain_size); 241 242 sg->page_link &= ~0x02; 243 sg = sg_next(sg); 244 sg_set_page(sg, virt_to_page(q->dma_drain_buffer), 245 q->dma_drain_size, 246 ((unsigned long)q->dma_drain_buffer) & 247 (PAGE_SIZE - 1)); 248 nsegs++; 249 rq->extra_len += q->dma_drain_size; 250 } 251 252 if (sg) 253 sg_mark_end(sg); 254 255 return nsegs; 256 } 257 EXPORT_SYMBOL(blk_rq_map_sg); 258 259 /** 260 * blk_bio_map_sg - map a bio to a scatterlist 261 * @q: request_queue in question 262 * @bio: bio being mapped 263 * @sglist: scatterlist being mapped 264 * 265 * Note: 266 * Caller must make sure sg can hold bio->bi_phys_segments entries 267 * 268 * Will return the number of sg entries setup 269 */ 270 int blk_bio_map_sg(struct request_queue *q, struct bio *bio, 271 struct scatterlist *sglist) 272 { 273 struct scatterlist *sg = NULL; 274 int nsegs; 275 struct bio *next = bio->bi_next; 276 bio->bi_next = NULL; 277 278 nsegs = __blk_bios_map_sg(q, bio, sglist, &sg); 279 bio->bi_next = next; 280 if (sg) 281 sg_mark_end(sg); 282 283 BUG_ON(bio->bi_phys_segments && nsegs > bio->bi_phys_segments); 284 return nsegs; 285 } 286 EXPORT_SYMBOL(blk_bio_map_sg); 287 288 static inline int ll_new_hw_segment(struct request_queue *q, 289 struct request *req, 290 struct bio *bio) 291 { 292 int nr_phys_segs = bio_phys_segments(q, bio); 293 294 if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) 295 goto no_merge; 296 297 if (bio_integrity(bio) && blk_integrity_merge_bio(q, req, bio)) 298 goto no_merge; 299 300 /* 301 * This will form the start of a new hw segment. Bump both 302 * counters. 303 */ 304 req->nr_phys_segments += nr_phys_segs; 305 return 1; 306 307 no_merge: 308 req->cmd_flags |= REQ_NOMERGE; 309 if (req == q->last_merge) 310 q->last_merge = NULL; 311 return 0; 312 } 313 314 int ll_back_merge_fn(struct request_queue *q, struct request *req, 315 struct bio *bio) 316 { 317 if (blk_rq_sectors(req) + bio_sectors(bio) > 318 blk_rq_get_max_sectors(req)) { 319 req->cmd_flags |= REQ_NOMERGE; 320 if (req == q->last_merge) 321 q->last_merge = NULL; 322 return 0; 323 } 324 if (!bio_flagged(req->biotail, BIO_SEG_VALID)) 325 blk_recount_segments(q, req->biotail); 326 if (!bio_flagged(bio, BIO_SEG_VALID)) 327 blk_recount_segments(q, bio); 328 329 return ll_new_hw_segment(q, req, bio); 330 } 331 332 int ll_front_merge_fn(struct request_queue *q, struct request *req, 333 struct bio *bio) 334 { 335 if (blk_rq_sectors(req) + bio_sectors(bio) > 336 blk_rq_get_max_sectors(req)) { 337 req->cmd_flags |= REQ_NOMERGE; 338 if (req == q->last_merge) 339 q->last_merge = NULL; 340 return 0; 341 } 342 if (!bio_flagged(bio, BIO_SEG_VALID)) 343 blk_recount_segments(q, bio); 344 if (!bio_flagged(req->bio, BIO_SEG_VALID)) 345 blk_recount_segments(q, req->bio); 346 347 return ll_new_hw_segment(q, req, bio); 348 } 349 350 /* 351 * blk-mq uses req->special to carry normal driver per-request payload, it 352 * does not indicate a prepared command that we cannot merge with. 353 */ 354 static bool req_no_special_merge(struct request *req) 355 { 356 struct request_queue *q = req->q; 357 358 return !q->mq_ops && req->special; 359 } 360 361 static int ll_merge_requests_fn(struct request_queue *q, struct request *req, 362 struct request *next) 363 { 364 int total_phys_segments; 365 unsigned int seg_size = 366 req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; 367 368 /* 369 * First check if the either of the requests are re-queued 370 * requests. Can't merge them if they are. 371 */ 372 if (req_no_special_merge(req) || req_no_special_merge(next)) 373 return 0; 374 375 /* 376 * Will it become too large? 377 */ 378 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > 379 blk_rq_get_max_sectors(req)) 380 return 0; 381 382 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 383 if (blk_phys_contig_segment(q, req->biotail, next->bio)) { 384 if (req->nr_phys_segments == 1) 385 req->bio->bi_seg_front_size = seg_size; 386 if (next->nr_phys_segments == 1) 387 next->biotail->bi_seg_back_size = seg_size; 388 total_phys_segments--; 389 } 390 391 if (total_phys_segments > queue_max_segments(q)) 392 return 0; 393 394 if (blk_integrity_rq(req) && blk_integrity_merge_rq(q, req, next)) 395 return 0; 396 397 /* Merge is OK... */ 398 req->nr_phys_segments = total_phys_segments; 399 return 1; 400 } 401 402 /** 403 * blk_rq_set_mixed_merge - mark a request as mixed merge 404 * @rq: request to mark as mixed merge 405 * 406 * Description: 407 * @rq is about to be mixed merged. Make sure the attributes 408 * which can be mixed are set in each bio and mark @rq as mixed 409 * merged. 410 */ 411 void blk_rq_set_mixed_merge(struct request *rq) 412 { 413 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; 414 struct bio *bio; 415 416 if (rq->cmd_flags & REQ_MIXED_MERGE) 417 return; 418 419 /* 420 * @rq will no longer represent mixable attributes for all the 421 * contained bios. It will just track those of the first one. 422 * Distributes the attributs to each bio. 423 */ 424 for (bio = rq->bio; bio; bio = bio->bi_next) { 425 WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) && 426 (bio->bi_rw & REQ_FAILFAST_MASK) != ff); 427 bio->bi_rw |= ff; 428 } 429 rq->cmd_flags |= REQ_MIXED_MERGE; 430 } 431 432 static void blk_account_io_merge(struct request *req) 433 { 434 if (blk_do_io_stat(req)) { 435 struct hd_struct *part; 436 int cpu; 437 438 cpu = part_stat_lock(); 439 part = req->part; 440 441 part_round_stats(cpu, part); 442 part_dec_in_flight(part, rq_data_dir(req)); 443 444 hd_struct_put(part); 445 part_stat_unlock(); 446 } 447 } 448 449 /* 450 * Has to be called with the request spinlock acquired 451 */ 452 static int attempt_merge(struct request_queue *q, struct request *req, 453 struct request *next) 454 { 455 if (!rq_mergeable(req) || !rq_mergeable(next)) 456 return 0; 457 458 if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) 459 return 0; 460 461 /* 462 * not contiguous 463 */ 464 if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) 465 return 0; 466 467 if (rq_data_dir(req) != rq_data_dir(next) 468 || req->rq_disk != next->rq_disk 469 || req_no_special_merge(next)) 470 return 0; 471 472 if (req->cmd_flags & REQ_WRITE_SAME && 473 !blk_write_same_mergeable(req->bio, next->bio)) 474 return 0; 475 476 /* 477 * If we are allowed to merge, then append bio list 478 * from next to rq and release next. merge_requests_fn 479 * will have updated segment counts, update sector 480 * counts here. 481 */ 482 if (!ll_merge_requests_fn(q, req, next)) 483 return 0; 484 485 /* 486 * If failfast settings disagree or any of the two is already 487 * a mixed merge, mark both as mixed before proceeding. This 488 * makes sure that all involved bios have mixable attributes 489 * set properly. 490 */ 491 if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE || 492 (req->cmd_flags & REQ_FAILFAST_MASK) != 493 (next->cmd_flags & REQ_FAILFAST_MASK)) { 494 blk_rq_set_mixed_merge(req); 495 blk_rq_set_mixed_merge(next); 496 } 497 498 /* 499 * At this point we have either done a back merge 500 * or front merge. We need the smaller start_time of 501 * the merged requests to be the current request 502 * for accounting purposes. 503 */ 504 if (time_after(req->start_time, next->start_time)) 505 req->start_time = next->start_time; 506 507 req->biotail->bi_next = next->bio; 508 req->biotail = next->biotail; 509 510 req->__data_len += blk_rq_bytes(next); 511 512 elv_merge_requests(q, req, next); 513 514 /* 515 * 'next' is going away, so update stats accordingly 516 */ 517 blk_account_io_merge(next); 518 519 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 520 if (blk_rq_cpu_valid(next)) 521 req->cpu = next->cpu; 522 523 /* owner-ship of bio passed from next to req */ 524 next->bio = NULL; 525 __blk_put_request(q, next); 526 return 1; 527 } 528 529 int attempt_back_merge(struct request_queue *q, struct request *rq) 530 { 531 struct request *next = elv_latter_request(q, rq); 532 533 if (next) 534 return attempt_merge(q, rq, next); 535 536 return 0; 537 } 538 539 int attempt_front_merge(struct request_queue *q, struct request *rq) 540 { 541 struct request *prev = elv_former_request(q, rq); 542 543 if (prev) 544 return attempt_merge(q, prev, rq); 545 546 return 0; 547 } 548 549 int blk_attempt_req_merge(struct request_queue *q, struct request *rq, 550 struct request *next) 551 { 552 return attempt_merge(q, rq, next); 553 } 554 555 bool blk_rq_merge_ok(struct request *rq, struct bio *bio) 556 { 557 if (!rq_mergeable(rq) || !bio_mergeable(bio)) 558 return false; 559 560 if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) 561 return false; 562 563 /* different data direction or already started, don't merge */ 564 if (bio_data_dir(bio) != rq_data_dir(rq)) 565 return false; 566 567 /* must be same device and not a special request */ 568 if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq)) 569 return false; 570 571 /* only merge integrity protected bio into ditto rq */ 572 if (bio_integrity(bio) != blk_integrity_rq(rq)) 573 return false; 574 575 /* must be using the same buffer */ 576 if (rq->cmd_flags & REQ_WRITE_SAME && 577 !blk_write_same_mergeable(rq->bio, bio)) 578 return false; 579 580 return true; 581 } 582 583 int blk_try_merge(struct request *rq, struct bio *bio) 584 { 585 if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) 586 return ELEVATOR_BACK_MERGE; 587 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) 588 return ELEVATOR_FRONT_MERGE; 589 return ELEVATOR_NO_MERGE; 590 } 591