1 /* 2 * Functions related to segment and merge handling 3 */ 4 #include <linux/kernel.h> 5 #include <linux/module.h> 6 #include <linux/bio.h> 7 #include <linux/blkdev.h> 8 #include <linux/scatterlist.h> 9 10 #include "blk.h" 11 12 static unsigned int __blk_recalc_rq_segments(struct request_queue *q, 13 struct bio *bio) 14 { 15 struct bio_vec bv, bvprv = { NULL }; 16 int cluster, high, highprv = 1; 17 unsigned int seg_size, nr_phys_segs; 18 struct bio *fbio, *bbio; 19 struct bvec_iter iter; 20 21 if (!bio) 22 return 0; 23 24 fbio = bio; 25 cluster = blk_queue_cluster(q); 26 seg_size = 0; 27 nr_phys_segs = 0; 28 for_each_bio(bio) { 29 bio_for_each_segment(bv, bio, iter) { 30 /* 31 * the trick here is making sure that a high page is 32 * never considered part of another segment, since that 33 * might change with the bounce page. 34 */ 35 high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q); 36 if (!high && !highprv && cluster) { 37 if (seg_size + bv.bv_len 38 > queue_max_segment_size(q)) 39 goto new_segment; 40 if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) 41 goto new_segment; 42 if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) 43 goto new_segment; 44 45 seg_size += bv.bv_len; 46 bvprv = bv; 47 continue; 48 } 49 new_segment: 50 if (nr_phys_segs == 1 && seg_size > 51 fbio->bi_seg_front_size) 52 fbio->bi_seg_front_size = seg_size; 53 54 nr_phys_segs++; 55 bvprv = bv; 56 seg_size = bv.bv_len; 57 highprv = high; 58 } 59 bbio = bio; 60 } 61 62 if (nr_phys_segs == 1 && seg_size > fbio->bi_seg_front_size) 63 fbio->bi_seg_front_size = seg_size; 64 if (seg_size > bbio->bi_seg_back_size) 65 bbio->bi_seg_back_size = seg_size; 66 67 return nr_phys_segs; 68 } 69 70 void blk_recalc_rq_segments(struct request *rq) 71 { 72 rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio); 73 } 74 75 void blk_recount_segments(struct request_queue *q, struct bio *bio) 76 { 77 struct bio *nxt = bio->bi_next; 78 79 bio->bi_next = NULL; 80 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio); 81 bio->bi_next = nxt; 82 bio->bi_flags |= (1 << BIO_SEG_VALID); 83 } 84 EXPORT_SYMBOL(blk_recount_segments); 85 86 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 87 struct bio *nxt) 88 { 89 struct bio_vec end_bv, nxt_bv; 90 struct bvec_iter iter; 91 92 uninitialized_var(end_bv); 93 94 if (!blk_queue_cluster(q)) 95 return 0; 96 97 if (bio->bi_seg_back_size + nxt->bi_seg_front_size > 98 queue_max_segment_size(q)) 99 return 0; 100 101 if (!bio_has_data(bio)) 102 return 1; 103 104 bio_for_each_segment(end_bv, bio, iter) 105 if (end_bv.bv_len == iter.bi_size) 106 break; 107 108 nxt_bv = bio_iovec(nxt); 109 110 if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) 111 return 0; 112 113 /* 114 * bio and nxt are contiguous in memory; check if the queue allows 115 * these two to be merged into one 116 */ 117 if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv)) 118 return 1; 119 120 return 0; 121 } 122 123 static inline void 124 __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, 125 struct scatterlist *sglist, struct bio_vec *bvprv, 126 struct scatterlist **sg, int *nsegs, int *cluster) 127 { 128 129 int nbytes = bvec->bv_len; 130 131 if (*sg && *cluster) { 132 if ((*sg)->length + nbytes > queue_max_segment_size(q)) 133 goto new_segment; 134 135 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) 136 goto new_segment; 137 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) 138 goto new_segment; 139 140 (*sg)->length += nbytes; 141 } else { 142 new_segment: 143 if (!*sg) 144 *sg = sglist; 145 else { 146 /* 147 * If the driver previously mapped a shorter 148 * list, we could see a termination bit 149 * prematurely unless it fully inits the sg 150 * table on each mapping. We KNOW that there 151 * must be more entries here or the driver 152 * would be buggy, so force clear the 153 * termination bit to avoid doing a full 154 * sg_init_table() in drivers for each command. 155 */ 156 sg_unmark_end(*sg); 157 *sg = sg_next(*sg); 158 } 159 160 sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); 161 (*nsegs)++; 162 } 163 *bvprv = *bvec; 164 } 165 166 /* 167 * map a request to scatterlist, return number of sg entries setup. Caller 168 * must make sure sg can hold rq->nr_phys_segments entries 169 */ 170 int blk_rq_map_sg(struct request_queue *q, struct request *rq, 171 struct scatterlist *sglist) 172 { 173 struct bio_vec bvec, bvprv; 174 struct req_iterator iter; 175 struct scatterlist *sg; 176 int nsegs, cluster; 177 178 uninitialized_var(bvprv); 179 180 nsegs = 0; 181 cluster = blk_queue_cluster(q); 182 183 /* 184 * for each bio in rq 185 */ 186 sg = NULL; 187 rq_for_each_segment(bvec, rq, iter) { 188 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, 189 &nsegs, &cluster); 190 } /* segments in rq */ 191 192 193 if (unlikely(rq->cmd_flags & REQ_COPY_USER) && 194 (blk_rq_bytes(rq) & q->dma_pad_mask)) { 195 unsigned int pad_len = 196 (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; 197 198 sg->length += pad_len; 199 rq->extra_len += pad_len; 200 } 201 202 if (q->dma_drain_size && q->dma_drain_needed(rq)) { 203 if (rq->cmd_flags & REQ_WRITE) 204 memset(q->dma_drain_buffer, 0, q->dma_drain_size); 205 206 sg->page_link &= ~0x02; 207 sg = sg_next(sg); 208 sg_set_page(sg, virt_to_page(q->dma_drain_buffer), 209 q->dma_drain_size, 210 ((unsigned long)q->dma_drain_buffer) & 211 (PAGE_SIZE - 1)); 212 nsegs++; 213 rq->extra_len += q->dma_drain_size; 214 } 215 216 if (sg) 217 sg_mark_end(sg); 218 219 return nsegs; 220 } 221 EXPORT_SYMBOL(blk_rq_map_sg); 222 223 /** 224 * blk_bio_map_sg - map a bio to a scatterlist 225 * @q: request_queue in question 226 * @bio: bio being mapped 227 * @sglist: scatterlist being mapped 228 * 229 * Note: 230 * Caller must make sure sg can hold bio->bi_phys_segments entries 231 * 232 * Will return the number of sg entries setup 233 */ 234 int blk_bio_map_sg(struct request_queue *q, struct bio *bio, 235 struct scatterlist *sglist) 236 { 237 struct bio_vec bvec, bvprv; 238 struct scatterlist *sg; 239 int nsegs, cluster; 240 struct bvec_iter iter; 241 242 uninitialized_var(bvprv); 243 244 nsegs = 0; 245 cluster = blk_queue_cluster(q); 246 247 sg = NULL; 248 bio_for_each_segment(bvec, bio, iter) { 249 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, 250 &nsegs, &cluster); 251 } /* segments in bio */ 252 253 if (sg) 254 sg_mark_end(sg); 255 256 BUG_ON(bio->bi_phys_segments && nsegs > bio->bi_phys_segments); 257 return nsegs; 258 } 259 EXPORT_SYMBOL(blk_bio_map_sg); 260 261 static inline int ll_new_hw_segment(struct request_queue *q, 262 struct request *req, 263 struct bio *bio) 264 { 265 int nr_phys_segs = bio_phys_segments(q, bio); 266 267 if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) 268 goto no_merge; 269 270 if (bio_integrity(bio) && blk_integrity_merge_bio(q, req, bio)) 271 goto no_merge; 272 273 /* 274 * This will form the start of a new hw segment. Bump both 275 * counters. 276 */ 277 req->nr_phys_segments += nr_phys_segs; 278 return 1; 279 280 no_merge: 281 req->cmd_flags |= REQ_NOMERGE; 282 if (req == q->last_merge) 283 q->last_merge = NULL; 284 return 0; 285 } 286 287 int ll_back_merge_fn(struct request_queue *q, struct request *req, 288 struct bio *bio) 289 { 290 if (blk_rq_sectors(req) + bio_sectors(bio) > 291 blk_rq_get_max_sectors(req)) { 292 req->cmd_flags |= REQ_NOMERGE; 293 if (req == q->last_merge) 294 q->last_merge = NULL; 295 return 0; 296 } 297 if (!bio_flagged(req->biotail, BIO_SEG_VALID)) 298 blk_recount_segments(q, req->biotail); 299 if (!bio_flagged(bio, BIO_SEG_VALID)) 300 blk_recount_segments(q, bio); 301 302 return ll_new_hw_segment(q, req, bio); 303 } 304 305 int ll_front_merge_fn(struct request_queue *q, struct request *req, 306 struct bio *bio) 307 { 308 if (blk_rq_sectors(req) + bio_sectors(bio) > 309 blk_rq_get_max_sectors(req)) { 310 req->cmd_flags |= REQ_NOMERGE; 311 if (req == q->last_merge) 312 q->last_merge = NULL; 313 return 0; 314 } 315 if (!bio_flagged(bio, BIO_SEG_VALID)) 316 blk_recount_segments(q, bio); 317 if (!bio_flagged(req->bio, BIO_SEG_VALID)) 318 blk_recount_segments(q, req->bio); 319 320 return ll_new_hw_segment(q, req, bio); 321 } 322 323 /* 324 * blk-mq uses req->special to carry normal driver per-request payload, it 325 * does not indicate a prepared command that we cannot merge with. 326 */ 327 static bool req_no_special_merge(struct request *req) 328 { 329 struct request_queue *q = req->q; 330 331 return !q->mq_ops && req->special; 332 } 333 334 static int ll_merge_requests_fn(struct request_queue *q, struct request *req, 335 struct request *next) 336 { 337 int total_phys_segments; 338 unsigned int seg_size = 339 req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; 340 341 /* 342 * First check if the either of the requests are re-queued 343 * requests. Can't merge them if they are. 344 */ 345 if (req_no_special_merge(req) || req_no_special_merge(next)) 346 return 0; 347 348 /* 349 * Will it become too large? 350 */ 351 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > 352 blk_rq_get_max_sectors(req)) 353 return 0; 354 355 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 356 if (blk_phys_contig_segment(q, req->biotail, next->bio)) { 357 if (req->nr_phys_segments == 1) 358 req->bio->bi_seg_front_size = seg_size; 359 if (next->nr_phys_segments == 1) 360 next->biotail->bi_seg_back_size = seg_size; 361 total_phys_segments--; 362 } 363 364 if (total_phys_segments > queue_max_segments(q)) 365 return 0; 366 367 if (blk_integrity_rq(req) && blk_integrity_merge_rq(q, req, next)) 368 return 0; 369 370 /* Merge is OK... */ 371 req->nr_phys_segments = total_phys_segments; 372 return 1; 373 } 374 375 /** 376 * blk_rq_set_mixed_merge - mark a request as mixed merge 377 * @rq: request to mark as mixed merge 378 * 379 * Description: 380 * @rq is about to be mixed merged. Make sure the attributes 381 * which can be mixed are set in each bio and mark @rq as mixed 382 * merged. 383 */ 384 void blk_rq_set_mixed_merge(struct request *rq) 385 { 386 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; 387 struct bio *bio; 388 389 if (rq->cmd_flags & REQ_MIXED_MERGE) 390 return; 391 392 /* 393 * @rq will no longer represent mixable attributes for all the 394 * contained bios. It will just track those of the first one. 395 * Distributes the attributs to each bio. 396 */ 397 for (bio = rq->bio; bio; bio = bio->bi_next) { 398 WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) && 399 (bio->bi_rw & REQ_FAILFAST_MASK) != ff); 400 bio->bi_rw |= ff; 401 } 402 rq->cmd_flags |= REQ_MIXED_MERGE; 403 } 404 405 static void blk_account_io_merge(struct request *req) 406 { 407 if (blk_do_io_stat(req)) { 408 struct hd_struct *part; 409 int cpu; 410 411 cpu = part_stat_lock(); 412 part = req->part; 413 414 part_round_stats(cpu, part); 415 part_dec_in_flight(part, rq_data_dir(req)); 416 417 hd_struct_put(part); 418 part_stat_unlock(); 419 } 420 } 421 422 /* 423 * Has to be called with the request spinlock acquired 424 */ 425 static int attempt_merge(struct request_queue *q, struct request *req, 426 struct request *next) 427 { 428 if (!rq_mergeable(req) || !rq_mergeable(next)) 429 return 0; 430 431 if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) 432 return 0; 433 434 /* 435 * not contiguous 436 */ 437 if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) 438 return 0; 439 440 if (rq_data_dir(req) != rq_data_dir(next) 441 || req->rq_disk != next->rq_disk 442 || req_no_special_merge(next)) 443 return 0; 444 445 if (req->cmd_flags & REQ_WRITE_SAME && 446 !blk_write_same_mergeable(req->bio, next->bio)) 447 return 0; 448 449 /* 450 * If we are allowed to merge, then append bio list 451 * from next to rq and release next. merge_requests_fn 452 * will have updated segment counts, update sector 453 * counts here. 454 */ 455 if (!ll_merge_requests_fn(q, req, next)) 456 return 0; 457 458 /* 459 * If failfast settings disagree or any of the two is already 460 * a mixed merge, mark both as mixed before proceeding. This 461 * makes sure that all involved bios have mixable attributes 462 * set properly. 463 */ 464 if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE || 465 (req->cmd_flags & REQ_FAILFAST_MASK) != 466 (next->cmd_flags & REQ_FAILFAST_MASK)) { 467 blk_rq_set_mixed_merge(req); 468 blk_rq_set_mixed_merge(next); 469 } 470 471 /* 472 * At this point we have either done a back merge 473 * or front merge. We need the smaller start_time of 474 * the merged requests to be the current request 475 * for accounting purposes. 476 */ 477 if (time_after(req->start_time, next->start_time)) 478 req->start_time = next->start_time; 479 480 req->biotail->bi_next = next->bio; 481 req->biotail = next->biotail; 482 483 req->__data_len += blk_rq_bytes(next); 484 485 elv_merge_requests(q, req, next); 486 487 /* 488 * 'next' is going away, so update stats accordingly 489 */ 490 blk_account_io_merge(next); 491 492 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 493 if (blk_rq_cpu_valid(next)) 494 req->cpu = next->cpu; 495 496 /* owner-ship of bio passed from next to req */ 497 next->bio = NULL; 498 __blk_put_request(q, next); 499 return 1; 500 } 501 502 int attempt_back_merge(struct request_queue *q, struct request *rq) 503 { 504 struct request *next = elv_latter_request(q, rq); 505 506 if (next) 507 return attempt_merge(q, rq, next); 508 509 return 0; 510 } 511 512 int attempt_front_merge(struct request_queue *q, struct request *rq) 513 { 514 struct request *prev = elv_former_request(q, rq); 515 516 if (prev) 517 return attempt_merge(q, prev, rq); 518 519 return 0; 520 } 521 522 int blk_attempt_req_merge(struct request_queue *q, struct request *rq, 523 struct request *next) 524 { 525 return attempt_merge(q, rq, next); 526 } 527 528 bool blk_rq_merge_ok(struct request *rq, struct bio *bio) 529 { 530 if (!rq_mergeable(rq) || !bio_mergeable(bio)) 531 return false; 532 533 if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) 534 return false; 535 536 /* different data direction or already started, don't merge */ 537 if (bio_data_dir(bio) != rq_data_dir(rq)) 538 return false; 539 540 /* must be same device and not a special request */ 541 if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq)) 542 return false; 543 544 /* only merge integrity protected bio into ditto rq */ 545 if (bio_integrity(bio) != blk_integrity_rq(rq)) 546 return false; 547 548 /* must be using the same buffer */ 549 if (rq->cmd_flags & REQ_WRITE_SAME && 550 !blk_write_same_mergeable(rq->bio, bio)) 551 return false; 552 553 return true; 554 } 555 556 int blk_try_merge(struct request *rq, struct bio *bio) 557 { 558 if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) 559 return ELEVATOR_BACK_MERGE; 560 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) 561 return ELEVATOR_FRONT_MERGE; 562 return ELEVATOR_NO_MERGE; 563 } 564