xref: /openbmc/linux/block/blk-merge.c (revision 67927d22)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Functions related to segment and merge handling
4  */
5 #include <linux/kernel.h>
6 #include <linux/module.h>
7 #include <linux/bio.h>
8 #include <linux/blkdev.h>
9 #include <linux/blk-integrity.h>
10 #include <linux/scatterlist.h>
11 #include <linux/part_stat.h>
12 #include <linux/blk-cgroup.h>
13 
14 #include <trace/events/block.h>
15 
16 #include "blk.h"
17 #include "blk-mq-sched.h"
18 #include "blk-rq-qos.h"
19 #include "blk-throttle.h"
20 
21 static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
22 {
23 	*bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
24 }
25 
26 static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
27 {
28 	struct bvec_iter iter = bio->bi_iter;
29 	int idx;
30 
31 	bio_get_first_bvec(bio, bv);
32 	if (bv->bv_len == bio->bi_iter.bi_size)
33 		return;		/* this bio only has a single bvec */
34 
35 	bio_advance_iter(bio, &iter, iter.bi_size);
36 
37 	if (!iter.bi_bvec_done)
38 		idx = iter.bi_idx - 1;
39 	else	/* in the middle of bvec */
40 		idx = iter.bi_idx;
41 
42 	*bv = bio->bi_io_vec[idx];
43 
44 	/*
45 	 * iter.bi_bvec_done records actual length of the last bvec
46 	 * if this bio ends in the middle of one io vector
47 	 */
48 	if (iter.bi_bvec_done)
49 		bv->bv_len = iter.bi_bvec_done;
50 }
51 
52 static inline bool bio_will_gap(struct request_queue *q,
53 		struct request *prev_rq, struct bio *prev, struct bio *next)
54 {
55 	struct bio_vec pb, nb;
56 
57 	if (!bio_has_data(prev) || !queue_virt_boundary(q))
58 		return false;
59 
60 	/*
61 	 * Don't merge if the 1st bio starts with non-zero offset, otherwise it
62 	 * is quite difficult to respect the sg gap limit.  We work hard to
63 	 * merge a huge number of small single bios in case of mkfs.
64 	 */
65 	if (prev_rq)
66 		bio_get_first_bvec(prev_rq->bio, &pb);
67 	else
68 		bio_get_first_bvec(prev, &pb);
69 	if (pb.bv_offset & queue_virt_boundary(q))
70 		return true;
71 
72 	/*
73 	 * We don't need to worry about the situation that the merged segment
74 	 * ends in unaligned virt boundary:
75 	 *
76 	 * - if 'pb' ends aligned, the merged segment ends aligned
77 	 * - if 'pb' ends unaligned, the next bio must include
78 	 *   one single bvec of 'nb', otherwise the 'nb' can't
79 	 *   merge with 'pb'
80 	 */
81 	bio_get_last_bvec(prev, &pb);
82 	bio_get_first_bvec(next, &nb);
83 	if (biovec_phys_mergeable(q, &pb, &nb))
84 		return false;
85 	return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
86 }
87 
88 static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
89 {
90 	return bio_will_gap(req->q, req, req->biotail, bio);
91 }
92 
93 static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
94 {
95 	return bio_will_gap(req->q, NULL, bio, req->bio);
96 }
97 
98 static struct bio *blk_bio_discard_split(struct request_queue *q,
99 					 struct bio *bio,
100 					 struct bio_set *bs,
101 					 unsigned *nsegs)
102 {
103 	unsigned int max_discard_sectors, granularity;
104 	int alignment;
105 	sector_t tmp;
106 	unsigned split_sectors;
107 
108 	*nsegs = 1;
109 
110 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
111 	granularity = max(q->limits.discard_granularity >> 9, 1U);
112 
113 	max_discard_sectors = min(q->limits.max_discard_sectors,
114 			bio_allowed_max_sectors(q));
115 	max_discard_sectors -= max_discard_sectors % granularity;
116 
117 	if (unlikely(!max_discard_sectors)) {
118 		/* XXX: warn */
119 		return NULL;
120 	}
121 
122 	if (bio_sectors(bio) <= max_discard_sectors)
123 		return NULL;
124 
125 	split_sectors = max_discard_sectors;
126 
127 	/*
128 	 * If the next starting sector would be misaligned, stop the discard at
129 	 * the previous aligned sector.
130 	 */
131 	alignment = (q->limits.discard_alignment >> 9) % granularity;
132 
133 	tmp = bio->bi_iter.bi_sector + split_sectors - alignment;
134 	tmp = sector_div(tmp, granularity);
135 
136 	if (split_sectors > tmp)
137 		split_sectors -= tmp;
138 
139 	return bio_split(bio, split_sectors, GFP_NOIO, bs);
140 }
141 
142 static struct bio *blk_bio_write_zeroes_split(struct request_queue *q,
143 		struct bio *bio, struct bio_set *bs, unsigned *nsegs)
144 {
145 	*nsegs = 0;
146 
147 	if (!q->limits.max_write_zeroes_sectors)
148 		return NULL;
149 
150 	if (bio_sectors(bio) <= q->limits.max_write_zeroes_sectors)
151 		return NULL;
152 
153 	return bio_split(bio, q->limits.max_write_zeroes_sectors, GFP_NOIO, bs);
154 }
155 
156 /*
157  * Return the maximum number of sectors from the start of a bio that may be
158  * submitted as a single request to a block device. If enough sectors remain,
159  * align the end to the physical block size. Otherwise align the end to the
160  * logical block size. This approach minimizes the number of non-aligned
161  * requests that are submitted to a block device if the start of a bio is not
162  * aligned to a physical block boundary.
163  */
164 static inline unsigned get_max_io_size(struct request_queue *q,
165 				       struct bio *bio)
166 {
167 	unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector, 0);
168 	unsigned max_sectors = sectors;
169 	unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT;
170 	unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT;
171 	unsigned start_offset = bio->bi_iter.bi_sector & (pbs - 1);
172 
173 	max_sectors += start_offset;
174 	max_sectors &= ~(pbs - 1);
175 	if (max_sectors > start_offset)
176 		return max_sectors - start_offset;
177 
178 	return sectors & ~(lbs - 1);
179 }
180 
181 static inline unsigned get_max_segment_size(const struct request_queue *q,
182 					    struct page *start_page,
183 					    unsigned long offset)
184 {
185 	unsigned long mask = queue_segment_boundary(q);
186 
187 	offset = mask & (page_to_phys(start_page) + offset);
188 
189 	/*
190 	 * overflow may be triggered in case of zero page physical address
191 	 * on 32bit arch, use queue's max segment size when that happens.
192 	 */
193 	return min_not_zero(mask - offset + 1,
194 			(unsigned long)queue_max_segment_size(q));
195 }
196 
197 /**
198  * bvec_split_segs - verify whether or not a bvec should be split in the middle
199  * @q:        [in] request queue associated with the bio associated with @bv
200  * @bv:       [in] bvec to examine
201  * @nsegs:    [in,out] Number of segments in the bio being built. Incremented
202  *            by the number of segments from @bv that may be appended to that
203  *            bio without exceeding @max_segs
204  * @bytes:    [in,out] Number of bytes in the bio being built. Incremented
205  *            by the number of bytes from @bv that may be appended to that
206  *            bio without exceeding @max_bytes
207  * @max_segs: [in] upper bound for *@nsegs
208  * @max_bytes: [in] upper bound for *@bytes
209  *
210  * When splitting a bio, it can happen that a bvec is encountered that is too
211  * big to fit in a single segment and hence that it has to be split in the
212  * middle. This function verifies whether or not that should happen. The value
213  * %true is returned if and only if appending the entire @bv to a bio with
214  * *@nsegs segments and *@sectors sectors would make that bio unacceptable for
215  * the block driver.
216  */
217 static bool bvec_split_segs(const struct request_queue *q,
218 			    const struct bio_vec *bv, unsigned *nsegs,
219 			    unsigned *bytes, unsigned max_segs,
220 			    unsigned max_bytes)
221 {
222 	unsigned max_len = min(max_bytes, UINT_MAX) - *bytes;
223 	unsigned len = min(bv->bv_len, max_len);
224 	unsigned total_len = 0;
225 	unsigned seg_size = 0;
226 
227 	while (len && *nsegs < max_segs) {
228 		seg_size = get_max_segment_size(q, bv->bv_page,
229 						bv->bv_offset + total_len);
230 		seg_size = min(seg_size, len);
231 
232 		(*nsegs)++;
233 		total_len += seg_size;
234 		len -= seg_size;
235 
236 		if ((bv->bv_offset + total_len) & queue_virt_boundary(q))
237 			break;
238 	}
239 
240 	*bytes += total_len;
241 
242 	/* tell the caller to split the bvec if it is too big to fit */
243 	return len > 0 || bv->bv_len > max_len;
244 }
245 
246 /**
247  * blk_bio_segment_split - split a bio in two bios
248  * @q:    [in] request queue pointer
249  * @bio:  [in] bio to be split
250  * @bs:	  [in] bio set to allocate the clone from
251  * @segs: [out] number of segments in the bio with the first half of the sectors
252  *
253  * Clone @bio, update the bi_iter of the clone to represent the first sectors
254  * of @bio and update @bio->bi_iter to represent the remaining sectors. The
255  * following is guaranteed for the cloned bio:
256  * - That it has at most get_max_io_size(@q, @bio) sectors.
257  * - That it has at most queue_max_segments(@q) segments.
258  *
259  * Except for discard requests the cloned bio will point at the bi_io_vec of
260  * the original bio. It is the responsibility of the caller to ensure that the
261  * original bio is not freed before the cloned bio. The caller is also
262  * responsible for ensuring that @bs is only destroyed after processing of the
263  * split bio has finished.
264  */
265 static struct bio *blk_bio_segment_split(struct request_queue *q,
266 					 struct bio *bio,
267 					 struct bio_set *bs,
268 					 unsigned *segs)
269 {
270 	struct bio_vec bv, bvprv, *bvprvp = NULL;
271 	struct bvec_iter iter;
272 	unsigned nsegs = 0, bytes = 0;
273 	const unsigned max_bytes = get_max_io_size(q, bio) << 9;
274 	const unsigned max_segs = queue_max_segments(q);
275 
276 	bio_for_each_bvec(bv, bio, iter) {
277 		/*
278 		 * If the queue doesn't support SG gaps and adding this
279 		 * offset would create a gap, disallow it.
280 		 */
281 		if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
282 			goto split;
283 
284 		if (nsegs < max_segs &&
285 		    bytes + bv.bv_len <= max_bytes &&
286 		    bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
287 			nsegs++;
288 			bytes += bv.bv_len;
289 		} else if (bvec_split_segs(q, &bv, &nsegs, &bytes, max_segs,
290 					   max_bytes)) {
291 			goto split;
292 		}
293 
294 		bvprv = bv;
295 		bvprvp = &bvprv;
296 	}
297 
298 	*segs = nsegs;
299 	return NULL;
300 split:
301 	*segs = nsegs;
302 
303 	/*
304 	 * Individual bvecs might not be logical block aligned. Round down the
305 	 * split size so that each bio is properly block size aligned, even if
306 	 * we do not use the full hardware limits.
307 	 */
308 	bytes = ALIGN_DOWN(bytes, queue_logical_block_size(q));
309 
310 	/*
311 	 * Bio splitting may cause subtle trouble such as hang when doing sync
312 	 * iopoll in direct IO routine. Given performance gain of iopoll for
313 	 * big IO can be trival, disable iopoll when split needed.
314 	 */
315 	bio_clear_polled(bio);
316 	return bio_split(bio, bytes >> SECTOR_SHIFT, GFP_NOIO, bs);
317 }
318 
319 /**
320  * __blk_queue_split - split a bio and submit the second half
321  * @q:       [in] request_queue new bio is being queued at
322  * @bio:     [in, out] bio to be split
323  * @nr_segs: [out] number of segments in the first bio
324  *
325  * Split a bio into two bios, chain the two bios, submit the second half and
326  * store a pointer to the first half in *@bio. If the second bio is still too
327  * big it will be split by a recursive call to this function. Since this
328  * function may allocate a new bio from q->bio_split, it is the responsibility
329  * of the caller to ensure that q->bio_split is only released after processing
330  * of the split bio has finished.
331  */
332 void __blk_queue_split(struct request_queue *q, struct bio **bio,
333 		       unsigned int *nr_segs)
334 {
335 	struct bio *split = NULL;
336 
337 	switch (bio_op(*bio)) {
338 	case REQ_OP_DISCARD:
339 	case REQ_OP_SECURE_ERASE:
340 		split = blk_bio_discard_split(q, *bio, &q->bio_split, nr_segs);
341 		break;
342 	case REQ_OP_WRITE_ZEROES:
343 		split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split,
344 				nr_segs);
345 		break;
346 	default:
347 		split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
348 		break;
349 	}
350 
351 	if (split) {
352 		/* there isn't chance to merge the splitted bio */
353 		split->bi_opf |= REQ_NOMERGE;
354 
355 		bio_chain(split, *bio);
356 		trace_block_split(split, (*bio)->bi_iter.bi_sector);
357 		submit_bio_noacct(*bio);
358 		*bio = split;
359 	}
360 }
361 
362 /**
363  * blk_queue_split - split a bio and submit the second half
364  * @bio: [in, out] bio to be split
365  *
366  * Split a bio into two bios, chains the two bios, submit the second half and
367  * store a pointer to the first half in *@bio. Since this function may allocate
368  * a new bio from q->bio_split, it is the responsibility of the caller to ensure
369  * that q->bio_split is only released after processing of the split bio has
370  * finished.
371  */
372 void blk_queue_split(struct bio **bio)
373 {
374 	struct request_queue *q = bdev_get_queue((*bio)->bi_bdev);
375 	unsigned int nr_segs;
376 
377 	if (blk_may_split(q, *bio))
378 		__blk_queue_split(q, bio, &nr_segs);
379 }
380 EXPORT_SYMBOL(blk_queue_split);
381 
382 unsigned int blk_recalc_rq_segments(struct request *rq)
383 {
384 	unsigned int nr_phys_segs = 0;
385 	unsigned int bytes = 0;
386 	struct req_iterator iter;
387 	struct bio_vec bv;
388 
389 	if (!rq->bio)
390 		return 0;
391 
392 	switch (bio_op(rq->bio)) {
393 	case REQ_OP_DISCARD:
394 	case REQ_OP_SECURE_ERASE:
395 		if (queue_max_discard_segments(rq->q) > 1) {
396 			struct bio *bio = rq->bio;
397 
398 			for_each_bio(bio)
399 				nr_phys_segs++;
400 			return nr_phys_segs;
401 		}
402 		return 1;
403 	case REQ_OP_WRITE_ZEROES:
404 		return 0;
405 	}
406 
407 	rq_for_each_bvec(bv, rq, iter)
408 		bvec_split_segs(rq->q, &bv, &nr_phys_segs, &bytes,
409 				UINT_MAX, UINT_MAX);
410 	return nr_phys_segs;
411 }
412 
413 static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
414 		struct scatterlist *sglist)
415 {
416 	if (!*sg)
417 		return sglist;
418 
419 	/*
420 	 * If the driver previously mapped a shorter list, we could see a
421 	 * termination bit prematurely unless it fully inits the sg table
422 	 * on each mapping. We KNOW that there must be more entries here
423 	 * or the driver would be buggy, so force clear the termination bit
424 	 * to avoid doing a full sg_init_table() in drivers for each command.
425 	 */
426 	sg_unmark_end(*sg);
427 	return sg_next(*sg);
428 }
429 
430 static unsigned blk_bvec_map_sg(struct request_queue *q,
431 		struct bio_vec *bvec, struct scatterlist *sglist,
432 		struct scatterlist **sg)
433 {
434 	unsigned nbytes = bvec->bv_len;
435 	unsigned nsegs = 0, total = 0;
436 
437 	while (nbytes > 0) {
438 		unsigned offset = bvec->bv_offset + total;
439 		unsigned len = min(get_max_segment_size(q, bvec->bv_page,
440 					offset), nbytes);
441 		struct page *page = bvec->bv_page;
442 
443 		/*
444 		 * Unfortunately a fair number of drivers barf on scatterlists
445 		 * that have an offset larger than PAGE_SIZE, despite other
446 		 * subsystems dealing with that invariant just fine.  For now
447 		 * stick to the legacy format where we never present those from
448 		 * the block layer, but the code below should be removed once
449 		 * these offenders (mostly MMC/SD drivers) are fixed.
450 		 */
451 		page += (offset >> PAGE_SHIFT);
452 		offset &= ~PAGE_MASK;
453 
454 		*sg = blk_next_sg(sg, sglist);
455 		sg_set_page(*sg, page, len, offset);
456 
457 		total += len;
458 		nbytes -= len;
459 		nsegs++;
460 	}
461 
462 	return nsegs;
463 }
464 
465 static inline int __blk_bvec_map_sg(struct bio_vec bv,
466 		struct scatterlist *sglist, struct scatterlist **sg)
467 {
468 	*sg = blk_next_sg(sg, sglist);
469 	sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
470 	return 1;
471 }
472 
473 /* only try to merge bvecs into one sg if they are from two bios */
474 static inline bool
475 __blk_segment_map_sg_merge(struct request_queue *q, struct bio_vec *bvec,
476 			   struct bio_vec *bvprv, struct scatterlist **sg)
477 {
478 
479 	int nbytes = bvec->bv_len;
480 
481 	if (!*sg)
482 		return false;
483 
484 	if ((*sg)->length + nbytes > queue_max_segment_size(q))
485 		return false;
486 
487 	if (!biovec_phys_mergeable(q, bvprv, bvec))
488 		return false;
489 
490 	(*sg)->length += nbytes;
491 
492 	return true;
493 }
494 
495 static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
496 			     struct scatterlist *sglist,
497 			     struct scatterlist **sg)
498 {
499 	struct bio_vec bvec, bvprv = { NULL };
500 	struct bvec_iter iter;
501 	int nsegs = 0;
502 	bool new_bio = false;
503 
504 	for_each_bio(bio) {
505 		bio_for_each_bvec(bvec, bio, iter) {
506 			/*
507 			 * Only try to merge bvecs from two bios given we
508 			 * have done bio internal merge when adding pages
509 			 * to bio
510 			 */
511 			if (new_bio &&
512 			    __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg))
513 				goto next_bvec;
514 
515 			if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE)
516 				nsegs += __blk_bvec_map_sg(bvec, sglist, sg);
517 			else
518 				nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg);
519  next_bvec:
520 			new_bio = false;
521 		}
522 		if (likely(bio->bi_iter.bi_size)) {
523 			bvprv = bvec;
524 			new_bio = true;
525 		}
526 	}
527 
528 	return nsegs;
529 }
530 
531 /*
532  * map a request to scatterlist, return number of sg entries setup. Caller
533  * must make sure sg can hold rq->nr_phys_segments entries
534  */
535 int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
536 		struct scatterlist *sglist, struct scatterlist **last_sg)
537 {
538 	int nsegs = 0;
539 
540 	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
541 		nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg);
542 	else if (rq->bio)
543 		nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg);
544 
545 	if (*last_sg)
546 		sg_mark_end(*last_sg);
547 
548 	/*
549 	 * Something must have been wrong if the figured number of
550 	 * segment is bigger than number of req's physical segments
551 	 */
552 	WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
553 
554 	return nsegs;
555 }
556 EXPORT_SYMBOL(__blk_rq_map_sg);
557 
558 static inline unsigned int blk_rq_get_max_segments(struct request *rq)
559 {
560 	if (req_op(rq) == REQ_OP_DISCARD)
561 		return queue_max_discard_segments(rq->q);
562 	return queue_max_segments(rq->q);
563 }
564 
565 static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
566 						  sector_t offset)
567 {
568 	struct request_queue *q = rq->q;
569 
570 	if (blk_rq_is_passthrough(rq))
571 		return q->limits.max_hw_sectors;
572 
573 	if (!q->limits.chunk_sectors ||
574 	    req_op(rq) == REQ_OP_DISCARD ||
575 	    req_op(rq) == REQ_OP_SECURE_ERASE)
576 		return blk_queue_get_max_sectors(q, req_op(rq));
577 
578 	return min(blk_max_size_offset(q, offset, 0),
579 			blk_queue_get_max_sectors(q, req_op(rq)));
580 }
581 
582 static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
583 		unsigned int nr_phys_segs)
584 {
585 	if (!blk_cgroup_mergeable(req, bio))
586 		goto no_merge;
587 
588 	if (blk_integrity_merge_bio(req->q, req, bio) == false)
589 		goto no_merge;
590 
591 	/* discard request merge won't add new segment */
592 	if (req_op(req) == REQ_OP_DISCARD)
593 		return 1;
594 
595 	if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
596 		goto no_merge;
597 
598 	/*
599 	 * This will form the start of a new hw segment.  Bump both
600 	 * counters.
601 	 */
602 	req->nr_phys_segments += nr_phys_segs;
603 	return 1;
604 
605 no_merge:
606 	req_set_nomerge(req->q, req);
607 	return 0;
608 }
609 
610 int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
611 {
612 	if (req_gap_back_merge(req, bio))
613 		return 0;
614 	if (blk_integrity_rq(req) &&
615 	    integrity_req_gap_back_merge(req, bio))
616 		return 0;
617 	if (!bio_crypt_ctx_back_mergeable(req, bio))
618 		return 0;
619 	if (blk_rq_sectors(req) + bio_sectors(bio) >
620 	    blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
621 		req_set_nomerge(req->q, req);
622 		return 0;
623 	}
624 
625 	return ll_new_hw_segment(req, bio, nr_segs);
626 }
627 
628 static int ll_front_merge_fn(struct request *req, struct bio *bio,
629 		unsigned int nr_segs)
630 {
631 	if (req_gap_front_merge(req, bio))
632 		return 0;
633 	if (blk_integrity_rq(req) &&
634 	    integrity_req_gap_front_merge(req, bio))
635 		return 0;
636 	if (!bio_crypt_ctx_front_mergeable(req, bio))
637 		return 0;
638 	if (blk_rq_sectors(req) + bio_sectors(bio) >
639 	    blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
640 		req_set_nomerge(req->q, req);
641 		return 0;
642 	}
643 
644 	return ll_new_hw_segment(req, bio, nr_segs);
645 }
646 
647 static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
648 		struct request *next)
649 {
650 	unsigned short segments = blk_rq_nr_discard_segments(req);
651 
652 	if (segments >= queue_max_discard_segments(q))
653 		goto no_merge;
654 	if (blk_rq_sectors(req) + bio_sectors(next->bio) >
655 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
656 		goto no_merge;
657 
658 	req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
659 	return true;
660 no_merge:
661 	req_set_nomerge(q, req);
662 	return false;
663 }
664 
665 static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
666 				struct request *next)
667 {
668 	int total_phys_segments;
669 
670 	if (req_gap_back_merge(req, next->bio))
671 		return 0;
672 
673 	/*
674 	 * Will it become too large?
675 	 */
676 	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
677 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
678 		return 0;
679 
680 	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
681 	if (total_phys_segments > blk_rq_get_max_segments(req))
682 		return 0;
683 
684 	if (!blk_cgroup_mergeable(req, next->bio))
685 		return 0;
686 
687 	if (blk_integrity_merge_rq(q, req, next) == false)
688 		return 0;
689 
690 	if (!bio_crypt_ctx_merge_rq(req, next))
691 		return 0;
692 
693 	/* Merge is OK... */
694 	req->nr_phys_segments = total_phys_segments;
695 	return 1;
696 }
697 
698 /**
699  * blk_rq_set_mixed_merge - mark a request as mixed merge
700  * @rq: request to mark as mixed merge
701  *
702  * Description:
703  *     @rq is about to be mixed merged.  Make sure the attributes
704  *     which can be mixed are set in each bio and mark @rq as mixed
705  *     merged.
706  */
707 void blk_rq_set_mixed_merge(struct request *rq)
708 {
709 	unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
710 	struct bio *bio;
711 
712 	if (rq->rq_flags & RQF_MIXED_MERGE)
713 		return;
714 
715 	/*
716 	 * @rq will no longer represent mixable attributes for all the
717 	 * contained bios.  It will just track those of the first one.
718 	 * Distributes the attributs to each bio.
719 	 */
720 	for (bio = rq->bio; bio; bio = bio->bi_next) {
721 		WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
722 			     (bio->bi_opf & REQ_FAILFAST_MASK) != ff);
723 		bio->bi_opf |= ff;
724 	}
725 	rq->rq_flags |= RQF_MIXED_MERGE;
726 }
727 
728 static void blk_account_io_merge_request(struct request *req)
729 {
730 	if (blk_do_io_stat(req)) {
731 		part_stat_lock();
732 		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
733 		part_stat_unlock();
734 	}
735 }
736 
737 static enum elv_merge blk_try_req_merge(struct request *req,
738 					struct request *next)
739 {
740 	if (blk_discard_mergable(req))
741 		return ELEVATOR_DISCARD_MERGE;
742 	else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
743 		return ELEVATOR_BACK_MERGE;
744 
745 	return ELEVATOR_NO_MERGE;
746 }
747 
748 /*
749  * For non-mq, this has to be called with the request spinlock acquired.
750  * For mq with scheduling, the appropriate queue wide lock should be held.
751  */
752 static struct request *attempt_merge(struct request_queue *q,
753 				     struct request *req, struct request *next)
754 {
755 	if (!rq_mergeable(req) || !rq_mergeable(next))
756 		return NULL;
757 
758 	if (req_op(req) != req_op(next))
759 		return NULL;
760 
761 	if (rq_data_dir(req) != rq_data_dir(next))
762 		return NULL;
763 
764 	if (req->ioprio != next->ioprio)
765 		return NULL;
766 
767 	/*
768 	 * If we are allowed to merge, then append bio list
769 	 * from next to rq and release next. merge_requests_fn
770 	 * will have updated segment counts, update sector
771 	 * counts here. Handle DISCARDs separately, as they
772 	 * have separate settings.
773 	 */
774 
775 	switch (blk_try_req_merge(req, next)) {
776 	case ELEVATOR_DISCARD_MERGE:
777 		if (!req_attempt_discard_merge(q, req, next))
778 			return NULL;
779 		break;
780 	case ELEVATOR_BACK_MERGE:
781 		if (!ll_merge_requests_fn(q, req, next))
782 			return NULL;
783 		break;
784 	default:
785 		return NULL;
786 	}
787 
788 	/*
789 	 * If failfast settings disagree or any of the two is already
790 	 * a mixed merge, mark both as mixed before proceeding.  This
791 	 * makes sure that all involved bios have mixable attributes
792 	 * set properly.
793 	 */
794 	if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) ||
795 	    (req->cmd_flags & REQ_FAILFAST_MASK) !=
796 	    (next->cmd_flags & REQ_FAILFAST_MASK)) {
797 		blk_rq_set_mixed_merge(req);
798 		blk_rq_set_mixed_merge(next);
799 	}
800 
801 	/*
802 	 * At this point we have either done a back merge or front merge. We
803 	 * need the smaller start_time_ns of the merged requests to be the
804 	 * current request for accounting purposes.
805 	 */
806 	if (next->start_time_ns < req->start_time_ns)
807 		req->start_time_ns = next->start_time_ns;
808 
809 	req->biotail->bi_next = next->bio;
810 	req->biotail = next->biotail;
811 
812 	req->__data_len += blk_rq_bytes(next);
813 
814 	if (!blk_discard_mergable(req))
815 		elv_merge_requests(q, req, next);
816 
817 	/*
818 	 * 'next' is going away, so update stats accordingly
819 	 */
820 	blk_account_io_merge_request(next);
821 
822 	trace_block_rq_merge(next);
823 
824 	/*
825 	 * ownership of bio passed from next to req, return 'next' for
826 	 * the caller to free
827 	 */
828 	next->bio = NULL;
829 	return next;
830 }
831 
832 static struct request *attempt_back_merge(struct request_queue *q,
833 		struct request *rq)
834 {
835 	struct request *next = elv_latter_request(q, rq);
836 
837 	if (next)
838 		return attempt_merge(q, rq, next);
839 
840 	return NULL;
841 }
842 
843 static struct request *attempt_front_merge(struct request_queue *q,
844 		struct request *rq)
845 {
846 	struct request *prev = elv_former_request(q, rq);
847 
848 	if (prev)
849 		return attempt_merge(q, prev, rq);
850 
851 	return NULL;
852 }
853 
854 /*
855  * Try to merge 'next' into 'rq'. Return true if the merge happened, false
856  * otherwise. The caller is responsible for freeing 'next' if the merge
857  * happened.
858  */
859 bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
860 			   struct request *next)
861 {
862 	return attempt_merge(q, rq, next);
863 }
864 
865 bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
866 {
867 	if (!rq_mergeable(rq) || !bio_mergeable(bio))
868 		return false;
869 
870 	if (req_op(rq) != bio_op(bio))
871 		return false;
872 
873 	/* different data direction or already started, don't merge */
874 	if (bio_data_dir(bio) != rq_data_dir(rq))
875 		return false;
876 
877 	/* don't merge across cgroup boundaries */
878 	if (!blk_cgroup_mergeable(rq, bio))
879 		return false;
880 
881 	/* only merge integrity protected bio into ditto rq */
882 	if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
883 		return false;
884 
885 	/* Only merge if the crypt contexts are compatible */
886 	if (!bio_crypt_rq_ctx_compatible(rq, bio))
887 		return false;
888 
889 	if (rq->ioprio != bio_prio(bio))
890 		return false;
891 
892 	return true;
893 }
894 
895 enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
896 {
897 	if (blk_discard_mergable(rq))
898 		return ELEVATOR_DISCARD_MERGE;
899 	else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
900 		return ELEVATOR_BACK_MERGE;
901 	else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
902 		return ELEVATOR_FRONT_MERGE;
903 	return ELEVATOR_NO_MERGE;
904 }
905 
906 static void blk_account_io_merge_bio(struct request *req)
907 {
908 	if (!blk_do_io_stat(req))
909 		return;
910 
911 	part_stat_lock();
912 	part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
913 	part_stat_unlock();
914 }
915 
916 enum bio_merge_status {
917 	BIO_MERGE_OK,
918 	BIO_MERGE_NONE,
919 	BIO_MERGE_FAILED,
920 };
921 
922 static enum bio_merge_status bio_attempt_back_merge(struct request *req,
923 		struct bio *bio, unsigned int nr_segs)
924 {
925 	const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
926 
927 	if (!ll_back_merge_fn(req, bio, nr_segs))
928 		return BIO_MERGE_FAILED;
929 
930 	trace_block_bio_backmerge(bio);
931 	rq_qos_merge(req->q, req, bio);
932 
933 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
934 		blk_rq_set_mixed_merge(req);
935 
936 	req->biotail->bi_next = bio;
937 	req->biotail = bio;
938 	req->__data_len += bio->bi_iter.bi_size;
939 
940 	bio_crypt_free_ctx(bio);
941 
942 	blk_account_io_merge_bio(req);
943 	return BIO_MERGE_OK;
944 }
945 
946 static enum bio_merge_status bio_attempt_front_merge(struct request *req,
947 		struct bio *bio, unsigned int nr_segs)
948 {
949 	const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
950 
951 	if (!ll_front_merge_fn(req, bio, nr_segs))
952 		return BIO_MERGE_FAILED;
953 
954 	trace_block_bio_frontmerge(bio);
955 	rq_qos_merge(req->q, req, bio);
956 
957 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
958 		blk_rq_set_mixed_merge(req);
959 
960 	bio->bi_next = req->bio;
961 	req->bio = bio;
962 
963 	req->__sector = bio->bi_iter.bi_sector;
964 	req->__data_len += bio->bi_iter.bi_size;
965 
966 	bio_crypt_do_front_merge(req, bio);
967 
968 	blk_account_io_merge_bio(req);
969 	return BIO_MERGE_OK;
970 }
971 
972 static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
973 		struct request *req, struct bio *bio)
974 {
975 	unsigned short segments = blk_rq_nr_discard_segments(req);
976 
977 	if (segments >= queue_max_discard_segments(q))
978 		goto no_merge;
979 	if (blk_rq_sectors(req) + bio_sectors(bio) >
980 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
981 		goto no_merge;
982 
983 	rq_qos_merge(q, req, bio);
984 
985 	req->biotail->bi_next = bio;
986 	req->biotail = bio;
987 	req->__data_len += bio->bi_iter.bi_size;
988 	req->nr_phys_segments = segments + 1;
989 
990 	blk_account_io_merge_bio(req);
991 	return BIO_MERGE_OK;
992 no_merge:
993 	req_set_nomerge(q, req);
994 	return BIO_MERGE_FAILED;
995 }
996 
997 static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
998 						   struct request *rq,
999 						   struct bio *bio,
1000 						   unsigned int nr_segs,
1001 						   bool sched_allow_merge)
1002 {
1003 	if (!blk_rq_merge_ok(rq, bio))
1004 		return BIO_MERGE_NONE;
1005 
1006 	switch (blk_try_merge(rq, bio)) {
1007 	case ELEVATOR_BACK_MERGE:
1008 		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
1009 			return bio_attempt_back_merge(rq, bio, nr_segs);
1010 		break;
1011 	case ELEVATOR_FRONT_MERGE:
1012 		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
1013 			return bio_attempt_front_merge(rq, bio, nr_segs);
1014 		break;
1015 	case ELEVATOR_DISCARD_MERGE:
1016 		return bio_attempt_discard_merge(q, rq, bio);
1017 	default:
1018 		return BIO_MERGE_NONE;
1019 	}
1020 
1021 	return BIO_MERGE_FAILED;
1022 }
1023 
1024 /**
1025  * blk_attempt_plug_merge - try to merge with %current's plugged list
1026  * @q: request_queue new bio is being queued at
1027  * @bio: new bio being queued
1028  * @nr_segs: number of segments in @bio
1029  * from the passed in @q already in the plug list
1030  *
1031  * Determine whether @bio being queued on @q can be merged with the previous
1032  * request on %current's plugged list.  Returns %true if merge was successful,
1033  * otherwise %false.
1034  *
1035  * Plugging coalesces IOs from the same issuer for the same purpose without
1036  * going through @q->queue_lock.  As such it's more of an issuing mechanism
1037  * than scheduling, and the request, while may have elvpriv data, is not
1038  * added on the elevator at this point.  In addition, we don't have
1039  * reliable access to the elevator outside queue lock.  Only check basic
1040  * merging parameters without querying the elevator.
1041  *
1042  * Caller must ensure !blk_queue_nomerges(q) beforehand.
1043  */
1044 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
1045 		unsigned int nr_segs)
1046 {
1047 	struct blk_plug *plug;
1048 	struct request *rq;
1049 
1050 	plug = blk_mq_plug(q, bio);
1051 	if (!plug || rq_list_empty(plug->mq_list))
1052 		return false;
1053 
1054 	rq_list_for_each(&plug->mq_list, rq) {
1055 		if (rq->q == q) {
1056 			if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1057 			    BIO_MERGE_OK)
1058 				return true;
1059 			break;
1060 		}
1061 
1062 		/*
1063 		 * Only keep iterating plug list for merges if we have multiple
1064 		 * queues
1065 		 */
1066 		if (!plug->multiple_queues)
1067 			break;
1068 	}
1069 	return false;
1070 }
1071 
1072 /*
1073  * Iterate list of requests and see if we can merge this bio with any
1074  * of them.
1075  */
1076 bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
1077 			struct bio *bio, unsigned int nr_segs)
1078 {
1079 	struct request *rq;
1080 	int checked = 8;
1081 
1082 	list_for_each_entry_reverse(rq, list, queuelist) {
1083 		if (!checked--)
1084 			break;
1085 
1086 		switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) {
1087 		case BIO_MERGE_NONE:
1088 			continue;
1089 		case BIO_MERGE_OK:
1090 			return true;
1091 		case BIO_MERGE_FAILED:
1092 			return false;
1093 		}
1094 
1095 	}
1096 
1097 	return false;
1098 }
1099 EXPORT_SYMBOL_GPL(blk_bio_list_merge);
1100 
1101 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
1102 		unsigned int nr_segs, struct request **merged_request)
1103 {
1104 	struct request *rq;
1105 
1106 	switch (elv_merge(q, &rq, bio)) {
1107 	case ELEVATOR_BACK_MERGE:
1108 		if (!blk_mq_sched_allow_merge(q, rq, bio))
1109 			return false;
1110 		if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1111 			return false;
1112 		*merged_request = attempt_back_merge(q, rq);
1113 		if (!*merged_request)
1114 			elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
1115 		return true;
1116 	case ELEVATOR_FRONT_MERGE:
1117 		if (!blk_mq_sched_allow_merge(q, rq, bio))
1118 			return false;
1119 		if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1120 			return false;
1121 		*merged_request = attempt_front_merge(q, rq);
1122 		if (!*merged_request)
1123 			elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
1124 		return true;
1125 	case ELEVATOR_DISCARD_MERGE:
1126 		return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
1127 	default:
1128 		return false;
1129 	}
1130 }
1131 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
1132