xref: /openbmc/linux/block/bounce.c (revision 5bf9a1f3)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2719c555fSJens Axboe /* bounce buffer handling for block devices
3719c555fSJens Axboe  *
4719c555fSJens Axboe  * - Split from highmem.c
5719c555fSJens Axboe  */
6719c555fSJens Axboe 
7b1de0d13SMitchel Humpherys #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8b1de0d13SMitchel Humpherys 
9719c555fSJens Axboe #include <linux/mm.h>
10719c555fSJens Axboe #include <linux/export.h>
11719c555fSJens Axboe #include <linux/swap.h>
12719c555fSJens Axboe #include <linux/gfp.h>
13719c555fSJens Axboe #include <linux/bio.h>
14719c555fSJens Axboe #include <linux/pagemap.h>
15719c555fSJens Axboe #include <linux/mempool.h>
16719c555fSJens Axboe #include <linux/blkdev.h>
1766114cadSTejun Heo #include <linux/backing-dev.h>
18719c555fSJens Axboe #include <linux/init.h>
19719c555fSJens Axboe #include <linux/hash.h>
20719c555fSJens Axboe #include <linux/highmem.h>
21719c555fSJens Axboe #include <linux/bootmem.h>
22b1de0d13SMitchel Humpherys #include <linux/printk.h>
23719c555fSJens Axboe #include <asm/tlbflush.h>
24719c555fSJens Axboe 
25719c555fSJens Axboe #include <trace/events/block.h>
263bce016aSChristoph Hellwig #include "blk.h"
27719c555fSJens Axboe 
28719c555fSJens Axboe #define POOL_SIZE	64
29719c555fSJens Axboe #define ISA_POOL_SIZE	16
30719c555fSJens Axboe 
31338aa96dSKent Overstreet static struct bio_set bounce_bio_set, bounce_bio_split;
32338aa96dSKent Overstreet static mempool_t page_pool, isa_page_pool;
33719c555fSJens Axboe 
34a687a533SArnd Bergmann #if defined(CONFIG_HIGHMEM)
35719c555fSJens Axboe static __init int init_emergency_pool(void)
36719c555fSJens Axboe {
37338aa96dSKent Overstreet 	int ret;
38719c555fSJens Axboe #if defined(CONFIG_HIGHMEM) && !defined(CONFIG_MEMORY_HOTPLUG)
39719c555fSJens Axboe 	if (max_pfn <= max_low_pfn)
40719c555fSJens Axboe 		return 0;
41719c555fSJens Axboe #endif
42719c555fSJens Axboe 
43338aa96dSKent Overstreet 	ret = mempool_init_page_pool(&page_pool, POOL_SIZE, 0);
44338aa96dSKent Overstreet 	BUG_ON(ret);
45b1de0d13SMitchel Humpherys 	pr_info("pool size: %d pages\n", POOL_SIZE);
46719c555fSJens Axboe 
47338aa96dSKent Overstreet 	ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
48338aa96dSKent Overstreet 	BUG_ON(ret);
4904c4950dSJens Axboe 	if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE))
50a8821f3fSNeilBrown 		BUG_ON(1);
51a8821f3fSNeilBrown 
52338aa96dSKent Overstreet 	ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0);
53338aa96dSKent Overstreet 	BUG_ON(ret);
54a8821f3fSNeilBrown 
55719c555fSJens Axboe 	return 0;
56719c555fSJens Axboe }
57719c555fSJens Axboe 
58719c555fSJens Axboe __initcall(init_emergency_pool);
59719c555fSJens Axboe #endif
60719c555fSJens Axboe 
61719c555fSJens Axboe #ifdef CONFIG_HIGHMEM
62719c555fSJens Axboe /*
63719c555fSJens Axboe  * highmem version, map in to vec
64719c555fSJens Axboe  */
65719c555fSJens Axboe static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
66719c555fSJens Axboe {
67719c555fSJens Axboe 	unsigned char *vto;
68719c555fSJens Axboe 
69719c555fSJens Axboe 	vto = kmap_atomic(to->bv_page);
70719c555fSJens Axboe 	memcpy(vto + to->bv_offset, vfrom, to->bv_len);
71719c555fSJens Axboe 	kunmap_atomic(vto);
72719c555fSJens Axboe }
73719c555fSJens Axboe 
74719c555fSJens Axboe #else /* CONFIG_HIGHMEM */
75719c555fSJens Axboe 
76719c555fSJens Axboe #define bounce_copy_vec(to, vfrom)	\
77719c555fSJens Axboe 	memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
78719c555fSJens Axboe 
79719c555fSJens Axboe #endif /* CONFIG_HIGHMEM */
80719c555fSJens Axboe 
81719c555fSJens Axboe /*
82719c555fSJens Axboe  * allocate pages in the DMA region for the ISA pool
83719c555fSJens Axboe  */
84719c555fSJens Axboe static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
85719c555fSJens Axboe {
86719c555fSJens Axboe 	return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
87719c555fSJens Axboe }
88719c555fSJens Axboe 
89719c555fSJens Axboe /*
90719c555fSJens Axboe  * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
91719c555fSJens Axboe  * as the max address, so check if the pool has already been created.
92719c555fSJens Axboe  */
93719c555fSJens Axboe int init_emergency_isa_pool(void)
94719c555fSJens Axboe {
95338aa96dSKent Overstreet 	int ret;
96338aa96dSKent Overstreet 
97338aa96dSKent Overstreet 	if (mempool_initialized(&isa_page_pool))
98719c555fSJens Axboe 		return 0;
99719c555fSJens Axboe 
100338aa96dSKent Overstreet 	ret = mempool_init(&isa_page_pool, ISA_POOL_SIZE, mempool_alloc_pages_isa,
101719c555fSJens Axboe 			   mempool_free_pages, (void *) 0);
102338aa96dSKent Overstreet 	BUG_ON(ret);
103719c555fSJens Axboe 
104b1de0d13SMitchel Humpherys 	pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE);
105719c555fSJens Axboe 	return 0;
106719c555fSJens Axboe }
107719c555fSJens Axboe 
108719c555fSJens Axboe /*
109719c555fSJens Axboe  * Simple bounce buffer support for highmem pages. Depending on the
110719c555fSJens Axboe  * queue gfp mask set, *to may or may not be a highmem page. kmap it
111719c555fSJens Axboe  * always, it will do the Right Thing
112719c555fSJens Axboe  */
113719c555fSJens Axboe static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
114719c555fSJens Axboe {
115719c555fSJens Axboe 	unsigned char *vfrom;
1163c892a09SMing Lei 	struct bio_vec tovec, fromvec;
117719c555fSJens Axboe 	struct bvec_iter iter;
1183c892a09SMing Lei 	/*
1193c892a09SMing Lei 	 * The bio of @from is created by bounce, so we can iterate
1203c892a09SMing Lei 	 * its bvec from start to end, but the @from->bi_iter can't be
1213c892a09SMing Lei 	 * trusted because it might be changed by splitting.
1223c892a09SMing Lei 	 */
1233c892a09SMing Lei 	struct bvec_iter from_iter = BVEC_ITER_ALL_INIT;
124719c555fSJens Axboe 
125719c555fSJens Axboe 	bio_for_each_segment(tovec, to, iter) {
1263c892a09SMing Lei 		fromvec = bio_iter_iovec(from, from_iter);
1273c892a09SMing Lei 		if (tovec.bv_page != fromvec.bv_page) {
128719c555fSJens Axboe 			/*
129719c555fSJens Axboe 			 * fromvec->bv_offset and fromvec->bv_len might have
130719c555fSJens Axboe 			 * been modified by the block layer, so use the original
131719c555fSJens Axboe 			 * copy, bounce_copy_vec already uses tovec->bv_len
132719c555fSJens Axboe 			 */
1333c892a09SMing Lei 			vfrom = page_address(fromvec.bv_page) +
134719c555fSJens Axboe 				tovec.bv_offset;
135719c555fSJens Axboe 
136719c555fSJens Axboe 			bounce_copy_vec(&tovec, vfrom);
137719c555fSJens Axboe 			flush_dcache_page(tovec.bv_page);
138719c555fSJens Axboe 		}
1393c892a09SMing Lei 		bio_advance_iter(from, &from_iter, tovec.bv_len);
140719c555fSJens Axboe 	}
141719c555fSJens Axboe }
142719c555fSJens Axboe 
1434246a0b6SChristoph Hellwig static void bounce_end_io(struct bio *bio, mempool_t *pool)
144719c555fSJens Axboe {
145719c555fSJens Axboe 	struct bio *bio_orig = bio->bi_private;
1467891f05cSMing Lei 	struct bio_vec *bvec, orig_vec;
147719c555fSJens Axboe 	int i;
1487891f05cSMing Lei 	struct bvec_iter orig_iter = bio_orig->bi_iter;
149719c555fSJens Axboe 
150719c555fSJens Axboe 	/*
151719c555fSJens Axboe 	 * free up bounce indirect pages used
152719c555fSJens Axboe 	 */
153719c555fSJens Axboe 	bio_for_each_segment_all(bvec, bio, i) {
1547891f05cSMing Lei 		orig_vec = bio_iter_iovec(bio_orig, orig_iter);
1557891f05cSMing Lei 		if (bvec->bv_page != orig_vec.bv_page) {
156719c555fSJens Axboe 			dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
157719c555fSJens Axboe 			mempool_free(bvec->bv_page, pool);
158719c555fSJens Axboe 		}
1597891f05cSMing Lei 		bio_advance_iter(bio_orig, &orig_iter, orig_vec.bv_len);
1607891f05cSMing Lei 	}
161719c555fSJens Axboe 
1624e4cbee9SChristoph Hellwig 	bio_orig->bi_status = bio->bi_status;
1634246a0b6SChristoph Hellwig 	bio_endio(bio_orig);
164719c555fSJens Axboe 	bio_put(bio);
165719c555fSJens Axboe }
166719c555fSJens Axboe 
1674246a0b6SChristoph Hellwig static void bounce_end_io_write(struct bio *bio)
168719c555fSJens Axboe {
169338aa96dSKent Overstreet 	bounce_end_io(bio, &page_pool);
170719c555fSJens Axboe }
171719c555fSJens Axboe 
1724246a0b6SChristoph Hellwig static void bounce_end_io_write_isa(struct bio *bio)
173719c555fSJens Axboe {
174719c555fSJens Axboe 
175338aa96dSKent Overstreet 	bounce_end_io(bio, &isa_page_pool);
176719c555fSJens Axboe }
177719c555fSJens Axboe 
1784246a0b6SChristoph Hellwig static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
179719c555fSJens Axboe {
180719c555fSJens Axboe 	struct bio *bio_orig = bio->bi_private;
181719c555fSJens Axboe 
1824e4cbee9SChristoph Hellwig 	if (!bio->bi_status)
183719c555fSJens Axboe 		copy_to_high_bio_irq(bio_orig, bio);
184719c555fSJens Axboe 
1854246a0b6SChristoph Hellwig 	bounce_end_io(bio, pool);
186719c555fSJens Axboe }
187719c555fSJens Axboe 
1884246a0b6SChristoph Hellwig static void bounce_end_io_read(struct bio *bio)
189719c555fSJens Axboe {
190338aa96dSKent Overstreet 	__bounce_end_io_read(bio, &page_pool);
191719c555fSJens Axboe }
192719c555fSJens Axboe 
1934246a0b6SChristoph Hellwig static void bounce_end_io_read_isa(struct bio *bio)
194719c555fSJens Axboe {
195338aa96dSKent Overstreet 	__bounce_end_io_read(bio, &isa_page_pool);
196719c555fSJens Axboe }
197719c555fSJens Axboe 
198c55183c9SChristoph Hellwig static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
199c55183c9SChristoph Hellwig 		struct bio_set *bs)
200c55183c9SChristoph Hellwig {
201c55183c9SChristoph Hellwig 	struct bvec_iter iter;
202c55183c9SChristoph Hellwig 	struct bio_vec bv;
203c55183c9SChristoph Hellwig 	struct bio *bio;
204c55183c9SChristoph Hellwig 
205c55183c9SChristoph Hellwig 	/*
206c55183c9SChristoph Hellwig 	 * Pre immutable biovecs, __bio_clone() used to just do a memcpy from
207c55183c9SChristoph Hellwig 	 * bio_src->bi_io_vec to bio->bi_io_vec.
208c55183c9SChristoph Hellwig 	 *
209c55183c9SChristoph Hellwig 	 * We can't do that anymore, because:
210c55183c9SChristoph Hellwig 	 *
211c55183c9SChristoph Hellwig 	 *  - The point of cloning the biovec is to produce a bio with a biovec
212c55183c9SChristoph Hellwig 	 *    the caller can modify: bi_idx and bi_bvec_done should be 0.
213c55183c9SChristoph Hellwig 	 *
214c55183c9SChristoph Hellwig 	 *  - The original bio could've had more than BIO_MAX_PAGES biovecs; if
215c55183c9SChristoph Hellwig 	 *    we tried to clone the whole thing bio_alloc_bioset() would fail.
216c55183c9SChristoph Hellwig 	 *    But the clone should succeed as long as the number of biovecs we
217c55183c9SChristoph Hellwig 	 *    actually need to allocate is fewer than BIO_MAX_PAGES.
218c55183c9SChristoph Hellwig 	 *
219c55183c9SChristoph Hellwig 	 *  - Lastly, bi_vcnt should not be looked at or relied upon by code
220c55183c9SChristoph Hellwig 	 *    that does not own the bio - reason being drivers don't use it for
221c55183c9SChristoph Hellwig 	 *    iterating over the biovec anymore, so expecting it to be kept up
222c55183c9SChristoph Hellwig 	 *    to date (i.e. for clones that share the parent biovec) is just
223c55183c9SChristoph Hellwig 	 *    asking for trouble and would force extra work on
224c55183c9SChristoph Hellwig 	 *    __bio_clone_fast() anyways.
225c55183c9SChristoph Hellwig 	 */
226c55183c9SChristoph Hellwig 
227c55183c9SChristoph Hellwig 	bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
228c55183c9SChristoph Hellwig 	if (!bio)
229c55183c9SChristoph Hellwig 		return NULL;
230c55183c9SChristoph Hellwig 	bio->bi_disk		= bio_src->bi_disk;
231c55183c9SChristoph Hellwig 	bio->bi_opf		= bio_src->bi_opf;
232c55183c9SChristoph Hellwig 	bio->bi_write_hint	= bio_src->bi_write_hint;
233c55183c9SChristoph Hellwig 	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
234c55183c9SChristoph Hellwig 	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
235c55183c9SChristoph Hellwig 
236c55183c9SChristoph Hellwig 	switch (bio_op(bio)) {
237c55183c9SChristoph Hellwig 	case REQ_OP_DISCARD:
238c55183c9SChristoph Hellwig 	case REQ_OP_SECURE_ERASE:
239c55183c9SChristoph Hellwig 	case REQ_OP_WRITE_ZEROES:
240c55183c9SChristoph Hellwig 		break;
241c55183c9SChristoph Hellwig 	case REQ_OP_WRITE_SAME:
242c55183c9SChristoph Hellwig 		bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
243c55183c9SChristoph Hellwig 		break;
244c55183c9SChristoph Hellwig 	default:
245c55183c9SChristoph Hellwig 		bio_for_each_segment(bv, bio_src, iter)
246c55183c9SChristoph Hellwig 			bio->bi_io_vec[bio->bi_vcnt++] = bv;
247c55183c9SChristoph Hellwig 		break;
248c55183c9SChristoph Hellwig 	}
249c55183c9SChristoph Hellwig 
250c55183c9SChristoph Hellwig 	if (bio_integrity(bio_src)) {
251c55183c9SChristoph Hellwig 		int ret;
252c55183c9SChristoph Hellwig 
253c55183c9SChristoph Hellwig 		ret = bio_integrity_clone(bio, bio_src, gfp_mask);
254c55183c9SChristoph Hellwig 		if (ret < 0) {
255c55183c9SChristoph Hellwig 			bio_put(bio);
256c55183c9SChristoph Hellwig 			return NULL;
257c55183c9SChristoph Hellwig 		}
258c55183c9SChristoph Hellwig 	}
259c55183c9SChristoph Hellwig 
260c55183c9SChristoph Hellwig 	bio_clone_blkcg_association(bio, bio_src);
261c55183c9SChristoph Hellwig 
2625bf9a1f3SDennis Zhou (Facebook) 	blkcg_bio_issue_init(bio);
2635bf9a1f3SDennis Zhou (Facebook) 
264c55183c9SChristoph Hellwig 	return bio;
265c55183c9SChristoph Hellwig }
266c55183c9SChristoph Hellwig 
267719c555fSJens Axboe static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
268a3ad0a9dSJan Kara 			       mempool_t *pool)
269719c555fSJens Axboe {
270719c555fSJens Axboe 	struct bio *bio;
271719c555fSJens Axboe 	int rw = bio_data_dir(*bio_orig);
272719c555fSJens Axboe 	struct bio_vec *to, from;
273719c555fSJens Axboe 	struct bvec_iter iter;
274a8821f3fSNeilBrown 	unsigned i = 0;
275a8821f3fSNeilBrown 	bool bounce = false;
276a8821f3fSNeilBrown 	int sectors = 0;
27714cb0dc6SMing Lei 	bool passthrough = bio_is_passthrough(*bio_orig);
278719c555fSJens Axboe 
279a8821f3fSNeilBrown 	bio_for_each_segment(from, *bio_orig, iter) {
280a8821f3fSNeilBrown 		if (i++ < BIO_MAX_PAGES)
281a8821f3fSNeilBrown 			sectors += from.bv_len >> 9;
2821c4bc3abSChristoph Hellwig 		if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn)
283a8821f3fSNeilBrown 			bounce = true;
284a8821f3fSNeilBrown 	}
285a8821f3fSNeilBrown 	if (!bounce)
286719c555fSJens Axboe 		return;
287a8821f3fSNeilBrown 
28814cb0dc6SMing Lei 	if (!passthrough && sectors < bio_sectors(*bio_orig)) {
289338aa96dSKent Overstreet 		bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
290a8821f3fSNeilBrown 		bio_chain(bio, *bio_orig);
291a8821f3fSNeilBrown 		generic_make_request(*bio_orig);
292a8821f3fSNeilBrown 		*bio_orig = bio;
293a8821f3fSNeilBrown 	}
294c55183c9SChristoph Hellwig 	bio = bounce_clone_bio(*bio_orig, GFP_NOIO, passthrough ? NULL :
295338aa96dSKent Overstreet 			&bounce_bio_set);
296719c555fSJens Axboe 
297719c555fSJens Axboe 	bio_for_each_segment_all(to, bio, i) {
298719c555fSJens Axboe 		struct page *page = to->bv_page;
299719c555fSJens Axboe 
3001c4bc3abSChristoph Hellwig 		if (page_to_pfn(page) <= q->limits.bounce_pfn)
301719c555fSJens Axboe 			continue;
302719c555fSJens Axboe 
303719c555fSJens Axboe 		to->bv_page = mempool_alloc(pool, q->bounce_gfp);
304393a3397SWang YanQing 		inc_zone_page_state(to->bv_page, NR_BOUNCE);
305719c555fSJens Axboe 
306719c555fSJens Axboe 		if (rw == WRITE) {
307719c555fSJens Axboe 			char *vto, *vfrom;
308719c555fSJens Axboe 
309719c555fSJens Axboe 			flush_dcache_page(page);
310719c555fSJens Axboe 
311719c555fSJens Axboe 			vto = page_address(to->bv_page) + to->bv_offset;
312719c555fSJens Axboe 			vfrom = kmap_atomic(page) + to->bv_offset;
313719c555fSJens Axboe 			memcpy(vto, vfrom, to->bv_len);
314719c555fSJens Axboe 			kunmap_atomic(vfrom);
315719c555fSJens Axboe 		}
316719c555fSJens Axboe 	}
317719c555fSJens Axboe 
318719c555fSJens Axboe 	trace_block_bio_bounce(q, *bio_orig);
319719c555fSJens Axboe 
320719c555fSJens Axboe 	bio->bi_flags |= (1 << BIO_BOUNCED);
321719c555fSJens Axboe 
322338aa96dSKent Overstreet 	if (pool == &page_pool) {
323719c555fSJens Axboe 		bio->bi_end_io = bounce_end_io_write;
324719c555fSJens Axboe 		if (rw == READ)
325719c555fSJens Axboe 			bio->bi_end_io = bounce_end_io_read;
326719c555fSJens Axboe 	} else {
327719c555fSJens Axboe 		bio->bi_end_io = bounce_end_io_write_isa;
328719c555fSJens Axboe 		if (rw == READ)
329719c555fSJens Axboe 			bio->bi_end_io = bounce_end_io_read_isa;
330719c555fSJens Axboe 	}
331719c555fSJens Axboe 
332719c555fSJens Axboe 	bio->bi_private = *bio_orig;
333719c555fSJens Axboe 	*bio_orig = bio;
334719c555fSJens Axboe }
335719c555fSJens Axboe 
336719c555fSJens Axboe void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
337719c555fSJens Axboe {
338719c555fSJens Axboe 	mempool_t *pool;
339719c555fSJens Axboe 
340719c555fSJens Axboe 	/*
341719c555fSJens Axboe 	 * Data-less bio, nothing to bounce
342719c555fSJens Axboe 	 */
343719c555fSJens Axboe 	if (!bio_has_data(*bio_orig))
344719c555fSJens Axboe 		return;
345719c555fSJens Axboe 
346719c555fSJens Axboe 	/*
347719c555fSJens Axboe 	 * for non-isa bounce case, just check if the bounce pfn is equal
348719c555fSJens Axboe 	 * to or bigger than the highest pfn in the system -- in that case,
349719c555fSJens Axboe 	 * don't waste time iterating over bio segments
350719c555fSJens Axboe 	 */
351719c555fSJens Axboe 	if (!(q->bounce_gfp & GFP_DMA)) {
3521c4bc3abSChristoph Hellwig 		if (q->limits.bounce_pfn >= blk_max_pfn)
353719c555fSJens Axboe 			return;
354338aa96dSKent Overstreet 		pool = &page_pool;
355719c555fSJens Axboe 	} else {
356338aa96dSKent Overstreet 		BUG_ON(!mempool_initialized(&isa_page_pool));
357338aa96dSKent Overstreet 		pool = &isa_page_pool;
358719c555fSJens Axboe 	}
359719c555fSJens Axboe 
360719c555fSJens Axboe 	/*
361719c555fSJens Axboe 	 * slow path
362719c555fSJens Axboe 	 */
363a3ad0a9dSJan Kara 	__blk_queue_bounce(q, bio_orig, pool);
364719c555fSJens Axboe }
365