1 /*
2 * DMA helper functions
3 *
4 * Copyright (c) 2009,2020 Red Hat
5 *
6 * This work is licensed under the terms of the GNU General Public License
7 * (GNU GPL), version 2 or later.
8 */
9
10 #include "qemu/osdep.h"
11 #include "sysemu/block-backend.h"
12 #include "sysemu/dma.h"
13 #include "trace.h"
14 #include "qemu/thread.h"
15 #include "qemu/main-loop.h"
16 #include "sysemu/cpu-timers.h"
17 #include "qemu/range.h"
18
19 /* #define DEBUG_IOMMU */
20
dma_memory_set(AddressSpace * as,dma_addr_t addr,uint8_t c,dma_addr_t len,MemTxAttrs attrs)21 MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
22 uint8_t c, dma_addr_t len, MemTxAttrs attrs)
23 {
24 dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
25
26 return address_space_set(as, addr, c, len, attrs);
27 }
28
qemu_sglist_init(QEMUSGList * qsg,DeviceState * dev,int alloc_hint,AddressSpace * as)29 void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
30 AddressSpace *as)
31 {
32 qsg->sg = g_new(ScatterGatherEntry, alloc_hint);
33 qsg->nsg = 0;
34 qsg->nalloc = alloc_hint;
35 qsg->size = 0;
36 qsg->as = as;
37 qsg->dev = dev;
38 object_ref(OBJECT(dev));
39 }
40
qemu_sglist_add(QEMUSGList * qsg,dma_addr_t base,dma_addr_t len)41 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
42 {
43 if (qsg->nsg == qsg->nalloc) {
44 qsg->nalloc = 2 * qsg->nalloc + 1;
45 qsg->sg = g_renew(ScatterGatherEntry, qsg->sg, qsg->nalloc);
46 }
47 qsg->sg[qsg->nsg].base = base;
48 qsg->sg[qsg->nsg].len = len;
49 qsg->size += len;
50 ++qsg->nsg;
51 }
52
qemu_sglist_destroy(QEMUSGList * qsg)53 void qemu_sglist_destroy(QEMUSGList *qsg)
54 {
55 object_unref(OBJECT(qsg->dev));
56 g_free(qsg->sg);
57 memset(qsg, 0, sizeof(*qsg));
58 }
59
60 typedef struct {
61 BlockAIOCB common;
62 AioContext *ctx;
63 BlockAIOCB *acb;
64 QEMUSGList *sg;
65 uint32_t align;
66 uint64_t offset;
67 DMADirection dir;
68 int sg_cur_index;
69 dma_addr_t sg_cur_byte;
70 QEMUIOVector iov;
71 QEMUBH *bh;
72 DMAIOFunc *io_func;
73 void *io_func_opaque;
74 } DMAAIOCB;
75
76 static void dma_blk_cb(void *opaque, int ret);
77
reschedule_dma(void * opaque)78 static void reschedule_dma(void *opaque)
79 {
80 DMAAIOCB *dbs = (DMAAIOCB *)opaque;
81
82 assert(!dbs->acb && dbs->bh);
83 qemu_bh_delete(dbs->bh);
84 dbs->bh = NULL;
85 dma_blk_cb(dbs, 0);
86 }
87
dma_blk_unmap(DMAAIOCB * dbs)88 static void dma_blk_unmap(DMAAIOCB *dbs)
89 {
90 int i;
91
92 for (i = 0; i < dbs->iov.niov; ++i) {
93 dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
94 dbs->iov.iov[i].iov_len, dbs->dir,
95 dbs->iov.iov[i].iov_len);
96 }
97 qemu_iovec_reset(&dbs->iov);
98 }
99
dma_complete(DMAAIOCB * dbs,int ret)100 static void dma_complete(DMAAIOCB *dbs, int ret)
101 {
102 trace_dma_complete(dbs, ret, dbs->common.cb);
103
104 assert(!dbs->acb && !dbs->bh);
105 dma_blk_unmap(dbs);
106 if (dbs->common.cb) {
107 dbs->common.cb(dbs->common.opaque, ret);
108 }
109 qemu_iovec_destroy(&dbs->iov);
110 qemu_aio_unref(dbs);
111 }
112
dma_blk_cb(void * opaque,int ret)113 static void dma_blk_cb(void *opaque, int ret)
114 {
115 DMAAIOCB *dbs = (DMAAIOCB *)opaque;
116 AioContext *ctx = dbs->ctx;
117 dma_addr_t cur_addr, cur_len;
118 void *mem;
119
120 trace_dma_blk_cb(dbs, ret);
121
122 /* DMAAIOCB is not thread-safe and must be accessed only from dbs->ctx */
123 assert(ctx == qemu_get_current_aio_context());
124
125 dbs->acb = NULL;
126 dbs->offset += dbs->iov.size;
127
128 if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
129 dma_complete(dbs, ret);
130 return;
131 }
132 dma_blk_unmap(dbs);
133
134 while (dbs->sg_cur_index < dbs->sg->nsg) {
135 cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
136 cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
137 mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir,
138 MEMTXATTRS_UNSPECIFIED);
139 /*
140 * Make reads deterministic in icount mode. Windows sometimes issues
141 * disk read requests with overlapping SGs. It leads
142 * to non-determinism, because resulting buffer contents may be mixed
143 * from several sectors. This code splits all SGs into several
144 * groups. SGs in every group do not overlap.
145 */
146 if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) {
147 int i;
148 for (i = 0 ; i < dbs->iov.niov ; ++i) {
149 if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base,
150 dbs->iov.iov[i].iov_len, (intptr_t)mem,
151 cur_len)) {
152 dma_memory_unmap(dbs->sg->as, mem, cur_len,
153 dbs->dir, cur_len);
154 mem = NULL;
155 break;
156 }
157 }
158 }
159 if (!mem)
160 break;
161 qemu_iovec_add(&dbs->iov, mem, cur_len);
162 dbs->sg_cur_byte += cur_len;
163 if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
164 dbs->sg_cur_byte = 0;
165 ++dbs->sg_cur_index;
166 }
167 }
168
169 if (dbs->iov.size == 0) {
170 trace_dma_map_wait(dbs);
171 dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
172 address_space_register_map_client(dbs->sg->as, dbs->bh);
173 return;
174 }
175
176 if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
177 qemu_iovec_discard_back(&dbs->iov,
178 QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
179 }
180
181 dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
182 dma_blk_cb, dbs, dbs->io_func_opaque);
183 assert(dbs->acb);
184 }
185
dma_aio_cancel(BlockAIOCB * acb)186 static void dma_aio_cancel(BlockAIOCB *acb)
187 {
188 DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
189
190 trace_dma_aio_cancel(dbs);
191
192 assert(!(dbs->acb && dbs->bh));
193 if (dbs->acb) {
194 /* This will invoke dma_blk_cb. */
195 blk_aio_cancel_async(dbs->acb);
196 return;
197 }
198
199 if (dbs->bh) {
200 address_space_unregister_map_client(dbs->sg->as, dbs->bh);
201 qemu_bh_delete(dbs->bh);
202 dbs->bh = NULL;
203 }
204 if (dbs->common.cb) {
205 dbs->common.cb(dbs->common.opaque, -ECANCELED);
206 }
207 }
208
209 static const AIOCBInfo dma_aiocb_info = {
210 .aiocb_size = sizeof(DMAAIOCB),
211 .cancel_async = dma_aio_cancel,
212 };
213
dma_blk_io(AioContext * ctx,QEMUSGList * sg,uint64_t offset,uint32_t align,DMAIOFunc * io_func,void * io_func_opaque,BlockCompletionFunc * cb,void * opaque,DMADirection dir)214 BlockAIOCB *dma_blk_io(AioContext *ctx,
215 QEMUSGList *sg, uint64_t offset, uint32_t align,
216 DMAIOFunc *io_func, void *io_func_opaque,
217 BlockCompletionFunc *cb,
218 void *opaque, DMADirection dir)
219 {
220 DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);
221
222 trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));
223
224 dbs->acb = NULL;
225 dbs->sg = sg;
226 dbs->ctx = ctx;
227 dbs->offset = offset;
228 dbs->align = align;
229 dbs->sg_cur_index = 0;
230 dbs->sg_cur_byte = 0;
231 dbs->dir = dir;
232 dbs->io_func = io_func;
233 dbs->io_func_opaque = io_func_opaque;
234 dbs->bh = NULL;
235 qemu_iovec_init(&dbs->iov, sg->nsg);
236 dma_blk_cb(dbs, 0);
237 return &dbs->common;
238 }
239
240
241 static
dma_blk_read_io_func(int64_t offset,QEMUIOVector * iov,BlockCompletionFunc * cb,void * cb_opaque,void * opaque)242 BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
243 BlockCompletionFunc *cb, void *cb_opaque,
244 void *opaque)
245 {
246 BlockBackend *blk = opaque;
247 return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
248 }
249
dma_blk_read(BlockBackend * blk,QEMUSGList * sg,uint64_t offset,uint32_t align,void (* cb)(void * opaque,int ret),void * opaque)250 BlockAIOCB *dma_blk_read(BlockBackend *blk,
251 QEMUSGList *sg, uint64_t offset, uint32_t align,
252 void (*cb)(void *opaque, int ret), void *opaque)
253 {
254 return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
255 dma_blk_read_io_func, blk, cb, opaque,
256 DMA_DIRECTION_FROM_DEVICE);
257 }
258
259 static
dma_blk_write_io_func(int64_t offset,QEMUIOVector * iov,BlockCompletionFunc * cb,void * cb_opaque,void * opaque)260 BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
261 BlockCompletionFunc *cb, void *cb_opaque,
262 void *opaque)
263 {
264 BlockBackend *blk = opaque;
265 return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
266 }
267
dma_blk_write(BlockBackend * blk,QEMUSGList * sg,uint64_t offset,uint32_t align,void (* cb)(void * opaque,int ret),void * opaque)268 BlockAIOCB *dma_blk_write(BlockBackend *blk,
269 QEMUSGList *sg, uint64_t offset, uint32_t align,
270 void (*cb)(void *opaque, int ret), void *opaque)
271 {
272 return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
273 dma_blk_write_io_func, blk, cb, opaque,
274 DMA_DIRECTION_TO_DEVICE);
275 }
276
277
dma_buf_rw(void * buf,dma_addr_t len,dma_addr_t * residual,QEMUSGList * sg,DMADirection dir,MemTxAttrs attrs)278 static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
279 QEMUSGList *sg, DMADirection dir,
280 MemTxAttrs attrs)
281 {
282 uint8_t *ptr = buf;
283 dma_addr_t xresidual;
284 int sg_cur_index;
285 MemTxResult res = MEMTX_OK;
286
287 xresidual = sg->size;
288 sg_cur_index = 0;
289 len = MIN(len, xresidual);
290 while (len > 0) {
291 ScatterGatherEntry entry = sg->sg[sg_cur_index++];
292 dma_addr_t xfer = MIN(len, entry.len);
293 res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs);
294 ptr += xfer;
295 len -= xfer;
296 xresidual -= xfer;
297 }
298
299 if (residual) {
300 *residual = xresidual;
301 }
302 return res;
303 }
304
dma_buf_read(void * ptr,dma_addr_t len,dma_addr_t * residual,QEMUSGList * sg,MemTxAttrs attrs)305 MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual,
306 QEMUSGList *sg, MemTxAttrs attrs)
307 {
308 return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_FROM_DEVICE, attrs);
309 }
310
dma_buf_write(void * ptr,dma_addr_t len,dma_addr_t * residual,QEMUSGList * sg,MemTxAttrs attrs)311 MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual,
312 QEMUSGList *sg, MemTxAttrs attrs)
313 {
314 return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_TO_DEVICE, attrs);
315 }
316
dma_acct_start(BlockBackend * blk,BlockAcctCookie * cookie,QEMUSGList * sg,enum BlockAcctType type)317 void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
318 QEMUSGList *sg, enum BlockAcctType type)
319 {
320 block_acct_start(blk_get_stats(blk), cookie, sg->size, type);
321 }
322
dma_aligned_pow2_mask(uint64_t start,uint64_t end,int max_addr_bits)323 uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits)
324 {
325 uint64_t max_mask = UINT64_MAX, addr_mask = end - start;
326 uint64_t alignment_mask, size_mask;
327
328 if (max_addr_bits != 64) {
329 max_mask = (1ULL << max_addr_bits) - 1;
330 }
331
332 alignment_mask = start ? (start & -start) - 1 : max_mask;
333 alignment_mask = MIN(alignment_mask, max_mask);
334 size_mask = MIN(addr_mask, max_mask);
335
336 if (alignment_mask <= size_mask) {
337 /* Increase the alignment of start */
338 return alignment_mask;
339 } else {
340 /* Find the largest page mask from size */
341 if (addr_mask == UINT64_MAX) {
342 return UINT64_MAX;
343 }
344 return (1ULL << (63 - clz64(addr_mask + 1))) - 1;
345 }
346 }
347
348