10a8a69ddSRusty Russell /* Virtio ring implementation. 20a8a69ddSRusty Russell * 30a8a69ddSRusty Russell * Copyright 2007 Rusty Russell IBM Corporation 40a8a69ddSRusty Russell * 50a8a69ddSRusty Russell * This program is free software; you can redistribute it and/or modify 60a8a69ddSRusty Russell * it under the terms of the GNU General Public License as published by 70a8a69ddSRusty Russell * the Free Software Foundation; either version 2 of the License, or 80a8a69ddSRusty Russell * (at your option) any later version. 90a8a69ddSRusty Russell * 100a8a69ddSRusty Russell * This program is distributed in the hope that it will be useful, 110a8a69ddSRusty Russell * but WITHOUT ANY WARRANTY; without even the implied warranty of 120a8a69ddSRusty Russell * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 130a8a69ddSRusty Russell * GNU General Public License for more details. 140a8a69ddSRusty Russell * 150a8a69ddSRusty Russell * You should have received a copy of the GNU General Public License 160a8a69ddSRusty Russell * along with this program; if not, write to the Free Software 170a8a69ddSRusty Russell * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 180a8a69ddSRusty Russell */ 190a8a69ddSRusty Russell #include <linux/virtio.h> 200a8a69ddSRusty Russell #include <linux/virtio_ring.h> 21e34f8725SRusty Russell #include <linux/virtio_config.h> 220a8a69ddSRusty Russell #include <linux/device.h> 235a0e3ad6STejun Heo #include <linux/slab.h> 24b5a2c4f1SPaul Gortmaker #include <linux/module.h> 25e93300b1SRusty Russell #include <linux/hrtimer.h> 26780bc790SAndy Lutomirski #include <linux/dma-mapping.h> 2778fe3987SAndy Lutomirski #include <xen/xen.h> 280a8a69ddSRusty Russell 290a8a69ddSRusty Russell #ifdef DEBUG 300a8a69ddSRusty Russell /* For development, we want to crash whenever the ring is screwed. */ 319499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 329499f5e7SRusty Russell do { \ 339499f5e7SRusty Russell dev_err(&(_vq)->vq.vdev->dev, \ 349499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 359499f5e7SRusty Russell BUG(); \ 369499f5e7SRusty Russell } while (0) 37c5f841f1SRusty Russell /* Caller is supposed to guarantee no reentry. */ 383a35ce7dSRoel Kluin #define START_USE(_vq) \ 39c5f841f1SRusty Russell do { \ 40c5f841f1SRusty Russell if ((_vq)->in_use) \ 419499f5e7SRusty Russell panic("%s:in_use = %i\n", \ 429499f5e7SRusty Russell (_vq)->vq.name, (_vq)->in_use); \ 43c5f841f1SRusty Russell (_vq)->in_use = __LINE__; \ 44c5f841f1SRusty Russell } while (0) 453a35ce7dSRoel Kluin #define END_USE(_vq) \ 4697a545abSRusty Russell do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 470a8a69ddSRusty Russell #else 489499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 499499f5e7SRusty Russell do { \ 509499f5e7SRusty Russell dev_err(&_vq->vq.vdev->dev, \ 519499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 529499f5e7SRusty Russell (_vq)->broken = true; \ 539499f5e7SRusty Russell } while (0) 540a8a69ddSRusty Russell #define START_USE(vq) 550a8a69ddSRusty Russell #define END_USE(vq) 560a8a69ddSRusty Russell #endif 570a8a69ddSRusty Russell 58780bc790SAndy Lutomirski struct vring_desc_state { 59780bc790SAndy Lutomirski void *data; /* Data for callback. */ 60780bc790SAndy Lutomirski struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 61780bc790SAndy Lutomirski }; 62780bc790SAndy Lutomirski 6343b4f721SMichael S. Tsirkin struct vring_virtqueue { 640a8a69ddSRusty Russell struct virtqueue vq; 650a8a69ddSRusty Russell 660a8a69ddSRusty Russell /* Actual memory layout for this queue */ 670a8a69ddSRusty Russell struct vring vring; 680a8a69ddSRusty Russell 697b21e34fSRusty Russell /* Can we use weak barriers? */ 707b21e34fSRusty Russell bool weak_barriers; 717b21e34fSRusty Russell 720a8a69ddSRusty Russell /* Other side has made a mess, don't try any more. */ 730a8a69ddSRusty Russell bool broken; 740a8a69ddSRusty Russell 759fa29b9dSMark McLoughlin /* Host supports indirect buffers */ 769fa29b9dSMark McLoughlin bool indirect; 779fa29b9dSMark McLoughlin 78a5c262c5SMichael S. Tsirkin /* Host publishes avail event idx */ 79a5c262c5SMichael S. Tsirkin bool event; 80a5c262c5SMichael S. Tsirkin 810a8a69ddSRusty Russell /* Head of free buffer list. */ 820a8a69ddSRusty Russell unsigned int free_head; 830a8a69ddSRusty Russell /* Number we've added since last sync. */ 840a8a69ddSRusty Russell unsigned int num_added; 850a8a69ddSRusty Russell 860a8a69ddSRusty Russell /* Last used index we've seen. */ 871bc4953eSAnthony Liguori u16 last_used_idx; 880a8a69ddSRusty Russell 89f277ec42SVenkatesh Srinivas /* Last written value to avail->flags */ 90f277ec42SVenkatesh Srinivas u16 avail_flags_shadow; 91f277ec42SVenkatesh Srinivas 92f277ec42SVenkatesh Srinivas /* Last written value to avail->idx in guest byte order */ 93f277ec42SVenkatesh Srinivas u16 avail_idx_shadow; 94f277ec42SVenkatesh Srinivas 950a8a69ddSRusty Russell /* How to notify other side. FIXME: commonalize hcalls! */ 9646f9c2b9SHeinz Graalfs bool (*notify)(struct virtqueue *vq); 970a8a69ddSRusty Russell 982a2d1382SAndy Lutomirski /* DMA, allocation, and size information */ 992a2d1382SAndy Lutomirski bool we_own_ring; 1002a2d1382SAndy Lutomirski size_t queue_size_in_bytes; 1012a2d1382SAndy Lutomirski dma_addr_t queue_dma_addr; 1022a2d1382SAndy Lutomirski 1030a8a69ddSRusty Russell #ifdef DEBUG 1040a8a69ddSRusty Russell /* They're supposed to lock for us. */ 1050a8a69ddSRusty Russell unsigned int in_use; 106e93300b1SRusty Russell 107e93300b1SRusty Russell /* Figure out if their kicks are too delayed. */ 108e93300b1SRusty Russell bool last_add_time_valid; 109e93300b1SRusty Russell ktime_t last_add_time; 1100a8a69ddSRusty Russell #endif 1110a8a69ddSRusty Russell 112780bc790SAndy Lutomirski /* Per-descriptor state. */ 113780bc790SAndy Lutomirski struct vring_desc_state desc_state[]; 1140a8a69ddSRusty Russell }; 1150a8a69ddSRusty Russell 116*e6f633e5STiwei Bie 117*e6f633e5STiwei Bie /* 118*e6f633e5STiwei Bie * Helpers. 119*e6f633e5STiwei Bie */ 120*e6f633e5STiwei Bie 1210a8a69ddSRusty Russell #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 1220a8a69ddSRusty Russell 123d26c96c8SAndy Lutomirski /* 1241a937693SMichael S. Tsirkin * Modern virtio devices have feature bits to specify whether they need a 1251a937693SMichael S. Tsirkin * quirk and bypass the IOMMU. If not there, just use the DMA API. 1261a937693SMichael S. Tsirkin * 1271a937693SMichael S. Tsirkin * If there, the interaction between virtio and DMA API is messy. 128d26c96c8SAndy Lutomirski * 129d26c96c8SAndy Lutomirski * On most systems with virtio, physical addresses match bus addresses, 130d26c96c8SAndy Lutomirski * and it doesn't particularly matter whether we use the DMA API. 131d26c96c8SAndy Lutomirski * 132d26c96c8SAndy Lutomirski * On some systems, including Xen and any system with a physical device 133d26c96c8SAndy Lutomirski * that speaks virtio behind a physical IOMMU, we must use the DMA API 134d26c96c8SAndy Lutomirski * for virtio DMA to work at all. 135d26c96c8SAndy Lutomirski * 136d26c96c8SAndy Lutomirski * On other systems, including SPARC and PPC64, virtio-pci devices are 137d26c96c8SAndy Lutomirski * enumerated as though they are behind an IOMMU, but the virtio host 138d26c96c8SAndy Lutomirski * ignores the IOMMU, so we must either pretend that the IOMMU isn't 139d26c96c8SAndy Lutomirski * there or somehow map everything as the identity. 140d26c96c8SAndy Lutomirski * 141d26c96c8SAndy Lutomirski * For the time being, we preserve historic behavior and bypass the DMA 142d26c96c8SAndy Lutomirski * API. 1431a937693SMichael S. Tsirkin * 1441a937693SMichael S. Tsirkin * TODO: install a per-device DMA ops structure that does the right thing 1451a937693SMichael S. Tsirkin * taking into account all the above quirks, and use the DMA API 1461a937693SMichael S. Tsirkin * unconditionally on data path. 147d26c96c8SAndy Lutomirski */ 148d26c96c8SAndy Lutomirski 149d26c96c8SAndy Lutomirski static bool vring_use_dma_api(struct virtio_device *vdev) 150d26c96c8SAndy Lutomirski { 1511a937693SMichael S. Tsirkin if (!virtio_has_iommu_quirk(vdev)) 1521a937693SMichael S. Tsirkin return true; 1531a937693SMichael S. Tsirkin 1541a937693SMichael S. Tsirkin /* Otherwise, we are left to guess. */ 15578fe3987SAndy Lutomirski /* 15678fe3987SAndy Lutomirski * In theory, it's possible to have a buggy QEMU-supposed 15778fe3987SAndy Lutomirski * emulated Q35 IOMMU and Xen enabled at the same time. On 15878fe3987SAndy Lutomirski * such a configuration, virtio has never worked and will 15978fe3987SAndy Lutomirski * not work without an even larger kludge. Instead, enable 16078fe3987SAndy Lutomirski * the DMA API if we're a Xen guest, which at least allows 16178fe3987SAndy Lutomirski * all of the sensible Xen configurations to work correctly. 16278fe3987SAndy Lutomirski */ 16378fe3987SAndy Lutomirski if (xen_domain()) 16478fe3987SAndy Lutomirski return true; 16578fe3987SAndy Lutomirski 166d26c96c8SAndy Lutomirski return false; 167d26c96c8SAndy Lutomirski } 168d26c96c8SAndy Lutomirski 169780bc790SAndy Lutomirski /* 170780bc790SAndy Lutomirski * The DMA ops on various arches are rather gnarly right now, and 171780bc790SAndy Lutomirski * making all of the arch DMA ops work on the vring device itself 172780bc790SAndy Lutomirski * is a mess. For now, we use the parent device for DMA ops. 173780bc790SAndy Lutomirski */ 17475bfa81bSMichael S. Tsirkin static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 175780bc790SAndy Lutomirski { 176780bc790SAndy Lutomirski return vq->vq.vdev->dev.parent; 177780bc790SAndy Lutomirski } 178780bc790SAndy Lutomirski 179780bc790SAndy Lutomirski /* Map one sg entry. */ 180780bc790SAndy Lutomirski static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 181780bc790SAndy Lutomirski struct scatterlist *sg, 182780bc790SAndy Lutomirski enum dma_data_direction direction) 183780bc790SAndy Lutomirski { 184780bc790SAndy Lutomirski if (!vring_use_dma_api(vq->vq.vdev)) 185780bc790SAndy Lutomirski return (dma_addr_t)sg_phys(sg); 186780bc790SAndy Lutomirski 187780bc790SAndy Lutomirski /* 188780bc790SAndy Lutomirski * We can't use dma_map_sg, because we don't use scatterlists in 189780bc790SAndy Lutomirski * the way it expects (we don't guarantee that the scatterlist 190780bc790SAndy Lutomirski * will exist for the lifetime of the mapping). 191780bc790SAndy Lutomirski */ 192780bc790SAndy Lutomirski return dma_map_page(vring_dma_dev(vq), 193780bc790SAndy Lutomirski sg_page(sg), sg->offset, sg->length, 194780bc790SAndy Lutomirski direction); 195780bc790SAndy Lutomirski } 196780bc790SAndy Lutomirski 197780bc790SAndy Lutomirski static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 198780bc790SAndy Lutomirski void *cpu_addr, size_t size, 199780bc790SAndy Lutomirski enum dma_data_direction direction) 200780bc790SAndy Lutomirski { 201780bc790SAndy Lutomirski if (!vring_use_dma_api(vq->vq.vdev)) 202780bc790SAndy Lutomirski return (dma_addr_t)virt_to_phys(cpu_addr); 203780bc790SAndy Lutomirski 204780bc790SAndy Lutomirski return dma_map_single(vring_dma_dev(vq), 205780bc790SAndy Lutomirski cpu_addr, size, direction); 206780bc790SAndy Lutomirski } 207780bc790SAndy Lutomirski 208*e6f633e5STiwei Bie static int vring_mapping_error(const struct vring_virtqueue *vq, 209*e6f633e5STiwei Bie dma_addr_t addr) 210*e6f633e5STiwei Bie { 211*e6f633e5STiwei Bie if (!vring_use_dma_api(vq->vq.vdev)) 212*e6f633e5STiwei Bie return 0; 213*e6f633e5STiwei Bie 214*e6f633e5STiwei Bie return dma_mapping_error(vring_dma_dev(vq), addr); 215*e6f633e5STiwei Bie } 216*e6f633e5STiwei Bie 217*e6f633e5STiwei Bie 218*e6f633e5STiwei Bie /* 219*e6f633e5STiwei Bie * Split ring specific functions - *_split(). 220*e6f633e5STiwei Bie */ 221*e6f633e5STiwei Bie 222138fd251STiwei Bie static void vring_unmap_one_split(const struct vring_virtqueue *vq, 223780bc790SAndy Lutomirski struct vring_desc *desc) 224780bc790SAndy Lutomirski { 225780bc790SAndy Lutomirski u16 flags; 226780bc790SAndy Lutomirski 227780bc790SAndy Lutomirski if (!vring_use_dma_api(vq->vq.vdev)) 228780bc790SAndy Lutomirski return; 229780bc790SAndy Lutomirski 230780bc790SAndy Lutomirski flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 231780bc790SAndy Lutomirski 232780bc790SAndy Lutomirski if (flags & VRING_DESC_F_INDIRECT) { 233780bc790SAndy Lutomirski dma_unmap_single(vring_dma_dev(vq), 234780bc790SAndy Lutomirski virtio64_to_cpu(vq->vq.vdev, desc->addr), 235780bc790SAndy Lutomirski virtio32_to_cpu(vq->vq.vdev, desc->len), 236780bc790SAndy Lutomirski (flags & VRING_DESC_F_WRITE) ? 237780bc790SAndy Lutomirski DMA_FROM_DEVICE : DMA_TO_DEVICE); 238780bc790SAndy Lutomirski } else { 239780bc790SAndy Lutomirski dma_unmap_page(vring_dma_dev(vq), 240780bc790SAndy Lutomirski virtio64_to_cpu(vq->vq.vdev, desc->addr), 241780bc790SAndy Lutomirski virtio32_to_cpu(vq->vq.vdev, desc->len), 242780bc790SAndy Lutomirski (flags & VRING_DESC_F_WRITE) ? 243780bc790SAndy Lutomirski DMA_FROM_DEVICE : DMA_TO_DEVICE); 244780bc790SAndy Lutomirski } 245780bc790SAndy Lutomirski } 246780bc790SAndy Lutomirski 247138fd251STiwei Bie static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 248138fd251STiwei Bie unsigned int total_sg, 249138fd251STiwei Bie gfp_t gfp) 2509fa29b9dSMark McLoughlin { 2519fa29b9dSMark McLoughlin struct vring_desc *desc; 252b25bd251SRusty Russell unsigned int i; 2539fa29b9dSMark McLoughlin 254b92b1b89SWill Deacon /* 255b92b1b89SWill Deacon * We require lowmem mappings for the descriptors because 256b92b1b89SWill Deacon * otherwise virt_to_phys will give us bogus addresses in the 257b92b1b89SWill Deacon * virtqueue. 258b92b1b89SWill Deacon */ 25982107539SMichal Hocko gfp &= ~__GFP_HIGHMEM; 260b92b1b89SWill Deacon 2616da2ec56SKees Cook desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 2629fa29b9dSMark McLoughlin if (!desc) 263b25bd251SRusty Russell return NULL; 2649fa29b9dSMark McLoughlin 265b25bd251SRusty Russell for (i = 0; i < total_sg; i++) 26600e6f3d9SMichael S. Tsirkin desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 267b25bd251SRusty Russell return desc; 2689fa29b9dSMark McLoughlin } 2699fa29b9dSMark McLoughlin 270138fd251STiwei Bie static inline int virtqueue_add_split(struct virtqueue *_vq, 27113816c76SRusty Russell struct scatterlist *sgs[], 272eeebf9b1SRusty Russell unsigned int total_sg, 27313816c76SRusty Russell unsigned int out_sgs, 27413816c76SRusty Russell unsigned int in_sgs, 275bbd603efSMichael S. Tsirkin void *data, 2765a08b04fSMichael S. Tsirkin void *ctx, 277bbd603efSMichael S. Tsirkin gfp_t gfp) 2780a8a69ddSRusty Russell { 2790a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 28013816c76SRusty Russell struct scatterlist *sg; 281b25bd251SRusty Russell struct vring_desc *desc; 282780bc790SAndy Lutomirski unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx; 2831fe9b6feSMichael S. Tsirkin int head; 284b25bd251SRusty Russell bool indirect; 2850a8a69ddSRusty Russell 2869fa29b9dSMark McLoughlin START_USE(vq); 2879fa29b9dSMark McLoughlin 2880a8a69ddSRusty Russell BUG_ON(data == NULL); 2895a08b04fSMichael S. Tsirkin BUG_ON(ctx && vq->indirect); 2909fa29b9dSMark McLoughlin 29170670444SRusty Russell if (unlikely(vq->broken)) { 29270670444SRusty Russell END_USE(vq); 29370670444SRusty Russell return -EIO; 29470670444SRusty Russell } 29570670444SRusty Russell 296e93300b1SRusty Russell #ifdef DEBUG 297e93300b1SRusty Russell { 298e93300b1SRusty Russell ktime_t now = ktime_get(); 299e93300b1SRusty Russell 300e93300b1SRusty Russell /* No kick or get, with .1 second between? Warn. */ 301e93300b1SRusty Russell if (vq->last_add_time_valid) 302e93300b1SRusty Russell WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time)) 303e93300b1SRusty Russell > 100); 304e93300b1SRusty Russell vq->last_add_time = now; 305e93300b1SRusty Russell vq->last_add_time_valid = true; 306e93300b1SRusty Russell } 307e93300b1SRusty Russell #endif 308e93300b1SRusty Russell 30913816c76SRusty Russell BUG_ON(total_sg == 0); 3100a8a69ddSRusty Russell 311b25bd251SRusty Russell head = vq->free_head; 312b25bd251SRusty Russell 313b25bd251SRusty Russell /* If the host supports indirect descriptor tables, and we have multiple 314b25bd251SRusty Russell * buffers, then go indirect. FIXME: tune this threshold */ 315b25bd251SRusty Russell if (vq->indirect && total_sg > 1 && vq->vq.num_free) 316138fd251STiwei Bie desc = alloc_indirect_split(_vq, total_sg, gfp); 31744ed8089SRichard W.M. Jones else { 318b25bd251SRusty Russell desc = NULL; 31944ed8089SRichard W.M. Jones WARN_ON_ONCE(total_sg > vq->vring.num && !vq->indirect); 32044ed8089SRichard W.M. Jones } 321b25bd251SRusty Russell 322b25bd251SRusty Russell if (desc) { 323b25bd251SRusty Russell /* Use a single buffer which doesn't continue */ 324780bc790SAndy Lutomirski indirect = true; 325b25bd251SRusty Russell /* Set up rest to use this indirect table. */ 326b25bd251SRusty Russell i = 0; 327b25bd251SRusty Russell descs_used = 1; 328b25bd251SRusty Russell } else { 329780bc790SAndy Lutomirski indirect = false; 330b25bd251SRusty Russell desc = vq->vring.desc; 331b25bd251SRusty Russell i = head; 332b25bd251SRusty Russell descs_used = total_sg; 333b25bd251SRusty Russell } 334b25bd251SRusty Russell 335b25bd251SRusty Russell if (vq->vq.num_free < descs_used) { 3360a8a69ddSRusty Russell pr_debug("Can't add buf len %i - avail = %i\n", 337b25bd251SRusty Russell descs_used, vq->vq.num_free); 33844653eaeSRusty Russell /* FIXME: for historical reasons, we force a notify here if 33944653eaeSRusty Russell * there are outgoing parts to the buffer. Presumably the 34044653eaeSRusty Russell * host should service the ring ASAP. */ 34113816c76SRusty Russell if (out_sgs) 342426e3e0aSRusty Russell vq->notify(&vq->vq); 34358625edfSWei Yongjun if (indirect) 34458625edfSWei Yongjun kfree(desc); 3450a8a69ddSRusty Russell END_USE(vq); 3460a8a69ddSRusty Russell return -ENOSPC; 3470a8a69ddSRusty Russell } 3480a8a69ddSRusty Russell 34913816c76SRusty Russell for (n = 0; n < out_sgs; n++) { 350eeebf9b1SRusty Russell for (sg = sgs[n]; sg; sg = sg_next(sg)) { 351780bc790SAndy Lutomirski dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 352780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 353780bc790SAndy Lutomirski goto unmap_release; 354780bc790SAndy Lutomirski 35500e6f3d9SMichael S. Tsirkin desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); 356780bc790SAndy Lutomirski desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 35700e6f3d9SMichael S. Tsirkin desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 3580a8a69ddSRusty Russell prev = i; 35900e6f3d9SMichael S. Tsirkin i = virtio16_to_cpu(_vq->vdev, desc[i].next); 3600a8a69ddSRusty Russell } 36113816c76SRusty Russell } 36213816c76SRusty Russell for (; n < (out_sgs + in_sgs); n++) { 363eeebf9b1SRusty Russell for (sg = sgs[n]; sg; sg = sg_next(sg)) { 364780bc790SAndy Lutomirski dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 365780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 366780bc790SAndy Lutomirski goto unmap_release; 367780bc790SAndy Lutomirski 36800e6f3d9SMichael S. Tsirkin desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); 369780bc790SAndy Lutomirski desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 37000e6f3d9SMichael S. Tsirkin desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 3710a8a69ddSRusty Russell prev = i; 37200e6f3d9SMichael S. Tsirkin i = virtio16_to_cpu(_vq->vdev, desc[i].next); 37313816c76SRusty Russell } 3740a8a69ddSRusty Russell } 3750a8a69ddSRusty Russell /* Last one doesn't continue. */ 37600e6f3d9SMichael S. Tsirkin desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 3770a8a69ddSRusty Russell 378780bc790SAndy Lutomirski if (indirect) { 379780bc790SAndy Lutomirski /* Now that the indirect table is filled in, map it. */ 380780bc790SAndy Lutomirski dma_addr_t addr = vring_map_single( 381780bc790SAndy Lutomirski vq, desc, total_sg * sizeof(struct vring_desc), 382780bc790SAndy Lutomirski DMA_TO_DEVICE); 383780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 384780bc790SAndy Lutomirski goto unmap_release; 385780bc790SAndy Lutomirski 386780bc790SAndy Lutomirski vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT); 387780bc790SAndy Lutomirski vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr); 388780bc790SAndy Lutomirski 389780bc790SAndy Lutomirski vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc)); 390780bc790SAndy Lutomirski } 391780bc790SAndy Lutomirski 392780bc790SAndy Lutomirski /* We're using some buffers from the free list. */ 393780bc790SAndy Lutomirski vq->vq.num_free -= descs_used; 394780bc790SAndy Lutomirski 3950a8a69ddSRusty Russell /* Update free pointer */ 396b25bd251SRusty Russell if (indirect) 39700e6f3d9SMichael S. Tsirkin vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next); 398b25bd251SRusty Russell else 3990a8a69ddSRusty Russell vq->free_head = i; 4000a8a69ddSRusty Russell 401780bc790SAndy Lutomirski /* Store token and indirect buffer state. */ 402780bc790SAndy Lutomirski vq->desc_state[head].data = data; 403780bc790SAndy Lutomirski if (indirect) 404780bc790SAndy Lutomirski vq->desc_state[head].indir_desc = desc; 40587646a34SJason Wang else 4065a08b04fSMichael S. Tsirkin vq->desc_state[head].indir_desc = ctx; 4070a8a69ddSRusty Russell 4080a8a69ddSRusty Russell /* Put entry in available array (but don't update avail->idx until they 4093b720b8cSRusty Russell * do sync). */ 410f277ec42SVenkatesh Srinivas avail = vq->avail_idx_shadow & (vq->vring.num - 1); 41100e6f3d9SMichael S. Tsirkin vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 4120a8a69ddSRusty Russell 413ee7cd898SRusty Russell /* Descriptors and available array need to be set before we expose the 414ee7cd898SRusty Russell * new available array entries. */ 415a9a0fef7SRusty Russell virtio_wmb(vq->weak_barriers); 416f277ec42SVenkatesh Srinivas vq->avail_idx_shadow++; 417f277ec42SVenkatesh Srinivas vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow); 418ee7cd898SRusty Russell vq->num_added++; 419ee7cd898SRusty Russell 4205e05bf58STetsuo Handa pr_debug("Added buffer head %i to %p\n", head, vq); 4215e05bf58STetsuo Handa END_USE(vq); 4225e05bf58STetsuo Handa 423ee7cd898SRusty Russell /* This is very unlikely, but theoretically possible. Kick 424ee7cd898SRusty Russell * just in case. */ 425ee7cd898SRusty Russell if (unlikely(vq->num_added == (1 << 16) - 1)) 426ee7cd898SRusty Russell virtqueue_kick(_vq); 427ee7cd898SRusty Russell 42898e8c6bcSRusty Russell return 0; 429780bc790SAndy Lutomirski 430780bc790SAndy Lutomirski unmap_release: 431780bc790SAndy Lutomirski err_idx = i; 432780bc790SAndy Lutomirski i = head; 433780bc790SAndy Lutomirski 434780bc790SAndy Lutomirski for (n = 0; n < total_sg; n++) { 435780bc790SAndy Lutomirski if (i == err_idx) 436780bc790SAndy Lutomirski break; 437138fd251STiwei Bie vring_unmap_one_split(vq, &desc[i]); 438c60923cbSGonglei i = virtio16_to_cpu(_vq->vdev, vq->vring.desc[i].next); 439780bc790SAndy Lutomirski } 440780bc790SAndy Lutomirski 441780bc790SAndy Lutomirski if (indirect) 442780bc790SAndy Lutomirski kfree(desc); 443780bc790SAndy Lutomirski 4443cc36f6eSMichael S. Tsirkin END_USE(vq); 445780bc790SAndy Lutomirski return -EIO; 4460a8a69ddSRusty Russell } 44713816c76SRusty Russell 448138fd251STiwei Bie static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 4490a8a69ddSRusty Russell { 4500a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 451a5c262c5SMichael S. Tsirkin u16 new, old; 45241f0377fSRusty Russell bool needs_kick; 45341f0377fSRusty Russell 4540a8a69ddSRusty Russell START_USE(vq); 455a72caae2SJason Wang /* We need to expose available array entries before checking avail 456a72caae2SJason Wang * event. */ 457a9a0fef7SRusty Russell virtio_mb(vq->weak_barriers); 4580a8a69ddSRusty Russell 459f277ec42SVenkatesh Srinivas old = vq->avail_idx_shadow - vq->num_added; 460f277ec42SVenkatesh Srinivas new = vq->avail_idx_shadow; 4610a8a69ddSRusty Russell vq->num_added = 0; 4620a8a69ddSRusty Russell 463e93300b1SRusty Russell #ifdef DEBUG 464e93300b1SRusty Russell if (vq->last_add_time_valid) { 465e93300b1SRusty Russell WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), 466e93300b1SRusty Russell vq->last_add_time)) > 100); 467e93300b1SRusty Russell } 468e93300b1SRusty Russell vq->last_add_time_valid = false; 469e93300b1SRusty Russell #endif 470e93300b1SRusty Russell 47141f0377fSRusty Russell if (vq->event) { 47200e6f3d9SMichael S. Tsirkin needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, vring_avail_event(&vq->vring)), 47341f0377fSRusty Russell new, old); 47441f0377fSRusty Russell } else { 47500e6f3d9SMichael S. Tsirkin needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY)); 47641f0377fSRusty Russell } 4770a8a69ddSRusty Russell END_USE(vq); 47841f0377fSRusty Russell return needs_kick; 47941f0377fSRusty Russell } 480138fd251STiwei Bie 481138fd251STiwei Bie static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 4825a08b04fSMichael S. Tsirkin void **ctx) 4830a8a69ddSRusty Russell { 484780bc790SAndy Lutomirski unsigned int i, j; 485c60923cbSGonglei __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 4860a8a69ddSRusty Russell 4870a8a69ddSRusty Russell /* Clear data ptr. */ 488780bc790SAndy Lutomirski vq->desc_state[head].data = NULL; 4890a8a69ddSRusty Russell 490780bc790SAndy Lutomirski /* Put back on free list: unmap first-level descriptors and find end */ 4910a8a69ddSRusty Russell i = head; 4929fa29b9dSMark McLoughlin 493780bc790SAndy Lutomirski while (vq->vring.desc[i].flags & nextflag) { 494138fd251STiwei Bie vring_unmap_one_split(vq, &vq->vring.desc[i]); 49500e6f3d9SMichael S. Tsirkin i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next); 49606ca287dSRusty Russell vq->vq.num_free++; 4970a8a69ddSRusty Russell } 4980a8a69ddSRusty Russell 499138fd251STiwei Bie vring_unmap_one_split(vq, &vq->vring.desc[i]); 50000e6f3d9SMichael S. Tsirkin vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head); 5010a8a69ddSRusty Russell vq->free_head = head; 502780bc790SAndy Lutomirski 5030a8a69ddSRusty Russell /* Plus final descriptor */ 50406ca287dSRusty Russell vq->vq.num_free++; 505780bc790SAndy Lutomirski 5065a08b04fSMichael S. Tsirkin if (vq->indirect) { 507780bc790SAndy Lutomirski struct vring_desc *indir_desc = vq->desc_state[head].indir_desc; 5085a08b04fSMichael S. Tsirkin u32 len; 5095a08b04fSMichael S. Tsirkin 5105a08b04fSMichael S. Tsirkin /* Free the indirect table, if any, now that it's unmapped. */ 5115a08b04fSMichael S. Tsirkin if (!indir_desc) 5125a08b04fSMichael S. Tsirkin return; 5135a08b04fSMichael S. Tsirkin 5145a08b04fSMichael S. Tsirkin len = virtio32_to_cpu(vq->vq.vdev, vq->vring.desc[head].len); 515780bc790SAndy Lutomirski 516780bc790SAndy Lutomirski BUG_ON(!(vq->vring.desc[head].flags & 517780bc790SAndy Lutomirski cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); 518780bc790SAndy Lutomirski BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 519780bc790SAndy Lutomirski 520780bc790SAndy Lutomirski for (j = 0; j < len / sizeof(struct vring_desc); j++) 521138fd251STiwei Bie vring_unmap_one_split(vq, &indir_desc[j]); 522780bc790SAndy Lutomirski 5235a08b04fSMichael S. Tsirkin kfree(indir_desc); 524780bc790SAndy Lutomirski vq->desc_state[head].indir_desc = NULL; 5255a08b04fSMichael S. Tsirkin } else if (ctx) { 5265a08b04fSMichael S. Tsirkin *ctx = vq->desc_state[head].indir_desc; 527780bc790SAndy Lutomirski } 5280a8a69ddSRusty Russell } 5290a8a69ddSRusty Russell 530138fd251STiwei Bie static inline bool more_used_split(const struct vring_virtqueue *vq) 5310a8a69ddSRusty Russell { 53200e6f3d9SMichael S. Tsirkin return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx); 5330a8a69ddSRusty Russell } 5340a8a69ddSRusty Russell 535138fd251STiwei Bie static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 536138fd251STiwei Bie unsigned int *len, 5375a08b04fSMichael S. Tsirkin void **ctx) 5380a8a69ddSRusty Russell { 5390a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 5400a8a69ddSRusty Russell void *ret; 5410a8a69ddSRusty Russell unsigned int i; 5423b720b8cSRusty Russell u16 last_used; 5430a8a69ddSRusty Russell 5440a8a69ddSRusty Russell START_USE(vq); 5450a8a69ddSRusty Russell 5465ef82752SRusty Russell if (unlikely(vq->broken)) { 5475ef82752SRusty Russell END_USE(vq); 5485ef82752SRusty Russell return NULL; 5495ef82752SRusty Russell } 5505ef82752SRusty Russell 551138fd251STiwei Bie if (!more_used_split(vq)) { 5520a8a69ddSRusty Russell pr_debug("No more buffers in queue\n"); 5530a8a69ddSRusty Russell END_USE(vq); 5540a8a69ddSRusty Russell return NULL; 5550a8a69ddSRusty Russell } 5560a8a69ddSRusty Russell 5572d61ba95SMichael S. Tsirkin /* Only get used array entries after they have been exposed by host. */ 558a9a0fef7SRusty Russell virtio_rmb(vq->weak_barriers); 5592d61ba95SMichael S. Tsirkin 5603b720b8cSRusty Russell last_used = (vq->last_used_idx & (vq->vring.num - 1)); 56100e6f3d9SMichael S. Tsirkin i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id); 56200e6f3d9SMichael S. Tsirkin *len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len); 5630a8a69ddSRusty Russell 5640a8a69ddSRusty Russell if (unlikely(i >= vq->vring.num)) { 5650a8a69ddSRusty Russell BAD_RING(vq, "id %u out of range\n", i); 5660a8a69ddSRusty Russell return NULL; 5670a8a69ddSRusty Russell } 568780bc790SAndy Lutomirski if (unlikely(!vq->desc_state[i].data)) { 5690a8a69ddSRusty Russell BAD_RING(vq, "id %u is not a head!\n", i); 5700a8a69ddSRusty Russell return NULL; 5710a8a69ddSRusty Russell } 5720a8a69ddSRusty Russell 573138fd251STiwei Bie /* detach_buf_split clears data, so grab it now. */ 574780bc790SAndy Lutomirski ret = vq->desc_state[i].data; 575138fd251STiwei Bie detach_buf_split(vq, i, ctx); 5760a8a69ddSRusty Russell vq->last_used_idx++; 577a5c262c5SMichael S. Tsirkin /* If we expect an interrupt for the next entry, tell host 578a5c262c5SMichael S. Tsirkin * by writing event index and flush out the write before 579a5c262c5SMichael S. Tsirkin * the read in the next get_buf call. */ 580788e5b3aSMichael S. Tsirkin if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 581788e5b3aSMichael S. Tsirkin virtio_store_mb(vq->weak_barriers, 582788e5b3aSMichael S. Tsirkin &vring_used_event(&vq->vring), 583788e5b3aSMichael S. Tsirkin cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 584a5c262c5SMichael S. Tsirkin 585e93300b1SRusty Russell #ifdef DEBUG 586e93300b1SRusty Russell vq->last_add_time_valid = false; 587e93300b1SRusty Russell #endif 588e93300b1SRusty Russell 5890a8a69ddSRusty Russell END_USE(vq); 5900a8a69ddSRusty Russell return ret; 5910a8a69ddSRusty Russell } 592138fd251STiwei Bie 593138fd251STiwei Bie static void virtqueue_disable_cb_split(struct virtqueue *_vq) 594138fd251STiwei Bie { 595138fd251STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 596138fd251STiwei Bie 597138fd251STiwei Bie if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 598138fd251STiwei Bie vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 599138fd251STiwei Bie if (!vq->event) 600138fd251STiwei Bie vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); 601138fd251STiwei Bie } 602138fd251STiwei Bie } 603138fd251STiwei Bie 604138fd251STiwei Bie static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 605cc229884SMichael S. Tsirkin { 606cc229884SMichael S. Tsirkin struct vring_virtqueue *vq = to_vvq(_vq); 607cc229884SMichael S. Tsirkin u16 last_used_idx; 608cc229884SMichael S. Tsirkin 609cc229884SMichael S. Tsirkin START_USE(vq); 610cc229884SMichael S. Tsirkin 611cc229884SMichael S. Tsirkin /* We optimistically turn back on interrupts, then check if there was 612cc229884SMichael S. Tsirkin * more to do. */ 613cc229884SMichael S. Tsirkin /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 614cc229884SMichael S. Tsirkin * either clear the flags bit or point the event index at the next 615cc229884SMichael S. Tsirkin * entry. Always do both to keep code simple. */ 616f277ec42SVenkatesh Srinivas if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 617f277ec42SVenkatesh Srinivas vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 6180ea1e4a6SLadi Prosek if (!vq->event) 619f277ec42SVenkatesh Srinivas vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); 620f277ec42SVenkatesh Srinivas } 62100e6f3d9SMichael S. Tsirkin vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx); 622cc229884SMichael S. Tsirkin END_USE(vq); 623cc229884SMichael S. Tsirkin return last_used_idx; 624cc229884SMichael S. Tsirkin } 625138fd251STiwei Bie 626138fd251STiwei Bie static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 627138fd251STiwei Bie { 628138fd251STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 629138fd251STiwei Bie 630138fd251STiwei Bie return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 631138fd251STiwei Bie vq->vring.used->idx); 632138fd251STiwei Bie } 633138fd251STiwei Bie 634138fd251STiwei Bie static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 6357ab358c2SMichael S. Tsirkin { 6367ab358c2SMichael S. Tsirkin struct vring_virtqueue *vq = to_vvq(_vq); 6377ab358c2SMichael S. Tsirkin u16 bufs; 6387ab358c2SMichael S. Tsirkin 6397ab358c2SMichael S. Tsirkin START_USE(vq); 6407ab358c2SMichael S. Tsirkin 6417ab358c2SMichael S. Tsirkin /* We optimistically turn back on interrupts, then check if there was 6427ab358c2SMichael S. Tsirkin * more to do. */ 6437ab358c2SMichael S. Tsirkin /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 6447ab358c2SMichael S. Tsirkin * either clear the flags bit or point the event index at the next 6450ea1e4a6SLadi Prosek * entry. Always update the event index to keep code simple. */ 646f277ec42SVenkatesh Srinivas if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 647f277ec42SVenkatesh Srinivas vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 6480ea1e4a6SLadi Prosek if (!vq->event) 649f277ec42SVenkatesh Srinivas vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); 650f277ec42SVenkatesh Srinivas } 6517ab358c2SMichael S. Tsirkin /* TODO: tune this threshold */ 652f277ec42SVenkatesh Srinivas bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4; 653788e5b3aSMichael S. Tsirkin 654788e5b3aSMichael S. Tsirkin virtio_store_mb(vq->weak_barriers, 655788e5b3aSMichael S. Tsirkin &vring_used_event(&vq->vring), 656788e5b3aSMichael S. Tsirkin cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 657788e5b3aSMichael S. Tsirkin 65800e6f3d9SMichael S. Tsirkin if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) { 6597ab358c2SMichael S. Tsirkin END_USE(vq); 6607ab358c2SMichael S. Tsirkin return false; 6617ab358c2SMichael S. Tsirkin } 6627ab358c2SMichael S. Tsirkin 6637ab358c2SMichael S. Tsirkin END_USE(vq); 6647ab358c2SMichael S. Tsirkin return true; 6657ab358c2SMichael S. Tsirkin } 6667ab358c2SMichael S. Tsirkin 667138fd251STiwei Bie static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 668c021eac4SShirley Ma { 669c021eac4SShirley Ma struct vring_virtqueue *vq = to_vvq(_vq); 670c021eac4SShirley Ma unsigned int i; 671c021eac4SShirley Ma void *buf; 672c021eac4SShirley Ma 673c021eac4SShirley Ma START_USE(vq); 674c021eac4SShirley Ma 675c021eac4SShirley Ma for (i = 0; i < vq->vring.num; i++) { 676780bc790SAndy Lutomirski if (!vq->desc_state[i].data) 677c021eac4SShirley Ma continue; 678138fd251STiwei Bie /* detach_buf_split clears data, so grab it now. */ 679780bc790SAndy Lutomirski buf = vq->desc_state[i].data; 680138fd251STiwei Bie detach_buf_split(vq, i, NULL); 681f277ec42SVenkatesh Srinivas vq->avail_idx_shadow--; 682f277ec42SVenkatesh Srinivas vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow); 683c021eac4SShirley Ma END_USE(vq); 684c021eac4SShirley Ma return buf; 685c021eac4SShirley Ma } 686c021eac4SShirley Ma /* That should have freed everything. */ 68706ca287dSRusty Russell BUG_ON(vq->vq.num_free != vq->vring.num); 688c021eac4SShirley Ma 689c021eac4SShirley Ma END_USE(vq); 690c021eac4SShirley Ma return NULL; 691c021eac4SShirley Ma } 692138fd251STiwei Bie 693*e6f633e5STiwei Bie 694*e6f633e5STiwei Bie /* 695*e6f633e5STiwei Bie * Generic functions and exported symbols. 696*e6f633e5STiwei Bie */ 697*e6f633e5STiwei Bie 698*e6f633e5STiwei Bie static inline int virtqueue_add(struct virtqueue *_vq, 699*e6f633e5STiwei Bie struct scatterlist *sgs[], 700*e6f633e5STiwei Bie unsigned int total_sg, 701*e6f633e5STiwei Bie unsigned int out_sgs, 702*e6f633e5STiwei Bie unsigned int in_sgs, 703*e6f633e5STiwei Bie void *data, 704*e6f633e5STiwei Bie void *ctx, 705*e6f633e5STiwei Bie gfp_t gfp) 706*e6f633e5STiwei Bie { 707*e6f633e5STiwei Bie return virtqueue_add_split(_vq, sgs, total_sg, 708*e6f633e5STiwei Bie out_sgs, in_sgs, data, ctx, gfp); 709*e6f633e5STiwei Bie } 710*e6f633e5STiwei Bie 711*e6f633e5STiwei Bie /** 712*e6f633e5STiwei Bie * virtqueue_add_sgs - expose buffers to other end 713*e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 714*e6f633e5STiwei Bie * @sgs: array of terminated scatterlists. 715*e6f633e5STiwei Bie * @out_num: the number of scatterlists readable by other side 716*e6f633e5STiwei Bie * @in_num: the number of scatterlists which are writable (after readable ones) 717*e6f633e5STiwei Bie * @data: the token identifying the buffer. 718*e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 719*e6f633e5STiwei Bie * 720*e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 721*e6f633e5STiwei Bie * at the same time (except where noted). 722*e6f633e5STiwei Bie * 723*e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 724*e6f633e5STiwei Bie */ 725*e6f633e5STiwei Bie int virtqueue_add_sgs(struct virtqueue *_vq, 726*e6f633e5STiwei Bie struct scatterlist *sgs[], 727*e6f633e5STiwei Bie unsigned int out_sgs, 728*e6f633e5STiwei Bie unsigned int in_sgs, 729*e6f633e5STiwei Bie void *data, 730*e6f633e5STiwei Bie gfp_t gfp) 731*e6f633e5STiwei Bie { 732*e6f633e5STiwei Bie unsigned int i, total_sg = 0; 733*e6f633e5STiwei Bie 734*e6f633e5STiwei Bie /* Count them first. */ 735*e6f633e5STiwei Bie for (i = 0; i < out_sgs + in_sgs; i++) { 736*e6f633e5STiwei Bie struct scatterlist *sg; 737*e6f633e5STiwei Bie 738*e6f633e5STiwei Bie for (sg = sgs[i]; sg; sg = sg_next(sg)) 739*e6f633e5STiwei Bie total_sg++; 740*e6f633e5STiwei Bie } 741*e6f633e5STiwei Bie return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 742*e6f633e5STiwei Bie data, NULL, gfp); 743*e6f633e5STiwei Bie } 744*e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 745*e6f633e5STiwei Bie 746*e6f633e5STiwei Bie /** 747*e6f633e5STiwei Bie * virtqueue_add_outbuf - expose output buffers to other end 748*e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 749*e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 750*e6f633e5STiwei Bie * @num: the number of entries in @sg readable by other side 751*e6f633e5STiwei Bie * @data: the token identifying the buffer. 752*e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 753*e6f633e5STiwei Bie * 754*e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 755*e6f633e5STiwei Bie * at the same time (except where noted). 756*e6f633e5STiwei Bie * 757*e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 758*e6f633e5STiwei Bie */ 759*e6f633e5STiwei Bie int virtqueue_add_outbuf(struct virtqueue *vq, 760*e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 761*e6f633e5STiwei Bie void *data, 762*e6f633e5STiwei Bie gfp_t gfp) 763*e6f633e5STiwei Bie { 764*e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 765*e6f633e5STiwei Bie } 766*e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 767*e6f633e5STiwei Bie 768*e6f633e5STiwei Bie /** 769*e6f633e5STiwei Bie * virtqueue_add_inbuf - expose input buffers to other end 770*e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 771*e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 772*e6f633e5STiwei Bie * @num: the number of entries in @sg writable by other side 773*e6f633e5STiwei Bie * @data: the token identifying the buffer. 774*e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 775*e6f633e5STiwei Bie * 776*e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 777*e6f633e5STiwei Bie * at the same time (except where noted). 778*e6f633e5STiwei Bie * 779*e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 780*e6f633e5STiwei Bie */ 781*e6f633e5STiwei Bie int virtqueue_add_inbuf(struct virtqueue *vq, 782*e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 783*e6f633e5STiwei Bie void *data, 784*e6f633e5STiwei Bie gfp_t gfp) 785*e6f633e5STiwei Bie { 786*e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 787*e6f633e5STiwei Bie } 788*e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 789*e6f633e5STiwei Bie 790*e6f633e5STiwei Bie /** 791*e6f633e5STiwei Bie * virtqueue_add_inbuf_ctx - expose input buffers to other end 792*e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 793*e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 794*e6f633e5STiwei Bie * @num: the number of entries in @sg writable by other side 795*e6f633e5STiwei Bie * @data: the token identifying the buffer. 796*e6f633e5STiwei Bie * @ctx: extra context for the token 797*e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 798*e6f633e5STiwei Bie * 799*e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 800*e6f633e5STiwei Bie * at the same time (except where noted). 801*e6f633e5STiwei Bie * 802*e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 803*e6f633e5STiwei Bie */ 804*e6f633e5STiwei Bie int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 805*e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 806*e6f633e5STiwei Bie void *data, 807*e6f633e5STiwei Bie void *ctx, 808*e6f633e5STiwei Bie gfp_t gfp) 809*e6f633e5STiwei Bie { 810*e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 811*e6f633e5STiwei Bie } 812*e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 813*e6f633e5STiwei Bie 814*e6f633e5STiwei Bie /** 815*e6f633e5STiwei Bie * virtqueue_kick_prepare - first half of split virtqueue_kick call. 816*e6f633e5STiwei Bie * @vq: the struct virtqueue 817*e6f633e5STiwei Bie * 818*e6f633e5STiwei Bie * Instead of virtqueue_kick(), you can do: 819*e6f633e5STiwei Bie * if (virtqueue_kick_prepare(vq)) 820*e6f633e5STiwei Bie * virtqueue_notify(vq); 821*e6f633e5STiwei Bie * 822*e6f633e5STiwei Bie * This is sometimes useful because the virtqueue_kick_prepare() needs 823*e6f633e5STiwei Bie * to be serialized, but the actual virtqueue_notify() call does not. 824*e6f633e5STiwei Bie */ 825*e6f633e5STiwei Bie bool virtqueue_kick_prepare(struct virtqueue *_vq) 826*e6f633e5STiwei Bie { 827*e6f633e5STiwei Bie return virtqueue_kick_prepare_split(_vq); 828*e6f633e5STiwei Bie } 829*e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 830*e6f633e5STiwei Bie 831*e6f633e5STiwei Bie /** 832*e6f633e5STiwei Bie * virtqueue_notify - second half of split virtqueue_kick call. 833*e6f633e5STiwei Bie * @vq: the struct virtqueue 834*e6f633e5STiwei Bie * 835*e6f633e5STiwei Bie * This does not need to be serialized. 836*e6f633e5STiwei Bie * 837*e6f633e5STiwei Bie * Returns false if host notify failed or queue is broken, otherwise true. 838*e6f633e5STiwei Bie */ 839*e6f633e5STiwei Bie bool virtqueue_notify(struct virtqueue *_vq) 840*e6f633e5STiwei Bie { 841*e6f633e5STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 842*e6f633e5STiwei Bie 843*e6f633e5STiwei Bie if (unlikely(vq->broken)) 844*e6f633e5STiwei Bie return false; 845*e6f633e5STiwei Bie 846*e6f633e5STiwei Bie /* Prod other side to tell it about changes. */ 847*e6f633e5STiwei Bie if (!vq->notify(_vq)) { 848*e6f633e5STiwei Bie vq->broken = true; 849*e6f633e5STiwei Bie return false; 850*e6f633e5STiwei Bie } 851*e6f633e5STiwei Bie return true; 852*e6f633e5STiwei Bie } 853*e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_notify); 854*e6f633e5STiwei Bie 855*e6f633e5STiwei Bie /** 856*e6f633e5STiwei Bie * virtqueue_kick - update after add_buf 857*e6f633e5STiwei Bie * @vq: the struct virtqueue 858*e6f633e5STiwei Bie * 859*e6f633e5STiwei Bie * After one or more virtqueue_add_* calls, invoke this to kick 860*e6f633e5STiwei Bie * the other side. 861*e6f633e5STiwei Bie * 862*e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 863*e6f633e5STiwei Bie * operations at the same time (except where noted). 864*e6f633e5STiwei Bie * 865*e6f633e5STiwei Bie * Returns false if kick failed, otherwise true. 866*e6f633e5STiwei Bie */ 867*e6f633e5STiwei Bie bool virtqueue_kick(struct virtqueue *vq) 868*e6f633e5STiwei Bie { 869*e6f633e5STiwei Bie if (virtqueue_kick_prepare(vq)) 870*e6f633e5STiwei Bie return virtqueue_notify(vq); 871*e6f633e5STiwei Bie return true; 872*e6f633e5STiwei Bie } 873*e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_kick); 874*e6f633e5STiwei Bie 875*e6f633e5STiwei Bie /** 876*e6f633e5STiwei Bie * virtqueue_get_buf - get the next used buffer 877*e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 878*e6f633e5STiwei Bie * @len: the length written into the buffer 879*e6f633e5STiwei Bie * 880*e6f633e5STiwei Bie * If the device wrote data into the buffer, @len will be set to the 881*e6f633e5STiwei Bie * amount written. This means you don't need to clear the buffer 882*e6f633e5STiwei Bie * beforehand to ensure there's no data leakage in the case of short 883*e6f633e5STiwei Bie * writes. 884*e6f633e5STiwei Bie * 885*e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 886*e6f633e5STiwei Bie * operations at the same time (except where noted). 887*e6f633e5STiwei Bie * 888*e6f633e5STiwei Bie * Returns NULL if there are no used buffers, or the "data" token 889*e6f633e5STiwei Bie * handed to virtqueue_add_*(). 890*e6f633e5STiwei Bie */ 891*e6f633e5STiwei Bie void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 892*e6f633e5STiwei Bie void **ctx) 893*e6f633e5STiwei Bie { 894*e6f633e5STiwei Bie return virtqueue_get_buf_ctx_split(_vq, len, ctx); 895*e6f633e5STiwei Bie } 896*e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 897*e6f633e5STiwei Bie 898*e6f633e5STiwei Bie void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 899*e6f633e5STiwei Bie { 900*e6f633e5STiwei Bie return virtqueue_get_buf_ctx(_vq, len, NULL); 901*e6f633e5STiwei Bie } 902*e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_get_buf); 903*e6f633e5STiwei Bie 904*e6f633e5STiwei Bie /** 905*e6f633e5STiwei Bie * virtqueue_disable_cb - disable callbacks 906*e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 907*e6f633e5STiwei Bie * 908*e6f633e5STiwei Bie * Note that this is not necessarily synchronous, hence unreliable and only 909*e6f633e5STiwei Bie * useful as an optimization. 910*e6f633e5STiwei Bie * 911*e6f633e5STiwei Bie * Unlike other operations, this need not be serialized. 912*e6f633e5STiwei Bie */ 913*e6f633e5STiwei Bie void virtqueue_disable_cb(struct virtqueue *_vq) 914*e6f633e5STiwei Bie { 915*e6f633e5STiwei Bie virtqueue_disable_cb_split(_vq); 916*e6f633e5STiwei Bie } 917*e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 918*e6f633e5STiwei Bie 919*e6f633e5STiwei Bie /** 920*e6f633e5STiwei Bie * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 921*e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 922*e6f633e5STiwei Bie * 923*e6f633e5STiwei Bie * This re-enables callbacks; it returns current queue state 924*e6f633e5STiwei Bie * in an opaque unsigned value. This value should be later tested by 925*e6f633e5STiwei Bie * virtqueue_poll, to detect a possible race between the driver checking for 926*e6f633e5STiwei Bie * more work, and enabling callbacks. 927*e6f633e5STiwei Bie * 928*e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 929*e6f633e5STiwei Bie * operations at the same time (except where noted). 930*e6f633e5STiwei Bie */ 931*e6f633e5STiwei Bie unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 932*e6f633e5STiwei Bie { 933*e6f633e5STiwei Bie return virtqueue_enable_cb_prepare_split(_vq); 934*e6f633e5STiwei Bie } 935*e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 936*e6f633e5STiwei Bie 937*e6f633e5STiwei Bie /** 938*e6f633e5STiwei Bie * virtqueue_poll - query pending used buffers 939*e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 940*e6f633e5STiwei Bie * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 941*e6f633e5STiwei Bie * 942*e6f633e5STiwei Bie * Returns "true" if there are pending used buffers in the queue. 943*e6f633e5STiwei Bie * 944*e6f633e5STiwei Bie * This does not need to be serialized. 945*e6f633e5STiwei Bie */ 946*e6f633e5STiwei Bie bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 947*e6f633e5STiwei Bie { 948*e6f633e5STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 949*e6f633e5STiwei Bie 950*e6f633e5STiwei Bie virtio_mb(vq->weak_barriers); 951*e6f633e5STiwei Bie return virtqueue_poll_split(_vq, last_used_idx); 952*e6f633e5STiwei Bie } 953*e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_poll); 954*e6f633e5STiwei Bie 955*e6f633e5STiwei Bie /** 956*e6f633e5STiwei Bie * virtqueue_enable_cb - restart callbacks after disable_cb. 957*e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 958*e6f633e5STiwei Bie * 959*e6f633e5STiwei Bie * This re-enables callbacks; it returns "false" if there are pending 960*e6f633e5STiwei Bie * buffers in the queue, to detect a possible race between the driver 961*e6f633e5STiwei Bie * checking for more work, and enabling callbacks. 962*e6f633e5STiwei Bie * 963*e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 964*e6f633e5STiwei Bie * operations at the same time (except where noted). 965*e6f633e5STiwei Bie */ 966*e6f633e5STiwei Bie bool virtqueue_enable_cb(struct virtqueue *_vq) 967*e6f633e5STiwei Bie { 968*e6f633e5STiwei Bie unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 969*e6f633e5STiwei Bie 970*e6f633e5STiwei Bie return !virtqueue_poll(_vq, last_used_idx); 971*e6f633e5STiwei Bie } 972*e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 973*e6f633e5STiwei Bie 974*e6f633e5STiwei Bie /** 975*e6f633e5STiwei Bie * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 976*e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 977*e6f633e5STiwei Bie * 978*e6f633e5STiwei Bie * This re-enables callbacks but hints to the other side to delay 979*e6f633e5STiwei Bie * interrupts until most of the available buffers have been processed; 980*e6f633e5STiwei Bie * it returns "false" if there are many pending buffers in the queue, 981*e6f633e5STiwei Bie * to detect a possible race between the driver checking for more work, 982*e6f633e5STiwei Bie * and enabling callbacks. 983*e6f633e5STiwei Bie * 984*e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 985*e6f633e5STiwei Bie * operations at the same time (except where noted). 986*e6f633e5STiwei Bie */ 987*e6f633e5STiwei Bie bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 988*e6f633e5STiwei Bie { 989*e6f633e5STiwei Bie return virtqueue_enable_cb_delayed_split(_vq); 990*e6f633e5STiwei Bie } 991*e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 992*e6f633e5STiwei Bie 993138fd251STiwei Bie /** 994138fd251STiwei Bie * virtqueue_detach_unused_buf - detach first unused buffer 995138fd251STiwei Bie * @vq: the struct virtqueue we're talking about. 996138fd251STiwei Bie * 997138fd251STiwei Bie * Returns NULL or the "data" token handed to virtqueue_add_*(). 998138fd251STiwei Bie * This is not valid on an active queue; it is useful only for device 999138fd251STiwei Bie * shutdown. 1000138fd251STiwei Bie */ 1001138fd251STiwei Bie void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 1002138fd251STiwei Bie { 1003138fd251STiwei Bie return virtqueue_detach_unused_buf_split(_vq); 1004138fd251STiwei Bie } 10057c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 1006c021eac4SShirley Ma 1007138fd251STiwei Bie static inline bool more_used(const struct vring_virtqueue *vq) 1008138fd251STiwei Bie { 1009138fd251STiwei Bie return more_used_split(vq); 1010138fd251STiwei Bie } 1011138fd251STiwei Bie 10120a8a69ddSRusty Russell irqreturn_t vring_interrupt(int irq, void *_vq) 10130a8a69ddSRusty Russell { 10140a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 10150a8a69ddSRusty Russell 10160a8a69ddSRusty Russell if (!more_used(vq)) { 10170a8a69ddSRusty Russell pr_debug("virtqueue interrupt with no work for %p\n", vq); 10180a8a69ddSRusty Russell return IRQ_NONE; 10190a8a69ddSRusty Russell } 10200a8a69ddSRusty Russell 10210a8a69ddSRusty Russell if (unlikely(vq->broken)) 10220a8a69ddSRusty Russell return IRQ_HANDLED; 10230a8a69ddSRusty Russell 10240a8a69ddSRusty Russell pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 102518445c4dSRusty Russell if (vq->vq.callback) 102618445c4dSRusty Russell vq->vq.callback(&vq->vq); 10270a8a69ddSRusty Russell 10280a8a69ddSRusty Russell return IRQ_HANDLED; 10290a8a69ddSRusty Russell } 1030c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_interrupt); 10310a8a69ddSRusty Russell 10322a2d1382SAndy Lutomirski struct virtqueue *__vring_new_virtqueue(unsigned int index, 10332a2d1382SAndy Lutomirski struct vring vring, 10340a8a69ddSRusty Russell struct virtio_device *vdev, 10357b21e34fSRusty Russell bool weak_barriers, 1036f94682ddSMichael S. Tsirkin bool context, 103746f9c2b9SHeinz Graalfs bool (*notify)(struct virtqueue *), 10389499f5e7SRusty Russell void (*callback)(struct virtqueue *), 10399499f5e7SRusty Russell const char *name) 10400a8a69ddSRusty Russell { 10410a8a69ddSRusty Russell unsigned int i; 10422a2d1382SAndy Lutomirski struct vring_virtqueue *vq; 10430a8a69ddSRusty Russell 10442a2d1382SAndy Lutomirski vq = kmalloc(sizeof(*vq) + vring.num * sizeof(struct vring_desc_state), 1045780bc790SAndy Lutomirski GFP_KERNEL); 10460a8a69ddSRusty Russell if (!vq) 10470a8a69ddSRusty Russell return NULL; 10480a8a69ddSRusty Russell 10492a2d1382SAndy Lutomirski vq->vring = vring; 10500a8a69ddSRusty Russell vq->vq.callback = callback; 10510a8a69ddSRusty Russell vq->vq.vdev = vdev; 10529499f5e7SRusty Russell vq->vq.name = name; 10532a2d1382SAndy Lutomirski vq->vq.num_free = vring.num; 105406ca287dSRusty Russell vq->vq.index = index; 10552a2d1382SAndy Lutomirski vq->we_own_ring = false; 10562a2d1382SAndy Lutomirski vq->queue_dma_addr = 0; 10572a2d1382SAndy Lutomirski vq->queue_size_in_bytes = 0; 10580a8a69ddSRusty Russell vq->notify = notify; 10597b21e34fSRusty Russell vq->weak_barriers = weak_barriers; 10600a8a69ddSRusty Russell vq->broken = false; 10610a8a69ddSRusty Russell vq->last_used_idx = 0; 1062f277ec42SVenkatesh Srinivas vq->avail_flags_shadow = 0; 1063f277ec42SVenkatesh Srinivas vq->avail_idx_shadow = 0; 10640a8a69ddSRusty Russell vq->num_added = 0; 10659499f5e7SRusty Russell list_add_tail(&vq->vq.list, &vdev->vqs); 10660a8a69ddSRusty Russell #ifdef DEBUG 10670a8a69ddSRusty Russell vq->in_use = false; 1068e93300b1SRusty Russell vq->last_add_time_valid = false; 10690a8a69ddSRusty Russell #endif 10700a8a69ddSRusty Russell 10715a08b04fSMichael S. Tsirkin vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 10725a08b04fSMichael S. Tsirkin !context; 1073a5c262c5SMichael S. Tsirkin vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 10749fa29b9dSMark McLoughlin 10750a8a69ddSRusty Russell /* No callback? Tell other side not to bother us. */ 1076f277ec42SVenkatesh Srinivas if (!callback) { 1077f277ec42SVenkatesh Srinivas vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 10780ea1e4a6SLadi Prosek if (!vq->event) 1079f277ec42SVenkatesh Srinivas vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow); 1080f277ec42SVenkatesh Srinivas } 10810a8a69ddSRusty Russell 10820a8a69ddSRusty Russell /* Put everything in free lists. */ 10830a8a69ddSRusty Russell vq->free_head = 0; 10842a2d1382SAndy Lutomirski for (i = 0; i < vring.num-1; i++) 108500e6f3d9SMichael S. Tsirkin vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 10862a2d1382SAndy Lutomirski memset(vq->desc_state, 0, vring.num * sizeof(struct vring_desc_state)); 10870a8a69ddSRusty Russell 10880a8a69ddSRusty Russell return &vq->vq; 10890a8a69ddSRusty Russell } 10902a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 10912a2d1382SAndy Lutomirski 10922a2d1382SAndy Lutomirski static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 10932a2d1382SAndy Lutomirski dma_addr_t *dma_handle, gfp_t flag) 10942a2d1382SAndy Lutomirski { 10952a2d1382SAndy Lutomirski if (vring_use_dma_api(vdev)) { 10962a2d1382SAndy Lutomirski return dma_alloc_coherent(vdev->dev.parent, size, 10972a2d1382SAndy Lutomirski dma_handle, flag); 10982a2d1382SAndy Lutomirski } else { 10992a2d1382SAndy Lutomirski void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 11002a2d1382SAndy Lutomirski if (queue) { 11012a2d1382SAndy Lutomirski phys_addr_t phys_addr = virt_to_phys(queue); 11022a2d1382SAndy Lutomirski *dma_handle = (dma_addr_t)phys_addr; 11032a2d1382SAndy Lutomirski 11042a2d1382SAndy Lutomirski /* 11052a2d1382SAndy Lutomirski * Sanity check: make sure we dind't truncate 11062a2d1382SAndy Lutomirski * the address. The only arches I can find that 11072a2d1382SAndy Lutomirski * have 64-bit phys_addr_t but 32-bit dma_addr_t 11082a2d1382SAndy Lutomirski * are certain non-highmem MIPS and x86 11092a2d1382SAndy Lutomirski * configurations, but these configurations 11102a2d1382SAndy Lutomirski * should never allocate physical pages above 32 11112a2d1382SAndy Lutomirski * bits, so this is fine. Just in case, throw a 11122a2d1382SAndy Lutomirski * warning and abort if we end up with an 11132a2d1382SAndy Lutomirski * unrepresentable address. 11142a2d1382SAndy Lutomirski */ 11152a2d1382SAndy Lutomirski if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 11162a2d1382SAndy Lutomirski free_pages_exact(queue, PAGE_ALIGN(size)); 11172a2d1382SAndy Lutomirski return NULL; 11182a2d1382SAndy Lutomirski } 11192a2d1382SAndy Lutomirski } 11202a2d1382SAndy Lutomirski return queue; 11212a2d1382SAndy Lutomirski } 11222a2d1382SAndy Lutomirski } 11232a2d1382SAndy Lutomirski 11242a2d1382SAndy Lutomirski static void vring_free_queue(struct virtio_device *vdev, size_t size, 11252a2d1382SAndy Lutomirski void *queue, dma_addr_t dma_handle) 11262a2d1382SAndy Lutomirski { 11272a2d1382SAndy Lutomirski if (vring_use_dma_api(vdev)) { 11282a2d1382SAndy Lutomirski dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 11292a2d1382SAndy Lutomirski } else { 11302a2d1382SAndy Lutomirski free_pages_exact(queue, PAGE_ALIGN(size)); 11312a2d1382SAndy Lutomirski } 11322a2d1382SAndy Lutomirski } 11332a2d1382SAndy Lutomirski 11342a2d1382SAndy Lutomirski struct virtqueue *vring_create_virtqueue( 11352a2d1382SAndy Lutomirski unsigned int index, 11362a2d1382SAndy Lutomirski unsigned int num, 11372a2d1382SAndy Lutomirski unsigned int vring_align, 11382a2d1382SAndy Lutomirski struct virtio_device *vdev, 11392a2d1382SAndy Lutomirski bool weak_barriers, 11402a2d1382SAndy Lutomirski bool may_reduce_num, 1141f94682ddSMichael S. Tsirkin bool context, 11422a2d1382SAndy Lutomirski bool (*notify)(struct virtqueue *), 11432a2d1382SAndy Lutomirski void (*callback)(struct virtqueue *), 11442a2d1382SAndy Lutomirski const char *name) 11452a2d1382SAndy Lutomirski { 11462a2d1382SAndy Lutomirski struct virtqueue *vq; 1147e00f7bd2SDan Carpenter void *queue = NULL; 11482a2d1382SAndy Lutomirski dma_addr_t dma_addr; 11492a2d1382SAndy Lutomirski size_t queue_size_in_bytes; 11502a2d1382SAndy Lutomirski struct vring vring; 11512a2d1382SAndy Lutomirski 11522a2d1382SAndy Lutomirski /* We assume num is a power of 2. */ 11532a2d1382SAndy Lutomirski if (num & (num - 1)) { 11542a2d1382SAndy Lutomirski dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 11552a2d1382SAndy Lutomirski return NULL; 11562a2d1382SAndy Lutomirski } 11572a2d1382SAndy Lutomirski 11582a2d1382SAndy Lutomirski /* TODO: allocate each queue chunk individually */ 11592a2d1382SAndy Lutomirski for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 11602a2d1382SAndy Lutomirski queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 11612a2d1382SAndy Lutomirski &dma_addr, 11622a2d1382SAndy Lutomirski GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 11632a2d1382SAndy Lutomirski if (queue) 11642a2d1382SAndy Lutomirski break; 11652a2d1382SAndy Lutomirski } 11662a2d1382SAndy Lutomirski 11672a2d1382SAndy Lutomirski if (!num) 11682a2d1382SAndy Lutomirski return NULL; 11692a2d1382SAndy Lutomirski 11702a2d1382SAndy Lutomirski if (!queue) { 11712a2d1382SAndy Lutomirski /* Try to get a single page. You are my only hope! */ 11722a2d1382SAndy Lutomirski queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 11732a2d1382SAndy Lutomirski &dma_addr, GFP_KERNEL|__GFP_ZERO); 11742a2d1382SAndy Lutomirski } 11752a2d1382SAndy Lutomirski if (!queue) 11762a2d1382SAndy Lutomirski return NULL; 11772a2d1382SAndy Lutomirski 11782a2d1382SAndy Lutomirski queue_size_in_bytes = vring_size(num, vring_align); 11792a2d1382SAndy Lutomirski vring_init(&vring, num, queue, vring_align); 11802a2d1382SAndy Lutomirski 1181f94682ddSMichael S. Tsirkin vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 11822a2d1382SAndy Lutomirski notify, callback, name); 11832a2d1382SAndy Lutomirski if (!vq) { 11842a2d1382SAndy Lutomirski vring_free_queue(vdev, queue_size_in_bytes, queue, 11852a2d1382SAndy Lutomirski dma_addr); 11862a2d1382SAndy Lutomirski return NULL; 11872a2d1382SAndy Lutomirski } 11882a2d1382SAndy Lutomirski 11892a2d1382SAndy Lutomirski to_vvq(vq)->queue_dma_addr = dma_addr; 11902a2d1382SAndy Lutomirski to_vvq(vq)->queue_size_in_bytes = queue_size_in_bytes; 11912a2d1382SAndy Lutomirski to_vvq(vq)->we_own_ring = true; 11922a2d1382SAndy Lutomirski 11932a2d1382SAndy Lutomirski return vq; 11942a2d1382SAndy Lutomirski } 11952a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(vring_create_virtqueue); 11962a2d1382SAndy Lutomirski 11972a2d1382SAndy Lutomirski struct virtqueue *vring_new_virtqueue(unsigned int index, 11982a2d1382SAndy Lutomirski unsigned int num, 11992a2d1382SAndy Lutomirski unsigned int vring_align, 12002a2d1382SAndy Lutomirski struct virtio_device *vdev, 12012a2d1382SAndy Lutomirski bool weak_barriers, 1202f94682ddSMichael S. Tsirkin bool context, 12032a2d1382SAndy Lutomirski void *pages, 12042a2d1382SAndy Lutomirski bool (*notify)(struct virtqueue *vq), 12052a2d1382SAndy Lutomirski void (*callback)(struct virtqueue *vq), 12062a2d1382SAndy Lutomirski const char *name) 12072a2d1382SAndy Lutomirski { 12082a2d1382SAndy Lutomirski struct vring vring; 12092a2d1382SAndy Lutomirski vring_init(&vring, num, pages, vring_align); 1210f94682ddSMichael S. Tsirkin return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 12112a2d1382SAndy Lutomirski notify, callback, name); 12122a2d1382SAndy Lutomirski } 1213c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_new_virtqueue); 12140a8a69ddSRusty Russell 12152a2d1382SAndy Lutomirski void vring_del_virtqueue(struct virtqueue *_vq) 12160a8a69ddSRusty Russell { 12172a2d1382SAndy Lutomirski struct vring_virtqueue *vq = to_vvq(_vq); 12182a2d1382SAndy Lutomirski 12192a2d1382SAndy Lutomirski if (vq->we_own_ring) { 12202a2d1382SAndy Lutomirski vring_free_queue(vq->vq.vdev, vq->queue_size_in_bytes, 12212a2d1382SAndy Lutomirski vq->vring.desc, vq->queue_dma_addr); 12222a2d1382SAndy Lutomirski } 12232a2d1382SAndy Lutomirski list_del(&_vq->list); 12242a2d1382SAndy Lutomirski kfree(vq); 12250a8a69ddSRusty Russell } 1226c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_del_virtqueue); 12270a8a69ddSRusty Russell 1228e34f8725SRusty Russell /* Manipulates transport-specific feature bits. */ 1229e34f8725SRusty Russell void vring_transport_features(struct virtio_device *vdev) 1230e34f8725SRusty Russell { 1231e34f8725SRusty Russell unsigned int i; 1232e34f8725SRusty Russell 1233e34f8725SRusty Russell for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 1234e34f8725SRusty Russell switch (i) { 12359fa29b9dSMark McLoughlin case VIRTIO_RING_F_INDIRECT_DESC: 12369fa29b9dSMark McLoughlin break; 1237a5c262c5SMichael S. Tsirkin case VIRTIO_RING_F_EVENT_IDX: 1238a5c262c5SMichael S. Tsirkin break; 1239747ae34aSMichael S. Tsirkin case VIRTIO_F_VERSION_1: 1240747ae34aSMichael S. Tsirkin break; 12411a937693SMichael S. Tsirkin case VIRTIO_F_IOMMU_PLATFORM: 12421a937693SMichael S. Tsirkin break; 1243e34f8725SRusty Russell default: 1244e34f8725SRusty Russell /* We don't understand this bit. */ 1245e16e12beSMichael S. Tsirkin __virtio_clear_bit(vdev, i); 1246e34f8725SRusty Russell } 1247e34f8725SRusty Russell } 1248e34f8725SRusty Russell } 1249e34f8725SRusty Russell EXPORT_SYMBOL_GPL(vring_transport_features); 1250e34f8725SRusty Russell 12515dfc1762SRusty Russell /** 12525dfc1762SRusty Russell * virtqueue_get_vring_size - return the size of the virtqueue's vring 12535dfc1762SRusty Russell * @vq: the struct virtqueue containing the vring of interest. 12545dfc1762SRusty Russell * 12555dfc1762SRusty Russell * Returns the size of the vring. This is mainly used for boasting to 12565dfc1762SRusty Russell * userspace. Unlike other operations, this need not be serialized. 12575dfc1762SRusty Russell */ 12588f9f4668SRick Jones unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 12598f9f4668SRick Jones { 12608f9f4668SRick Jones 12618f9f4668SRick Jones struct vring_virtqueue *vq = to_vvq(_vq); 12628f9f4668SRick Jones 12638f9f4668SRick Jones return vq->vring.num; 12648f9f4668SRick Jones } 12658f9f4668SRick Jones EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 12668f9f4668SRick Jones 1267b3b32c94SHeinz Graalfs bool virtqueue_is_broken(struct virtqueue *_vq) 1268b3b32c94SHeinz Graalfs { 1269b3b32c94SHeinz Graalfs struct vring_virtqueue *vq = to_vvq(_vq); 1270b3b32c94SHeinz Graalfs 1271b3b32c94SHeinz Graalfs return vq->broken; 1272b3b32c94SHeinz Graalfs } 1273b3b32c94SHeinz Graalfs EXPORT_SYMBOL_GPL(virtqueue_is_broken); 1274b3b32c94SHeinz Graalfs 1275e2dcdfe9SRusty Russell /* 1276e2dcdfe9SRusty Russell * This should prevent the device from being used, allowing drivers to 1277e2dcdfe9SRusty Russell * recover. You may need to grab appropriate locks to flush. 1278e2dcdfe9SRusty Russell */ 1279e2dcdfe9SRusty Russell void virtio_break_device(struct virtio_device *dev) 1280e2dcdfe9SRusty Russell { 1281e2dcdfe9SRusty Russell struct virtqueue *_vq; 1282e2dcdfe9SRusty Russell 1283e2dcdfe9SRusty Russell list_for_each_entry(_vq, &dev->vqs, list) { 1284e2dcdfe9SRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 1285e2dcdfe9SRusty Russell vq->broken = true; 1286e2dcdfe9SRusty Russell } 1287e2dcdfe9SRusty Russell } 1288e2dcdfe9SRusty Russell EXPORT_SYMBOL_GPL(virtio_break_device); 1289e2dcdfe9SRusty Russell 12902a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 129189062652SCornelia Huck { 129289062652SCornelia Huck struct vring_virtqueue *vq = to_vvq(_vq); 129389062652SCornelia Huck 12942a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 129589062652SCornelia Huck 12962a2d1382SAndy Lutomirski return vq->queue_dma_addr; 12972a2d1382SAndy Lutomirski } 12982a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 12992a2d1382SAndy Lutomirski 13002a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 130189062652SCornelia Huck { 130289062652SCornelia Huck struct vring_virtqueue *vq = to_vvq(_vq); 130389062652SCornelia Huck 13042a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 13052a2d1382SAndy Lutomirski 13062a2d1382SAndy Lutomirski return vq->queue_dma_addr + 13072a2d1382SAndy Lutomirski ((char *)vq->vring.avail - (char *)vq->vring.desc); 130889062652SCornelia Huck } 13092a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 13102a2d1382SAndy Lutomirski 13112a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 13122a2d1382SAndy Lutomirski { 13132a2d1382SAndy Lutomirski struct vring_virtqueue *vq = to_vvq(_vq); 13142a2d1382SAndy Lutomirski 13152a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 13162a2d1382SAndy Lutomirski 13172a2d1382SAndy Lutomirski return vq->queue_dma_addr + 13182a2d1382SAndy Lutomirski ((char *)vq->vring.used - (char *)vq->vring.desc); 13192a2d1382SAndy Lutomirski } 13202a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 13212a2d1382SAndy Lutomirski 13222a2d1382SAndy Lutomirski const struct vring *virtqueue_get_vring(struct virtqueue *vq) 13232a2d1382SAndy Lutomirski { 13242a2d1382SAndy Lutomirski return &to_vvq(vq)->vring; 13252a2d1382SAndy Lutomirski } 13262a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_vring); 132789062652SCornelia Huck 1328c6fd4701SRusty Russell MODULE_LICENSE("GPL"); 1329