10a8a69ddSRusty Russell /* Virtio ring implementation. 20a8a69ddSRusty Russell * 30a8a69ddSRusty Russell * Copyright 2007 Rusty Russell IBM Corporation 40a8a69ddSRusty Russell * 50a8a69ddSRusty Russell * This program is free software; you can redistribute it and/or modify 60a8a69ddSRusty Russell * it under the terms of the GNU General Public License as published by 70a8a69ddSRusty Russell * the Free Software Foundation; either version 2 of the License, or 80a8a69ddSRusty Russell * (at your option) any later version. 90a8a69ddSRusty Russell * 100a8a69ddSRusty Russell * This program is distributed in the hope that it will be useful, 110a8a69ddSRusty Russell * but WITHOUT ANY WARRANTY; without even the implied warranty of 120a8a69ddSRusty Russell * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 130a8a69ddSRusty Russell * GNU General Public License for more details. 140a8a69ddSRusty Russell * 150a8a69ddSRusty Russell * You should have received a copy of the GNU General Public License 160a8a69ddSRusty Russell * along with this program; if not, write to the Free Software 170a8a69ddSRusty Russell * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 180a8a69ddSRusty Russell */ 190a8a69ddSRusty Russell #include <linux/virtio.h> 200a8a69ddSRusty Russell #include <linux/virtio_ring.h> 21e34f8725SRusty Russell #include <linux/virtio_config.h> 220a8a69ddSRusty Russell #include <linux/device.h> 235a0e3ad6STejun Heo #include <linux/slab.h> 24b5a2c4f1SPaul Gortmaker #include <linux/module.h> 25e93300b1SRusty Russell #include <linux/hrtimer.h> 26780bc790SAndy Lutomirski #include <linux/dma-mapping.h> 2778fe3987SAndy Lutomirski #include <xen/xen.h> 280a8a69ddSRusty Russell 290a8a69ddSRusty Russell #ifdef DEBUG 300a8a69ddSRusty Russell /* For development, we want to crash whenever the ring is screwed. */ 319499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 329499f5e7SRusty Russell do { \ 339499f5e7SRusty Russell dev_err(&(_vq)->vq.vdev->dev, \ 349499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 359499f5e7SRusty Russell BUG(); \ 369499f5e7SRusty Russell } while (0) 37c5f841f1SRusty Russell /* Caller is supposed to guarantee no reentry. */ 383a35ce7dSRoel Kluin #define START_USE(_vq) \ 39c5f841f1SRusty Russell do { \ 40c5f841f1SRusty Russell if ((_vq)->in_use) \ 419499f5e7SRusty Russell panic("%s:in_use = %i\n", \ 429499f5e7SRusty Russell (_vq)->vq.name, (_vq)->in_use); \ 43c5f841f1SRusty Russell (_vq)->in_use = __LINE__; \ 44c5f841f1SRusty Russell } while (0) 453a35ce7dSRoel Kluin #define END_USE(_vq) \ 4697a545abSRusty Russell do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 474d6a105eSTiwei Bie #define LAST_ADD_TIME_UPDATE(_vq) \ 484d6a105eSTiwei Bie do { \ 494d6a105eSTiwei Bie ktime_t now = ktime_get(); \ 504d6a105eSTiwei Bie \ 514d6a105eSTiwei Bie /* No kick or get, with .1 second between? Warn. */ \ 524d6a105eSTiwei Bie if ((_vq)->last_add_time_valid) \ 534d6a105eSTiwei Bie WARN_ON(ktime_to_ms(ktime_sub(now, \ 544d6a105eSTiwei Bie (_vq)->last_add_time)) > 100); \ 554d6a105eSTiwei Bie (_vq)->last_add_time = now; \ 564d6a105eSTiwei Bie (_vq)->last_add_time_valid = true; \ 574d6a105eSTiwei Bie } while (0) 584d6a105eSTiwei Bie #define LAST_ADD_TIME_CHECK(_vq) \ 594d6a105eSTiwei Bie do { \ 604d6a105eSTiwei Bie if ((_vq)->last_add_time_valid) { \ 614d6a105eSTiwei Bie WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 624d6a105eSTiwei Bie (_vq)->last_add_time)) > 100); \ 634d6a105eSTiwei Bie } \ 644d6a105eSTiwei Bie } while (0) 654d6a105eSTiwei Bie #define LAST_ADD_TIME_INVALID(_vq) \ 664d6a105eSTiwei Bie ((_vq)->last_add_time_valid = false) 670a8a69ddSRusty Russell #else 689499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 699499f5e7SRusty Russell do { \ 709499f5e7SRusty Russell dev_err(&_vq->vq.vdev->dev, \ 719499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 729499f5e7SRusty Russell (_vq)->broken = true; \ 739499f5e7SRusty Russell } while (0) 740a8a69ddSRusty Russell #define START_USE(vq) 750a8a69ddSRusty Russell #define END_USE(vq) 764d6a105eSTiwei Bie #define LAST_ADD_TIME_UPDATE(vq) 774d6a105eSTiwei Bie #define LAST_ADD_TIME_CHECK(vq) 784d6a105eSTiwei Bie #define LAST_ADD_TIME_INVALID(vq) 790a8a69ddSRusty Russell #endif 800a8a69ddSRusty Russell 81cbeedb72STiwei Bie struct vring_desc_state_split { 82780bc790SAndy Lutomirski void *data; /* Data for callback. */ 83780bc790SAndy Lutomirski struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 84780bc790SAndy Lutomirski }; 85780bc790SAndy Lutomirski 8643b4f721SMichael S. Tsirkin struct vring_virtqueue { 870a8a69ddSRusty Russell struct virtqueue vq; 880a8a69ddSRusty Russell 89*fb3fba6bSTiwei Bie /* Is DMA API used? */ 90*fb3fba6bSTiwei Bie bool use_dma_api; 91*fb3fba6bSTiwei Bie 927b21e34fSRusty Russell /* Can we use weak barriers? */ 937b21e34fSRusty Russell bool weak_barriers; 947b21e34fSRusty Russell 950a8a69ddSRusty Russell /* Other side has made a mess, don't try any more. */ 960a8a69ddSRusty Russell bool broken; 970a8a69ddSRusty Russell 989fa29b9dSMark McLoughlin /* Host supports indirect buffers */ 999fa29b9dSMark McLoughlin bool indirect; 1009fa29b9dSMark McLoughlin 101a5c262c5SMichael S. Tsirkin /* Host publishes avail event idx */ 102a5c262c5SMichael S. Tsirkin bool event; 103a5c262c5SMichael S. Tsirkin 1040a8a69ddSRusty Russell /* Head of free buffer list. */ 1050a8a69ddSRusty Russell unsigned int free_head; 1060a8a69ddSRusty Russell /* Number we've added since last sync. */ 1070a8a69ddSRusty Russell unsigned int num_added; 1080a8a69ddSRusty Russell 1090a8a69ddSRusty Russell /* Last used index we've seen. */ 1101bc4953eSAnthony Liguori u16 last_used_idx; 1110a8a69ddSRusty Russell 112e593bf97STiwei Bie struct { 113e593bf97STiwei Bie /* Actual memory layout for this queue */ 114e593bf97STiwei Bie struct vring vring; 115e593bf97STiwei Bie 116f277ec42SVenkatesh Srinivas /* Last written value to avail->flags */ 117f277ec42SVenkatesh Srinivas u16 avail_flags_shadow; 118f277ec42SVenkatesh Srinivas 119f277ec42SVenkatesh Srinivas /* Last written value to avail->idx in guest byte order */ 120f277ec42SVenkatesh Srinivas u16 avail_idx_shadow; 121cbeedb72STiwei Bie 122cbeedb72STiwei Bie /* Per-descriptor state. */ 123cbeedb72STiwei Bie struct vring_desc_state_split *desc_state; 124d79dca75STiwei Bie 125d79dca75STiwei Bie /* DMA, allocation, and size information */ 126d79dca75STiwei Bie size_t queue_size_in_bytes; 127d79dca75STiwei Bie dma_addr_t queue_dma_addr; 128e593bf97STiwei Bie } split; 129f277ec42SVenkatesh Srinivas 1300a8a69ddSRusty Russell /* How to notify other side. FIXME: commonalize hcalls! */ 13146f9c2b9SHeinz Graalfs bool (*notify)(struct virtqueue *vq); 1320a8a69ddSRusty Russell 1332a2d1382SAndy Lutomirski /* DMA, allocation, and size information */ 1342a2d1382SAndy Lutomirski bool we_own_ring; 1352a2d1382SAndy Lutomirski 1360a8a69ddSRusty Russell #ifdef DEBUG 1370a8a69ddSRusty Russell /* They're supposed to lock for us. */ 1380a8a69ddSRusty Russell unsigned int in_use; 139e93300b1SRusty Russell 140e93300b1SRusty Russell /* Figure out if their kicks are too delayed. */ 141e93300b1SRusty Russell bool last_add_time_valid; 142e93300b1SRusty Russell ktime_t last_add_time; 1430a8a69ddSRusty Russell #endif 1440a8a69ddSRusty Russell }; 1450a8a69ddSRusty Russell 146e6f633e5STiwei Bie 147e6f633e5STiwei Bie /* 148e6f633e5STiwei Bie * Helpers. 149e6f633e5STiwei Bie */ 150e6f633e5STiwei Bie 1510a8a69ddSRusty Russell #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 1520a8a69ddSRusty Russell 1532f18c2d1STiwei Bie static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 1542f18c2d1STiwei Bie unsigned int total_sg) 1552f18c2d1STiwei Bie { 1562f18c2d1STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 1572f18c2d1STiwei Bie 1582f18c2d1STiwei Bie /* 1592f18c2d1STiwei Bie * If the host supports indirect descriptor tables, and we have multiple 1602f18c2d1STiwei Bie * buffers, then go indirect. FIXME: tune this threshold 1612f18c2d1STiwei Bie */ 1622f18c2d1STiwei Bie return (vq->indirect && total_sg > 1 && vq->vq.num_free); 1632f18c2d1STiwei Bie } 1642f18c2d1STiwei Bie 165d26c96c8SAndy Lutomirski /* 1661a937693SMichael S. Tsirkin * Modern virtio devices have feature bits to specify whether they need a 1671a937693SMichael S. Tsirkin * quirk and bypass the IOMMU. If not there, just use the DMA API. 1681a937693SMichael S. Tsirkin * 1691a937693SMichael S. Tsirkin * If there, the interaction between virtio and DMA API is messy. 170d26c96c8SAndy Lutomirski * 171d26c96c8SAndy Lutomirski * On most systems with virtio, physical addresses match bus addresses, 172d26c96c8SAndy Lutomirski * and it doesn't particularly matter whether we use the DMA API. 173d26c96c8SAndy Lutomirski * 174d26c96c8SAndy Lutomirski * On some systems, including Xen and any system with a physical device 175d26c96c8SAndy Lutomirski * that speaks virtio behind a physical IOMMU, we must use the DMA API 176d26c96c8SAndy Lutomirski * for virtio DMA to work at all. 177d26c96c8SAndy Lutomirski * 178d26c96c8SAndy Lutomirski * On other systems, including SPARC and PPC64, virtio-pci devices are 179d26c96c8SAndy Lutomirski * enumerated as though they are behind an IOMMU, but the virtio host 180d26c96c8SAndy Lutomirski * ignores the IOMMU, so we must either pretend that the IOMMU isn't 181d26c96c8SAndy Lutomirski * there or somehow map everything as the identity. 182d26c96c8SAndy Lutomirski * 183d26c96c8SAndy Lutomirski * For the time being, we preserve historic behavior and bypass the DMA 184d26c96c8SAndy Lutomirski * API. 1851a937693SMichael S. Tsirkin * 1861a937693SMichael S. Tsirkin * TODO: install a per-device DMA ops structure that does the right thing 1871a937693SMichael S. Tsirkin * taking into account all the above quirks, and use the DMA API 1881a937693SMichael S. Tsirkin * unconditionally on data path. 189d26c96c8SAndy Lutomirski */ 190d26c96c8SAndy Lutomirski 191d26c96c8SAndy Lutomirski static bool vring_use_dma_api(struct virtio_device *vdev) 192d26c96c8SAndy Lutomirski { 1931a937693SMichael S. Tsirkin if (!virtio_has_iommu_quirk(vdev)) 1941a937693SMichael S. Tsirkin return true; 1951a937693SMichael S. Tsirkin 1961a937693SMichael S. Tsirkin /* Otherwise, we are left to guess. */ 19778fe3987SAndy Lutomirski /* 19878fe3987SAndy Lutomirski * In theory, it's possible to have a buggy QEMU-supposed 19978fe3987SAndy Lutomirski * emulated Q35 IOMMU and Xen enabled at the same time. On 20078fe3987SAndy Lutomirski * such a configuration, virtio has never worked and will 20178fe3987SAndy Lutomirski * not work without an even larger kludge. Instead, enable 20278fe3987SAndy Lutomirski * the DMA API if we're a Xen guest, which at least allows 20378fe3987SAndy Lutomirski * all of the sensible Xen configurations to work correctly. 20478fe3987SAndy Lutomirski */ 20578fe3987SAndy Lutomirski if (xen_domain()) 20678fe3987SAndy Lutomirski return true; 20778fe3987SAndy Lutomirski 208d26c96c8SAndy Lutomirski return false; 209d26c96c8SAndy Lutomirski } 210d26c96c8SAndy Lutomirski 211d79dca75STiwei Bie static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 212d79dca75STiwei Bie dma_addr_t *dma_handle, gfp_t flag) 213d79dca75STiwei Bie { 214d79dca75STiwei Bie if (vring_use_dma_api(vdev)) { 215d79dca75STiwei Bie return dma_alloc_coherent(vdev->dev.parent, size, 216d79dca75STiwei Bie dma_handle, flag); 217d79dca75STiwei Bie } else { 218d79dca75STiwei Bie void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 219d79dca75STiwei Bie 220d79dca75STiwei Bie if (queue) { 221d79dca75STiwei Bie phys_addr_t phys_addr = virt_to_phys(queue); 222d79dca75STiwei Bie *dma_handle = (dma_addr_t)phys_addr; 223d79dca75STiwei Bie 224d79dca75STiwei Bie /* 225d79dca75STiwei Bie * Sanity check: make sure we dind't truncate 226d79dca75STiwei Bie * the address. The only arches I can find that 227d79dca75STiwei Bie * have 64-bit phys_addr_t but 32-bit dma_addr_t 228d79dca75STiwei Bie * are certain non-highmem MIPS and x86 229d79dca75STiwei Bie * configurations, but these configurations 230d79dca75STiwei Bie * should never allocate physical pages above 32 231d79dca75STiwei Bie * bits, so this is fine. Just in case, throw a 232d79dca75STiwei Bie * warning and abort if we end up with an 233d79dca75STiwei Bie * unrepresentable address. 234d79dca75STiwei Bie */ 235d79dca75STiwei Bie if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 236d79dca75STiwei Bie free_pages_exact(queue, PAGE_ALIGN(size)); 237d79dca75STiwei Bie return NULL; 238d79dca75STiwei Bie } 239d79dca75STiwei Bie } 240d79dca75STiwei Bie return queue; 241d79dca75STiwei Bie } 242d79dca75STiwei Bie } 243d79dca75STiwei Bie 244d79dca75STiwei Bie static void vring_free_queue(struct virtio_device *vdev, size_t size, 245d79dca75STiwei Bie void *queue, dma_addr_t dma_handle) 246d79dca75STiwei Bie { 247d79dca75STiwei Bie if (vring_use_dma_api(vdev)) 248d79dca75STiwei Bie dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 249d79dca75STiwei Bie else 250d79dca75STiwei Bie free_pages_exact(queue, PAGE_ALIGN(size)); 251d79dca75STiwei Bie } 252d79dca75STiwei Bie 253780bc790SAndy Lutomirski /* 254780bc790SAndy Lutomirski * The DMA ops on various arches are rather gnarly right now, and 255780bc790SAndy Lutomirski * making all of the arch DMA ops work on the vring device itself 256780bc790SAndy Lutomirski * is a mess. For now, we use the parent device for DMA ops. 257780bc790SAndy Lutomirski */ 25875bfa81bSMichael S. Tsirkin static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 259780bc790SAndy Lutomirski { 260780bc790SAndy Lutomirski return vq->vq.vdev->dev.parent; 261780bc790SAndy Lutomirski } 262780bc790SAndy Lutomirski 263780bc790SAndy Lutomirski /* Map one sg entry. */ 264780bc790SAndy Lutomirski static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 265780bc790SAndy Lutomirski struct scatterlist *sg, 266780bc790SAndy Lutomirski enum dma_data_direction direction) 267780bc790SAndy Lutomirski { 268*fb3fba6bSTiwei Bie if (!vq->use_dma_api) 269780bc790SAndy Lutomirski return (dma_addr_t)sg_phys(sg); 270780bc790SAndy Lutomirski 271780bc790SAndy Lutomirski /* 272780bc790SAndy Lutomirski * We can't use dma_map_sg, because we don't use scatterlists in 273780bc790SAndy Lutomirski * the way it expects (we don't guarantee that the scatterlist 274780bc790SAndy Lutomirski * will exist for the lifetime of the mapping). 275780bc790SAndy Lutomirski */ 276780bc790SAndy Lutomirski return dma_map_page(vring_dma_dev(vq), 277780bc790SAndy Lutomirski sg_page(sg), sg->offset, sg->length, 278780bc790SAndy Lutomirski direction); 279780bc790SAndy Lutomirski } 280780bc790SAndy Lutomirski 281780bc790SAndy Lutomirski static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 282780bc790SAndy Lutomirski void *cpu_addr, size_t size, 283780bc790SAndy Lutomirski enum dma_data_direction direction) 284780bc790SAndy Lutomirski { 285*fb3fba6bSTiwei Bie if (!vq->use_dma_api) 286780bc790SAndy Lutomirski return (dma_addr_t)virt_to_phys(cpu_addr); 287780bc790SAndy Lutomirski 288780bc790SAndy Lutomirski return dma_map_single(vring_dma_dev(vq), 289780bc790SAndy Lutomirski cpu_addr, size, direction); 290780bc790SAndy Lutomirski } 291780bc790SAndy Lutomirski 292e6f633e5STiwei Bie static int vring_mapping_error(const struct vring_virtqueue *vq, 293e6f633e5STiwei Bie dma_addr_t addr) 294e6f633e5STiwei Bie { 295*fb3fba6bSTiwei Bie if (!vq->use_dma_api) 296e6f633e5STiwei Bie return 0; 297e6f633e5STiwei Bie 298e6f633e5STiwei Bie return dma_mapping_error(vring_dma_dev(vq), addr); 299e6f633e5STiwei Bie } 300e6f633e5STiwei Bie 301e6f633e5STiwei Bie 302e6f633e5STiwei Bie /* 303e6f633e5STiwei Bie * Split ring specific functions - *_split(). 304e6f633e5STiwei Bie */ 305e6f633e5STiwei Bie 306138fd251STiwei Bie static void vring_unmap_one_split(const struct vring_virtqueue *vq, 307780bc790SAndy Lutomirski struct vring_desc *desc) 308780bc790SAndy Lutomirski { 309780bc790SAndy Lutomirski u16 flags; 310780bc790SAndy Lutomirski 311*fb3fba6bSTiwei Bie if (!vq->use_dma_api) 312780bc790SAndy Lutomirski return; 313780bc790SAndy Lutomirski 314780bc790SAndy Lutomirski flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 315780bc790SAndy Lutomirski 316780bc790SAndy Lutomirski if (flags & VRING_DESC_F_INDIRECT) { 317780bc790SAndy Lutomirski dma_unmap_single(vring_dma_dev(vq), 318780bc790SAndy Lutomirski virtio64_to_cpu(vq->vq.vdev, desc->addr), 319780bc790SAndy Lutomirski virtio32_to_cpu(vq->vq.vdev, desc->len), 320780bc790SAndy Lutomirski (flags & VRING_DESC_F_WRITE) ? 321780bc790SAndy Lutomirski DMA_FROM_DEVICE : DMA_TO_DEVICE); 322780bc790SAndy Lutomirski } else { 323780bc790SAndy Lutomirski dma_unmap_page(vring_dma_dev(vq), 324780bc790SAndy Lutomirski virtio64_to_cpu(vq->vq.vdev, desc->addr), 325780bc790SAndy Lutomirski virtio32_to_cpu(vq->vq.vdev, desc->len), 326780bc790SAndy Lutomirski (flags & VRING_DESC_F_WRITE) ? 327780bc790SAndy Lutomirski DMA_FROM_DEVICE : DMA_TO_DEVICE); 328780bc790SAndy Lutomirski } 329780bc790SAndy Lutomirski } 330780bc790SAndy Lutomirski 331138fd251STiwei Bie static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 332138fd251STiwei Bie unsigned int total_sg, 333138fd251STiwei Bie gfp_t gfp) 3349fa29b9dSMark McLoughlin { 3359fa29b9dSMark McLoughlin struct vring_desc *desc; 336b25bd251SRusty Russell unsigned int i; 3379fa29b9dSMark McLoughlin 338b92b1b89SWill Deacon /* 339b92b1b89SWill Deacon * We require lowmem mappings for the descriptors because 340b92b1b89SWill Deacon * otherwise virt_to_phys will give us bogus addresses in the 341b92b1b89SWill Deacon * virtqueue. 342b92b1b89SWill Deacon */ 34382107539SMichal Hocko gfp &= ~__GFP_HIGHMEM; 344b92b1b89SWill Deacon 3456da2ec56SKees Cook desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 3469fa29b9dSMark McLoughlin if (!desc) 347b25bd251SRusty Russell return NULL; 3489fa29b9dSMark McLoughlin 349b25bd251SRusty Russell for (i = 0; i < total_sg; i++) 35000e6f3d9SMichael S. Tsirkin desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 351b25bd251SRusty Russell return desc; 3529fa29b9dSMark McLoughlin } 3539fa29b9dSMark McLoughlin 354138fd251STiwei Bie static inline int virtqueue_add_split(struct virtqueue *_vq, 35513816c76SRusty Russell struct scatterlist *sgs[], 356eeebf9b1SRusty Russell unsigned int total_sg, 35713816c76SRusty Russell unsigned int out_sgs, 35813816c76SRusty Russell unsigned int in_sgs, 359bbd603efSMichael S. Tsirkin void *data, 3605a08b04fSMichael S. Tsirkin void *ctx, 361bbd603efSMichael S. Tsirkin gfp_t gfp) 3620a8a69ddSRusty Russell { 3630a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 36413816c76SRusty Russell struct scatterlist *sg; 365b25bd251SRusty Russell struct vring_desc *desc; 366780bc790SAndy Lutomirski unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx; 3671fe9b6feSMichael S. Tsirkin int head; 368b25bd251SRusty Russell bool indirect; 3690a8a69ddSRusty Russell 3709fa29b9dSMark McLoughlin START_USE(vq); 3719fa29b9dSMark McLoughlin 3720a8a69ddSRusty Russell BUG_ON(data == NULL); 3735a08b04fSMichael S. Tsirkin BUG_ON(ctx && vq->indirect); 3749fa29b9dSMark McLoughlin 37570670444SRusty Russell if (unlikely(vq->broken)) { 37670670444SRusty Russell END_USE(vq); 37770670444SRusty Russell return -EIO; 37870670444SRusty Russell } 37970670444SRusty Russell 3804d6a105eSTiwei Bie LAST_ADD_TIME_UPDATE(vq); 381e93300b1SRusty Russell 38213816c76SRusty Russell BUG_ON(total_sg == 0); 3830a8a69ddSRusty Russell 384b25bd251SRusty Russell head = vq->free_head; 385b25bd251SRusty Russell 3862f18c2d1STiwei Bie if (virtqueue_use_indirect(_vq, total_sg)) 387138fd251STiwei Bie desc = alloc_indirect_split(_vq, total_sg, gfp); 38844ed8089SRichard W.M. Jones else { 389b25bd251SRusty Russell desc = NULL; 390e593bf97STiwei Bie WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 39144ed8089SRichard W.M. Jones } 392b25bd251SRusty Russell 393b25bd251SRusty Russell if (desc) { 394b25bd251SRusty Russell /* Use a single buffer which doesn't continue */ 395780bc790SAndy Lutomirski indirect = true; 396b25bd251SRusty Russell /* Set up rest to use this indirect table. */ 397b25bd251SRusty Russell i = 0; 398b25bd251SRusty Russell descs_used = 1; 399b25bd251SRusty Russell } else { 400780bc790SAndy Lutomirski indirect = false; 401e593bf97STiwei Bie desc = vq->split.vring.desc; 402b25bd251SRusty Russell i = head; 403b25bd251SRusty Russell descs_used = total_sg; 404b25bd251SRusty Russell } 405b25bd251SRusty Russell 406b25bd251SRusty Russell if (vq->vq.num_free < descs_used) { 4070a8a69ddSRusty Russell pr_debug("Can't add buf len %i - avail = %i\n", 408b25bd251SRusty Russell descs_used, vq->vq.num_free); 40944653eaeSRusty Russell /* FIXME: for historical reasons, we force a notify here if 41044653eaeSRusty Russell * there are outgoing parts to the buffer. Presumably the 41144653eaeSRusty Russell * host should service the ring ASAP. */ 41213816c76SRusty Russell if (out_sgs) 413426e3e0aSRusty Russell vq->notify(&vq->vq); 41458625edfSWei Yongjun if (indirect) 41558625edfSWei Yongjun kfree(desc); 4160a8a69ddSRusty Russell END_USE(vq); 4170a8a69ddSRusty Russell return -ENOSPC; 4180a8a69ddSRusty Russell } 4190a8a69ddSRusty Russell 42013816c76SRusty Russell for (n = 0; n < out_sgs; n++) { 421eeebf9b1SRusty Russell for (sg = sgs[n]; sg; sg = sg_next(sg)) { 422780bc790SAndy Lutomirski dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 423780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 424780bc790SAndy Lutomirski goto unmap_release; 425780bc790SAndy Lutomirski 42600e6f3d9SMichael S. Tsirkin desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); 427780bc790SAndy Lutomirski desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 42800e6f3d9SMichael S. Tsirkin desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 4290a8a69ddSRusty Russell prev = i; 43000e6f3d9SMichael S. Tsirkin i = virtio16_to_cpu(_vq->vdev, desc[i].next); 4310a8a69ddSRusty Russell } 43213816c76SRusty Russell } 43313816c76SRusty Russell for (; n < (out_sgs + in_sgs); n++) { 434eeebf9b1SRusty Russell for (sg = sgs[n]; sg; sg = sg_next(sg)) { 435780bc790SAndy Lutomirski dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 436780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 437780bc790SAndy Lutomirski goto unmap_release; 438780bc790SAndy Lutomirski 43900e6f3d9SMichael S. Tsirkin desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); 440780bc790SAndy Lutomirski desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 44100e6f3d9SMichael S. Tsirkin desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 4420a8a69ddSRusty Russell prev = i; 44300e6f3d9SMichael S. Tsirkin i = virtio16_to_cpu(_vq->vdev, desc[i].next); 44413816c76SRusty Russell } 4450a8a69ddSRusty Russell } 4460a8a69ddSRusty Russell /* Last one doesn't continue. */ 44700e6f3d9SMichael S. Tsirkin desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 4480a8a69ddSRusty Russell 449780bc790SAndy Lutomirski if (indirect) { 450780bc790SAndy Lutomirski /* Now that the indirect table is filled in, map it. */ 451780bc790SAndy Lutomirski dma_addr_t addr = vring_map_single( 452780bc790SAndy Lutomirski vq, desc, total_sg * sizeof(struct vring_desc), 453780bc790SAndy Lutomirski DMA_TO_DEVICE); 454780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 455780bc790SAndy Lutomirski goto unmap_release; 456780bc790SAndy Lutomirski 457e593bf97STiwei Bie vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, 458e593bf97STiwei Bie VRING_DESC_F_INDIRECT); 459e593bf97STiwei Bie vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, 460e593bf97STiwei Bie addr); 461780bc790SAndy Lutomirski 462e593bf97STiwei Bie vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev, 463e593bf97STiwei Bie total_sg * sizeof(struct vring_desc)); 464780bc790SAndy Lutomirski } 465780bc790SAndy Lutomirski 466780bc790SAndy Lutomirski /* We're using some buffers from the free list. */ 467780bc790SAndy Lutomirski vq->vq.num_free -= descs_used; 468780bc790SAndy Lutomirski 4690a8a69ddSRusty Russell /* Update free pointer */ 470b25bd251SRusty Russell if (indirect) 471e593bf97STiwei Bie vq->free_head = virtio16_to_cpu(_vq->vdev, 472e593bf97STiwei Bie vq->split.vring.desc[head].next); 473b25bd251SRusty Russell else 4740a8a69ddSRusty Russell vq->free_head = i; 4750a8a69ddSRusty Russell 476780bc790SAndy Lutomirski /* Store token and indirect buffer state. */ 477cbeedb72STiwei Bie vq->split.desc_state[head].data = data; 478780bc790SAndy Lutomirski if (indirect) 479cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc = desc; 48087646a34SJason Wang else 481cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc = ctx; 4820a8a69ddSRusty Russell 4830a8a69ddSRusty Russell /* Put entry in available array (but don't update avail->idx until they 4843b720b8cSRusty Russell * do sync). */ 485e593bf97STiwei Bie avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 486e593bf97STiwei Bie vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 4870a8a69ddSRusty Russell 488ee7cd898SRusty Russell /* Descriptors and available array need to be set before we expose the 489ee7cd898SRusty Russell * new available array entries. */ 490a9a0fef7SRusty Russell virtio_wmb(vq->weak_barriers); 491e593bf97STiwei Bie vq->split.avail_idx_shadow++; 492e593bf97STiwei Bie vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 493e593bf97STiwei Bie vq->split.avail_idx_shadow); 494ee7cd898SRusty Russell vq->num_added++; 495ee7cd898SRusty Russell 4965e05bf58STetsuo Handa pr_debug("Added buffer head %i to %p\n", head, vq); 4975e05bf58STetsuo Handa END_USE(vq); 4985e05bf58STetsuo Handa 499ee7cd898SRusty Russell /* This is very unlikely, but theoretically possible. Kick 500ee7cd898SRusty Russell * just in case. */ 501ee7cd898SRusty Russell if (unlikely(vq->num_added == (1 << 16) - 1)) 502ee7cd898SRusty Russell virtqueue_kick(_vq); 503ee7cd898SRusty Russell 50498e8c6bcSRusty Russell return 0; 505780bc790SAndy Lutomirski 506780bc790SAndy Lutomirski unmap_release: 507780bc790SAndy Lutomirski err_idx = i; 508780bc790SAndy Lutomirski i = head; 509780bc790SAndy Lutomirski 510780bc790SAndy Lutomirski for (n = 0; n < total_sg; n++) { 511780bc790SAndy Lutomirski if (i == err_idx) 512780bc790SAndy Lutomirski break; 513138fd251STiwei Bie vring_unmap_one_split(vq, &desc[i]); 514e593bf97STiwei Bie i = virtio16_to_cpu(_vq->vdev, vq->split.vring.desc[i].next); 515780bc790SAndy Lutomirski } 516780bc790SAndy Lutomirski 517780bc790SAndy Lutomirski if (indirect) 518780bc790SAndy Lutomirski kfree(desc); 519780bc790SAndy Lutomirski 5203cc36f6eSMichael S. Tsirkin END_USE(vq); 521780bc790SAndy Lutomirski return -EIO; 5220a8a69ddSRusty Russell } 52313816c76SRusty Russell 524138fd251STiwei Bie static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 5250a8a69ddSRusty Russell { 5260a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 527a5c262c5SMichael S. Tsirkin u16 new, old; 52841f0377fSRusty Russell bool needs_kick; 52941f0377fSRusty Russell 5300a8a69ddSRusty Russell START_USE(vq); 531a72caae2SJason Wang /* We need to expose available array entries before checking avail 532a72caae2SJason Wang * event. */ 533a9a0fef7SRusty Russell virtio_mb(vq->weak_barriers); 5340a8a69ddSRusty Russell 535e593bf97STiwei Bie old = vq->split.avail_idx_shadow - vq->num_added; 536e593bf97STiwei Bie new = vq->split.avail_idx_shadow; 5370a8a69ddSRusty Russell vq->num_added = 0; 5380a8a69ddSRusty Russell 5394d6a105eSTiwei Bie LAST_ADD_TIME_CHECK(vq); 5404d6a105eSTiwei Bie LAST_ADD_TIME_INVALID(vq); 541e93300b1SRusty Russell 54241f0377fSRusty Russell if (vq->event) { 543e593bf97STiwei Bie needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 544e593bf97STiwei Bie vring_avail_event(&vq->split.vring)), 54541f0377fSRusty Russell new, old); 54641f0377fSRusty Russell } else { 547e593bf97STiwei Bie needs_kick = !(vq->split.vring.used->flags & 548e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 549e593bf97STiwei Bie VRING_USED_F_NO_NOTIFY)); 55041f0377fSRusty Russell } 5510a8a69ddSRusty Russell END_USE(vq); 55241f0377fSRusty Russell return needs_kick; 55341f0377fSRusty Russell } 554138fd251STiwei Bie 555138fd251STiwei Bie static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 5565a08b04fSMichael S. Tsirkin void **ctx) 5570a8a69ddSRusty Russell { 558780bc790SAndy Lutomirski unsigned int i, j; 559c60923cbSGonglei __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 5600a8a69ddSRusty Russell 5610a8a69ddSRusty Russell /* Clear data ptr. */ 562cbeedb72STiwei Bie vq->split.desc_state[head].data = NULL; 5630a8a69ddSRusty Russell 564780bc790SAndy Lutomirski /* Put back on free list: unmap first-level descriptors and find end */ 5650a8a69ddSRusty Russell i = head; 5669fa29b9dSMark McLoughlin 567e593bf97STiwei Bie while (vq->split.vring.desc[i].flags & nextflag) { 568e593bf97STiwei Bie vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 569e593bf97STiwei Bie i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); 57006ca287dSRusty Russell vq->vq.num_free++; 5710a8a69ddSRusty Russell } 5720a8a69ddSRusty Russell 573e593bf97STiwei Bie vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 574e593bf97STiwei Bie vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, 575e593bf97STiwei Bie vq->free_head); 5760a8a69ddSRusty Russell vq->free_head = head; 577780bc790SAndy Lutomirski 5780a8a69ddSRusty Russell /* Plus final descriptor */ 57906ca287dSRusty Russell vq->vq.num_free++; 580780bc790SAndy Lutomirski 5815a08b04fSMichael S. Tsirkin if (vq->indirect) { 582cbeedb72STiwei Bie struct vring_desc *indir_desc = 583cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc; 5845a08b04fSMichael S. Tsirkin u32 len; 5855a08b04fSMichael S. Tsirkin 5865a08b04fSMichael S. Tsirkin /* Free the indirect table, if any, now that it's unmapped. */ 5875a08b04fSMichael S. Tsirkin if (!indir_desc) 5885a08b04fSMichael S. Tsirkin return; 5895a08b04fSMichael S. Tsirkin 590e593bf97STiwei Bie len = virtio32_to_cpu(vq->vq.vdev, 591e593bf97STiwei Bie vq->split.vring.desc[head].len); 592780bc790SAndy Lutomirski 593e593bf97STiwei Bie BUG_ON(!(vq->split.vring.desc[head].flags & 594780bc790SAndy Lutomirski cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); 595780bc790SAndy Lutomirski BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 596780bc790SAndy Lutomirski 597780bc790SAndy Lutomirski for (j = 0; j < len / sizeof(struct vring_desc); j++) 598138fd251STiwei Bie vring_unmap_one_split(vq, &indir_desc[j]); 599780bc790SAndy Lutomirski 6005a08b04fSMichael S. Tsirkin kfree(indir_desc); 601cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc = NULL; 6025a08b04fSMichael S. Tsirkin } else if (ctx) { 603cbeedb72STiwei Bie *ctx = vq->split.desc_state[head].indir_desc; 604780bc790SAndy Lutomirski } 6050a8a69ddSRusty Russell } 6060a8a69ddSRusty Russell 607138fd251STiwei Bie static inline bool more_used_split(const struct vring_virtqueue *vq) 6080a8a69ddSRusty Russell { 609e593bf97STiwei Bie return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 610e593bf97STiwei Bie vq->split.vring.used->idx); 6110a8a69ddSRusty Russell } 6120a8a69ddSRusty Russell 613138fd251STiwei Bie static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 614138fd251STiwei Bie unsigned int *len, 6155a08b04fSMichael S. Tsirkin void **ctx) 6160a8a69ddSRusty Russell { 6170a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 6180a8a69ddSRusty Russell void *ret; 6190a8a69ddSRusty Russell unsigned int i; 6203b720b8cSRusty Russell u16 last_used; 6210a8a69ddSRusty Russell 6220a8a69ddSRusty Russell START_USE(vq); 6230a8a69ddSRusty Russell 6245ef82752SRusty Russell if (unlikely(vq->broken)) { 6255ef82752SRusty Russell END_USE(vq); 6265ef82752SRusty Russell return NULL; 6275ef82752SRusty Russell } 6285ef82752SRusty Russell 629138fd251STiwei Bie if (!more_used_split(vq)) { 6300a8a69ddSRusty Russell pr_debug("No more buffers in queue\n"); 6310a8a69ddSRusty Russell END_USE(vq); 6320a8a69ddSRusty Russell return NULL; 6330a8a69ddSRusty Russell } 6340a8a69ddSRusty Russell 6352d61ba95SMichael S. Tsirkin /* Only get used array entries after they have been exposed by host. */ 636a9a0fef7SRusty Russell virtio_rmb(vq->weak_barriers); 6372d61ba95SMichael S. Tsirkin 638e593bf97STiwei Bie last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 639e593bf97STiwei Bie i = virtio32_to_cpu(_vq->vdev, 640e593bf97STiwei Bie vq->split.vring.used->ring[last_used].id); 641e593bf97STiwei Bie *len = virtio32_to_cpu(_vq->vdev, 642e593bf97STiwei Bie vq->split.vring.used->ring[last_used].len); 6430a8a69ddSRusty Russell 644e593bf97STiwei Bie if (unlikely(i >= vq->split.vring.num)) { 6450a8a69ddSRusty Russell BAD_RING(vq, "id %u out of range\n", i); 6460a8a69ddSRusty Russell return NULL; 6470a8a69ddSRusty Russell } 648cbeedb72STiwei Bie if (unlikely(!vq->split.desc_state[i].data)) { 6490a8a69ddSRusty Russell BAD_RING(vq, "id %u is not a head!\n", i); 6500a8a69ddSRusty Russell return NULL; 6510a8a69ddSRusty Russell } 6520a8a69ddSRusty Russell 653138fd251STiwei Bie /* detach_buf_split clears data, so grab it now. */ 654cbeedb72STiwei Bie ret = vq->split.desc_state[i].data; 655138fd251STiwei Bie detach_buf_split(vq, i, ctx); 6560a8a69ddSRusty Russell vq->last_used_idx++; 657a5c262c5SMichael S. Tsirkin /* If we expect an interrupt for the next entry, tell host 658a5c262c5SMichael S. Tsirkin * by writing event index and flush out the write before 659a5c262c5SMichael S. Tsirkin * the read in the next get_buf call. */ 660e593bf97STiwei Bie if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 661788e5b3aSMichael S. Tsirkin virtio_store_mb(vq->weak_barriers, 662e593bf97STiwei Bie &vring_used_event(&vq->split.vring), 663788e5b3aSMichael S. Tsirkin cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 664a5c262c5SMichael S. Tsirkin 6654d6a105eSTiwei Bie LAST_ADD_TIME_INVALID(vq); 666e93300b1SRusty Russell 6670a8a69ddSRusty Russell END_USE(vq); 6680a8a69ddSRusty Russell return ret; 6690a8a69ddSRusty Russell } 670138fd251STiwei Bie 671138fd251STiwei Bie static void virtqueue_disable_cb_split(struct virtqueue *_vq) 672138fd251STiwei Bie { 673138fd251STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 674138fd251STiwei Bie 675e593bf97STiwei Bie if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 676e593bf97STiwei Bie vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 677138fd251STiwei Bie if (!vq->event) 678e593bf97STiwei Bie vq->split.vring.avail->flags = 679e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 680e593bf97STiwei Bie vq->split.avail_flags_shadow); 681138fd251STiwei Bie } 682138fd251STiwei Bie } 683138fd251STiwei Bie 684138fd251STiwei Bie static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 685cc229884SMichael S. Tsirkin { 686cc229884SMichael S. Tsirkin struct vring_virtqueue *vq = to_vvq(_vq); 687cc229884SMichael S. Tsirkin u16 last_used_idx; 688cc229884SMichael S. Tsirkin 689cc229884SMichael S. Tsirkin START_USE(vq); 690cc229884SMichael S. Tsirkin 691cc229884SMichael S. Tsirkin /* We optimistically turn back on interrupts, then check if there was 692cc229884SMichael S. Tsirkin * more to do. */ 693cc229884SMichael S. Tsirkin /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 694cc229884SMichael S. Tsirkin * either clear the flags bit or point the event index at the next 695cc229884SMichael S. Tsirkin * entry. Always do both to keep code simple. */ 696e593bf97STiwei Bie if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 697e593bf97STiwei Bie vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 6980ea1e4a6SLadi Prosek if (!vq->event) 699e593bf97STiwei Bie vq->split.vring.avail->flags = 700e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 701e593bf97STiwei Bie vq->split.avail_flags_shadow); 702f277ec42SVenkatesh Srinivas } 703e593bf97STiwei Bie vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 704e593bf97STiwei Bie last_used_idx = vq->last_used_idx); 705cc229884SMichael S. Tsirkin END_USE(vq); 706cc229884SMichael S. Tsirkin return last_used_idx; 707cc229884SMichael S. Tsirkin } 708138fd251STiwei Bie 709138fd251STiwei Bie static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 710138fd251STiwei Bie { 711138fd251STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 712138fd251STiwei Bie 713138fd251STiwei Bie return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 714e593bf97STiwei Bie vq->split.vring.used->idx); 715138fd251STiwei Bie } 716138fd251STiwei Bie 717138fd251STiwei Bie static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 7187ab358c2SMichael S. Tsirkin { 7197ab358c2SMichael S. Tsirkin struct vring_virtqueue *vq = to_vvq(_vq); 7207ab358c2SMichael S. Tsirkin u16 bufs; 7217ab358c2SMichael S. Tsirkin 7227ab358c2SMichael S. Tsirkin START_USE(vq); 7237ab358c2SMichael S. Tsirkin 7247ab358c2SMichael S. Tsirkin /* We optimistically turn back on interrupts, then check if there was 7257ab358c2SMichael S. Tsirkin * more to do. */ 7267ab358c2SMichael S. Tsirkin /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 7277ab358c2SMichael S. Tsirkin * either clear the flags bit or point the event index at the next 7280ea1e4a6SLadi Prosek * entry. Always update the event index to keep code simple. */ 729e593bf97STiwei Bie if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 730e593bf97STiwei Bie vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 7310ea1e4a6SLadi Prosek if (!vq->event) 732e593bf97STiwei Bie vq->split.vring.avail->flags = 733e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 734e593bf97STiwei Bie vq->split.avail_flags_shadow); 735f277ec42SVenkatesh Srinivas } 7367ab358c2SMichael S. Tsirkin /* TODO: tune this threshold */ 737e593bf97STiwei Bie bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 738788e5b3aSMichael S. Tsirkin 739788e5b3aSMichael S. Tsirkin virtio_store_mb(vq->weak_barriers, 740e593bf97STiwei Bie &vring_used_event(&vq->split.vring), 741788e5b3aSMichael S. Tsirkin cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 742788e5b3aSMichael S. Tsirkin 743e593bf97STiwei Bie if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 744e593bf97STiwei Bie - vq->last_used_idx) > bufs)) { 7457ab358c2SMichael S. Tsirkin END_USE(vq); 7467ab358c2SMichael S. Tsirkin return false; 7477ab358c2SMichael S. Tsirkin } 7487ab358c2SMichael S. Tsirkin 7497ab358c2SMichael S. Tsirkin END_USE(vq); 7507ab358c2SMichael S. Tsirkin return true; 7517ab358c2SMichael S. Tsirkin } 7527ab358c2SMichael S. Tsirkin 753138fd251STiwei Bie static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 754c021eac4SShirley Ma { 755c021eac4SShirley Ma struct vring_virtqueue *vq = to_vvq(_vq); 756c021eac4SShirley Ma unsigned int i; 757c021eac4SShirley Ma void *buf; 758c021eac4SShirley Ma 759c021eac4SShirley Ma START_USE(vq); 760c021eac4SShirley Ma 761e593bf97STiwei Bie for (i = 0; i < vq->split.vring.num; i++) { 762cbeedb72STiwei Bie if (!vq->split.desc_state[i].data) 763c021eac4SShirley Ma continue; 764138fd251STiwei Bie /* detach_buf_split clears data, so grab it now. */ 765cbeedb72STiwei Bie buf = vq->split.desc_state[i].data; 766138fd251STiwei Bie detach_buf_split(vq, i, NULL); 767e593bf97STiwei Bie vq->split.avail_idx_shadow--; 768e593bf97STiwei Bie vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 769e593bf97STiwei Bie vq->split.avail_idx_shadow); 770c021eac4SShirley Ma END_USE(vq); 771c021eac4SShirley Ma return buf; 772c021eac4SShirley Ma } 773c021eac4SShirley Ma /* That should have freed everything. */ 774e593bf97STiwei Bie BUG_ON(vq->vq.num_free != vq->split.vring.num); 775c021eac4SShirley Ma 776c021eac4SShirley Ma END_USE(vq); 777c021eac4SShirley Ma return NULL; 778c021eac4SShirley Ma } 779138fd251STiwei Bie 780d79dca75STiwei Bie static struct virtqueue *vring_create_virtqueue_split( 781d79dca75STiwei Bie unsigned int index, 782d79dca75STiwei Bie unsigned int num, 783d79dca75STiwei Bie unsigned int vring_align, 784d79dca75STiwei Bie struct virtio_device *vdev, 785d79dca75STiwei Bie bool weak_barriers, 786d79dca75STiwei Bie bool may_reduce_num, 787d79dca75STiwei Bie bool context, 788d79dca75STiwei Bie bool (*notify)(struct virtqueue *), 789d79dca75STiwei Bie void (*callback)(struct virtqueue *), 790d79dca75STiwei Bie const char *name) 791d79dca75STiwei Bie { 792d79dca75STiwei Bie struct virtqueue *vq; 793d79dca75STiwei Bie void *queue = NULL; 794d79dca75STiwei Bie dma_addr_t dma_addr; 795d79dca75STiwei Bie size_t queue_size_in_bytes; 796d79dca75STiwei Bie struct vring vring; 797d79dca75STiwei Bie 798d79dca75STiwei Bie /* We assume num is a power of 2. */ 799d79dca75STiwei Bie if (num & (num - 1)) { 800d79dca75STiwei Bie dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 801d79dca75STiwei Bie return NULL; 802d79dca75STiwei Bie } 803d79dca75STiwei Bie 804d79dca75STiwei Bie /* TODO: allocate each queue chunk individually */ 805d79dca75STiwei Bie for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 806d79dca75STiwei Bie queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 807d79dca75STiwei Bie &dma_addr, 808d79dca75STiwei Bie GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 809d79dca75STiwei Bie if (queue) 810d79dca75STiwei Bie break; 811d79dca75STiwei Bie } 812d79dca75STiwei Bie 813d79dca75STiwei Bie if (!num) 814d79dca75STiwei Bie return NULL; 815d79dca75STiwei Bie 816d79dca75STiwei Bie if (!queue) { 817d79dca75STiwei Bie /* Try to get a single page. You are my only hope! */ 818d79dca75STiwei Bie queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 819d79dca75STiwei Bie &dma_addr, GFP_KERNEL|__GFP_ZERO); 820d79dca75STiwei Bie } 821d79dca75STiwei Bie if (!queue) 822d79dca75STiwei Bie return NULL; 823d79dca75STiwei Bie 824d79dca75STiwei Bie queue_size_in_bytes = vring_size(num, vring_align); 825d79dca75STiwei Bie vring_init(&vring, num, queue, vring_align); 826d79dca75STiwei Bie 827d79dca75STiwei Bie vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 828d79dca75STiwei Bie notify, callback, name); 829d79dca75STiwei Bie if (!vq) { 830d79dca75STiwei Bie vring_free_queue(vdev, queue_size_in_bytes, queue, 831d79dca75STiwei Bie dma_addr); 832d79dca75STiwei Bie return NULL; 833d79dca75STiwei Bie } 834d79dca75STiwei Bie 835d79dca75STiwei Bie to_vvq(vq)->split.queue_dma_addr = dma_addr; 836d79dca75STiwei Bie to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 837d79dca75STiwei Bie to_vvq(vq)->we_own_ring = true; 838d79dca75STiwei Bie 839d79dca75STiwei Bie return vq; 840d79dca75STiwei Bie } 841d79dca75STiwei Bie 842e6f633e5STiwei Bie 843e6f633e5STiwei Bie /* 844e6f633e5STiwei Bie * Generic functions and exported symbols. 845e6f633e5STiwei Bie */ 846e6f633e5STiwei Bie 847e6f633e5STiwei Bie static inline int virtqueue_add(struct virtqueue *_vq, 848e6f633e5STiwei Bie struct scatterlist *sgs[], 849e6f633e5STiwei Bie unsigned int total_sg, 850e6f633e5STiwei Bie unsigned int out_sgs, 851e6f633e5STiwei Bie unsigned int in_sgs, 852e6f633e5STiwei Bie void *data, 853e6f633e5STiwei Bie void *ctx, 854e6f633e5STiwei Bie gfp_t gfp) 855e6f633e5STiwei Bie { 856e6f633e5STiwei Bie return virtqueue_add_split(_vq, sgs, total_sg, 857e6f633e5STiwei Bie out_sgs, in_sgs, data, ctx, gfp); 858e6f633e5STiwei Bie } 859e6f633e5STiwei Bie 860e6f633e5STiwei Bie /** 861e6f633e5STiwei Bie * virtqueue_add_sgs - expose buffers to other end 862e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 863e6f633e5STiwei Bie * @sgs: array of terminated scatterlists. 864e6f633e5STiwei Bie * @out_num: the number of scatterlists readable by other side 865e6f633e5STiwei Bie * @in_num: the number of scatterlists which are writable (after readable ones) 866e6f633e5STiwei Bie * @data: the token identifying the buffer. 867e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 868e6f633e5STiwei Bie * 869e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 870e6f633e5STiwei Bie * at the same time (except where noted). 871e6f633e5STiwei Bie * 872e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 873e6f633e5STiwei Bie */ 874e6f633e5STiwei Bie int virtqueue_add_sgs(struct virtqueue *_vq, 875e6f633e5STiwei Bie struct scatterlist *sgs[], 876e6f633e5STiwei Bie unsigned int out_sgs, 877e6f633e5STiwei Bie unsigned int in_sgs, 878e6f633e5STiwei Bie void *data, 879e6f633e5STiwei Bie gfp_t gfp) 880e6f633e5STiwei Bie { 881e6f633e5STiwei Bie unsigned int i, total_sg = 0; 882e6f633e5STiwei Bie 883e6f633e5STiwei Bie /* Count them first. */ 884e6f633e5STiwei Bie for (i = 0; i < out_sgs + in_sgs; i++) { 885e6f633e5STiwei Bie struct scatterlist *sg; 886e6f633e5STiwei Bie 887e6f633e5STiwei Bie for (sg = sgs[i]; sg; sg = sg_next(sg)) 888e6f633e5STiwei Bie total_sg++; 889e6f633e5STiwei Bie } 890e6f633e5STiwei Bie return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 891e6f633e5STiwei Bie data, NULL, gfp); 892e6f633e5STiwei Bie } 893e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 894e6f633e5STiwei Bie 895e6f633e5STiwei Bie /** 896e6f633e5STiwei Bie * virtqueue_add_outbuf - expose output buffers to other end 897e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 898e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 899e6f633e5STiwei Bie * @num: the number of entries in @sg readable by other side 900e6f633e5STiwei Bie * @data: the token identifying the buffer. 901e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 902e6f633e5STiwei Bie * 903e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 904e6f633e5STiwei Bie * at the same time (except where noted). 905e6f633e5STiwei Bie * 906e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 907e6f633e5STiwei Bie */ 908e6f633e5STiwei Bie int virtqueue_add_outbuf(struct virtqueue *vq, 909e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 910e6f633e5STiwei Bie void *data, 911e6f633e5STiwei Bie gfp_t gfp) 912e6f633e5STiwei Bie { 913e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 914e6f633e5STiwei Bie } 915e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 916e6f633e5STiwei Bie 917e6f633e5STiwei Bie /** 918e6f633e5STiwei Bie * virtqueue_add_inbuf - expose input buffers to other end 919e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 920e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 921e6f633e5STiwei Bie * @num: the number of entries in @sg writable by other side 922e6f633e5STiwei Bie * @data: the token identifying the buffer. 923e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 924e6f633e5STiwei Bie * 925e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 926e6f633e5STiwei Bie * at the same time (except where noted). 927e6f633e5STiwei Bie * 928e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 929e6f633e5STiwei Bie */ 930e6f633e5STiwei Bie int virtqueue_add_inbuf(struct virtqueue *vq, 931e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 932e6f633e5STiwei Bie void *data, 933e6f633e5STiwei Bie gfp_t gfp) 934e6f633e5STiwei Bie { 935e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 936e6f633e5STiwei Bie } 937e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 938e6f633e5STiwei Bie 939e6f633e5STiwei Bie /** 940e6f633e5STiwei Bie * virtqueue_add_inbuf_ctx - expose input buffers to other end 941e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 942e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 943e6f633e5STiwei Bie * @num: the number of entries in @sg writable by other side 944e6f633e5STiwei Bie * @data: the token identifying the buffer. 945e6f633e5STiwei Bie * @ctx: extra context for the token 946e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 947e6f633e5STiwei Bie * 948e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 949e6f633e5STiwei Bie * at the same time (except where noted). 950e6f633e5STiwei Bie * 951e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 952e6f633e5STiwei Bie */ 953e6f633e5STiwei Bie int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 954e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 955e6f633e5STiwei Bie void *data, 956e6f633e5STiwei Bie void *ctx, 957e6f633e5STiwei Bie gfp_t gfp) 958e6f633e5STiwei Bie { 959e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 960e6f633e5STiwei Bie } 961e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 962e6f633e5STiwei Bie 963e6f633e5STiwei Bie /** 964e6f633e5STiwei Bie * virtqueue_kick_prepare - first half of split virtqueue_kick call. 965e6f633e5STiwei Bie * @vq: the struct virtqueue 966e6f633e5STiwei Bie * 967e6f633e5STiwei Bie * Instead of virtqueue_kick(), you can do: 968e6f633e5STiwei Bie * if (virtqueue_kick_prepare(vq)) 969e6f633e5STiwei Bie * virtqueue_notify(vq); 970e6f633e5STiwei Bie * 971e6f633e5STiwei Bie * This is sometimes useful because the virtqueue_kick_prepare() needs 972e6f633e5STiwei Bie * to be serialized, but the actual virtqueue_notify() call does not. 973e6f633e5STiwei Bie */ 974e6f633e5STiwei Bie bool virtqueue_kick_prepare(struct virtqueue *_vq) 975e6f633e5STiwei Bie { 976e6f633e5STiwei Bie return virtqueue_kick_prepare_split(_vq); 977e6f633e5STiwei Bie } 978e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 979e6f633e5STiwei Bie 980e6f633e5STiwei Bie /** 981e6f633e5STiwei Bie * virtqueue_notify - second half of split virtqueue_kick call. 982e6f633e5STiwei Bie * @vq: the struct virtqueue 983e6f633e5STiwei Bie * 984e6f633e5STiwei Bie * This does not need to be serialized. 985e6f633e5STiwei Bie * 986e6f633e5STiwei Bie * Returns false if host notify failed or queue is broken, otherwise true. 987e6f633e5STiwei Bie */ 988e6f633e5STiwei Bie bool virtqueue_notify(struct virtqueue *_vq) 989e6f633e5STiwei Bie { 990e6f633e5STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 991e6f633e5STiwei Bie 992e6f633e5STiwei Bie if (unlikely(vq->broken)) 993e6f633e5STiwei Bie return false; 994e6f633e5STiwei Bie 995e6f633e5STiwei Bie /* Prod other side to tell it about changes. */ 996e6f633e5STiwei Bie if (!vq->notify(_vq)) { 997e6f633e5STiwei Bie vq->broken = true; 998e6f633e5STiwei Bie return false; 999e6f633e5STiwei Bie } 1000e6f633e5STiwei Bie return true; 1001e6f633e5STiwei Bie } 1002e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_notify); 1003e6f633e5STiwei Bie 1004e6f633e5STiwei Bie /** 1005e6f633e5STiwei Bie * virtqueue_kick - update after add_buf 1006e6f633e5STiwei Bie * @vq: the struct virtqueue 1007e6f633e5STiwei Bie * 1008e6f633e5STiwei Bie * After one or more virtqueue_add_* calls, invoke this to kick 1009e6f633e5STiwei Bie * the other side. 1010e6f633e5STiwei Bie * 1011e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 1012e6f633e5STiwei Bie * operations at the same time (except where noted). 1013e6f633e5STiwei Bie * 1014e6f633e5STiwei Bie * Returns false if kick failed, otherwise true. 1015e6f633e5STiwei Bie */ 1016e6f633e5STiwei Bie bool virtqueue_kick(struct virtqueue *vq) 1017e6f633e5STiwei Bie { 1018e6f633e5STiwei Bie if (virtqueue_kick_prepare(vq)) 1019e6f633e5STiwei Bie return virtqueue_notify(vq); 1020e6f633e5STiwei Bie return true; 1021e6f633e5STiwei Bie } 1022e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_kick); 1023e6f633e5STiwei Bie 1024e6f633e5STiwei Bie /** 1025e6f633e5STiwei Bie * virtqueue_get_buf - get the next used buffer 1026e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1027e6f633e5STiwei Bie * @len: the length written into the buffer 1028e6f633e5STiwei Bie * 1029e6f633e5STiwei Bie * If the device wrote data into the buffer, @len will be set to the 1030e6f633e5STiwei Bie * amount written. This means you don't need to clear the buffer 1031e6f633e5STiwei Bie * beforehand to ensure there's no data leakage in the case of short 1032e6f633e5STiwei Bie * writes. 1033e6f633e5STiwei Bie * 1034e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 1035e6f633e5STiwei Bie * operations at the same time (except where noted). 1036e6f633e5STiwei Bie * 1037e6f633e5STiwei Bie * Returns NULL if there are no used buffers, or the "data" token 1038e6f633e5STiwei Bie * handed to virtqueue_add_*(). 1039e6f633e5STiwei Bie */ 1040e6f633e5STiwei Bie void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1041e6f633e5STiwei Bie void **ctx) 1042e6f633e5STiwei Bie { 1043e6f633e5STiwei Bie return virtqueue_get_buf_ctx_split(_vq, len, ctx); 1044e6f633e5STiwei Bie } 1045e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 1046e6f633e5STiwei Bie 1047e6f633e5STiwei Bie void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 1048e6f633e5STiwei Bie { 1049e6f633e5STiwei Bie return virtqueue_get_buf_ctx(_vq, len, NULL); 1050e6f633e5STiwei Bie } 1051e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_get_buf); 1052e6f633e5STiwei Bie 1053e6f633e5STiwei Bie /** 1054e6f633e5STiwei Bie * virtqueue_disable_cb - disable callbacks 1055e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1056e6f633e5STiwei Bie * 1057e6f633e5STiwei Bie * Note that this is not necessarily synchronous, hence unreliable and only 1058e6f633e5STiwei Bie * useful as an optimization. 1059e6f633e5STiwei Bie * 1060e6f633e5STiwei Bie * Unlike other operations, this need not be serialized. 1061e6f633e5STiwei Bie */ 1062e6f633e5STiwei Bie void virtqueue_disable_cb(struct virtqueue *_vq) 1063e6f633e5STiwei Bie { 1064e6f633e5STiwei Bie virtqueue_disable_cb_split(_vq); 1065e6f633e5STiwei Bie } 1066e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 1067e6f633e5STiwei Bie 1068e6f633e5STiwei Bie /** 1069e6f633e5STiwei Bie * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 1070e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1071e6f633e5STiwei Bie * 1072e6f633e5STiwei Bie * This re-enables callbacks; it returns current queue state 1073e6f633e5STiwei Bie * in an opaque unsigned value. This value should be later tested by 1074e6f633e5STiwei Bie * virtqueue_poll, to detect a possible race between the driver checking for 1075e6f633e5STiwei Bie * more work, and enabling callbacks. 1076e6f633e5STiwei Bie * 1077e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 1078e6f633e5STiwei Bie * operations at the same time (except where noted). 1079e6f633e5STiwei Bie */ 1080e6f633e5STiwei Bie unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 1081e6f633e5STiwei Bie { 1082e6f633e5STiwei Bie return virtqueue_enable_cb_prepare_split(_vq); 1083e6f633e5STiwei Bie } 1084e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 1085e6f633e5STiwei Bie 1086e6f633e5STiwei Bie /** 1087e6f633e5STiwei Bie * virtqueue_poll - query pending used buffers 1088e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1089e6f633e5STiwei Bie * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 1090e6f633e5STiwei Bie * 1091e6f633e5STiwei Bie * Returns "true" if there are pending used buffers in the queue. 1092e6f633e5STiwei Bie * 1093e6f633e5STiwei Bie * This does not need to be serialized. 1094e6f633e5STiwei Bie */ 1095e6f633e5STiwei Bie bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 1096e6f633e5STiwei Bie { 1097e6f633e5STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 1098e6f633e5STiwei Bie 1099e6f633e5STiwei Bie virtio_mb(vq->weak_barriers); 1100e6f633e5STiwei Bie return virtqueue_poll_split(_vq, last_used_idx); 1101e6f633e5STiwei Bie } 1102e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_poll); 1103e6f633e5STiwei Bie 1104e6f633e5STiwei Bie /** 1105e6f633e5STiwei Bie * virtqueue_enable_cb - restart callbacks after disable_cb. 1106e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1107e6f633e5STiwei Bie * 1108e6f633e5STiwei Bie * This re-enables callbacks; it returns "false" if there are pending 1109e6f633e5STiwei Bie * buffers in the queue, to detect a possible race between the driver 1110e6f633e5STiwei Bie * checking for more work, and enabling callbacks. 1111e6f633e5STiwei Bie * 1112e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 1113e6f633e5STiwei Bie * operations at the same time (except where noted). 1114e6f633e5STiwei Bie */ 1115e6f633e5STiwei Bie bool virtqueue_enable_cb(struct virtqueue *_vq) 1116e6f633e5STiwei Bie { 1117e6f633e5STiwei Bie unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 1118e6f633e5STiwei Bie 1119e6f633e5STiwei Bie return !virtqueue_poll(_vq, last_used_idx); 1120e6f633e5STiwei Bie } 1121e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 1122e6f633e5STiwei Bie 1123e6f633e5STiwei Bie /** 1124e6f633e5STiwei Bie * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 1125e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1126e6f633e5STiwei Bie * 1127e6f633e5STiwei Bie * This re-enables callbacks but hints to the other side to delay 1128e6f633e5STiwei Bie * interrupts until most of the available buffers have been processed; 1129e6f633e5STiwei Bie * it returns "false" if there are many pending buffers in the queue, 1130e6f633e5STiwei Bie * to detect a possible race between the driver checking for more work, 1131e6f633e5STiwei Bie * and enabling callbacks. 1132e6f633e5STiwei Bie * 1133e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 1134e6f633e5STiwei Bie * operations at the same time (except where noted). 1135e6f633e5STiwei Bie */ 1136e6f633e5STiwei Bie bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 1137e6f633e5STiwei Bie { 1138e6f633e5STiwei Bie return virtqueue_enable_cb_delayed_split(_vq); 1139e6f633e5STiwei Bie } 1140e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 1141e6f633e5STiwei Bie 1142138fd251STiwei Bie /** 1143138fd251STiwei Bie * virtqueue_detach_unused_buf - detach first unused buffer 1144138fd251STiwei Bie * @vq: the struct virtqueue we're talking about. 1145138fd251STiwei Bie * 1146138fd251STiwei Bie * Returns NULL or the "data" token handed to virtqueue_add_*(). 1147138fd251STiwei Bie * This is not valid on an active queue; it is useful only for device 1148138fd251STiwei Bie * shutdown. 1149138fd251STiwei Bie */ 1150138fd251STiwei Bie void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 1151138fd251STiwei Bie { 1152138fd251STiwei Bie return virtqueue_detach_unused_buf_split(_vq); 1153138fd251STiwei Bie } 11547c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 1155c021eac4SShirley Ma 1156138fd251STiwei Bie static inline bool more_used(const struct vring_virtqueue *vq) 1157138fd251STiwei Bie { 1158138fd251STiwei Bie return more_used_split(vq); 1159138fd251STiwei Bie } 1160138fd251STiwei Bie 11610a8a69ddSRusty Russell irqreturn_t vring_interrupt(int irq, void *_vq) 11620a8a69ddSRusty Russell { 11630a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 11640a8a69ddSRusty Russell 11650a8a69ddSRusty Russell if (!more_used(vq)) { 11660a8a69ddSRusty Russell pr_debug("virtqueue interrupt with no work for %p\n", vq); 11670a8a69ddSRusty Russell return IRQ_NONE; 11680a8a69ddSRusty Russell } 11690a8a69ddSRusty Russell 11700a8a69ddSRusty Russell if (unlikely(vq->broken)) 11710a8a69ddSRusty Russell return IRQ_HANDLED; 11720a8a69ddSRusty Russell 11730a8a69ddSRusty Russell pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 117418445c4dSRusty Russell if (vq->vq.callback) 117518445c4dSRusty Russell vq->vq.callback(&vq->vq); 11760a8a69ddSRusty Russell 11770a8a69ddSRusty Russell return IRQ_HANDLED; 11780a8a69ddSRusty Russell } 1179c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_interrupt); 11800a8a69ddSRusty Russell 11812a2d1382SAndy Lutomirski struct virtqueue *__vring_new_virtqueue(unsigned int index, 11822a2d1382SAndy Lutomirski struct vring vring, 11830a8a69ddSRusty Russell struct virtio_device *vdev, 11847b21e34fSRusty Russell bool weak_barriers, 1185f94682ddSMichael S. Tsirkin bool context, 118646f9c2b9SHeinz Graalfs bool (*notify)(struct virtqueue *), 11879499f5e7SRusty Russell void (*callback)(struct virtqueue *), 11889499f5e7SRusty Russell const char *name) 11890a8a69ddSRusty Russell { 11900a8a69ddSRusty Russell unsigned int i; 11912a2d1382SAndy Lutomirski struct vring_virtqueue *vq; 11920a8a69ddSRusty Russell 1193cbeedb72STiwei Bie vq = kmalloc(sizeof(*vq), GFP_KERNEL); 11940a8a69ddSRusty Russell if (!vq) 11950a8a69ddSRusty Russell return NULL; 11960a8a69ddSRusty Russell 11970a8a69ddSRusty Russell vq->vq.callback = callback; 11980a8a69ddSRusty Russell vq->vq.vdev = vdev; 11999499f5e7SRusty Russell vq->vq.name = name; 12002a2d1382SAndy Lutomirski vq->vq.num_free = vring.num; 120106ca287dSRusty Russell vq->vq.index = index; 12022a2d1382SAndy Lutomirski vq->we_own_ring = false; 12030a8a69ddSRusty Russell vq->notify = notify; 12047b21e34fSRusty Russell vq->weak_barriers = weak_barriers; 12050a8a69ddSRusty Russell vq->broken = false; 12060a8a69ddSRusty Russell vq->last_used_idx = 0; 12070a8a69ddSRusty Russell vq->num_added = 0; 1208*fb3fba6bSTiwei Bie vq->use_dma_api = vring_use_dma_api(vdev); 12099499f5e7SRusty Russell list_add_tail(&vq->vq.list, &vdev->vqs); 12100a8a69ddSRusty Russell #ifdef DEBUG 12110a8a69ddSRusty Russell vq->in_use = false; 1212e93300b1SRusty Russell vq->last_add_time_valid = false; 12130a8a69ddSRusty Russell #endif 12140a8a69ddSRusty Russell 12155a08b04fSMichael S. Tsirkin vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 12165a08b04fSMichael S. Tsirkin !context; 1217a5c262c5SMichael S. Tsirkin vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 12189fa29b9dSMark McLoughlin 1219d79dca75STiwei Bie vq->split.queue_dma_addr = 0; 1220d79dca75STiwei Bie vq->split.queue_size_in_bytes = 0; 1221d79dca75STiwei Bie 1222e593bf97STiwei Bie vq->split.vring = vring; 1223e593bf97STiwei Bie vq->split.avail_flags_shadow = 0; 1224e593bf97STiwei Bie vq->split.avail_idx_shadow = 0; 1225e593bf97STiwei Bie 12260a8a69ddSRusty Russell /* No callback? Tell other side not to bother us. */ 1227f277ec42SVenkatesh Srinivas if (!callback) { 1228e593bf97STiwei Bie vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 12290ea1e4a6SLadi Prosek if (!vq->event) 1230e593bf97STiwei Bie vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 1231e593bf97STiwei Bie vq->split.avail_flags_shadow); 1232f277ec42SVenkatesh Srinivas } 12330a8a69ddSRusty Russell 1234cbeedb72STiwei Bie vq->split.desc_state = kmalloc_array(vring.num, 1235cbeedb72STiwei Bie sizeof(struct vring_desc_state_split), GFP_KERNEL); 1236cbeedb72STiwei Bie if (!vq->split.desc_state) { 1237cbeedb72STiwei Bie kfree(vq); 1238cbeedb72STiwei Bie return NULL; 1239cbeedb72STiwei Bie } 1240cbeedb72STiwei Bie 12410a8a69ddSRusty Russell /* Put everything in free lists. */ 12420a8a69ddSRusty Russell vq->free_head = 0; 12432a2d1382SAndy Lutomirski for (i = 0; i < vring.num-1; i++) 1244e593bf97STiwei Bie vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 1245cbeedb72STiwei Bie memset(vq->split.desc_state, 0, vring.num * 1246cbeedb72STiwei Bie sizeof(struct vring_desc_state_split)); 12470a8a69ddSRusty Russell 12480a8a69ddSRusty Russell return &vq->vq; 12490a8a69ddSRusty Russell } 12502a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 12512a2d1382SAndy Lutomirski 12522a2d1382SAndy Lutomirski struct virtqueue *vring_create_virtqueue( 12532a2d1382SAndy Lutomirski unsigned int index, 12542a2d1382SAndy Lutomirski unsigned int num, 12552a2d1382SAndy Lutomirski unsigned int vring_align, 12562a2d1382SAndy Lutomirski struct virtio_device *vdev, 12572a2d1382SAndy Lutomirski bool weak_barriers, 12582a2d1382SAndy Lutomirski bool may_reduce_num, 1259f94682ddSMichael S. Tsirkin bool context, 12602a2d1382SAndy Lutomirski bool (*notify)(struct virtqueue *), 12612a2d1382SAndy Lutomirski void (*callback)(struct virtqueue *), 12622a2d1382SAndy Lutomirski const char *name) 12632a2d1382SAndy Lutomirski { 1264d79dca75STiwei Bie return vring_create_virtqueue_split(index, num, vring_align, 1265d79dca75STiwei Bie vdev, weak_barriers, may_reduce_num, 1266d79dca75STiwei Bie context, notify, callback, name); 12672a2d1382SAndy Lutomirski } 12682a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(vring_create_virtqueue); 12692a2d1382SAndy Lutomirski 12702a2d1382SAndy Lutomirski struct virtqueue *vring_new_virtqueue(unsigned int index, 12712a2d1382SAndy Lutomirski unsigned int num, 12722a2d1382SAndy Lutomirski unsigned int vring_align, 12732a2d1382SAndy Lutomirski struct virtio_device *vdev, 12742a2d1382SAndy Lutomirski bool weak_barriers, 1275f94682ddSMichael S. Tsirkin bool context, 12762a2d1382SAndy Lutomirski void *pages, 12772a2d1382SAndy Lutomirski bool (*notify)(struct virtqueue *vq), 12782a2d1382SAndy Lutomirski void (*callback)(struct virtqueue *vq), 12792a2d1382SAndy Lutomirski const char *name) 12802a2d1382SAndy Lutomirski { 12812a2d1382SAndy Lutomirski struct vring vring; 12822a2d1382SAndy Lutomirski vring_init(&vring, num, pages, vring_align); 1283f94682ddSMichael S. Tsirkin return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 12842a2d1382SAndy Lutomirski notify, callback, name); 12852a2d1382SAndy Lutomirski } 1286c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_new_virtqueue); 12870a8a69ddSRusty Russell 12882a2d1382SAndy Lutomirski void vring_del_virtqueue(struct virtqueue *_vq) 12890a8a69ddSRusty Russell { 12902a2d1382SAndy Lutomirski struct vring_virtqueue *vq = to_vvq(_vq); 12912a2d1382SAndy Lutomirski 12922a2d1382SAndy Lutomirski if (vq->we_own_ring) { 1293d79dca75STiwei Bie vring_free_queue(vq->vq.vdev, 1294d79dca75STiwei Bie vq->split.queue_size_in_bytes, 1295d79dca75STiwei Bie vq->split.vring.desc, 1296d79dca75STiwei Bie vq->split.queue_dma_addr); 1297cbeedb72STiwei Bie kfree(vq->split.desc_state); 12982a2d1382SAndy Lutomirski } 12992a2d1382SAndy Lutomirski list_del(&_vq->list); 13002a2d1382SAndy Lutomirski kfree(vq); 13010a8a69ddSRusty Russell } 1302c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_del_virtqueue); 13030a8a69ddSRusty Russell 1304e34f8725SRusty Russell /* Manipulates transport-specific feature bits. */ 1305e34f8725SRusty Russell void vring_transport_features(struct virtio_device *vdev) 1306e34f8725SRusty Russell { 1307e34f8725SRusty Russell unsigned int i; 1308e34f8725SRusty Russell 1309e34f8725SRusty Russell for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 1310e34f8725SRusty Russell switch (i) { 13119fa29b9dSMark McLoughlin case VIRTIO_RING_F_INDIRECT_DESC: 13129fa29b9dSMark McLoughlin break; 1313a5c262c5SMichael S. Tsirkin case VIRTIO_RING_F_EVENT_IDX: 1314a5c262c5SMichael S. Tsirkin break; 1315747ae34aSMichael S. Tsirkin case VIRTIO_F_VERSION_1: 1316747ae34aSMichael S. Tsirkin break; 13171a937693SMichael S. Tsirkin case VIRTIO_F_IOMMU_PLATFORM: 13181a937693SMichael S. Tsirkin break; 1319e34f8725SRusty Russell default: 1320e34f8725SRusty Russell /* We don't understand this bit. */ 1321e16e12beSMichael S. Tsirkin __virtio_clear_bit(vdev, i); 1322e34f8725SRusty Russell } 1323e34f8725SRusty Russell } 1324e34f8725SRusty Russell } 1325e34f8725SRusty Russell EXPORT_SYMBOL_GPL(vring_transport_features); 1326e34f8725SRusty Russell 13275dfc1762SRusty Russell /** 13285dfc1762SRusty Russell * virtqueue_get_vring_size - return the size of the virtqueue's vring 13295dfc1762SRusty Russell * @vq: the struct virtqueue containing the vring of interest. 13305dfc1762SRusty Russell * 13315dfc1762SRusty Russell * Returns the size of the vring. This is mainly used for boasting to 13325dfc1762SRusty Russell * userspace. Unlike other operations, this need not be serialized. 13335dfc1762SRusty Russell */ 13348f9f4668SRick Jones unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 13358f9f4668SRick Jones { 13368f9f4668SRick Jones 13378f9f4668SRick Jones struct vring_virtqueue *vq = to_vvq(_vq); 13388f9f4668SRick Jones 1339e593bf97STiwei Bie return vq->split.vring.num; 13408f9f4668SRick Jones } 13418f9f4668SRick Jones EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 13428f9f4668SRick Jones 1343b3b32c94SHeinz Graalfs bool virtqueue_is_broken(struct virtqueue *_vq) 1344b3b32c94SHeinz Graalfs { 1345b3b32c94SHeinz Graalfs struct vring_virtqueue *vq = to_vvq(_vq); 1346b3b32c94SHeinz Graalfs 1347b3b32c94SHeinz Graalfs return vq->broken; 1348b3b32c94SHeinz Graalfs } 1349b3b32c94SHeinz Graalfs EXPORT_SYMBOL_GPL(virtqueue_is_broken); 1350b3b32c94SHeinz Graalfs 1351e2dcdfe9SRusty Russell /* 1352e2dcdfe9SRusty Russell * This should prevent the device from being used, allowing drivers to 1353e2dcdfe9SRusty Russell * recover. You may need to grab appropriate locks to flush. 1354e2dcdfe9SRusty Russell */ 1355e2dcdfe9SRusty Russell void virtio_break_device(struct virtio_device *dev) 1356e2dcdfe9SRusty Russell { 1357e2dcdfe9SRusty Russell struct virtqueue *_vq; 1358e2dcdfe9SRusty Russell 1359e2dcdfe9SRusty Russell list_for_each_entry(_vq, &dev->vqs, list) { 1360e2dcdfe9SRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 1361e2dcdfe9SRusty Russell vq->broken = true; 1362e2dcdfe9SRusty Russell } 1363e2dcdfe9SRusty Russell } 1364e2dcdfe9SRusty Russell EXPORT_SYMBOL_GPL(virtio_break_device); 1365e2dcdfe9SRusty Russell 13662a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 136789062652SCornelia Huck { 136889062652SCornelia Huck struct vring_virtqueue *vq = to_vvq(_vq); 136989062652SCornelia Huck 13702a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 137189062652SCornelia Huck 1372d79dca75STiwei Bie return vq->split.queue_dma_addr; 13732a2d1382SAndy Lutomirski } 13742a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 13752a2d1382SAndy Lutomirski 13762a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 137789062652SCornelia Huck { 137889062652SCornelia Huck struct vring_virtqueue *vq = to_vvq(_vq); 137989062652SCornelia Huck 13802a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 13812a2d1382SAndy Lutomirski 1382d79dca75STiwei Bie return vq->split.queue_dma_addr + 1383e593bf97STiwei Bie ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 138489062652SCornelia Huck } 13852a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 13862a2d1382SAndy Lutomirski 13872a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 13882a2d1382SAndy Lutomirski { 13892a2d1382SAndy Lutomirski struct vring_virtqueue *vq = to_vvq(_vq); 13902a2d1382SAndy Lutomirski 13912a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 13922a2d1382SAndy Lutomirski 1393d79dca75STiwei Bie return vq->split.queue_dma_addr + 1394e593bf97STiwei Bie ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 13952a2d1382SAndy Lutomirski } 13962a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 13972a2d1382SAndy Lutomirski 13982a2d1382SAndy Lutomirski const struct vring *virtqueue_get_vring(struct virtqueue *vq) 13992a2d1382SAndy Lutomirski { 1400e593bf97STiwei Bie return &to_vvq(vq)->split.vring; 14012a2d1382SAndy Lutomirski } 14022a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_vring); 140389062652SCornelia Huck 1404c6fd4701SRusty Russell MODULE_LICENSE("GPL"); 1405