10a8a69ddSRusty Russell /* Virtio ring implementation. 20a8a69ddSRusty Russell * 30a8a69ddSRusty Russell * Copyright 2007 Rusty Russell IBM Corporation 40a8a69ddSRusty Russell * 50a8a69ddSRusty Russell * This program is free software; you can redistribute it and/or modify 60a8a69ddSRusty Russell * it under the terms of the GNU General Public License as published by 70a8a69ddSRusty Russell * the Free Software Foundation; either version 2 of the License, or 80a8a69ddSRusty Russell * (at your option) any later version. 90a8a69ddSRusty Russell * 100a8a69ddSRusty Russell * This program is distributed in the hope that it will be useful, 110a8a69ddSRusty Russell * but WITHOUT ANY WARRANTY; without even the implied warranty of 120a8a69ddSRusty Russell * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 130a8a69ddSRusty Russell * GNU General Public License for more details. 140a8a69ddSRusty Russell * 150a8a69ddSRusty Russell * You should have received a copy of the GNU General Public License 160a8a69ddSRusty Russell * along with this program; if not, write to the Free Software 170a8a69ddSRusty Russell * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 180a8a69ddSRusty Russell */ 190a8a69ddSRusty Russell #include <linux/virtio.h> 200a8a69ddSRusty Russell #include <linux/virtio_ring.h> 21e34f8725SRusty Russell #include <linux/virtio_config.h> 220a8a69ddSRusty Russell #include <linux/device.h> 235a0e3ad6STejun Heo #include <linux/slab.h> 24b5a2c4f1SPaul Gortmaker #include <linux/module.h> 25e93300b1SRusty Russell #include <linux/hrtimer.h> 26780bc790SAndy Lutomirski #include <linux/dma-mapping.h> 2778fe3987SAndy Lutomirski #include <xen/xen.h> 280a8a69ddSRusty Russell 290a8a69ddSRusty Russell #ifdef DEBUG 300a8a69ddSRusty Russell /* For development, we want to crash whenever the ring is screwed. */ 319499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 329499f5e7SRusty Russell do { \ 339499f5e7SRusty Russell dev_err(&(_vq)->vq.vdev->dev, \ 349499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 359499f5e7SRusty Russell BUG(); \ 369499f5e7SRusty Russell } while (0) 37c5f841f1SRusty Russell /* Caller is supposed to guarantee no reentry. */ 383a35ce7dSRoel Kluin #define START_USE(_vq) \ 39c5f841f1SRusty Russell do { \ 40c5f841f1SRusty Russell if ((_vq)->in_use) \ 419499f5e7SRusty Russell panic("%s:in_use = %i\n", \ 429499f5e7SRusty Russell (_vq)->vq.name, (_vq)->in_use); \ 43c5f841f1SRusty Russell (_vq)->in_use = __LINE__; \ 44c5f841f1SRusty Russell } while (0) 453a35ce7dSRoel Kluin #define END_USE(_vq) \ 4697a545abSRusty Russell do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 474d6a105eSTiwei Bie #define LAST_ADD_TIME_UPDATE(_vq) \ 484d6a105eSTiwei Bie do { \ 494d6a105eSTiwei Bie ktime_t now = ktime_get(); \ 504d6a105eSTiwei Bie \ 514d6a105eSTiwei Bie /* No kick or get, with .1 second between? Warn. */ \ 524d6a105eSTiwei Bie if ((_vq)->last_add_time_valid) \ 534d6a105eSTiwei Bie WARN_ON(ktime_to_ms(ktime_sub(now, \ 544d6a105eSTiwei Bie (_vq)->last_add_time)) > 100); \ 554d6a105eSTiwei Bie (_vq)->last_add_time = now; \ 564d6a105eSTiwei Bie (_vq)->last_add_time_valid = true; \ 574d6a105eSTiwei Bie } while (0) 584d6a105eSTiwei Bie #define LAST_ADD_TIME_CHECK(_vq) \ 594d6a105eSTiwei Bie do { \ 604d6a105eSTiwei Bie if ((_vq)->last_add_time_valid) { \ 614d6a105eSTiwei Bie WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 624d6a105eSTiwei Bie (_vq)->last_add_time)) > 100); \ 634d6a105eSTiwei Bie } \ 644d6a105eSTiwei Bie } while (0) 654d6a105eSTiwei Bie #define LAST_ADD_TIME_INVALID(_vq) \ 664d6a105eSTiwei Bie ((_vq)->last_add_time_valid = false) 670a8a69ddSRusty Russell #else 689499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 699499f5e7SRusty Russell do { \ 709499f5e7SRusty Russell dev_err(&_vq->vq.vdev->dev, \ 719499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 729499f5e7SRusty Russell (_vq)->broken = true; \ 739499f5e7SRusty Russell } while (0) 740a8a69ddSRusty Russell #define START_USE(vq) 750a8a69ddSRusty Russell #define END_USE(vq) 764d6a105eSTiwei Bie #define LAST_ADD_TIME_UPDATE(vq) 774d6a105eSTiwei Bie #define LAST_ADD_TIME_CHECK(vq) 784d6a105eSTiwei Bie #define LAST_ADD_TIME_INVALID(vq) 790a8a69ddSRusty Russell #endif 800a8a69ddSRusty Russell 81cbeedb72STiwei Bie struct vring_desc_state_split { 82780bc790SAndy Lutomirski void *data; /* Data for callback. */ 83780bc790SAndy Lutomirski struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 84780bc790SAndy Lutomirski }; 85780bc790SAndy Lutomirski 8643b4f721SMichael S. Tsirkin struct vring_virtqueue { 870a8a69ddSRusty Russell struct virtqueue vq; 880a8a69ddSRusty Russell 897b21e34fSRusty Russell /* Can we use weak barriers? */ 907b21e34fSRusty Russell bool weak_barriers; 917b21e34fSRusty Russell 920a8a69ddSRusty Russell /* Other side has made a mess, don't try any more. */ 930a8a69ddSRusty Russell bool broken; 940a8a69ddSRusty Russell 959fa29b9dSMark McLoughlin /* Host supports indirect buffers */ 969fa29b9dSMark McLoughlin bool indirect; 979fa29b9dSMark McLoughlin 98a5c262c5SMichael S. Tsirkin /* Host publishes avail event idx */ 99a5c262c5SMichael S. Tsirkin bool event; 100a5c262c5SMichael S. Tsirkin 1010a8a69ddSRusty Russell /* Head of free buffer list. */ 1020a8a69ddSRusty Russell unsigned int free_head; 1030a8a69ddSRusty Russell /* Number we've added since last sync. */ 1040a8a69ddSRusty Russell unsigned int num_added; 1050a8a69ddSRusty Russell 1060a8a69ddSRusty Russell /* Last used index we've seen. */ 1071bc4953eSAnthony Liguori u16 last_used_idx; 1080a8a69ddSRusty Russell 109e593bf97STiwei Bie struct { 110e593bf97STiwei Bie /* Actual memory layout for this queue */ 111e593bf97STiwei Bie struct vring vring; 112e593bf97STiwei Bie 113f277ec42SVenkatesh Srinivas /* Last written value to avail->flags */ 114f277ec42SVenkatesh Srinivas u16 avail_flags_shadow; 115f277ec42SVenkatesh Srinivas 116f277ec42SVenkatesh Srinivas /* Last written value to avail->idx in guest byte order */ 117f277ec42SVenkatesh Srinivas u16 avail_idx_shadow; 118cbeedb72STiwei Bie 119cbeedb72STiwei Bie /* Per-descriptor state. */ 120cbeedb72STiwei Bie struct vring_desc_state_split *desc_state; 121*d79dca75STiwei Bie 122*d79dca75STiwei Bie /* DMA, allocation, and size information */ 123*d79dca75STiwei Bie size_t queue_size_in_bytes; 124*d79dca75STiwei Bie dma_addr_t queue_dma_addr; 125e593bf97STiwei Bie } split; 126f277ec42SVenkatesh Srinivas 1270a8a69ddSRusty Russell /* How to notify other side. FIXME: commonalize hcalls! */ 12846f9c2b9SHeinz Graalfs bool (*notify)(struct virtqueue *vq); 1290a8a69ddSRusty Russell 1302a2d1382SAndy Lutomirski /* DMA, allocation, and size information */ 1312a2d1382SAndy Lutomirski bool we_own_ring; 1322a2d1382SAndy Lutomirski 1330a8a69ddSRusty Russell #ifdef DEBUG 1340a8a69ddSRusty Russell /* They're supposed to lock for us. */ 1350a8a69ddSRusty Russell unsigned int in_use; 136e93300b1SRusty Russell 137e93300b1SRusty Russell /* Figure out if their kicks are too delayed. */ 138e93300b1SRusty Russell bool last_add_time_valid; 139e93300b1SRusty Russell ktime_t last_add_time; 1400a8a69ddSRusty Russell #endif 1410a8a69ddSRusty Russell }; 1420a8a69ddSRusty Russell 143e6f633e5STiwei Bie 144e6f633e5STiwei Bie /* 145e6f633e5STiwei Bie * Helpers. 146e6f633e5STiwei Bie */ 147e6f633e5STiwei Bie 1480a8a69ddSRusty Russell #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 1490a8a69ddSRusty Russell 1502f18c2d1STiwei Bie static inline bool virtqueue_use_indirect(struct virtqueue *_vq, 1512f18c2d1STiwei Bie unsigned int total_sg) 1522f18c2d1STiwei Bie { 1532f18c2d1STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 1542f18c2d1STiwei Bie 1552f18c2d1STiwei Bie /* 1562f18c2d1STiwei Bie * If the host supports indirect descriptor tables, and we have multiple 1572f18c2d1STiwei Bie * buffers, then go indirect. FIXME: tune this threshold 1582f18c2d1STiwei Bie */ 1592f18c2d1STiwei Bie return (vq->indirect && total_sg > 1 && vq->vq.num_free); 1602f18c2d1STiwei Bie } 1612f18c2d1STiwei Bie 162d26c96c8SAndy Lutomirski /* 1631a937693SMichael S. Tsirkin * Modern virtio devices have feature bits to specify whether they need a 1641a937693SMichael S. Tsirkin * quirk and bypass the IOMMU. If not there, just use the DMA API. 1651a937693SMichael S. Tsirkin * 1661a937693SMichael S. Tsirkin * If there, the interaction between virtio and DMA API is messy. 167d26c96c8SAndy Lutomirski * 168d26c96c8SAndy Lutomirski * On most systems with virtio, physical addresses match bus addresses, 169d26c96c8SAndy Lutomirski * and it doesn't particularly matter whether we use the DMA API. 170d26c96c8SAndy Lutomirski * 171d26c96c8SAndy Lutomirski * On some systems, including Xen and any system with a physical device 172d26c96c8SAndy Lutomirski * that speaks virtio behind a physical IOMMU, we must use the DMA API 173d26c96c8SAndy Lutomirski * for virtio DMA to work at all. 174d26c96c8SAndy Lutomirski * 175d26c96c8SAndy Lutomirski * On other systems, including SPARC and PPC64, virtio-pci devices are 176d26c96c8SAndy Lutomirski * enumerated as though they are behind an IOMMU, but the virtio host 177d26c96c8SAndy Lutomirski * ignores the IOMMU, so we must either pretend that the IOMMU isn't 178d26c96c8SAndy Lutomirski * there or somehow map everything as the identity. 179d26c96c8SAndy Lutomirski * 180d26c96c8SAndy Lutomirski * For the time being, we preserve historic behavior and bypass the DMA 181d26c96c8SAndy Lutomirski * API. 1821a937693SMichael S. Tsirkin * 1831a937693SMichael S. Tsirkin * TODO: install a per-device DMA ops structure that does the right thing 1841a937693SMichael S. Tsirkin * taking into account all the above quirks, and use the DMA API 1851a937693SMichael S. Tsirkin * unconditionally on data path. 186d26c96c8SAndy Lutomirski */ 187d26c96c8SAndy Lutomirski 188d26c96c8SAndy Lutomirski static bool vring_use_dma_api(struct virtio_device *vdev) 189d26c96c8SAndy Lutomirski { 1901a937693SMichael S. Tsirkin if (!virtio_has_iommu_quirk(vdev)) 1911a937693SMichael S. Tsirkin return true; 1921a937693SMichael S. Tsirkin 1931a937693SMichael S. Tsirkin /* Otherwise, we are left to guess. */ 19478fe3987SAndy Lutomirski /* 19578fe3987SAndy Lutomirski * In theory, it's possible to have a buggy QEMU-supposed 19678fe3987SAndy Lutomirski * emulated Q35 IOMMU and Xen enabled at the same time. On 19778fe3987SAndy Lutomirski * such a configuration, virtio has never worked and will 19878fe3987SAndy Lutomirski * not work without an even larger kludge. Instead, enable 19978fe3987SAndy Lutomirski * the DMA API if we're a Xen guest, which at least allows 20078fe3987SAndy Lutomirski * all of the sensible Xen configurations to work correctly. 20178fe3987SAndy Lutomirski */ 20278fe3987SAndy Lutomirski if (xen_domain()) 20378fe3987SAndy Lutomirski return true; 20478fe3987SAndy Lutomirski 205d26c96c8SAndy Lutomirski return false; 206d26c96c8SAndy Lutomirski } 207d26c96c8SAndy Lutomirski 208*d79dca75STiwei Bie static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 209*d79dca75STiwei Bie dma_addr_t *dma_handle, gfp_t flag) 210*d79dca75STiwei Bie { 211*d79dca75STiwei Bie if (vring_use_dma_api(vdev)) { 212*d79dca75STiwei Bie return dma_alloc_coherent(vdev->dev.parent, size, 213*d79dca75STiwei Bie dma_handle, flag); 214*d79dca75STiwei Bie } else { 215*d79dca75STiwei Bie void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 216*d79dca75STiwei Bie 217*d79dca75STiwei Bie if (queue) { 218*d79dca75STiwei Bie phys_addr_t phys_addr = virt_to_phys(queue); 219*d79dca75STiwei Bie *dma_handle = (dma_addr_t)phys_addr; 220*d79dca75STiwei Bie 221*d79dca75STiwei Bie /* 222*d79dca75STiwei Bie * Sanity check: make sure we dind't truncate 223*d79dca75STiwei Bie * the address. The only arches I can find that 224*d79dca75STiwei Bie * have 64-bit phys_addr_t but 32-bit dma_addr_t 225*d79dca75STiwei Bie * are certain non-highmem MIPS and x86 226*d79dca75STiwei Bie * configurations, but these configurations 227*d79dca75STiwei Bie * should never allocate physical pages above 32 228*d79dca75STiwei Bie * bits, so this is fine. Just in case, throw a 229*d79dca75STiwei Bie * warning and abort if we end up with an 230*d79dca75STiwei Bie * unrepresentable address. 231*d79dca75STiwei Bie */ 232*d79dca75STiwei Bie if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 233*d79dca75STiwei Bie free_pages_exact(queue, PAGE_ALIGN(size)); 234*d79dca75STiwei Bie return NULL; 235*d79dca75STiwei Bie } 236*d79dca75STiwei Bie } 237*d79dca75STiwei Bie return queue; 238*d79dca75STiwei Bie } 239*d79dca75STiwei Bie } 240*d79dca75STiwei Bie 241*d79dca75STiwei Bie static void vring_free_queue(struct virtio_device *vdev, size_t size, 242*d79dca75STiwei Bie void *queue, dma_addr_t dma_handle) 243*d79dca75STiwei Bie { 244*d79dca75STiwei Bie if (vring_use_dma_api(vdev)) 245*d79dca75STiwei Bie dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 246*d79dca75STiwei Bie else 247*d79dca75STiwei Bie free_pages_exact(queue, PAGE_ALIGN(size)); 248*d79dca75STiwei Bie } 249*d79dca75STiwei Bie 250780bc790SAndy Lutomirski /* 251780bc790SAndy Lutomirski * The DMA ops on various arches are rather gnarly right now, and 252780bc790SAndy Lutomirski * making all of the arch DMA ops work on the vring device itself 253780bc790SAndy Lutomirski * is a mess. For now, we use the parent device for DMA ops. 254780bc790SAndy Lutomirski */ 25575bfa81bSMichael S. Tsirkin static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 256780bc790SAndy Lutomirski { 257780bc790SAndy Lutomirski return vq->vq.vdev->dev.parent; 258780bc790SAndy Lutomirski } 259780bc790SAndy Lutomirski 260780bc790SAndy Lutomirski /* Map one sg entry. */ 261780bc790SAndy Lutomirski static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 262780bc790SAndy Lutomirski struct scatterlist *sg, 263780bc790SAndy Lutomirski enum dma_data_direction direction) 264780bc790SAndy Lutomirski { 265780bc790SAndy Lutomirski if (!vring_use_dma_api(vq->vq.vdev)) 266780bc790SAndy Lutomirski return (dma_addr_t)sg_phys(sg); 267780bc790SAndy Lutomirski 268780bc790SAndy Lutomirski /* 269780bc790SAndy Lutomirski * We can't use dma_map_sg, because we don't use scatterlists in 270780bc790SAndy Lutomirski * the way it expects (we don't guarantee that the scatterlist 271780bc790SAndy Lutomirski * will exist for the lifetime of the mapping). 272780bc790SAndy Lutomirski */ 273780bc790SAndy Lutomirski return dma_map_page(vring_dma_dev(vq), 274780bc790SAndy Lutomirski sg_page(sg), sg->offset, sg->length, 275780bc790SAndy Lutomirski direction); 276780bc790SAndy Lutomirski } 277780bc790SAndy Lutomirski 278780bc790SAndy Lutomirski static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 279780bc790SAndy Lutomirski void *cpu_addr, size_t size, 280780bc790SAndy Lutomirski enum dma_data_direction direction) 281780bc790SAndy Lutomirski { 282780bc790SAndy Lutomirski if (!vring_use_dma_api(vq->vq.vdev)) 283780bc790SAndy Lutomirski return (dma_addr_t)virt_to_phys(cpu_addr); 284780bc790SAndy Lutomirski 285780bc790SAndy Lutomirski return dma_map_single(vring_dma_dev(vq), 286780bc790SAndy Lutomirski cpu_addr, size, direction); 287780bc790SAndy Lutomirski } 288780bc790SAndy Lutomirski 289e6f633e5STiwei Bie static int vring_mapping_error(const struct vring_virtqueue *vq, 290e6f633e5STiwei Bie dma_addr_t addr) 291e6f633e5STiwei Bie { 292e6f633e5STiwei Bie if (!vring_use_dma_api(vq->vq.vdev)) 293e6f633e5STiwei Bie return 0; 294e6f633e5STiwei Bie 295e6f633e5STiwei Bie return dma_mapping_error(vring_dma_dev(vq), addr); 296e6f633e5STiwei Bie } 297e6f633e5STiwei Bie 298e6f633e5STiwei Bie 299e6f633e5STiwei Bie /* 300e6f633e5STiwei Bie * Split ring specific functions - *_split(). 301e6f633e5STiwei Bie */ 302e6f633e5STiwei Bie 303138fd251STiwei Bie static void vring_unmap_one_split(const struct vring_virtqueue *vq, 304780bc790SAndy Lutomirski struct vring_desc *desc) 305780bc790SAndy Lutomirski { 306780bc790SAndy Lutomirski u16 flags; 307780bc790SAndy Lutomirski 308780bc790SAndy Lutomirski if (!vring_use_dma_api(vq->vq.vdev)) 309780bc790SAndy Lutomirski return; 310780bc790SAndy Lutomirski 311780bc790SAndy Lutomirski flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 312780bc790SAndy Lutomirski 313780bc790SAndy Lutomirski if (flags & VRING_DESC_F_INDIRECT) { 314780bc790SAndy Lutomirski dma_unmap_single(vring_dma_dev(vq), 315780bc790SAndy Lutomirski virtio64_to_cpu(vq->vq.vdev, desc->addr), 316780bc790SAndy Lutomirski virtio32_to_cpu(vq->vq.vdev, desc->len), 317780bc790SAndy Lutomirski (flags & VRING_DESC_F_WRITE) ? 318780bc790SAndy Lutomirski DMA_FROM_DEVICE : DMA_TO_DEVICE); 319780bc790SAndy Lutomirski } else { 320780bc790SAndy Lutomirski dma_unmap_page(vring_dma_dev(vq), 321780bc790SAndy Lutomirski virtio64_to_cpu(vq->vq.vdev, desc->addr), 322780bc790SAndy Lutomirski virtio32_to_cpu(vq->vq.vdev, desc->len), 323780bc790SAndy Lutomirski (flags & VRING_DESC_F_WRITE) ? 324780bc790SAndy Lutomirski DMA_FROM_DEVICE : DMA_TO_DEVICE); 325780bc790SAndy Lutomirski } 326780bc790SAndy Lutomirski } 327780bc790SAndy Lutomirski 328138fd251STiwei Bie static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 329138fd251STiwei Bie unsigned int total_sg, 330138fd251STiwei Bie gfp_t gfp) 3319fa29b9dSMark McLoughlin { 3329fa29b9dSMark McLoughlin struct vring_desc *desc; 333b25bd251SRusty Russell unsigned int i; 3349fa29b9dSMark McLoughlin 335b92b1b89SWill Deacon /* 336b92b1b89SWill Deacon * We require lowmem mappings for the descriptors because 337b92b1b89SWill Deacon * otherwise virt_to_phys will give us bogus addresses in the 338b92b1b89SWill Deacon * virtqueue. 339b92b1b89SWill Deacon */ 34082107539SMichal Hocko gfp &= ~__GFP_HIGHMEM; 341b92b1b89SWill Deacon 3426da2ec56SKees Cook desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 3439fa29b9dSMark McLoughlin if (!desc) 344b25bd251SRusty Russell return NULL; 3459fa29b9dSMark McLoughlin 346b25bd251SRusty Russell for (i = 0; i < total_sg; i++) 34700e6f3d9SMichael S. Tsirkin desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 348b25bd251SRusty Russell return desc; 3499fa29b9dSMark McLoughlin } 3509fa29b9dSMark McLoughlin 351138fd251STiwei Bie static inline int virtqueue_add_split(struct virtqueue *_vq, 35213816c76SRusty Russell struct scatterlist *sgs[], 353eeebf9b1SRusty Russell unsigned int total_sg, 35413816c76SRusty Russell unsigned int out_sgs, 35513816c76SRusty Russell unsigned int in_sgs, 356bbd603efSMichael S. Tsirkin void *data, 3575a08b04fSMichael S. Tsirkin void *ctx, 358bbd603efSMichael S. Tsirkin gfp_t gfp) 3590a8a69ddSRusty Russell { 3600a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 36113816c76SRusty Russell struct scatterlist *sg; 362b25bd251SRusty Russell struct vring_desc *desc; 363780bc790SAndy Lutomirski unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx; 3641fe9b6feSMichael S. Tsirkin int head; 365b25bd251SRusty Russell bool indirect; 3660a8a69ddSRusty Russell 3679fa29b9dSMark McLoughlin START_USE(vq); 3689fa29b9dSMark McLoughlin 3690a8a69ddSRusty Russell BUG_ON(data == NULL); 3705a08b04fSMichael S. Tsirkin BUG_ON(ctx && vq->indirect); 3719fa29b9dSMark McLoughlin 37270670444SRusty Russell if (unlikely(vq->broken)) { 37370670444SRusty Russell END_USE(vq); 37470670444SRusty Russell return -EIO; 37570670444SRusty Russell } 37670670444SRusty Russell 3774d6a105eSTiwei Bie LAST_ADD_TIME_UPDATE(vq); 378e93300b1SRusty Russell 37913816c76SRusty Russell BUG_ON(total_sg == 0); 3800a8a69ddSRusty Russell 381b25bd251SRusty Russell head = vq->free_head; 382b25bd251SRusty Russell 3832f18c2d1STiwei Bie if (virtqueue_use_indirect(_vq, total_sg)) 384138fd251STiwei Bie desc = alloc_indirect_split(_vq, total_sg, gfp); 38544ed8089SRichard W.M. Jones else { 386b25bd251SRusty Russell desc = NULL; 387e593bf97STiwei Bie WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 38844ed8089SRichard W.M. Jones } 389b25bd251SRusty Russell 390b25bd251SRusty Russell if (desc) { 391b25bd251SRusty Russell /* Use a single buffer which doesn't continue */ 392780bc790SAndy Lutomirski indirect = true; 393b25bd251SRusty Russell /* Set up rest to use this indirect table. */ 394b25bd251SRusty Russell i = 0; 395b25bd251SRusty Russell descs_used = 1; 396b25bd251SRusty Russell } else { 397780bc790SAndy Lutomirski indirect = false; 398e593bf97STiwei Bie desc = vq->split.vring.desc; 399b25bd251SRusty Russell i = head; 400b25bd251SRusty Russell descs_used = total_sg; 401b25bd251SRusty Russell } 402b25bd251SRusty Russell 403b25bd251SRusty Russell if (vq->vq.num_free < descs_used) { 4040a8a69ddSRusty Russell pr_debug("Can't add buf len %i - avail = %i\n", 405b25bd251SRusty Russell descs_used, vq->vq.num_free); 40644653eaeSRusty Russell /* FIXME: for historical reasons, we force a notify here if 40744653eaeSRusty Russell * there are outgoing parts to the buffer. Presumably the 40844653eaeSRusty Russell * host should service the ring ASAP. */ 40913816c76SRusty Russell if (out_sgs) 410426e3e0aSRusty Russell vq->notify(&vq->vq); 41158625edfSWei Yongjun if (indirect) 41258625edfSWei Yongjun kfree(desc); 4130a8a69ddSRusty Russell END_USE(vq); 4140a8a69ddSRusty Russell return -ENOSPC; 4150a8a69ddSRusty Russell } 4160a8a69ddSRusty Russell 41713816c76SRusty Russell for (n = 0; n < out_sgs; n++) { 418eeebf9b1SRusty Russell for (sg = sgs[n]; sg; sg = sg_next(sg)) { 419780bc790SAndy Lutomirski dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 420780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 421780bc790SAndy Lutomirski goto unmap_release; 422780bc790SAndy Lutomirski 42300e6f3d9SMichael S. Tsirkin desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); 424780bc790SAndy Lutomirski desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 42500e6f3d9SMichael S. Tsirkin desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 4260a8a69ddSRusty Russell prev = i; 42700e6f3d9SMichael S. Tsirkin i = virtio16_to_cpu(_vq->vdev, desc[i].next); 4280a8a69ddSRusty Russell } 42913816c76SRusty Russell } 43013816c76SRusty Russell for (; n < (out_sgs + in_sgs); n++) { 431eeebf9b1SRusty Russell for (sg = sgs[n]; sg; sg = sg_next(sg)) { 432780bc790SAndy Lutomirski dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 433780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 434780bc790SAndy Lutomirski goto unmap_release; 435780bc790SAndy Lutomirski 43600e6f3d9SMichael S. Tsirkin desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); 437780bc790SAndy Lutomirski desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); 43800e6f3d9SMichael S. Tsirkin desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); 4390a8a69ddSRusty Russell prev = i; 44000e6f3d9SMichael S. Tsirkin i = virtio16_to_cpu(_vq->vdev, desc[i].next); 44113816c76SRusty Russell } 4420a8a69ddSRusty Russell } 4430a8a69ddSRusty Russell /* Last one doesn't continue. */ 44400e6f3d9SMichael S. Tsirkin desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 4450a8a69ddSRusty Russell 446780bc790SAndy Lutomirski if (indirect) { 447780bc790SAndy Lutomirski /* Now that the indirect table is filled in, map it. */ 448780bc790SAndy Lutomirski dma_addr_t addr = vring_map_single( 449780bc790SAndy Lutomirski vq, desc, total_sg * sizeof(struct vring_desc), 450780bc790SAndy Lutomirski DMA_TO_DEVICE); 451780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 452780bc790SAndy Lutomirski goto unmap_release; 453780bc790SAndy Lutomirski 454e593bf97STiwei Bie vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, 455e593bf97STiwei Bie VRING_DESC_F_INDIRECT); 456e593bf97STiwei Bie vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, 457e593bf97STiwei Bie addr); 458780bc790SAndy Lutomirski 459e593bf97STiwei Bie vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev, 460e593bf97STiwei Bie total_sg * sizeof(struct vring_desc)); 461780bc790SAndy Lutomirski } 462780bc790SAndy Lutomirski 463780bc790SAndy Lutomirski /* We're using some buffers from the free list. */ 464780bc790SAndy Lutomirski vq->vq.num_free -= descs_used; 465780bc790SAndy Lutomirski 4660a8a69ddSRusty Russell /* Update free pointer */ 467b25bd251SRusty Russell if (indirect) 468e593bf97STiwei Bie vq->free_head = virtio16_to_cpu(_vq->vdev, 469e593bf97STiwei Bie vq->split.vring.desc[head].next); 470b25bd251SRusty Russell else 4710a8a69ddSRusty Russell vq->free_head = i; 4720a8a69ddSRusty Russell 473780bc790SAndy Lutomirski /* Store token and indirect buffer state. */ 474cbeedb72STiwei Bie vq->split.desc_state[head].data = data; 475780bc790SAndy Lutomirski if (indirect) 476cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc = desc; 47787646a34SJason Wang else 478cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc = ctx; 4790a8a69ddSRusty Russell 4800a8a69ddSRusty Russell /* Put entry in available array (but don't update avail->idx until they 4813b720b8cSRusty Russell * do sync). */ 482e593bf97STiwei Bie avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 483e593bf97STiwei Bie vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 4840a8a69ddSRusty Russell 485ee7cd898SRusty Russell /* Descriptors and available array need to be set before we expose the 486ee7cd898SRusty Russell * new available array entries. */ 487a9a0fef7SRusty Russell virtio_wmb(vq->weak_barriers); 488e593bf97STiwei Bie vq->split.avail_idx_shadow++; 489e593bf97STiwei Bie vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 490e593bf97STiwei Bie vq->split.avail_idx_shadow); 491ee7cd898SRusty Russell vq->num_added++; 492ee7cd898SRusty Russell 4935e05bf58STetsuo Handa pr_debug("Added buffer head %i to %p\n", head, vq); 4945e05bf58STetsuo Handa END_USE(vq); 4955e05bf58STetsuo Handa 496ee7cd898SRusty Russell /* This is very unlikely, but theoretically possible. Kick 497ee7cd898SRusty Russell * just in case. */ 498ee7cd898SRusty Russell if (unlikely(vq->num_added == (1 << 16) - 1)) 499ee7cd898SRusty Russell virtqueue_kick(_vq); 500ee7cd898SRusty Russell 50198e8c6bcSRusty Russell return 0; 502780bc790SAndy Lutomirski 503780bc790SAndy Lutomirski unmap_release: 504780bc790SAndy Lutomirski err_idx = i; 505780bc790SAndy Lutomirski i = head; 506780bc790SAndy Lutomirski 507780bc790SAndy Lutomirski for (n = 0; n < total_sg; n++) { 508780bc790SAndy Lutomirski if (i == err_idx) 509780bc790SAndy Lutomirski break; 510138fd251STiwei Bie vring_unmap_one_split(vq, &desc[i]); 511e593bf97STiwei Bie i = virtio16_to_cpu(_vq->vdev, vq->split.vring.desc[i].next); 512780bc790SAndy Lutomirski } 513780bc790SAndy Lutomirski 514780bc790SAndy Lutomirski if (indirect) 515780bc790SAndy Lutomirski kfree(desc); 516780bc790SAndy Lutomirski 5173cc36f6eSMichael S. Tsirkin END_USE(vq); 518780bc790SAndy Lutomirski return -EIO; 5190a8a69ddSRusty Russell } 52013816c76SRusty Russell 521138fd251STiwei Bie static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 5220a8a69ddSRusty Russell { 5230a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 524a5c262c5SMichael S. Tsirkin u16 new, old; 52541f0377fSRusty Russell bool needs_kick; 52641f0377fSRusty Russell 5270a8a69ddSRusty Russell START_USE(vq); 528a72caae2SJason Wang /* We need to expose available array entries before checking avail 529a72caae2SJason Wang * event. */ 530a9a0fef7SRusty Russell virtio_mb(vq->weak_barriers); 5310a8a69ddSRusty Russell 532e593bf97STiwei Bie old = vq->split.avail_idx_shadow - vq->num_added; 533e593bf97STiwei Bie new = vq->split.avail_idx_shadow; 5340a8a69ddSRusty Russell vq->num_added = 0; 5350a8a69ddSRusty Russell 5364d6a105eSTiwei Bie LAST_ADD_TIME_CHECK(vq); 5374d6a105eSTiwei Bie LAST_ADD_TIME_INVALID(vq); 538e93300b1SRusty Russell 53941f0377fSRusty Russell if (vq->event) { 540e593bf97STiwei Bie needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 541e593bf97STiwei Bie vring_avail_event(&vq->split.vring)), 54241f0377fSRusty Russell new, old); 54341f0377fSRusty Russell } else { 544e593bf97STiwei Bie needs_kick = !(vq->split.vring.used->flags & 545e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 546e593bf97STiwei Bie VRING_USED_F_NO_NOTIFY)); 54741f0377fSRusty Russell } 5480a8a69ddSRusty Russell END_USE(vq); 54941f0377fSRusty Russell return needs_kick; 55041f0377fSRusty Russell } 551138fd251STiwei Bie 552138fd251STiwei Bie static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 5535a08b04fSMichael S. Tsirkin void **ctx) 5540a8a69ddSRusty Russell { 555780bc790SAndy Lutomirski unsigned int i, j; 556c60923cbSGonglei __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 5570a8a69ddSRusty Russell 5580a8a69ddSRusty Russell /* Clear data ptr. */ 559cbeedb72STiwei Bie vq->split.desc_state[head].data = NULL; 5600a8a69ddSRusty Russell 561780bc790SAndy Lutomirski /* Put back on free list: unmap first-level descriptors and find end */ 5620a8a69ddSRusty Russell i = head; 5639fa29b9dSMark McLoughlin 564e593bf97STiwei Bie while (vq->split.vring.desc[i].flags & nextflag) { 565e593bf97STiwei Bie vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 566e593bf97STiwei Bie i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); 56706ca287dSRusty Russell vq->vq.num_free++; 5680a8a69ddSRusty Russell } 5690a8a69ddSRusty Russell 570e593bf97STiwei Bie vring_unmap_one_split(vq, &vq->split.vring.desc[i]); 571e593bf97STiwei Bie vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, 572e593bf97STiwei Bie vq->free_head); 5730a8a69ddSRusty Russell vq->free_head = head; 574780bc790SAndy Lutomirski 5750a8a69ddSRusty Russell /* Plus final descriptor */ 57606ca287dSRusty Russell vq->vq.num_free++; 577780bc790SAndy Lutomirski 5785a08b04fSMichael S. Tsirkin if (vq->indirect) { 579cbeedb72STiwei Bie struct vring_desc *indir_desc = 580cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc; 5815a08b04fSMichael S. Tsirkin u32 len; 5825a08b04fSMichael S. Tsirkin 5835a08b04fSMichael S. Tsirkin /* Free the indirect table, if any, now that it's unmapped. */ 5845a08b04fSMichael S. Tsirkin if (!indir_desc) 5855a08b04fSMichael S. Tsirkin return; 5865a08b04fSMichael S. Tsirkin 587e593bf97STiwei Bie len = virtio32_to_cpu(vq->vq.vdev, 588e593bf97STiwei Bie vq->split.vring.desc[head].len); 589780bc790SAndy Lutomirski 590e593bf97STiwei Bie BUG_ON(!(vq->split.vring.desc[head].flags & 591780bc790SAndy Lutomirski cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); 592780bc790SAndy Lutomirski BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 593780bc790SAndy Lutomirski 594780bc790SAndy Lutomirski for (j = 0; j < len / sizeof(struct vring_desc); j++) 595138fd251STiwei Bie vring_unmap_one_split(vq, &indir_desc[j]); 596780bc790SAndy Lutomirski 5975a08b04fSMichael S. Tsirkin kfree(indir_desc); 598cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc = NULL; 5995a08b04fSMichael S. Tsirkin } else if (ctx) { 600cbeedb72STiwei Bie *ctx = vq->split.desc_state[head].indir_desc; 601780bc790SAndy Lutomirski } 6020a8a69ddSRusty Russell } 6030a8a69ddSRusty Russell 604138fd251STiwei Bie static inline bool more_used_split(const struct vring_virtqueue *vq) 6050a8a69ddSRusty Russell { 606e593bf97STiwei Bie return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 607e593bf97STiwei Bie vq->split.vring.used->idx); 6080a8a69ddSRusty Russell } 6090a8a69ddSRusty Russell 610138fd251STiwei Bie static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 611138fd251STiwei Bie unsigned int *len, 6125a08b04fSMichael S. Tsirkin void **ctx) 6130a8a69ddSRusty Russell { 6140a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 6150a8a69ddSRusty Russell void *ret; 6160a8a69ddSRusty Russell unsigned int i; 6173b720b8cSRusty Russell u16 last_used; 6180a8a69ddSRusty Russell 6190a8a69ddSRusty Russell START_USE(vq); 6200a8a69ddSRusty Russell 6215ef82752SRusty Russell if (unlikely(vq->broken)) { 6225ef82752SRusty Russell END_USE(vq); 6235ef82752SRusty Russell return NULL; 6245ef82752SRusty Russell } 6255ef82752SRusty Russell 626138fd251STiwei Bie if (!more_used_split(vq)) { 6270a8a69ddSRusty Russell pr_debug("No more buffers in queue\n"); 6280a8a69ddSRusty Russell END_USE(vq); 6290a8a69ddSRusty Russell return NULL; 6300a8a69ddSRusty Russell } 6310a8a69ddSRusty Russell 6322d61ba95SMichael S. Tsirkin /* Only get used array entries after they have been exposed by host. */ 633a9a0fef7SRusty Russell virtio_rmb(vq->weak_barriers); 6342d61ba95SMichael S. Tsirkin 635e593bf97STiwei Bie last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 636e593bf97STiwei Bie i = virtio32_to_cpu(_vq->vdev, 637e593bf97STiwei Bie vq->split.vring.used->ring[last_used].id); 638e593bf97STiwei Bie *len = virtio32_to_cpu(_vq->vdev, 639e593bf97STiwei Bie vq->split.vring.used->ring[last_used].len); 6400a8a69ddSRusty Russell 641e593bf97STiwei Bie if (unlikely(i >= vq->split.vring.num)) { 6420a8a69ddSRusty Russell BAD_RING(vq, "id %u out of range\n", i); 6430a8a69ddSRusty Russell return NULL; 6440a8a69ddSRusty Russell } 645cbeedb72STiwei Bie if (unlikely(!vq->split.desc_state[i].data)) { 6460a8a69ddSRusty Russell BAD_RING(vq, "id %u is not a head!\n", i); 6470a8a69ddSRusty Russell return NULL; 6480a8a69ddSRusty Russell } 6490a8a69ddSRusty Russell 650138fd251STiwei Bie /* detach_buf_split clears data, so grab it now. */ 651cbeedb72STiwei Bie ret = vq->split.desc_state[i].data; 652138fd251STiwei Bie detach_buf_split(vq, i, ctx); 6530a8a69ddSRusty Russell vq->last_used_idx++; 654a5c262c5SMichael S. Tsirkin /* If we expect an interrupt for the next entry, tell host 655a5c262c5SMichael S. Tsirkin * by writing event index and flush out the write before 656a5c262c5SMichael S. Tsirkin * the read in the next get_buf call. */ 657e593bf97STiwei Bie if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 658788e5b3aSMichael S. Tsirkin virtio_store_mb(vq->weak_barriers, 659e593bf97STiwei Bie &vring_used_event(&vq->split.vring), 660788e5b3aSMichael S. Tsirkin cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 661a5c262c5SMichael S. Tsirkin 6624d6a105eSTiwei Bie LAST_ADD_TIME_INVALID(vq); 663e93300b1SRusty Russell 6640a8a69ddSRusty Russell END_USE(vq); 6650a8a69ddSRusty Russell return ret; 6660a8a69ddSRusty Russell } 667138fd251STiwei Bie 668138fd251STiwei Bie static void virtqueue_disable_cb_split(struct virtqueue *_vq) 669138fd251STiwei Bie { 670138fd251STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 671138fd251STiwei Bie 672e593bf97STiwei Bie if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 673e593bf97STiwei Bie vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 674138fd251STiwei Bie if (!vq->event) 675e593bf97STiwei Bie vq->split.vring.avail->flags = 676e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 677e593bf97STiwei Bie vq->split.avail_flags_shadow); 678138fd251STiwei Bie } 679138fd251STiwei Bie } 680138fd251STiwei Bie 681138fd251STiwei Bie static unsigned virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 682cc229884SMichael S. Tsirkin { 683cc229884SMichael S. Tsirkin struct vring_virtqueue *vq = to_vvq(_vq); 684cc229884SMichael S. Tsirkin u16 last_used_idx; 685cc229884SMichael S. Tsirkin 686cc229884SMichael S. Tsirkin START_USE(vq); 687cc229884SMichael S. Tsirkin 688cc229884SMichael S. Tsirkin /* We optimistically turn back on interrupts, then check if there was 689cc229884SMichael S. Tsirkin * more to do. */ 690cc229884SMichael S. Tsirkin /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 691cc229884SMichael S. Tsirkin * either clear the flags bit or point the event index at the next 692cc229884SMichael S. Tsirkin * entry. Always do both to keep code simple. */ 693e593bf97STiwei Bie if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 694e593bf97STiwei Bie vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 6950ea1e4a6SLadi Prosek if (!vq->event) 696e593bf97STiwei Bie vq->split.vring.avail->flags = 697e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 698e593bf97STiwei Bie vq->split.avail_flags_shadow); 699f277ec42SVenkatesh Srinivas } 700e593bf97STiwei Bie vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 701e593bf97STiwei Bie last_used_idx = vq->last_used_idx); 702cc229884SMichael S. Tsirkin END_USE(vq); 703cc229884SMichael S. Tsirkin return last_used_idx; 704cc229884SMichael S. Tsirkin } 705138fd251STiwei Bie 706138fd251STiwei Bie static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned last_used_idx) 707138fd251STiwei Bie { 708138fd251STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 709138fd251STiwei Bie 710138fd251STiwei Bie return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 711e593bf97STiwei Bie vq->split.vring.used->idx); 712138fd251STiwei Bie } 713138fd251STiwei Bie 714138fd251STiwei Bie static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 7157ab358c2SMichael S. Tsirkin { 7167ab358c2SMichael S. Tsirkin struct vring_virtqueue *vq = to_vvq(_vq); 7177ab358c2SMichael S. Tsirkin u16 bufs; 7187ab358c2SMichael S. Tsirkin 7197ab358c2SMichael S. Tsirkin START_USE(vq); 7207ab358c2SMichael S. Tsirkin 7217ab358c2SMichael S. Tsirkin /* We optimistically turn back on interrupts, then check if there was 7227ab358c2SMichael S. Tsirkin * more to do. */ 7237ab358c2SMichael S. Tsirkin /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 7247ab358c2SMichael S. Tsirkin * either clear the flags bit or point the event index at the next 7250ea1e4a6SLadi Prosek * entry. Always update the event index to keep code simple. */ 726e593bf97STiwei Bie if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 727e593bf97STiwei Bie vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 7280ea1e4a6SLadi Prosek if (!vq->event) 729e593bf97STiwei Bie vq->split.vring.avail->flags = 730e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 731e593bf97STiwei Bie vq->split.avail_flags_shadow); 732f277ec42SVenkatesh Srinivas } 7337ab358c2SMichael S. Tsirkin /* TODO: tune this threshold */ 734e593bf97STiwei Bie bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 735788e5b3aSMichael S. Tsirkin 736788e5b3aSMichael S. Tsirkin virtio_store_mb(vq->weak_barriers, 737e593bf97STiwei Bie &vring_used_event(&vq->split.vring), 738788e5b3aSMichael S. Tsirkin cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 739788e5b3aSMichael S. Tsirkin 740e593bf97STiwei Bie if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 741e593bf97STiwei Bie - vq->last_used_idx) > bufs)) { 7427ab358c2SMichael S. Tsirkin END_USE(vq); 7437ab358c2SMichael S. Tsirkin return false; 7447ab358c2SMichael S. Tsirkin } 7457ab358c2SMichael S. Tsirkin 7467ab358c2SMichael S. Tsirkin END_USE(vq); 7477ab358c2SMichael S. Tsirkin return true; 7487ab358c2SMichael S. Tsirkin } 7497ab358c2SMichael S. Tsirkin 750138fd251STiwei Bie static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 751c021eac4SShirley Ma { 752c021eac4SShirley Ma struct vring_virtqueue *vq = to_vvq(_vq); 753c021eac4SShirley Ma unsigned int i; 754c021eac4SShirley Ma void *buf; 755c021eac4SShirley Ma 756c021eac4SShirley Ma START_USE(vq); 757c021eac4SShirley Ma 758e593bf97STiwei Bie for (i = 0; i < vq->split.vring.num; i++) { 759cbeedb72STiwei Bie if (!vq->split.desc_state[i].data) 760c021eac4SShirley Ma continue; 761138fd251STiwei Bie /* detach_buf_split clears data, so grab it now. */ 762cbeedb72STiwei Bie buf = vq->split.desc_state[i].data; 763138fd251STiwei Bie detach_buf_split(vq, i, NULL); 764e593bf97STiwei Bie vq->split.avail_idx_shadow--; 765e593bf97STiwei Bie vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 766e593bf97STiwei Bie vq->split.avail_idx_shadow); 767c021eac4SShirley Ma END_USE(vq); 768c021eac4SShirley Ma return buf; 769c021eac4SShirley Ma } 770c021eac4SShirley Ma /* That should have freed everything. */ 771e593bf97STiwei Bie BUG_ON(vq->vq.num_free != vq->split.vring.num); 772c021eac4SShirley Ma 773c021eac4SShirley Ma END_USE(vq); 774c021eac4SShirley Ma return NULL; 775c021eac4SShirley Ma } 776138fd251STiwei Bie 777*d79dca75STiwei Bie static struct virtqueue *vring_create_virtqueue_split( 778*d79dca75STiwei Bie unsigned int index, 779*d79dca75STiwei Bie unsigned int num, 780*d79dca75STiwei Bie unsigned int vring_align, 781*d79dca75STiwei Bie struct virtio_device *vdev, 782*d79dca75STiwei Bie bool weak_barriers, 783*d79dca75STiwei Bie bool may_reduce_num, 784*d79dca75STiwei Bie bool context, 785*d79dca75STiwei Bie bool (*notify)(struct virtqueue *), 786*d79dca75STiwei Bie void (*callback)(struct virtqueue *), 787*d79dca75STiwei Bie const char *name) 788*d79dca75STiwei Bie { 789*d79dca75STiwei Bie struct virtqueue *vq; 790*d79dca75STiwei Bie void *queue = NULL; 791*d79dca75STiwei Bie dma_addr_t dma_addr; 792*d79dca75STiwei Bie size_t queue_size_in_bytes; 793*d79dca75STiwei Bie struct vring vring; 794*d79dca75STiwei Bie 795*d79dca75STiwei Bie /* We assume num is a power of 2. */ 796*d79dca75STiwei Bie if (num & (num - 1)) { 797*d79dca75STiwei Bie dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 798*d79dca75STiwei Bie return NULL; 799*d79dca75STiwei Bie } 800*d79dca75STiwei Bie 801*d79dca75STiwei Bie /* TODO: allocate each queue chunk individually */ 802*d79dca75STiwei Bie for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 803*d79dca75STiwei Bie queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 804*d79dca75STiwei Bie &dma_addr, 805*d79dca75STiwei Bie GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 806*d79dca75STiwei Bie if (queue) 807*d79dca75STiwei Bie break; 808*d79dca75STiwei Bie } 809*d79dca75STiwei Bie 810*d79dca75STiwei Bie if (!num) 811*d79dca75STiwei Bie return NULL; 812*d79dca75STiwei Bie 813*d79dca75STiwei Bie if (!queue) { 814*d79dca75STiwei Bie /* Try to get a single page. You are my only hope! */ 815*d79dca75STiwei Bie queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 816*d79dca75STiwei Bie &dma_addr, GFP_KERNEL|__GFP_ZERO); 817*d79dca75STiwei Bie } 818*d79dca75STiwei Bie if (!queue) 819*d79dca75STiwei Bie return NULL; 820*d79dca75STiwei Bie 821*d79dca75STiwei Bie queue_size_in_bytes = vring_size(num, vring_align); 822*d79dca75STiwei Bie vring_init(&vring, num, queue, vring_align); 823*d79dca75STiwei Bie 824*d79dca75STiwei Bie vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 825*d79dca75STiwei Bie notify, callback, name); 826*d79dca75STiwei Bie if (!vq) { 827*d79dca75STiwei Bie vring_free_queue(vdev, queue_size_in_bytes, queue, 828*d79dca75STiwei Bie dma_addr); 829*d79dca75STiwei Bie return NULL; 830*d79dca75STiwei Bie } 831*d79dca75STiwei Bie 832*d79dca75STiwei Bie to_vvq(vq)->split.queue_dma_addr = dma_addr; 833*d79dca75STiwei Bie to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 834*d79dca75STiwei Bie to_vvq(vq)->we_own_ring = true; 835*d79dca75STiwei Bie 836*d79dca75STiwei Bie return vq; 837*d79dca75STiwei Bie } 838*d79dca75STiwei Bie 839e6f633e5STiwei Bie 840e6f633e5STiwei Bie /* 841e6f633e5STiwei Bie * Generic functions and exported symbols. 842e6f633e5STiwei Bie */ 843e6f633e5STiwei Bie 844e6f633e5STiwei Bie static inline int virtqueue_add(struct virtqueue *_vq, 845e6f633e5STiwei Bie struct scatterlist *sgs[], 846e6f633e5STiwei Bie unsigned int total_sg, 847e6f633e5STiwei Bie unsigned int out_sgs, 848e6f633e5STiwei Bie unsigned int in_sgs, 849e6f633e5STiwei Bie void *data, 850e6f633e5STiwei Bie void *ctx, 851e6f633e5STiwei Bie gfp_t gfp) 852e6f633e5STiwei Bie { 853e6f633e5STiwei Bie return virtqueue_add_split(_vq, sgs, total_sg, 854e6f633e5STiwei Bie out_sgs, in_sgs, data, ctx, gfp); 855e6f633e5STiwei Bie } 856e6f633e5STiwei Bie 857e6f633e5STiwei Bie /** 858e6f633e5STiwei Bie * virtqueue_add_sgs - expose buffers to other end 859e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 860e6f633e5STiwei Bie * @sgs: array of terminated scatterlists. 861e6f633e5STiwei Bie * @out_num: the number of scatterlists readable by other side 862e6f633e5STiwei Bie * @in_num: the number of scatterlists which are writable (after readable ones) 863e6f633e5STiwei Bie * @data: the token identifying the buffer. 864e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 865e6f633e5STiwei Bie * 866e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 867e6f633e5STiwei Bie * at the same time (except where noted). 868e6f633e5STiwei Bie * 869e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 870e6f633e5STiwei Bie */ 871e6f633e5STiwei Bie int virtqueue_add_sgs(struct virtqueue *_vq, 872e6f633e5STiwei Bie struct scatterlist *sgs[], 873e6f633e5STiwei Bie unsigned int out_sgs, 874e6f633e5STiwei Bie unsigned int in_sgs, 875e6f633e5STiwei Bie void *data, 876e6f633e5STiwei Bie gfp_t gfp) 877e6f633e5STiwei Bie { 878e6f633e5STiwei Bie unsigned int i, total_sg = 0; 879e6f633e5STiwei Bie 880e6f633e5STiwei Bie /* Count them first. */ 881e6f633e5STiwei Bie for (i = 0; i < out_sgs + in_sgs; i++) { 882e6f633e5STiwei Bie struct scatterlist *sg; 883e6f633e5STiwei Bie 884e6f633e5STiwei Bie for (sg = sgs[i]; sg; sg = sg_next(sg)) 885e6f633e5STiwei Bie total_sg++; 886e6f633e5STiwei Bie } 887e6f633e5STiwei Bie return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 888e6f633e5STiwei Bie data, NULL, gfp); 889e6f633e5STiwei Bie } 890e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 891e6f633e5STiwei Bie 892e6f633e5STiwei Bie /** 893e6f633e5STiwei Bie * virtqueue_add_outbuf - expose output buffers to other end 894e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 895e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 896e6f633e5STiwei Bie * @num: the number of entries in @sg readable by other side 897e6f633e5STiwei Bie * @data: the token identifying the buffer. 898e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 899e6f633e5STiwei Bie * 900e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 901e6f633e5STiwei Bie * at the same time (except where noted). 902e6f633e5STiwei Bie * 903e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 904e6f633e5STiwei Bie */ 905e6f633e5STiwei Bie int virtqueue_add_outbuf(struct virtqueue *vq, 906e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 907e6f633e5STiwei Bie void *data, 908e6f633e5STiwei Bie gfp_t gfp) 909e6f633e5STiwei Bie { 910e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 911e6f633e5STiwei Bie } 912e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 913e6f633e5STiwei Bie 914e6f633e5STiwei Bie /** 915e6f633e5STiwei Bie * virtqueue_add_inbuf - expose input buffers to other end 916e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 917e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 918e6f633e5STiwei Bie * @num: the number of entries in @sg writable by other side 919e6f633e5STiwei Bie * @data: the token identifying the buffer. 920e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 921e6f633e5STiwei Bie * 922e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 923e6f633e5STiwei Bie * at the same time (except where noted). 924e6f633e5STiwei Bie * 925e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 926e6f633e5STiwei Bie */ 927e6f633e5STiwei Bie int virtqueue_add_inbuf(struct virtqueue *vq, 928e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 929e6f633e5STiwei Bie void *data, 930e6f633e5STiwei Bie gfp_t gfp) 931e6f633e5STiwei Bie { 932e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 933e6f633e5STiwei Bie } 934e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 935e6f633e5STiwei Bie 936e6f633e5STiwei Bie /** 937e6f633e5STiwei Bie * virtqueue_add_inbuf_ctx - expose input buffers to other end 938e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 939e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 940e6f633e5STiwei Bie * @num: the number of entries in @sg writable by other side 941e6f633e5STiwei Bie * @data: the token identifying the buffer. 942e6f633e5STiwei Bie * @ctx: extra context for the token 943e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 944e6f633e5STiwei Bie * 945e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 946e6f633e5STiwei Bie * at the same time (except where noted). 947e6f633e5STiwei Bie * 948e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 949e6f633e5STiwei Bie */ 950e6f633e5STiwei Bie int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 951e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 952e6f633e5STiwei Bie void *data, 953e6f633e5STiwei Bie void *ctx, 954e6f633e5STiwei Bie gfp_t gfp) 955e6f633e5STiwei Bie { 956e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 957e6f633e5STiwei Bie } 958e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 959e6f633e5STiwei Bie 960e6f633e5STiwei Bie /** 961e6f633e5STiwei Bie * virtqueue_kick_prepare - first half of split virtqueue_kick call. 962e6f633e5STiwei Bie * @vq: the struct virtqueue 963e6f633e5STiwei Bie * 964e6f633e5STiwei Bie * Instead of virtqueue_kick(), you can do: 965e6f633e5STiwei Bie * if (virtqueue_kick_prepare(vq)) 966e6f633e5STiwei Bie * virtqueue_notify(vq); 967e6f633e5STiwei Bie * 968e6f633e5STiwei Bie * This is sometimes useful because the virtqueue_kick_prepare() needs 969e6f633e5STiwei Bie * to be serialized, but the actual virtqueue_notify() call does not. 970e6f633e5STiwei Bie */ 971e6f633e5STiwei Bie bool virtqueue_kick_prepare(struct virtqueue *_vq) 972e6f633e5STiwei Bie { 973e6f633e5STiwei Bie return virtqueue_kick_prepare_split(_vq); 974e6f633e5STiwei Bie } 975e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 976e6f633e5STiwei Bie 977e6f633e5STiwei Bie /** 978e6f633e5STiwei Bie * virtqueue_notify - second half of split virtqueue_kick call. 979e6f633e5STiwei Bie * @vq: the struct virtqueue 980e6f633e5STiwei Bie * 981e6f633e5STiwei Bie * This does not need to be serialized. 982e6f633e5STiwei Bie * 983e6f633e5STiwei Bie * Returns false if host notify failed or queue is broken, otherwise true. 984e6f633e5STiwei Bie */ 985e6f633e5STiwei Bie bool virtqueue_notify(struct virtqueue *_vq) 986e6f633e5STiwei Bie { 987e6f633e5STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 988e6f633e5STiwei Bie 989e6f633e5STiwei Bie if (unlikely(vq->broken)) 990e6f633e5STiwei Bie return false; 991e6f633e5STiwei Bie 992e6f633e5STiwei Bie /* Prod other side to tell it about changes. */ 993e6f633e5STiwei Bie if (!vq->notify(_vq)) { 994e6f633e5STiwei Bie vq->broken = true; 995e6f633e5STiwei Bie return false; 996e6f633e5STiwei Bie } 997e6f633e5STiwei Bie return true; 998e6f633e5STiwei Bie } 999e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_notify); 1000e6f633e5STiwei Bie 1001e6f633e5STiwei Bie /** 1002e6f633e5STiwei Bie * virtqueue_kick - update after add_buf 1003e6f633e5STiwei Bie * @vq: the struct virtqueue 1004e6f633e5STiwei Bie * 1005e6f633e5STiwei Bie * After one or more virtqueue_add_* calls, invoke this to kick 1006e6f633e5STiwei Bie * the other side. 1007e6f633e5STiwei Bie * 1008e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 1009e6f633e5STiwei Bie * operations at the same time (except where noted). 1010e6f633e5STiwei Bie * 1011e6f633e5STiwei Bie * Returns false if kick failed, otherwise true. 1012e6f633e5STiwei Bie */ 1013e6f633e5STiwei Bie bool virtqueue_kick(struct virtqueue *vq) 1014e6f633e5STiwei Bie { 1015e6f633e5STiwei Bie if (virtqueue_kick_prepare(vq)) 1016e6f633e5STiwei Bie return virtqueue_notify(vq); 1017e6f633e5STiwei Bie return true; 1018e6f633e5STiwei Bie } 1019e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_kick); 1020e6f633e5STiwei Bie 1021e6f633e5STiwei Bie /** 1022e6f633e5STiwei Bie * virtqueue_get_buf - get the next used buffer 1023e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1024e6f633e5STiwei Bie * @len: the length written into the buffer 1025e6f633e5STiwei Bie * 1026e6f633e5STiwei Bie * If the device wrote data into the buffer, @len will be set to the 1027e6f633e5STiwei Bie * amount written. This means you don't need to clear the buffer 1028e6f633e5STiwei Bie * beforehand to ensure there's no data leakage in the case of short 1029e6f633e5STiwei Bie * writes. 1030e6f633e5STiwei Bie * 1031e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 1032e6f633e5STiwei Bie * operations at the same time (except where noted). 1033e6f633e5STiwei Bie * 1034e6f633e5STiwei Bie * Returns NULL if there are no used buffers, or the "data" token 1035e6f633e5STiwei Bie * handed to virtqueue_add_*(). 1036e6f633e5STiwei Bie */ 1037e6f633e5STiwei Bie void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1038e6f633e5STiwei Bie void **ctx) 1039e6f633e5STiwei Bie { 1040e6f633e5STiwei Bie return virtqueue_get_buf_ctx_split(_vq, len, ctx); 1041e6f633e5STiwei Bie } 1042e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 1043e6f633e5STiwei Bie 1044e6f633e5STiwei Bie void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 1045e6f633e5STiwei Bie { 1046e6f633e5STiwei Bie return virtqueue_get_buf_ctx(_vq, len, NULL); 1047e6f633e5STiwei Bie } 1048e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_get_buf); 1049e6f633e5STiwei Bie 1050e6f633e5STiwei Bie /** 1051e6f633e5STiwei Bie * virtqueue_disable_cb - disable callbacks 1052e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1053e6f633e5STiwei Bie * 1054e6f633e5STiwei Bie * Note that this is not necessarily synchronous, hence unreliable and only 1055e6f633e5STiwei Bie * useful as an optimization. 1056e6f633e5STiwei Bie * 1057e6f633e5STiwei Bie * Unlike other operations, this need not be serialized. 1058e6f633e5STiwei Bie */ 1059e6f633e5STiwei Bie void virtqueue_disable_cb(struct virtqueue *_vq) 1060e6f633e5STiwei Bie { 1061e6f633e5STiwei Bie virtqueue_disable_cb_split(_vq); 1062e6f633e5STiwei Bie } 1063e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 1064e6f633e5STiwei Bie 1065e6f633e5STiwei Bie /** 1066e6f633e5STiwei Bie * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 1067e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1068e6f633e5STiwei Bie * 1069e6f633e5STiwei Bie * This re-enables callbacks; it returns current queue state 1070e6f633e5STiwei Bie * in an opaque unsigned value. This value should be later tested by 1071e6f633e5STiwei Bie * virtqueue_poll, to detect a possible race between the driver checking for 1072e6f633e5STiwei Bie * more work, and enabling callbacks. 1073e6f633e5STiwei Bie * 1074e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 1075e6f633e5STiwei Bie * operations at the same time (except where noted). 1076e6f633e5STiwei Bie */ 1077e6f633e5STiwei Bie unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) 1078e6f633e5STiwei Bie { 1079e6f633e5STiwei Bie return virtqueue_enable_cb_prepare_split(_vq); 1080e6f633e5STiwei Bie } 1081e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 1082e6f633e5STiwei Bie 1083e6f633e5STiwei Bie /** 1084e6f633e5STiwei Bie * virtqueue_poll - query pending used buffers 1085e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1086e6f633e5STiwei Bie * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 1087e6f633e5STiwei Bie * 1088e6f633e5STiwei Bie * Returns "true" if there are pending used buffers in the queue. 1089e6f633e5STiwei Bie * 1090e6f633e5STiwei Bie * This does not need to be serialized. 1091e6f633e5STiwei Bie */ 1092e6f633e5STiwei Bie bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) 1093e6f633e5STiwei Bie { 1094e6f633e5STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 1095e6f633e5STiwei Bie 1096e6f633e5STiwei Bie virtio_mb(vq->weak_barriers); 1097e6f633e5STiwei Bie return virtqueue_poll_split(_vq, last_used_idx); 1098e6f633e5STiwei Bie } 1099e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_poll); 1100e6f633e5STiwei Bie 1101e6f633e5STiwei Bie /** 1102e6f633e5STiwei Bie * virtqueue_enable_cb - restart callbacks after disable_cb. 1103e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1104e6f633e5STiwei Bie * 1105e6f633e5STiwei Bie * This re-enables callbacks; it returns "false" if there are pending 1106e6f633e5STiwei Bie * buffers in the queue, to detect a possible race between the driver 1107e6f633e5STiwei Bie * checking for more work, and enabling callbacks. 1108e6f633e5STiwei Bie * 1109e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 1110e6f633e5STiwei Bie * operations at the same time (except where noted). 1111e6f633e5STiwei Bie */ 1112e6f633e5STiwei Bie bool virtqueue_enable_cb(struct virtqueue *_vq) 1113e6f633e5STiwei Bie { 1114e6f633e5STiwei Bie unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); 1115e6f633e5STiwei Bie 1116e6f633e5STiwei Bie return !virtqueue_poll(_vq, last_used_idx); 1117e6f633e5STiwei Bie } 1118e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 1119e6f633e5STiwei Bie 1120e6f633e5STiwei Bie /** 1121e6f633e5STiwei Bie * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 1122e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1123e6f633e5STiwei Bie * 1124e6f633e5STiwei Bie * This re-enables callbacks but hints to the other side to delay 1125e6f633e5STiwei Bie * interrupts until most of the available buffers have been processed; 1126e6f633e5STiwei Bie * it returns "false" if there are many pending buffers in the queue, 1127e6f633e5STiwei Bie * to detect a possible race between the driver checking for more work, 1128e6f633e5STiwei Bie * and enabling callbacks. 1129e6f633e5STiwei Bie * 1130e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 1131e6f633e5STiwei Bie * operations at the same time (except where noted). 1132e6f633e5STiwei Bie */ 1133e6f633e5STiwei Bie bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 1134e6f633e5STiwei Bie { 1135e6f633e5STiwei Bie return virtqueue_enable_cb_delayed_split(_vq); 1136e6f633e5STiwei Bie } 1137e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 1138e6f633e5STiwei Bie 1139138fd251STiwei Bie /** 1140138fd251STiwei Bie * virtqueue_detach_unused_buf - detach first unused buffer 1141138fd251STiwei Bie * @vq: the struct virtqueue we're talking about. 1142138fd251STiwei Bie * 1143138fd251STiwei Bie * Returns NULL or the "data" token handed to virtqueue_add_*(). 1144138fd251STiwei Bie * This is not valid on an active queue; it is useful only for device 1145138fd251STiwei Bie * shutdown. 1146138fd251STiwei Bie */ 1147138fd251STiwei Bie void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 1148138fd251STiwei Bie { 1149138fd251STiwei Bie return virtqueue_detach_unused_buf_split(_vq); 1150138fd251STiwei Bie } 11517c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 1152c021eac4SShirley Ma 1153138fd251STiwei Bie static inline bool more_used(const struct vring_virtqueue *vq) 1154138fd251STiwei Bie { 1155138fd251STiwei Bie return more_used_split(vq); 1156138fd251STiwei Bie } 1157138fd251STiwei Bie 11580a8a69ddSRusty Russell irqreturn_t vring_interrupt(int irq, void *_vq) 11590a8a69ddSRusty Russell { 11600a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 11610a8a69ddSRusty Russell 11620a8a69ddSRusty Russell if (!more_used(vq)) { 11630a8a69ddSRusty Russell pr_debug("virtqueue interrupt with no work for %p\n", vq); 11640a8a69ddSRusty Russell return IRQ_NONE; 11650a8a69ddSRusty Russell } 11660a8a69ddSRusty Russell 11670a8a69ddSRusty Russell if (unlikely(vq->broken)) 11680a8a69ddSRusty Russell return IRQ_HANDLED; 11690a8a69ddSRusty Russell 11700a8a69ddSRusty Russell pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 117118445c4dSRusty Russell if (vq->vq.callback) 117218445c4dSRusty Russell vq->vq.callback(&vq->vq); 11730a8a69ddSRusty Russell 11740a8a69ddSRusty Russell return IRQ_HANDLED; 11750a8a69ddSRusty Russell } 1176c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_interrupt); 11770a8a69ddSRusty Russell 11782a2d1382SAndy Lutomirski struct virtqueue *__vring_new_virtqueue(unsigned int index, 11792a2d1382SAndy Lutomirski struct vring vring, 11800a8a69ddSRusty Russell struct virtio_device *vdev, 11817b21e34fSRusty Russell bool weak_barriers, 1182f94682ddSMichael S. Tsirkin bool context, 118346f9c2b9SHeinz Graalfs bool (*notify)(struct virtqueue *), 11849499f5e7SRusty Russell void (*callback)(struct virtqueue *), 11859499f5e7SRusty Russell const char *name) 11860a8a69ddSRusty Russell { 11870a8a69ddSRusty Russell unsigned int i; 11882a2d1382SAndy Lutomirski struct vring_virtqueue *vq; 11890a8a69ddSRusty Russell 1190cbeedb72STiwei Bie vq = kmalloc(sizeof(*vq), GFP_KERNEL); 11910a8a69ddSRusty Russell if (!vq) 11920a8a69ddSRusty Russell return NULL; 11930a8a69ddSRusty Russell 11940a8a69ddSRusty Russell vq->vq.callback = callback; 11950a8a69ddSRusty Russell vq->vq.vdev = vdev; 11969499f5e7SRusty Russell vq->vq.name = name; 11972a2d1382SAndy Lutomirski vq->vq.num_free = vring.num; 119806ca287dSRusty Russell vq->vq.index = index; 11992a2d1382SAndy Lutomirski vq->we_own_ring = false; 12000a8a69ddSRusty Russell vq->notify = notify; 12017b21e34fSRusty Russell vq->weak_barriers = weak_barriers; 12020a8a69ddSRusty Russell vq->broken = false; 12030a8a69ddSRusty Russell vq->last_used_idx = 0; 12040a8a69ddSRusty Russell vq->num_added = 0; 12059499f5e7SRusty Russell list_add_tail(&vq->vq.list, &vdev->vqs); 12060a8a69ddSRusty Russell #ifdef DEBUG 12070a8a69ddSRusty Russell vq->in_use = false; 1208e93300b1SRusty Russell vq->last_add_time_valid = false; 12090a8a69ddSRusty Russell #endif 12100a8a69ddSRusty Russell 12115a08b04fSMichael S. Tsirkin vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 12125a08b04fSMichael S. Tsirkin !context; 1213a5c262c5SMichael S. Tsirkin vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 12149fa29b9dSMark McLoughlin 1215*d79dca75STiwei Bie vq->split.queue_dma_addr = 0; 1216*d79dca75STiwei Bie vq->split.queue_size_in_bytes = 0; 1217*d79dca75STiwei Bie 1218e593bf97STiwei Bie vq->split.vring = vring; 1219e593bf97STiwei Bie vq->split.avail_flags_shadow = 0; 1220e593bf97STiwei Bie vq->split.avail_idx_shadow = 0; 1221e593bf97STiwei Bie 12220a8a69ddSRusty Russell /* No callback? Tell other side not to bother us. */ 1223f277ec42SVenkatesh Srinivas if (!callback) { 1224e593bf97STiwei Bie vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 12250ea1e4a6SLadi Prosek if (!vq->event) 1226e593bf97STiwei Bie vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 1227e593bf97STiwei Bie vq->split.avail_flags_shadow); 1228f277ec42SVenkatesh Srinivas } 12290a8a69ddSRusty Russell 1230cbeedb72STiwei Bie vq->split.desc_state = kmalloc_array(vring.num, 1231cbeedb72STiwei Bie sizeof(struct vring_desc_state_split), GFP_KERNEL); 1232cbeedb72STiwei Bie if (!vq->split.desc_state) { 1233cbeedb72STiwei Bie kfree(vq); 1234cbeedb72STiwei Bie return NULL; 1235cbeedb72STiwei Bie } 1236cbeedb72STiwei Bie 12370a8a69ddSRusty Russell /* Put everything in free lists. */ 12380a8a69ddSRusty Russell vq->free_head = 0; 12392a2d1382SAndy Lutomirski for (i = 0; i < vring.num-1; i++) 1240e593bf97STiwei Bie vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); 1241cbeedb72STiwei Bie memset(vq->split.desc_state, 0, vring.num * 1242cbeedb72STiwei Bie sizeof(struct vring_desc_state_split)); 12430a8a69ddSRusty Russell 12440a8a69ddSRusty Russell return &vq->vq; 12450a8a69ddSRusty Russell } 12462a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 12472a2d1382SAndy Lutomirski 12482a2d1382SAndy Lutomirski struct virtqueue *vring_create_virtqueue( 12492a2d1382SAndy Lutomirski unsigned int index, 12502a2d1382SAndy Lutomirski unsigned int num, 12512a2d1382SAndy Lutomirski unsigned int vring_align, 12522a2d1382SAndy Lutomirski struct virtio_device *vdev, 12532a2d1382SAndy Lutomirski bool weak_barriers, 12542a2d1382SAndy Lutomirski bool may_reduce_num, 1255f94682ddSMichael S. Tsirkin bool context, 12562a2d1382SAndy Lutomirski bool (*notify)(struct virtqueue *), 12572a2d1382SAndy Lutomirski void (*callback)(struct virtqueue *), 12582a2d1382SAndy Lutomirski const char *name) 12592a2d1382SAndy Lutomirski { 1260*d79dca75STiwei Bie return vring_create_virtqueue_split(index, num, vring_align, 1261*d79dca75STiwei Bie vdev, weak_barriers, may_reduce_num, 1262*d79dca75STiwei Bie context, notify, callback, name); 12632a2d1382SAndy Lutomirski } 12642a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(vring_create_virtqueue); 12652a2d1382SAndy Lutomirski 12662a2d1382SAndy Lutomirski struct virtqueue *vring_new_virtqueue(unsigned int index, 12672a2d1382SAndy Lutomirski unsigned int num, 12682a2d1382SAndy Lutomirski unsigned int vring_align, 12692a2d1382SAndy Lutomirski struct virtio_device *vdev, 12702a2d1382SAndy Lutomirski bool weak_barriers, 1271f94682ddSMichael S. Tsirkin bool context, 12722a2d1382SAndy Lutomirski void *pages, 12732a2d1382SAndy Lutomirski bool (*notify)(struct virtqueue *vq), 12742a2d1382SAndy Lutomirski void (*callback)(struct virtqueue *vq), 12752a2d1382SAndy Lutomirski const char *name) 12762a2d1382SAndy Lutomirski { 12772a2d1382SAndy Lutomirski struct vring vring; 12782a2d1382SAndy Lutomirski vring_init(&vring, num, pages, vring_align); 1279f94682ddSMichael S. Tsirkin return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 12802a2d1382SAndy Lutomirski notify, callback, name); 12812a2d1382SAndy Lutomirski } 1282c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_new_virtqueue); 12830a8a69ddSRusty Russell 12842a2d1382SAndy Lutomirski void vring_del_virtqueue(struct virtqueue *_vq) 12850a8a69ddSRusty Russell { 12862a2d1382SAndy Lutomirski struct vring_virtqueue *vq = to_vvq(_vq); 12872a2d1382SAndy Lutomirski 12882a2d1382SAndy Lutomirski if (vq->we_own_ring) { 1289*d79dca75STiwei Bie vring_free_queue(vq->vq.vdev, 1290*d79dca75STiwei Bie vq->split.queue_size_in_bytes, 1291*d79dca75STiwei Bie vq->split.vring.desc, 1292*d79dca75STiwei Bie vq->split.queue_dma_addr); 1293cbeedb72STiwei Bie kfree(vq->split.desc_state); 12942a2d1382SAndy Lutomirski } 12952a2d1382SAndy Lutomirski list_del(&_vq->list); 12962a2d1382SAndy Lutomirski kfree(vq); 12970a8a69ddSRusty Russell } 1298c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_del_virtqueue); 12990a8a69ddSRusty Russell 1300e34f8725SRusty Russell /* Manipulates transport-specific feature bits. */ 1301e34f8725SRusty Russell void vring_transport_features(struct virtio_device *vdev) 1302e34f8725SRusty Russell { 1303e34f8725SRusty Russell unsigned int i; 1304e34f8725SRusty Russell 1305e34f8725SRusty Russell for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 1306e34f8725SRusty Russell switch (i) { 13079fa29b9dSMark McLoughlin case VIRTIO_RING_F_INDIRECT_DESC: 13089fa29b9dSMark McLoughlin break; 1309a5c262c5SMichael S. Tsirkin case VIRTIO_RING_F_EVENT_IDX: 1310a5c262c5SMichael S. Tsirkin break; 1311747ae34aSMichael S. Tsirkin case VIRTIO_F_VERSION_1: 1312747ae34aSMichael S. Tsirkin break; 13131a937693SMichael S. Tsirkin case VIRTIO_F_IOMMU_PLATFORM: 13141a937693SMichael S. Tsirkin break; 1315e34f8725SRusty Russell default: 1316e34f8725SRusty Russell /* We don't understand this bit. */ 1317e16e12beSMichael S. Tsirkin __virtio_clear_bit(vdev, i); 1318e34f8725SRusty Russell } 1319e34f8725SRusty Russell } 1320e34f8725SRusty Russell } 1321e34f8725SRusty Russell EXPORT_SYMBOL_GPL(vring_transport_features); 1322e34f8725SRusty Russell 13235dfc1762SRusty Russell /** 13245dfc1762SRusty Russell * virtqueue_get_vring_size - return the size of the virtqueue's vring 13255dfc1762SRusty Russell * @vq: the struct virtqueue containing the vring of interest. 13265dfc1762SRusty Russell * 13275dfc1762SRusty Russell * Returns the size of the vring. This is mainly used for boasting to 13285dfc1762SRusty Russell * userspace. Unlike other operations, this need not be serialized. 13295dfc1762SRusty Russell */ 13308f9f4668SRick Jones unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 13318f9f4668SRick Jones { 13328f9f4668SRick Jones 13338f9f4668SRick Jones struct vring_virtqueue *vq = to_vvq(_vq); 13348f9f4668SRick Jones 1335e593bf97STiwei Bie return vq->split.vring.num; 13368f9f4668SRick Jones } 13378f9f4668SRick Jones EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 13388f9f4668SRick Jones 1339b3b32c94SHeinz Graalfs bool virtqueue_is_broken(struct virtqueue *_vq) 1340b3b32c94SHeinz Graalfs { 1341b3b32c94SHeinz Graalfs struct vring_virtqueue *vq = to_vvq(_vq); 1342b3b32c94SHeinz Graalfs 1343b3b32c94SHeinz Graalfs return vq->broken; 1344b3b32c94SHeinz Graalfs } 1345b3b32c94SHeinz Graalfs EXPORT_SYMBOL_GPL(virtqueue_is_broken); 1346b3b32c94SHeinz Graalfs 1347e2dcdfe9SRusty Russell /* 1348e2dcdfe9SRusty Russell * This should prevent the device from being used, allowing drivers to 1349e2dcdfe9SRusty Russell * recover. You may need to grab appropriate locks to flush. 1350e2dcdfe9SRusty Russell */ 1351e2dcdfe9SRusty Russell void virtio_break_device(struct virtio_device *dev) 1352e2dcdfe9SRusty Russell { 1353e2dcdfe9SRusty Russell struct virtqueue *_vq; 1354e2dcdfe9SRusty Russell 1355e2dcdfe9SRusty Russell list_for_each_entry(_vq, &dev->vqs, list) { 1356e2dcdfe9SRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 1357e2dcdfe9SRusty Russell vq->broken = true; 1358e2dcdfe9SRusty Russell } 1359e2dcdfe9SRusty Russell } 1360e2dcdfe9SRusty Russell EXPORT_SYMBOL_GPL(virtio_break_device); 1361e2dcdfe9SRusty Russell 13622a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 136389062652SCornelia Huck { 136489062652SCornelia Huck struct vring_virtqueue *vq = to_vvq(_vq); 136589062652SCornelia Huck 13662a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 136789062652SCornelia Huck 1368*d79dca75STiwei Bie return vq->split.queue_dma_addr; 13692a2d1382SAndy Lutomirski } 13702a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 13712a2d1382SAndy Lutomirski 13722a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 137389062652SCornelia Huck { 137489062652SCornelia Huck struct vring_virtqueue *vq = to_vvq(_vq); 137589062652SCornelia Huck 13762a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 13772a2d1382SAndy Lutomirski 1378*d79dca75STiwei Bie return vq->split.queue_dma_addr + 1379e593bf97STiwei Bie ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 138089062652SCornelia Huck } 13812a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 13822a2d1382SAndy Lutomirski 13832a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 13842a2d1382SAndy Lutomirski { 13852a2d1382SAndy Lutomirski struct vring_virtqueue *vq = to_vvq(_vq); 13862a2d1382SAndy Lutomirski 13872a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 13882a2d1382SAndy Lutomirski 1389*d79dca75STiwei Bie return vq->split.queue_dma_addr + 1390e593bf97STiwei Bie ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 13912a2d1382SAndy Lutomirski } 13922a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 13932a2d1382SAndy Lutomirski 13942a2d1382SAndy Lutomirski const struct vring *virtqueue_get_vring(struct virtqueue *vq) 13952a2d1382SAndy Lutomirski { 1396e593bf97STiwei Bie return &to_vvq(vq)->split.vring; 13972a2d1382SAndy Lutomirski } 13982a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_vring); 139989062652SCornelia Huck 1400c6fd4701SRusty Russell MODULE_LICENSE("GPL"); 1401