1fd534e9bSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 20a8a69ddSRusty Russell /* Virtio ring implementation. 30a8a69ddSRusty Russell * 40a8a69ddSRusty Russell * Copyright 2007 Rusty Russell IBM Corporation 50a8a69ddSRusty Russell */ 60a8a69ddSRusty Russell #include <linux/virtio.h> 70a8a69ddSRusty Russell #include <linux/virtio_ring.h> 8e34f8725SRusty Russell #include <linux/virtio_config.h> 90a8a69ddSRusty Russell #include <linux/device.h> 105a0e3ad6STejun Heo #include <linux/slab.h> 11b5a2c4f1SPaul Gortmaker #include <linux/module.h> 12e93300b1SRusty Russell #include <linux/hrtimer.h> 13780bc790SAndy Lutomirski #include <linux/dma-mapping.h> 14f8ce7263SMichael S. Tsirkin #include <linux/spinlock.h> 1578fe3987SAndy Lutomirski #include <xen/xen.h> 160a8a69ddSRusty Russell 170a8a69ddSRusty Russell #ifdef DEBUG 180a8a69ddSRusty Russell /* For development, we want to crash whenever the ring is screwed. */ 199499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 209499f5e7SRusty Russell do { \ 219499f5e7SRusty Russell dev_err(&(_vq)->vq.vdev->dev, \ 229499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 239499f5e7SRusty Russell BUG(); \ 249499f5e7SRusty Russell } while (0) 25c5f841f1SRusty Russell /* Caller is supposed to guarantee no reentry. */ 263a35ce7dSRoel Kluin #define START_USE(_vq) \ 27c5f841f1SRusty Russell do { \ 28c5f841f1SRusty Russell if ((_vq)->in_use) \ 299499f5e7SRusty Russell panic("%s:in_use = %i\n", \ 309499f5e7SRusty Russell (_vq)->vq.name, (_vq)->in_use); \ 31c5f841f1SRusty Russell (_vq)->in_use = __LINE__; \ 32c5f841f1SRusty Russell } while (0) 333a35ce7dSRoel Kluin #define END_USE(_vq) \ 3497a545abSRusty Russell do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 354d6a105eSTiwei Bie #define LAST_ADD_TIME_UPDATE(_vq) \ 364d6a105eSTiwei Bie do { \ 374d6a105eSTiwei Bie ktime_t now = ktime_get(); \ 384d6a105eSTiwei Bie \ 394d6a105eSTiwei Bie /* No kick or get, with .1 second between? Warn. */ \ 404d6a105eSTiwei Bie if ((_vq)->last_add_time_valid) \ 414d6a105eSTiwei Bie WARN_ON(ktime_to_ms(ktime_sub(now, \ 424d6a105eSTiwei Bie (_vq)->last_add_time)) > 100); \ 434d6a105eSTiwei Bie (_vq)->last_add_time = now; \ 444d6a105eSTiwei Bie (_vq)->last_add_time_valid = true; \ 454d6a105eSTiwei Bie } while (0) 464d6a105eSTiwei Bie #define LAST_ADD_TIME_CHECK(_vq) \ 474d6a105eSTiwei Bie do { \ 484d6a105eSTiwei Bie if ((_vq)->last_add_time_valid) { \ 494d6a105eSTiwei Bie WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 504d6a105eSTiwei Bie (_vq)->last_add_time)) > 100); \ 514d6a105eSTiwei Bie } \ 524d6a105eSTiwei Bie } while (0) 534d6a105eSTiwei Bie #define LAST_ADD_TIME_INVALID(_vq) \ 544d6a105eSTiwei Bie ((_vq)->last_add_time_valid = false) 550a8a69ddSRusty Russell #else 569499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 579499f5e7SRusty Russell do { \ 589499f5e7SRusty Russell dev_err(&_vq->vq.vdev->dev, \ 599499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 609499f5e7SRusty Russell (_vq)->broken = true; \ 619499f5e7SRusty Russell } while (0) 620a8a69ddSRusty Russell #define START_USE(vq) 630a8a69ddSRusty Russell #define END_USE(vq) 644d6a105eSTiwei Bie #define LAST_ADD_TIME_UPDATE(vq) 654d6a105eSTiwei Bie #define LAST_ADD_TIME_CHECK(vq) 664d6a105eSTiwei Bie #define LAST_ADD_TIME_INVALID(vq) 670a8a69ddSRusty Russell #endif 680a8a69ddSRusty Russell 69cbeedb72STiwei Bie struct vring_desc_state_split { 70780bc790SAndy Lutomirski void *data; /* Data for callback. */ 71780bc790SAndy Lutomirski struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 72780bc790SAndy Lutomirski }; 73780bc790SAndy Lutomirski 741ce9e605STiwei Bie struct vring_desc_state_packed { 751ce9e605STiwei Bie void *data; /* Data for callback. */ 761ce9e605STiwei Bie struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 771ce9e605STiwei Bie u16 num; /* Descriptor list length. */ 781ce9e605STiwei Bie u16 last; /* The last desc state in a list. */ 791ce9e605STiwei Bie }; 801ce9e605STiwei Bie 811f28750fSJason Wang struct vring_desc_extra { 82ef5c366fSJason Wang dma_addr_t addr; /* Descriptor DMA addr. */ 83ef5c366fSJason Wang u32 len; /* Descriptor length. */ 841ce9e605STiwei Bie u16 flags; /* Descriptor flags. */ 85aeef9b47SJason Wang u16 next; /* The next desc state in a list. */ 861ce9e605STiwei Bie }; 871ce9e605STiwei Bie 88d76136e4SXuan Zhuo struct vring_virtqueue_split { 89d76136e4SXuan Zhuo /* Actual memory layout for this queue. */ 90d76136e4SXuan Zhuo struct vring vring; 91d76136e4SXuan Zhuo 92d76136e4SXuan Zhuo /* Last written value to avail->flags */ 93d76136e4SXuan Zhuo u16 avail_flags_shadow; 94d76136e4SXuan Zhuo 95d76136e4SXuan Zhuo /* 96d76136e4SXuan Zhuo * Last written value to avail->idx in 97d76136e4SXuan Zhuo * guest byte order. 98d76136e4SXuan Zhuo */ 99d76136e4SXuan Zhuo u16 avail_idx_shadow; 100d76136e4SXuan Zhuo 101d76136e4SXuan Zhuo /* Per-descriptor state. */ 102d76136e4SXuan Zhuo struct vring_desc_state_split *desc_state; 103d76136e4SXuan Zhuo struct vring_desc_extra *desc_extra; 104d76136e4SXuan Zhuo 105d76136e4SXuan Zhuo /* DMA address and size information */ 106d76136e4SXuan Zhuo dma_addr_t queue_dma_addr; 107d76136e4SXuan Zhuo size_t queue_size_in_bytes; 108d76136e4SXuan Zhuo }; 109d76136e4SXuan Zhuo 110d76136e4SXuan Zhuo struct vring_virtqueue_packed { 111d76136e4SXuan Zhuo /* Actual memory layout for this queue. */ 112d76136e4SXuan Zhuo struct { 113d76136e4SXuan Zhuo unsigned int num; 114d76136e4SXuan Zhuo struct vring_packed_desc *desc; 115d76136e4SXuan Zhuo struct vring_packed_desc_event *driver; 116d76136e4SXuan Zhuo struct vring_packed_desc_event *device; 117d76136e4SXuan Zhuo } vring; 118d76136e4SXuan Zhuo 119d76136e4SXuan Zhuo /* Driver ring wrap counter. */ 120d76136e4SXuan Zhuo bool avail_wrap_counter; 121d76136e4SXuan Zhuo 122d76136e4SXuan Zhuo /* Avail used flags. */ 123d76136e4SXuan Zhuo u16 avail_used_flags; 124d76136e4SXuan Zhuo 125d76136e4SXuan Zhuo /* Index of the next avail descriptor. */ 126d76136e4SXuan Zhuo u16 next_avail_idx; 127d76136e4SXuan Zhuo 128d76136e4SXuan Zhuo /* 129d76136e4SXuan Zhuo * Last written value to driver->flags in 130d76136e4SXuan Zhuo * guest byte order. 131d76136e4SXuan Zhuo */ 132d76136e4SXuan Zhuo u16 event_flags_shadow; 133d76136e4SXuan Zhuo 134d76136e4SXuan Zhuo /* Per-descriptor state. */ 135d76136e4SXuan Zhuo struct vring_desc_state_packed *desc_state; 136d76136e4SXuan Zhuo struct vring_desc_extra *desc_extra; 137d76136e4SXuan Zhuo 138d76136e4SXuan Zhuo /* DMA address and size information */ 139d76136e4SXuan Zhuo dma_addr_t ring_dma_addr; 140d76136e4SXuan Zhuo dma_addr_t driver_event_dma_addr; 141d76136e4SXuan Zhuo dma_addr_t device_event_dma_addr; 142d76136e4SXuan Zhuo size_t ring_size_in_bytes; 143d76136e4SXuan Zhuo size_t event_size_in_bytes; 144d76136e4SXuan Zhuo }; 145d76136e4SXuan Zhuo 14643b4f721SMichael S. Tsirkin struct vring_virtqueue { 1470a8a69ddSRusty Russell struct virtqueue vq; 1480a8a69ddSRusty Russell 1491ce9e605STiwei Bie /* Is this a packed ring? */ 1501ce9e605STiwei Bie bool packed_ring; 1511ce9e605STiwei Bie 152fb3fba6bSTiwei Bie /* Is DMA API used? */ 153fb3fba6bSTiwei Bie bool use_dma_api; 154fb3fba6bSTiwei Bie 1557b21e34fSRusty Russell /* Can we use weak barriers? */ 1567b21e34fSRusty Russell bool weak_barriers; 1577b21e34fSRusty Russell 1580a8a69ddSRusty Russell /* Other side has made a mess, don't try any more. */ 1590a8a69ddSRusty Russell bool broken; 1600a8a69ddSRusty Russell 1619fa29b9dSMark McLoughlin /* Host supports indirect buffers */ 1629fa29b9dSMark McLoughlin bool indirect; 1639fa29b9dSMark McLoughlin 164a5c262c5SMichael S. Tsirkin /* Host publishes avail event idx */ 165a5c262c5SMichael S. Tsirkin bool event; 166a5c262c5SMichael S. Tsirkin 1670a8a69ddSRusty Russell /* Head of free buffer list. */ 1680a8a69ddSRusty Russell unsigned int free_head; 1690a8a69ddSRusty Russell /* Number we've added since last sync. */ 1700a8a69ddSRusty Russell unsigned int num_added; 1710a8a69ddSRusty Russell 172a7722890Shuangjie.albert /* Last used index we've seen. 173a7722890Shuangjie.albert * for split ring, it just contains last used index 174a7722890Shuangjie.albert * for packed ring: 175a7722890Shuangjie.albert * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index. 176a7722890Shuangjie.albert * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter. 177a7722890Shuangjie.albert */ 1781bc4953eSAnthony Liguori u16 last_used_idx; 1790a8a69ddSRusty Russell 1808d622d21SMichael S. Tsirkin /* Hint for event idx: already triggered no need to disable. */ 1818d622d21SMichael S. Tsirkin bool event_triggered; 1828d622d21SMichael S. Tsirkin 1831ce9e605STiwei Bie union { 1841ce9e605STiwei Bie /* Available for split ring */ 185d76136e4SXuan Zhuo struct vring_virtqueue_split split; 186f277ec42SVenkatesh Srinivas 1871ce9e605STiwei Bie /* Available for packed ring */ 188d76136e4SXuan Zhuo struct vring_virtqueue_packed packed; 1891ce9e605STiwei Bie }; 1901ce9e605STiwei Bie 1910a8a69ddSRusty Russell /* How to notify other side. FIXME: commonalize hcalls! */ 19246f9c2b9SHeinz Graalfs bool (*notify)(struct virtqueue *vq); 1930a8a69ddSRusty Russell 1942a2d1382SAndy Lutomirski /* DMA, allocation, and size information */ 1952a2d1382SAndy Lutomirski bool we_own_ring; 1962a2d1382SAndy Lutomirski 1970a8a69ddSRusty Russell #ifdef DEBUG 1980a8a69ddSRusty Russell /* They're supposed to lock for us. */ 1990a8a69ddSRusty Russell unsigned int in_use; 200e93300b1SRusty Russell 201e93300b1SRusty Russell /* Figure out if their kicks are too delayed. */ 202e93300b1SRusty Russell bool last_add_time_valid; 203e93300b1SRusty Russell ktime_t last_add_time; 2040a8a69ddSRusty Russell #endif 2050a8a69ddSRusty Russell }; 2060a8a69ddSRusty Russell 20707d9629dSXuan Zhuo static struct virtqueue *__vring_new_virtqueue(unsigned int index, 208cd4c812aSXuan Zhuo struct vring_virtqueue_split *vring_split, 20907d9629dSXuan Zhuo struct virtio_device *vdev, 21007d9629dSXuan Zhuo bool weak_barriers, 21107d9629dSXuan Zhuo bool context, 21207d9629dSXuan Zhuo bool (*notify)(struct virtqueue *), 21307d9629dSXuan Zhuo void (*callback)(struct virtqueue *), 21407d9629dSXuan Zhuo const char *name); 215*a2b36c8dSXuan Zhuo static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num); 216e6f633e5STiwei Bie 217e6f633e5STiwei Bie /* 218e6f633e5STiwei Bie * Helpers. 219e6f633e5STiwei Bie */ 220e6f633e5STiwei Bie 2210a8a69ddSRusty Russell #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 2220a8a69ddSRusty Russell 22335c51e09SXianting Tian static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq, 2242f18c2d1STiwei Bie unsigned int total_sg) 2252f18c2d1STiwei Bie { 2262f18c2d1STiwei Bie /* 2272f18c2d1STiwei Bie * If the host supports indirect descriptor tables, and we have multiple 2282f18c2d1STiwei Bie * buffers, then go indirect. FIXME: tune this threshold 2292f18c2d1STiwei Bie */ 2302f18c2d1STiwei Bie return (vq->indirect && total_sg > 1 && vq->vq.num_free); 2312f18c2d1STiwei Bie } 2322f18c2d1STiwei Bie 233d26c96c8SAndy Lutomirski /* 2341a937693SMichael S. Tsirkin * Modern virtio devices have feature bits to specify whether they need a 2351a937693SMichael S. Tsirkin * quirk and bypass the IOMMU. If not there, just use the DMA API. 2361a937693SMichael S. Tsirkin * 2371a937693SMichael S. Tsirkin * If there, the interaction between virtio and DMA API is messy. 238d26c96c8SAndy Lutomirski * 239d26c96c8SAndy Lutomirski * On most systems with virtio, physical addresses match bus addresses, 240d26c96c8SAndy Lutomirski * and it doesn't particularly matter whether we use the DMA API. 241d26c96c8SAndy Lutomirski * 242d26c96c8SAndy Lutomirski * On some systems, including Xen and any system with a physical device 243d26c96c8SAndy Lutomirski * that speaks virtio behind a physical IOMMU, we must use the DMA API 244d26c96c8SAndy Lutomirski * for virtio DMA to work at all. 245d26c96c8SAndy Lutomirski * 246d26c96c8SAndy Lutomirski * On other systems, including SPARC and PPC64, virtio-pci devices are 247d26c96c8SAndy Lutomirski * enumerated as though they are behind an IOMMU, but the virtio host 248d26c96c8SAndy Lutomirski * ignores the IOMMU, so we must either pretend that the IOMMU isn't 249d26c96c8SAndy Lutomirski * there or somehow map everything as the identity. 250d26c96c8SAndy Lutomirski * 251d26c96c8SAndy Lutomirski * For the time being, we preserve historic behavior and bypass the DMA 252d26c96c8SAndy Lutomirski * API. 2531a937693SMichael S. Tsirkin * 2541a937693SMichael S. Tsirkin * TODO: install a per-device DMA ops structure that does the right thing 2551a937693SMichael S. Tsirkin * taking into account all the above quirks, and use the DMA API 2561a937693SMichael S. Tsirkin * unconditionally on data path. 257d26c96c8SAndy Lutomirski */ 258d26c96c8SAndy Lutomirski 259d26c96c8SAndy Lutomirski static bool vring_use_dma_api(struct virtio_device *vdev) 260d26c96c8SAndy Lutomirski { 26124b6842aSMichael S. Tsirkin if (!virtio_has_dma_quirk(vdev)) 2621a937693SMichael S. Tsirkin return true; 2631a937693SMichael S. Tsirkin 2641a937693SMichael S. Tsirkin /* Otherwise, we are left to guess. */ 26578fe3987SAndy Lutomirski /* 26678fe3987SAndy Lutomirski * In theory, it's possible to have a buggy QEMU-supposed 26778fe3987SAndy Lutomirski * emulated Q35 IOMMU and Xen enabled at the same time. On 26878fe3987SAndy Lutomirski * such a configuration, virtio has never worked and will 26978fe3987SAndy Lutomirski * not work without an even larger kludge. Instead, enable 27078fe3987SAndy Lutomirski * the DMA API if we're a Xen guest, which at least allows 27178fe3987SAndy Lutomirski * all of the sensible Xen configurations to work correctly. 27278fe3987SAndy Lutomirski */ 27378fe3987SAndy Lutomirski if (xen_domain()) 27478fe3987SAndy Lutomirski return true; 27578fe3987SAndy Lutomirski 276d26c96c8SAndy Lutomirski return false; 277d26c96c8SAndy Lutomirski } 278d26c96c8SAndy Lutomirski 279e6d6dd6cSJoerg Roedel size_t virtio_max_dma_size(struct virtio_device *vdev) 280e6d6dd6cSJoerg Roedel { 281e6d6dd6cSJoerg Roedel size_t max_segment_size = SIZE_MAX; 282e6d6dd6cSJoerg Roedel 283e6d6dd6cSJoerg Roedel if (vring_use_dma_api(vdev)) 284817fc978SWill Deacon max_segment_size = dma_max_mapping_size(vdev->dev.parent); 285e6d6dd6cSJoerg Roedel 286e6d6dd6cSJoerg Roedel return max_segment_size; 287e6d6dd6cSJoerg Roedel } 288e6d6dd6cSJoerg Roedel EXPORT_SYMBOL_GPL(virtio_max_dma_size); 289e6d6dd6cSJoerg Roedel 290d79dca75STiwei Bie static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 291d79dca75STiwei Bie dma_addr_t *dma_handle, gfp_t flag) 292d79dca75STiwei Bie { 293d79dca75STiwei Bie if (vring_use_dma_api(vdev)) { 294d79dca75STiwei Bie return dma_alloc_coherent(vdev->dev.parent, size, 295d79dca75STiwei Bie dma_handle, flag); 296d79dca75STiwei Bie } else { 297d79dca75STiwei Bie void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 298d79dca75STiwei Bie 299d79dca75STiwei Bie if (queue) { 300d79dca75STiwei Bie phys_addr_t phys_addr = virt_to_phys(queue); 301d79dca75STiwei Bie *dma_handle = (dma_addr_t)phys_addr; 302d79dca75STiwei Bie 303d79dca75STiwei Bie /* 304d79dca75STiwei Bie * Sanity check: make sure we dind't truncate 305d79dca75STiwei Bie * the address. The only arches I can find that 306d79dca75STiwei Bie * have 64-bit phys_addr_t but 32-bit dma_addr_t 307d79dca75STiwei Bie * are certain non-highmem MIPS and x86 308d79dca75STiwei Bie * configurations, but these configurations 309d79dca75STiwei Bie * should never allocate physical pages above 32 310d79dca75STiwei Bie * bits, so this is fine. Just in case, throw a 311d79dca75STiwei Bie * warning and abort if we end up with an 312d79dca75STiwei Bie * unrepresentable address. 313d79dca75STiwei Bie */ 314d79dca75STiwei Bie if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 315d79dca75STiwei Bie free_pages_exact(queue, PAGE_ALIGN(size)); 316d79dca75STiwei Bie return NULL; 317d79dca75STiwei Bie } 318d79dca75STiwei Bie } 319d79dca75STiwei Bie return queue; 320d79dca75STiwei Bie } 321d79dca75STiwei Bie } 322d79dca75STiwei Bie 323d79dca75STiwei Bie static void vring_free_queue(struct virtio_device *vdev, size_t size, 324d79dca75STiwei Bie void *queue, dma_addr_t dma_handle) 325d79dca75STiwei Bie { 326d79dca75STiwei Bie if (vring_use_dma_api(vdev)) 327d79dca75STiwei Bie dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 328d79dca75STiwei Bie else 329d79dca75STiwei Bie free_pages_exact(queue, PAGE_ALIGN(size)); 330d79dca75STiwei Bie } 331d79dca75STiwei Bie 332780bc790SAndy Lutomirski /* 333780bc790SAndy Lutomirski * The DMA ops on various arches are rather gnarly right now, and 334780bc790SAndy Lutomirski * making all of the arch DMA ops work on the vring device itself 335780bc790SAndy Lutomirski * is a mess. For now, we use the parent device for DMA ops. 336780bc790SAndy Lutomirski */ 33775bfa81bSMichael S. Tsirkin static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 338780bc790SAndy Lutomirski { 339780bc790SAndy Lutomirski return vq->vq.vdev->dev.parent; 340780bc790SAndy Lutomirski } 341780bc790SAndy Lutomirski 342780bc790SAndy Lutomirski /* Map one sg entry. */ 343780bc790SAndy Lutomirski static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 344780bc790SAndy Lutomirski struct scatterlist *sg, 345780bc790SAndy Lutomirski enum dma_data_direction direction) 346780bc790SAndy Lutomirski { 347fb3fba6bSTiwei Bie if (!vq->use_dma_api) 348780bc790SAndy Lutomirski return (dma_addr_t)sg_phys(sg); 349780bc790SAndy Lutomirski 350780bc790SAndy Lutomirski /* 351780bc790SAndy Lutomirski * We can't use dma_map_sg, because we don't use scatterlists in 352780bc790SAndy Lutomirski * the way it expects (we don't guarantee that the scatterlist 353780bc790SAndy Lutomirski * will exist for the lifetime of the mapping). 354780bc790SAndy Lutomirski */ 355780bc790SAndy Lutomirski return dma_map_page(vring_dma_dev(vq), 356780bc790SAndy Lutomirski sg_page(sg), sg->offset, sg->length, 357780bc790SAndy Lutomirski direction); 358780bc790SAndy Lutomirski } 359780bc790SAndy Lutomirski 360780bc790SAndy Lutomirski static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 361780bc790SAndy Lutomirski void *cpu_addr, size_t size, 362780bc790SAndy Lutomirski enum dma_data_direction direction) 363780bc790SAndy Lutomirski { 364fb3fba6bSTiwei Bie if (!vq->use_dma_api) 365780bc790SAndy Lutomirski return (dma_addr_t)virt_to_phys(cpu_addr); 366780bc790SAndy Lutomirski 367780bc790SAndy Lutomirski return dma_map_single(vring_dma_dev(vq), 368780bc790SAndy Lutomirski cpu_addr, size, direction); 369780bc790SAndy Lutomirski } 370780bc790SAndy Lutomirski 371e6f633e5STiwei Bie static int vring_mapping_error(const struct vring_virtqueue *vq, 372e6f633e5STiwei Bie dma_addr_t addr) 373e6f633e5STiwei Bie { 374fb3fba6bSTiwei Bie if (!vq->use_dma_api) 375e6f633e5STiwei Bie return 0; 376e6f633e5STiwei Bie 377e6f633e5STiwei Bie return dma_mapping_error(vring_dma_dev(vq), addr); 378e6f633e5STiwei Bie } 379e6f633e5STiwei Bie 3803a897128SXuan Zhuo static void virtqueue_init(struct vring_virtqueue *vq, u32 num) 3813a897128SXuan Zhuo { 3823a897128SXuan Zhuo vq->vq.num_free = num; 3833a897128SXuan Zhuo 3843a897128SXuan Zhuo if (vq->packed_ring) 3853a897128SXuan Zhuo vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); 3863a897128SXuan Zhuo else 3873a897128SXuan Zhuo vq->last_used_idx = 0; 3883a897128SXuan Zhuo 3893a897128SXuan Zhuo vq->event_triggered = false; 3903a897128SXuan Zhuo vq->num_added = 0; 3913a897128SXuan Zhuo 3923a897128SXuan Zhuo #ifdef DEBUG 3933a897128SXuan Zhuo vq->in_use = false; 3943a897128SXuan Zhuo vq->last_add_time_valid = false; 3953a897128SXuan Zhuo #endif 3963a897128SXuan Zhuo } 3973a897128SXuan Zhuo 398e6f633e5STiwei Bie 399e6f633e5STiwei Bie /* 400e6f633e5STiwei Bie * Split ring specific functions - *_split(). 401e6f633e5STiwei Bie */ 402e6f633e5STiwei Bie 40372b5e895SJason Wang static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, 404780bc790SAndy Lutomirski struct vring_desc *desc) 405780bc790SAndy Lutomirski { 406780bc790SAndy Lutomirski u16 flags; 407780bc790SAndy Lutomirski 408fb3fba6bSTiwei Bie if (!vq->use_dma_api) 409780bc790SAndy Lutomirski return; 410780bc790SAndy Lutomirski 411780bc790SAndy Lutomirski flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 412780bc790SAndy Lutomirski 413780bc790SAndy Lutomirski dma_unmap_page(vring_dma_dev(vq), 414780bc790SAndy Lutomirski virtio64_to_cpu(vq->vq.vdev, desc->addr), 415780bc790SAndy Lutomirski virtio32_to_cpu(vq->vq.vdev, desc->len), 416780bc790SAndy Lutomirski (flags & VRING_DESC_F_WRITE) ? 417780bc790SAndy Lutomirski DMA_FROM_DEVICE : DMA_TO_DEVICE); 418780bc790SAndy Lutomirski } 419780bc790SAndy Lutomirski 42072b5e895SJason Wang static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 42172b5e895SJason Wang unsigned int i) 42272b5e895SJason Wang { 42372b5e895SJason Wang struct vring_desc_extra *extra = vq->split.desc_extra; 42472b5e895SJason Wang u16 flags; 42572b5e895SJason Wang 42672b5e895SJason Wang if (!vq->use_dma_api) 42772b5e895SJason Wang goto out; 42872b5e895SJason Wang 42972b5e895SJason Wang flags = extra[i].flags; 43072b5e895SJason Wang 43172b5e895SJason Wang if (flags & VRING_DESC_F_INDIRECT) { 43272b5e895SJason Wang dma_unmap_single(vring_dma_dev(vq), 43372b5e895SJason Wang extra[i].addr, 43472b5e895SJason Wang extra[i].len, 43572b5e895SJason Wang (flags & VRING_DESC_F_WRITE) ? 43672b5e895SJason Wang DMA_FROM_DEVICE : DMA_TO_DEVICE); 43772b5e895SJason Wang } else { 43872b5e895SJason Wang dma_unmap_page(vring_dma_dev(vq), 43972b5e895SJason Wang extra[i].addr, 44072b5e895SJason Wang extra[i].len, 44172b5e895SJason Wang (flags & VRING_DESC_F_WRITE) ? 44272b5e895SJason Wang DMA_FROM_DEVICE : DMA_TO_DEVICE); 44372b5e895SJason Wang } 44472b5e895SJason Wang 44572b5e895SJason Wang out: 44672b5e895SJason Wang return extra[i].next; 44772b5e895SJason Wang } 44872b5e895SJason Wang 449138fd251STiwei Bie static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 450138fd251STiwei Bie unsigned int total_sg, 451138fd251STiwei Bie gfp_t gfp) 4529fa29b9dSMark McLoughlin { 4539fa29b9dSMark McLoughlin struct vring_desc *desc; 454b25bd251SRusty Russell unsigned int i; 4559fa29b9dSMark McLoughlin 456b92b1b89SWill Deacon /* 457b92b1b89SWill Deacon * We require lowmem mappings for the descriptors because 458b92b1b89SWill Deacon * otherwise virt_to_phys will give us bogus addresses in the 459b92b1b89SWill Deacon * virtqueue. 460b92b1b89SWill Deacon */ 46182107539SMichal Hocko gfp &= ~__GFP_HIGHMEM; 462b92b1b89SWill Deacon 4636da2ec56SKees Cook desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 4649fa29b9dSMark McLoughlin if (!desc) 465b25bd251SRusty Russell return NULL; 4669fa29b9dSMark McLoughlin 467b25bd251SRusty Russell for (i = 0; i < total_sg; i++) 46800e6f3d9SMichael S. Tsirkin desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 469b25bd251SRusty Russell return desc; 4709fa29b9dSMark McLoughlin } 4719fa29b9dSMark McLoughlin 472fe4c3862SJason Wang static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, 473fe4c3862SJason Wang struct vring_desc *desc, 474fe4c3862SJason Wang unsigned int i, 475fe4c3862SJason Wang dma_addr_t addr, 476fe4c3862SJason Wang unsigned int len, 47772b5e895SJason Wang u16 flags, 47872b5e895SJason Wang bool indirect) 479fe4c3862SJason Wang { 48072b5e895SJason Wang struct vring_virtqueue *vring = to_vvq(vq); 48172b5e895SJason Wang struct vring_desc_extra *extra = vring->split.desc_extra; 48272b5e895SJason Wang u16 next; 48372b5e895SJason Wang 484fe4c3862SJason Wang desc[i].flags = cpu_to_virtio16(vq->vdev, flags); 485fe4c3862SJason Wang desc[i].addr = cpu_to_virtio64(vq->vdev, addr); 486fe4c3862SJason Wang desc[i].len = cpu_to_virtio32(vq->vdev, len); 487fe4c3862SJason Wang 48872b5e895SJason Wang if (!indirect) { 48972b5e895SJason Wang next = extra[i].next; 49072b5e895SJason Wang desc[i].next = cpu_to_virtio16(vq->vdev, next); 49172b5e895SJason Wang 49272b5e895SJason Wang extra[i].addr = addr; 49372b5e895SJason Wang extra[i].len = len; 49472b5e895SJason Wang extra[i].flags = flags; 49572b5e895SJason Wang } else 49672b5e895SJason Wang next = virtio16_to_cpu(vq->vdev, desc[i].next); 49772b5e895SJason Wang 49872b5e895SJason Wang return next; 499fe4c3862SJason Wang } 500fe4c3862SJason Wang 501138fd251STiwei Bie static inline int virtqueue_add_split(struct virtqueue *_vq, 50213816c76SRusty Russell struct scatterlist *sgs[], 503eeebf9b1SRusty Russell unsigned int total_sg, 50413816c76SRusty Russell unsigned int out_sgs, 50513816c76SRusty Russell unsigned int in_sgs, 506bbd603efSMichael S. Tsirkin void *data, 5075a08b04fSMichael S. Tsirkin void *ctx, 508bbd603efSMichael S. Tsirkin gfp_t gfp) 5090a8a69ddSRusty Russell { 5100a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 51113816c76SRusty Russell struct scatterlist *sg; 512b25bd251SRusty Russell struct vring_desc *desc; 5133f649ab7SKees Cook unsigned int i, n, avail, descs_used, prev, err_idx; 5141fe9b6feSMichael S. Tsirkin int head; 515b25bd251SRusty Russell bool indirect; 5160a8a69ddSRusty Russell 5179fa29b9dSMark McLoughlin START_USE(vq); 5189fa29b9dSMark McLoughlin 5190a8a69ddSRusty Russell BUG_ON(data == NULL); 5205a08b04fSMichael S. Tsirkin BUG_ON(ctx && vq->indirect); 5219fa29b9dSMark McLoughlin 52270670444SRusty Russell if (unlikely(vq->broken)) { 52370670444SRusty Russell END_USE(vq); 52470670444SRusty Russell return -EIO; 52570670444SRusty Russell } 52670670444SRusty Russell 5274d6a105eSTiwei Bie LAST_ADD_TIME_UPDATE(vq); 528e93300b1SRusty Russell 52913816c76SRusty Russell BUG_ON(total_sg == 0); 5300a8a69ddSRusty Russell 531b25bd251SRusty Russell head = vq->free_head; 532b25bd251SRusty Russell 53335c51e09SXianting Tian if (virtqueue_use_indirect(vq, total_sg)) 534138fd251STiwei Bie desc = alloc_indirect_split(_vq, total_sg, gfp); 53544ed8089SRichard W.M. Jones else { 536b25bd251SRusty Russell desc = NULL; 537e593bf97STiwei Bie WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 53844ed8089SRichard W.M. Jones } 539b25bd251SRusty Russell 540b25bd251SRusty Russell if (desc) { 541b25bd251SRusty Russell /* Use a single buffer which doesn't continue */ 542780bc790SAndy Lutomirski indirect = true; 543b25bd251SRusty Russell /* Set up rest to use this indirect table. */ 544b25bd251SRusty Russell i = 0; 545b25bd251SRusty Russell descs_used = 1; 546b25bd251SRusty Russell } else { 547780bc790SAndy Lutomirski indirect = false; 548e593bf97STiwei Bie desc = vq->split.vring.desc; 549b25bd251SRusty Russell i = head; 550b25bd251SRusty Russell descs_used = total_sg; 551b25bd251SRusty Russell } 552b25bd251SRusty Russell 553b4b4ff73SXianting Tian if (unlikely(vq->vq.num_free < descs_used)) { 5540a8a69ddSRusty Russell pr_debug("Can't add buf len %i - avail = %i\n", 555b25bd251SRusty Russell descs_used, vq->vq.num_free); 55644653eaeSRusty Russell /* FIXME: for historical reasons, we force a notify here if 55744653eaeSRusty Russell * there are outgoing parts to the buffer. Presumably the 55844653eaeSRusty Russell * host should service the ring ASAP. */ 55913816c76SRusty Russell if (out_sgs) 560426e3e0aSRusty Russell vq->notify(&vq->vq); 56158625edfSWei Yongjun if (indirect) 56258625edfSWei Yongjun kfree(desc); 5630a8a69ddSRusty Russell END_USE(vq); 5640a8a69ddSRusty Russell return -ENOSPC; 5650a8a69ddSRusty Russell } 5660a8a69ddSRusty Russell 56713816c76SRusty Russell for (n = 0; n < out_sgs; n++) { 568eeebf9b1SRusty Russell for (sg = sgs[n]; sg; sg = sg_next(sg)) { 569780bc790SAndy Lutomirski dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 570780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 571780bc790SAndy Lutomirski goto unmap_release; 572780bc790SAndy Lutomirski 5730a8a69ddSRusty Russell prev = i; 57472b5e895SJason Wang /* Note that we trust indirect descriptor 57572b5e895SJason Wang * table since it use stream DMA mapping. 57672b5e895SJason Wang */ 577fe4c3862SJason Wang i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, 57872b5e895SJason Wang VRING_DESC_F_NEXT, 57972b5e895SJason Wang indirect); 5800a8a69ddSRusty Russell } 58113816c76SRusty Russell } 58213816c76SRusty Russell for (; n < (out_sgs + in_sgs); n++) { 583eeebf9b1SRusty Russell for (sg = sgs[n]; sg; sg = sg_next(sg)) { 584780bc790SAndy Lutomirski dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 585780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 586780bc790SAndy Lutomirski goto unmap_release; 587780bc790SAndy Lutomirski 5880a8a69ddSRusty Russell prev = i; 58972b5e895SJason Wang /* Note that we trust indirect descriptor 59072b5e895SJason Wang * table since it use stream DMA mapping. 59172b5e895SJason Wang */ 592fe4c3862SJason Wang i = virtqueue_add_desc_split(_vq, desc, i, addr, 593fe4c3862SJason Wang sg->length, 594fe4c3862SJason Wang VRING_DESC_F_NEXT | 59572b5e895SJason Wang VRING_DESC_F_WRITE, 59672b5e895SJason Wang indirect); 59713816c76SRusty Russell } 5980a8a69ddSRusty Russell } 5990a8a69ddSRusty Russell /* Last one doesn't continue. */ 60000e6f3d9SMichael S. Tsirkin desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 60172b5e895SJason Wang if (!indirect && vq->use_dma_api) 602890d3356SVincent Whitchurch vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= 60372b5e895SJason Wang ~VRING_DESC_F_NEXT; 6040a8a69ddSRusty Russell 605780bc790SAndy Lutomirski if (indirect) { 606780bc790SAndy Lutomirski /* Now that the indirect table is filled in, map it. */ 607780bc790SAndy Lutomirski dma_addr_t addr = vring_map_single( 608780bc790SAndy Lutomirski vq, desc, total_sg * sizeof(struct vring_desc), 609780bc790SAndy Lutomirski DMA_TO_DEVICE); 610780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 611780bc790SAndy Lutomirski goto unmap_release; 612780bc790SAndy Lutomirski 613fe4c3862SJason Wang virtqueue_add_desc_split(_vq, vq->split.vring.desc, 614fe4c3862SJason Wang head, addr, 615fe4c3862SJason Wang total_sg * sizeof(struct vring_desc), 61672b5e895SJason Wang VRING_DESC_F_INDIRECT, 61772b5e895SJason Wang false); 618780bc790SAndy Lutomirski } 619780bc790SAndy Lutomirski 620780bc790SAndy Lutomirski /* We're using some buffers from the free list. */ 621780bc790SAndy Lutomirski vq->vq.num_free -= descs_used; 622780bc790SAndy Lutomirski 6230a8a69ddSRusty Russell /* Update free pointer */ 624b25bd251SRusty Russell if (indirect) 62572b5e895SJason Wang vq->free_head = vq->split.desc_extra[head].next; 626b25bd251SRusty Russell else 6270a8a69ddSRusty Russell vq->free_head = i; 6280a8a69ddSRusty Russell 629780bc790SAndy Lutomirski /* Store token and indirect buffer state. */ 630cbeedb72STiwei Bie vq->split.desc_state[head].data = data; 631780bc790SAndy Lutomirski if (indirect) 632cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc = desc; 63387646a34SJason Wang else 634cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc = ctx; 6350a8a69ddSRusty Russell 6360a8a69ddSRusty Russell /* Put entry in available array (but don't update avail->idx until they 6373b720b8cSRusty Russell * do sync). */ 638e593bf97STiwei Bie avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 639e593bf97STiwei Bie vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 6400a8a69ddSRusty Russell 641ee7cd898SRusty Russell /* Descriptors and available array need to be set before we expose the 642ee7cd898SRusty Russell * new available array entries. */ 643a9a0fef7SRusty Russell virtio_wmb(vq->weak_barriers); 644e593bf97STiwei Bie vq->split.avail_idx_shadow++; 645e593bf97STiwei Bie vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 646e593bf97STiwei Bie vq->split.avail_idx_shadow); 647ee7cd898SRusty Russell vq->num_added++; 648ee7cd898SRusty Russell 6495e05bf58STetsuo Handa pr_debug("Added buffer head %i to %p\n", head, vq); 6505e05bf58STetsuo Handa END_USE(vq); 6515e05bf58STetsuo Handa 652ee7cd898SRusty Russell /* This is very unlikely, but theoretically possible. Kick 653ee7cd898SRusty Russell * just in case. */ 654ee7cd898SRusty Russell if (unlikely(vq->num_added == (1 << 16) - 1)) 655ee7cd898SRusty Russell virtqueue_kick(_vq); 656ee7cd898SRusty Russell 65798e8c6bcSRusty Russell return 0; 658780bc790SAndy Lutomirski 659780bc790SAndy Lutomirski unmap_release: 660780bc790SAndy Lutomirski err_idx = i; 661cf8f1696SMatthias Lange 662cf8f1696SMatthias Lange if (indirect) 663cf8f1696SMatthias Lange i = 0; 664cf8f1696SMatthias Lange else 665780bc790SAndy Lutomirski i = head; 666780bc790SAndy Lutomirski 667780bc790SAndy Lutomirski for (n = 0; n < total_sg; n++) { 668780bc790SAndy Lutomirski if (i == err_idx) 669780bc790SAndy Lutomirski break; 67072b5e895SJason Wang if (indirect) { 67172b5e895SJason Wang vring_unmap_one_split_indirect(vq, &desc[i]); 672cf8f1696SMatthias Lange i = virtio16_to_cpu(_vq->vdev, desc[i].next); 67372b5e895SJason Wang } else 67472b5e895SJason Wang i = vring_unmap_one_split(vq, i); 675780bc790SAndy Lutomirski } 676780bc790SAndy Lutomirski 677780bc790SAndy Lutomirski if (indirect) 678780bc790SAndy Lutomirski kfree(desc); 679780bc790SAndy Lutomirski 6803cc36f6eSMichael S. Tsirkin END_USE(vq); 681f7728002SHalil Pasic return -ENOMEM; 6820a8a69ddSRusty Russell } 68313816c76SRusty Russell 684138fd251STiwei Bie static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 6850a8a69ddSRusty Russell { 6860a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 687a5c262c5SMichael S. Tsirkin u16 new, old; 68841f0377fSRusty Russell bool needs_kick; 68941f0377fSRusty Russell 6900a8a69ddSRusty Russell START_USE(vq); 691a72caae2SJason Wang /* We need to expose available array entries before checking avail 692a72caae2SJason Wang * event. */ 693a9a0fef7SRusty Russell virtio_mb(vq->weak_barriers); 6940a8a69ddSRusty Russell 695e593bf97STiwei Bie old = vq->split.avail_idx_shadow - vq->num_added; 696e593bf97STiwei Bie new = vq->split.avail_idx_shadow; 6970a8a69ddSRusty Russell vq->num_added = 0; 6980a8a69ddSRusty Russell 6994d6a105eSTiwei Bie LAST_ADD_TIME_CHECK(vq); 7004d6a105eSTiwei Bie LAST_ADD_TIME_INVALID(vq); 701e93300b1SRusty Russell 70241f0377fSRusty Russell if (vq->event) { 703e593bf97STiwei Bie needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 704e593bf97STiwei Bie vring_avail_event(&vq->split.vring)), 70541f0377fSRusty Russell new, old); 70641f0377fSRusty Russell } else { 707e593bf97STiwei Bie needs_kick = !(vq->split.vring.used->flags & 708e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 709e593bf97STiwei Bie VRING_USED_F_NO_NOTIFY)); 71041f0377fSRusty Russell } 7110a8a69ddSRusty Russell END_USE(vq); 71241f0377fSRusty Russell return needs_kick; 71341f0377fSRusty Russell } 714138fd251STiwei Bie 715138fd251STiwei Bie static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 7165a08b04fSMichael S. Tsirkin void **ctx) 7170a8a69ddSRusty Russell { 718780bc790SAndy Lutomirski unsigned int i, j; 719c60923cbSGonglei __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 7200a8a69ddSRusty Russell 7210a8a69ddSRusty Russell /* Clear data ptr. */ 722cbeedb72STiwei Bie vq->split.desc_state[head].data = NULL; 7230a8a69ddSRusty Russell 724780bc790SAndy Lutomirski /* Put back on free list: unmap first-level descriptors and find end */ 7250a8a69ddSRusty Russell i = head; 7269fa29b9dSMark McLoughlin 727e593bf97STiwei Bie while (vq->split.vring.desc[i].flags & nextflag) { 72872b5e895SJason Wang vring_unmap_one_split(vq, i); 72972b5e895SJason Wang i = vq->split.desc_extra[i].next; 73006ca287dSRusty Russell vq->vq.num_free++; 7310a8a69ddSRusty Russell } 7320a8a69ddSRusty Russell 73372b5e895SJason Wang vring_unmap_one_split(vq, i); 73472b5e895SJason Wang vq->split.desc_extra[i].next = vq->free_head; 7350a8a69ddSRusty Russell vq->free_head = head; 736780bc790SAndy Lutomirski 7370a8a69ddSRusty Russell /* Plus final descriptor */ 73806ca287dSRusty Russell vq->vq.num_free++; 739780bc790SAndy Lutomirski 7405a08b04fSMichael S. Tsirkin if (vq->indirect) { 741cbeedb72STiwei Bie struct vring_desc *indir_desc = 742cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc; 7435a08b04fSMichael S. Tsirkin u32 len; 7445a08b04fSMichael S. Tsirkin 7455a08b04fSMichael S. Tsirkin /* Free the indirect table, if any, now that it's unmapped. */ 7465a08b04fSMichael S. Tsirkin if (!indir_desc) 7475a08b04fSMichael S. Tsirkin return; 7485a08b04fSMichael S. Tsirkin 74972b5e895SJason Wang len = vq->split.desc_extra[head].len; 750780bc790SAndy Lutomirski 75172b5e895SJason Wang BUG_ON(!(vq->split.desc_extra[head].flags & 75272b5e895SJason Wang VRING_DESC_F_INDIRECT)); 753780bc790SAndy Lutomirski BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 754780bc790SAndy Lutomirski 755780bc790SAndy Lutomirski for (j = 0; j < len / sizeof(struct vring_desc); j++) 75672b5e895SJason Wang vring_unmap_one_split_indirect(vq, &indir_desc[j]); 757780bc790SAndy Lutomirski 7585a08b04fSMichael S. Tsirkin kfree(indir_desc); 759cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc = NULL; 7605a08b04fSMichael S. Tsirkin } else if (ctx) { 761cbeedb72STiwei Bie *ctx = vq->split.desc_state[head].indir_desc; 762780bc790SAndy Lutomirski } 7630a8a69ddSRusty Russell } 7640a8a69ddSRusty Russell 765138fd251STiwei Bie static inline bool more_used_split(const struct vring_virtqueue *vq) 7660a8a69ddSRusty Russell { 767e593bf97STiwei Bie return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 768e593bf97STiwei Bie vq->split.vring.used->idx); 7690a8a69ddSRusty Russell } 7700a8a69ddSRusty Russell 771138fd251STiwei Bie static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 772138fd251STiwei Bie unsigned int *len, 7735a08b04fSMichael S. Tsirkin void **ctx) 7740a8a69ddSRusty Russell { 7750a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 7760a8a69ddSRusty Russell void *ret; 7770a8a69ddSRusty Russell unsigned int i; 7783b720b8cSRusty Russell u16 last_used; 7790a8a69ddSRusty Russell 7800a8a69ddSRusty Russell START_USE(vq); 7810a8a69ddSRusty Russell 7825ef82752SRusty Russell if (unlikely(vq->broken)) { 7835ef82752SRusty Russell END_USE(vq); 7845ef82752SRusty Russell return NULL; 7855ef82752SRusty Russell } 7865ef82752SRusty Russell 787138fd251STiwei Bie if (!more_used_split(vq)) { 7880a8a69ddSRusty Russell pr_debug("No more buffers in queue\n"); 7890a8a69ddSRusty Russell END_USE(vq); 7900a8a69ddSRusty Russell return NULL; 7910a8a69ddSRusty Russell } 7920a8a69ddSRusty Russell 7932d61ba95SMichael S. Tsirkin /* Only get used array entries after they have been exposed by host. */ 794a9a0fef7SRusty Russell virtio_rmb(vq->weak_barriers); 7952d61ba95SMichael S. Tsirkin 796e593bf97STiwei Bie last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 797e593bf97STiwei Bie i = virtio32_to_cpu(_vq->vdev, 798e593bf97STiwei Bie vq->split.vring.used->ring[last_used].id); 799e593bf97STiwei Bie *len = virtio32_to_cpu(_vq->vdev, 800e593bf97STiwei Bie vq->split.vring.used->ring[last_used].len); 8010a8a69ddSRusty Russell 802e593bf97STiwei Bie if (unlikely(i >= vq->split.vring.num)) { 8030a8a69ddSRusty Russell BAD_RING(vq, "id %u out of range\n", i); 8040a8a69ddSRusty Russell return NULL; 8050a8a69ddSRusty Russell } 806cbeedb72STiwei Bie if (unlikely(!vq->split.desc_state[i].data)) { 8070a8a69ddSRusty Russell BAD_RING(vq, "id %u is not a head!\n", i); 8080a8a69ddSRusty Russell return NULL; 8090a8a69ddSRusty Russell } 8100a8a69ddSRusty Russell 811138fd251STiwei Bie /* detach_buf_split clears data, so grab it now. */ 812cbeedb72STiwei Bie ret = vq->split.desc_state[i].data; 813138fd251STiwei Bie detach_buf_split(vq, i, ctx); 8140a8a69ddSRusty Russell vq->last_used_idx++; 815a5c262c5SMichael S. Tsirkin /* If we expect an interrupt for the next entry, tell host 816a5c262c5SMichael S. Tsirkin * by writing event index and flush out the write before 817a5c262c5SMichael S. Tsirkin * the read in the next get_buf call. */ 818e593bf97STiwei Bie if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 819788e5b3aSMichael S. Tsirkin virtio_store_mb(vq->weak_barriers, 820e593bf97STiwei Bie &vring_used_event(&vq->split.vring), 821788e5b3aSMichael S. Tsirkin cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 822a5c262c5SMichael S. Tsirkin 8234d6a105eSTiwei Bie LAST_ADD_TIME_INVALID(vq); 824e93300b1SRusty Russell 8250a8a69ddSRusty Russell END_USE(vq); 8260a8a69ddSRusty Russell return ret; 8270a8a69ddSRusty Russell } 828138fd251STiwei Bie 829138fd251STiwei Bie static void virtqueue_disable_cb_split(struct virtqueue *_vq) 830138fd251STiwei Bie { 831138fd251STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 832138fd251STiwei Bie 833e593bf97STiwei Bie if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 834e593bf97STiwei Bie vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 8358d622d21SMichael S. Tsirkin if (vq->event) 8368d622d21SMichael S. Tsirkin /* TODO: this is a hack. Figure out a cleaner value to write. */ 8378d622d21SMichael S. Tsirkin vring_used_event(&vq->split.vring) = 0x0; 8388d622d21SMichael S. Tsirkin else 839e593bf97STiwei Bie vq->split.vring.avail->flags = 840e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 841e593bf97STiwei Bie vq->split.avail_flags_shadow); 842138fd251STiwei Bie } 843138fd251STiwei Bie } 844138fd251STiwei Bie 84531532340SSolomon Tan static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 846cc229884SMichael S. Tsirkin { 847cc229884SMichael S. Tsirkin struct vring_virtqueue *vq = to_vvq(_vq); 848cc229884SMichael S. Tsirkin u16 last_used_idx; 849cc229884SMichael S. Tsirkin 850cc229884SMichael S. Tsirkin START_USE(vq); 851cc229884SMichael S. Tsirkin 852cc229884SMichael S. Tsirkin /* We optimistically turn back on interrupts, then check if there was 853cc229884SMichael S. Tsirkin * more to do. */ 854cc229884SMichael S. Tsirkin /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 855cc229884SMichael S. Tsirkin * either clear the flags bit or point the event index at the next 856cc229884SMichael S. Tsirkin * entry. Always do both to keep code simple. */ 857e593bf97STiwei Bie if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 858e593bf97STiwei Bie vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 8590ea1e4a6SLadi Prosek if (!vq->event) 860e593bf97STiwei Bie vq->split.vring.avail->flags = 861e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 862e593bf97STiwei Bie vq->split.avail_flags_shadow); 863f277ec42SVenkatesh Srinivas } 864e593bf97STiwei Bie vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 865e593bf97STiwei Bie last_used_idx = vq->last_used_idx); 866cc229884SMichael S. Tsirkin END_USE(vq); 867cc229884SMichael S. Tsirkin return last_used_idx; 868cc229884SMichael S. Tsirkin } 869138fd251STiwei Bie 87031532340SSolomon Tan static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx) 871138fd251STiwei Bie { 872138fd251STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 873138fd251STiwei Bie 874138fd251STiwei Bie return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 875e593bf97STiwei Bie vq->split.vring.used->idx); 876138fd251STiwei Bie } 877138fd251STiwei Bie 878138fd251STiwei Bie static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 8797ab358c2SMichael S. Tsirkin { 8807ab358c2SMichael S. Tsirkin struct vring_virtqueue *vq = to_vvq(_vq); 8817ab358c2SMichael S. Tsirkin u16 bufs; 8827ab358c2SMichael S. Tsirkin 8837ab358c2SMichael S. Tsirkin START_USE(vq); 8847ab358c2SMichael S. Tsirkin 8857ab358c2SMichael S. Tsirkin /* We optimistically turn back on interrupts, then check if there was 8867ab358c2SMichael S. Tsirkin * more to do. */ 8877ab358c2SMichael S. Tsirkin /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 8887ab358c2SMichael S. Tsirkin * either clear the flags bit or point the event index at the next 8890ea1e4a6SLadi Prosek * entry. Always update the event index to keep code simple. */ 890e593bf97STiwei Bie if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 891e593bf97STiwei Bie vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 8920ea1e4a6SLadi Prosek if (!vq->event) 893e593bf97STiwei Bie vq->split.vring.avail->flags = 894e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 895e593bf97STiwei Bie vq->split.avail_flags_shadow); 896f277ec42SVenkatesh Srinivas } 8977ab358c2SMichael S. Tsirkin /* TODO: tune this threshold */ 898e593bf97STiwei Bie bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 899788e5b3aSMichael S. Tsirkin 900788e5b3aSMichael S. Tsirkin virtio_store_mb(vq->weak_barriers, 901e593bf97STiwei Bie &vring_used_event(&vq->split.vring), 902788e5b3aSMichael S. Tsirkin cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 903788e5b3aSMichael S. Tsirkin 904e593bf97STiwei Bie if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 905e593bf97STiwei Bie - vq->last_used_idx) > bufs)) { 9067ab358c2SMichael S. Tsirkin END_USE(vq); 9077ab358c2SMichael S. Tsirkin return false; 9087ab358c2SMichael S. Tsirkin } 9097ab358c2SMichael S. Tsirkin 9107ab358c2SMichael S. Tsirkin END_USE(vq); 9117ab358c2SMichael S. Tsirkin return true; 9127ab358c2SMichael S. Tsirkin } 9137ab358c2SMichael S. Tsirkin 914138fd251STiwei Bie static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 915c021eac4SShirley Ma { 916c021eac4SShirley Ma struct vring_virtqueue *vq = to_vvq(_vq); 917c021eac4SShirley Ma unsigned int i; 918c021eac4SShirley Ma void *buf; 919c021eac4SShirley Ma 920c021eac4SShirley Ma START_USE(vq); 921c021eac4SShirley Ma 922e593bf97STiwei Bie for (i = 0; i < vq->split.vring.num; i++) { 923cbeedb72STiwei Bie if (!vq->split.desc_state[i].data) 924c021eac4SShirley Ma continue; 925138fd251STiwei Bie /* detach_buf_split clears data, so grab it now. */ 926cbeedb72STiwei Bie buf = vq->split.desc_state[i].data; 927138fd251STiwei Bie detach_buf_split(vq, i, NULL); 928e593bf97STiwei Bie vq->split.avail_idx_shadow--; 929e593bf97STiwei Bie vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 930e593bf97STiwei Bie vq->split.avail_idx_shadow); 931c021eac4SShirley Ma END_USE(vq); 932c021eac4SShirley Ma return buf; 933c021eac4SShirley Ma } 934c021eac4SShirley Ma /* That should have freed everything. */ 935e593bf97STiwei Bie BUG_ON(vq->vq.num_free != vq->split.vring.num); 936c021eac4SShirley Ma 937c021eac4SShirley Ma END_USE(vq); 938c021eac4SShirley Ma return NULL; 939c021eac4SShirley Ma } 940138fd251STiwei Bie 941*a2b36c8dSXuan Zhuo static int vring_alloc_state_extra_split(struct vring_virtqueue_split *vring_split) 942*a2b36c8dSXuan Zhuo { 943*a2b36c8dSXuan Zhuo struct vring_desc_state_split *state; 944*a2b36c8dSXuan Zhuo struct vring_desc_extra *extra; 945*a2b36c8dSXuan Zhuo u32 num = vring_split->vring.num; 946*a2b36c8dSXuan Zhuo 947*a2b36c8dSXuan Zhuo state = kmalloc_array(num, sizeof(struct vring_desc_state_split), GFP_KERNEL); 948*a2b36c8dSXuan Zhuo if (!state) 949*a2b36c8dSXuan Zhuo goto err_state; 950*a2b36c8dSXuan Zhuo 951*a2b36c8dSXuan Zhuo extra = vring_alloc_desc_extra(num); 952*a2b36c8dSXuan Zhuo if (!extra) 953*a2b36c8dSXuan Zhuo goto err_extra; 954*a2b36c8dSXuan Zhuo 955*a2b36c8dSXuan Zhuo memset(state, 0, num * sizeof(struct vring_desc_state_split)); 956*a2b36c8dSXuan Zhuo 957*a2b36c8dSXuan Zhuo vring_split->desc_state = state; 958*a2b36c8dSXuan Zhuo vring_split->desc_extra = extra; 959*a2b36c8dSXuan Zhuo return 0; 960*a2b36c8dSXuan Zhuo 961*a2b36c8dSXuan Zhuo err_extra: 962*a2b36c8dSXuan Zhuo kfree(state); 963*a2b36c8dSXuan Zhuo err_state: 964*a2b36c8dSXuan Zhuo return -ENOMEM; 965*a2b36c8dSXuan Zhuo } 966*a2b36c8dSXuan Zhuo 96789f05d94SXuan Zhuo static void vring_free_split(struct vring_virtqueue_split *vring_split, 96889f05d94SXuan Zhuo struct virtio_device *vdev) 96989f05d94SXuan Zhuo { 97089f05d94SXuan Zhuo vring_free_queue(vdev, vring_split->queue_size_in_bytes, 97189f05d94SXuan Zhuo vring_split->vring.desc, 97289f05d94SXuan Zhuo vring_split->queue_dma_addr); 97389f05d94SXuan Zhuo 97489f05d94SXuan Zhuo kfree(vring_split->desc_state); 97589f05d94SXuan Zhuo kfree(vring_split->desc_extra); 97689f05d94SXuan Zhuo } 97789f05d94SXuan Zhuo 978c2d87fe6SXuan Zhuo static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split, 979c2d87fe6SXuan Zhuo struct virtio_device *vdev, 980c2d87fe6SXuan Zhuo u32 num, 981c2d87fe6SXuan Zhuo unsigned int vring_align, 982c2d87fe6SXuan Zhuo bool may_reduce_num) 983c2d87fe6SXuan Zhuo { 984c2d87fe6SXuan Zhuo void *queue = NULL; 985c2d87fe6SXuan Zhuo dma_addr_t dma_addr; 986c2d87fe6SXuan Zhuo 987c2d87fe6SXuan Zhuo /* We assume num is a power of 2. */ 988c2d87fe6SXuan Zhuo if (num & (num - 1)) { 989c2d87fe6SXuan Zhuo dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 990c2d87fe6SXuan Zhuo return -EINVAL; 991c2d87fe6SXuan Zhuo } 992c2d87fe6SXuan Zhuo 993c2d87fe6SXuan Zhuo /* TODO: allocate each queue chunk individually */ 994c2d87fe6SXuan Zhuo for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 995c2d87fe6SXuan Zhuo queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 996c2d87fe6SXuan Zhuo &dma_addr, 997c2d87fe6SXuan Zhuo GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); 998c2d87fe6SXuan Zhuo if (queue) 999c2d87fe6SXuan Zhuo break; 1000c2d87fe6SXuan Zhuo if (!may_reduce_num) 1001c2d87fe6SXuan Zhuo return -ENOMEM; 1002c2d87fe6SXuan Zhuo } 1003c2d87fe6SXuan Zhuo 1004c2d87fe6SXuan Zhuo if (!num) 1005c2d87fe6SXuan Zhuo return -ENOMEM; 1006c2d87fe6SXuan Zhuo 1007c2d87fe6SXuan Zhuo if (!queue) { 1008c2d87fe6SXuan Zhuo /* Try to get a single page. You are my only hope! */ 1009c2d87fe6SXuan Zhuo queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 1010c2d87fe6SXuan Zhuo &dma_addr, GFP_KERNEL|__GFP_ZERO); 1011c2d87fe6SXuan Zhuo } 1012c2d87fe6SXuan Zhuo if (!queue) 1013c2d87fe6SXuan Zhuo return -ENOMEM; 1014c2d87fe6SXuan Zhuo 1015c2d87fe6SXuan Zhuo vring_init(&vring_split->vring, num, queue, vring_align); 1016c2d87fe6SXuan Zhuo 1017c2d87fe6SXuan Zhuo vring_split->queue_dma_addr = dma_addr; 1018c2d87fe6SXuan Zhuo vring_split->queue_size_in_bytes = vring_size(num, vring_align); 1019c2d87fe6SXuan Zhuo 1020c2d87fe6SXuan Zhuo return 0; 1021c2d87fe6SXuan Zhuo } 1022c2d87fe6SXuan Zhuo 1023d79dca75STiwei Bie static struct virtqueue *vring_create_virtqueue_split( 1024d79dca75STiwei Bie unsigned int index, 1025d79dca75STiwei Bie unsigned int num, 1026d79dca75STiwei Bie unsigned int vring_align, 1027d79dca75STiwei Bie struct virtio_device *vdev, 1028d79dca75STiwei Bie bool weak_barriers, 1029d79dca75STiwei Bie bool may_reduce_num, 1030d79dca75STiwei Bie bool context, 1031d79dca75STiwei Bie bool (*notify)(struct virtqueue *), 1032d79dca75STiwei Bie void (*callback)(struct virtqueue *), 1033d79dca75STiwei Bie const char *name) 1034d79dca75STiwei Bie { 1035cd4c812aSXuan Zhuo struct vring_virtqueue_split vring_split = {}; 1036d79dca75STiwei Bie struct virtqueue *vq; 1037c2d87fe6SXuan Zhuo int err; 1038d79dca75STiwei Bie 1039c2d87fe6SXuan Zhuo err = vring_alloc_queue_split(&vring_split, vdev, num, vring_align, 1040c2d87fe6SXuan Zhuo may_reduce_num); 1041c2d87fe6SXuan Zhuo if (err) 1042d79dca75STiwei Bie return NULL; 1043d79dca75STiwei Bie 1044cd4c812aSXuan Zhuo vq = __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers, 1045cd4c812aSXuan Zhuo context, notify, callback, name); 1046d79dca75STiwei Bie if (!vq) { 1047c2d87fe6SXuan Zhuo vring_free_split(&vring_split, vdev); 1048d79dca75STiwei Bie return NULL; 1049d79dca75STiwei Bie } 1050d79dca75STiwei Bie 1051c2d87fe6SXuan Zhuo to_vvq(vq)->split.queue_dma_addr = vring_split.queue_dma_addr; 1052c2d87fe6SXuan Zhuo to_vvq(vq)->split.queue_size_in_bytes = vring_split.queue_size_in_bytes; 1053d79dca75STiwei Bie to_vvq(vq)->we_own_ring = true; 1054d79dca75STiwei Bie 1055d79dca75STiwei Bie return vq; 1056d79dca75STiwei Bie } 1057d79dca75STiwei Bie 1058e6f633e5STiwei Bie 1059e6f633e5STiwei Bie /* 10601ce9e605STiwei Bie * Packed ring specific functions - *_packed(). 10611ce9e605STiwei Bie */ 1062a7722890Shuangjie.albert static inline bool packed_used_wrap_counter(u16 last_used_idx) 1063a7722890Shuangjie.albert { 1064a7722890Shuangjie.albert return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1065a7722890Shuangjie.albert } 1066a7722890Shuangjie.albert 1067a7722890Shuangjie.albert static inline u16 packed_last_used(u16 last_used_idx) 1068a7722890Shuangjie.albert { 1069a7722890Shuangjie.albert return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 1070a7722890Shuangjie.albert } 10711ce9e605STiwei Bie 1072d80dc15bSXuan Zhuo static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, 1073d80dc15bSXuan Zhuo struct vring_desc_extra *extra) 10741ce9e605STiwei Bie { 10751ce9e605STiwei Bie u16 flags; 10761ce9e605STiwei Bie 10771ce9e605STiwei Bie if (!vq->use_dma_api) 10781ce9e605STiwei Bie return; 10791ce9e605STiwei Bie 1080d80dc15bSXuan Zhuo flags = extra->flags; 10811ce9e605STiwei Bie 10821ce9e605STiwei Bie if (flags & VRING_DESC_F_INDIRECT) { 10831ce9e605STiwei Bie dma_unmap_single(vring_dma_dev(vq), 1084d80dc15bSXuan Zhuo extra->addr, extra->len, 10851ce9e605STiwei Bie (flags & VRING_DESC_F_WRITE) ? 10861ce9e605STiwei Bie DMA_FROM_DEVICE : DMA_TO_DEVICE); 10871ce9e605STiwei Bie } else { 10881ce9e605STiwei Bie dma_unmap_page(vring_dma_dev(vq), 1089d80dc15bSXuan Zhuo extra->addr, extra->len, 10901ce9e605STiwei Bie (flags & VRING_DESC_F_WRITE) ? 10911ce9e605STiwei Bie DMA_FROM_DEVICE : DMA_TO_DEVICE); 10921ce9e605STiwei Bie } 10931ce9e605STiwei Bie } 10941ce9e605STiwei Bie 10951ce9e605STiwei Bie static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 10961ce9e605STiwei Bie struct vring_packed_desc *desc) 10971ce9e605STiwei Bie { 10981ce9e605STiwei Bie u16 flags; 10991ce9e605STiwei Bie 11001ce9e605STiwei Bie if (!vq->use_dma_api) 11011ce9e605STiwei Bie return; 11021ce9e605STiwei Bie 11031ce9e605STiwei Bie flags = le16_to_cpu(desc->flags); 11041ce9e605STiwei Bie 11051ce9e605STiwei Bie dma_unmap_page(vring_dma_dev(vq), 11061ce9e605STiwei Bie le64_to_cpu(desc->addr), 11071ce9e605STiwei Bie le32_to_cpu(desc->len), 11081ce9e605STiwei Bie (flags & VRING_DESC_F_WRITE) ? 11091ce9e605STiwei Bie DMA_FROM_DEVICE : DMA_TO_DEVICE); 11101ce9e605STiwei Bie } 11111ce9e605STiwei Bie 11121ce9e605STiwei Bie static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 11131ce9e605STiwei Bie gfp_t gfp) 11141ce9e605STiwei Bie { 11151ce9e605STiwei Bie struct vring_packed_desc *desc; 11161ce9e605STiwei Bie 11171ce9e605STiwei Bie /* 11181ce9e605STiwei Bie * We require lowmem mappings for the descriptors because 11191ce9e605STiwei Bie * otherwise virt_to_phys will give us bogus addresses in the 11201ce9e605STiwei Bie * virtqueue. 11211ce9e605STiwei Bie */ 11221ce9e605STiwei Bie gfp &= ~__GFP_HIGHMEM; 11231ce9e605STiwei Bie 11241ce9e605STiwei Bie desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 11251ce9e605STiwei Bie 11261ce9e605STiwei Bie return desc; 11271ce9e605STiwei Bie } 11281ce9e605STiwei Bie 11291ce9e605STiwei Bie static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 11301ce9e605STiwei Bie struct scatterlist *sgs[], 11311ce9e605STiwei Bie unsigned int total_sg, 11321ce9e605STiwei Bie unsigned int out_sgs, 11331ce9e605STiwei Bie unsigned int in_sgs, 11341ce9e605STiwei Bie void *data, 11351ce9e605STiwei Bie gfp_t gfp) 11361ce9e605STiwei Bie { 11371ce9e605STiwei Bie struct vring_packed_desc *desc; 11381ce9e605STiwei Bie struct scatterlist *sg; 11391ce9e605STiwei Bie unsigned int i, n, err_idx; 11401ce9e605STiwei Bie u16 head, id; 11411ce9e605STiwei Bie dma_addr_t addr; 11421ce9e605STiwei Bie 11431ce9e605STiwei Bie head = vq->packed.next_avail_idx; 11441ce9e605STiwei Bie desc = alloc_indirect_packed(total_sg, gfp); 1145fc6d70f4SXuan Zhuo if (!desc) 1146fc6d70f4SXuan Zhuo return -ENOMEM; 11471ce9e605STiwei Bie 11481ce9e605STiwei Bie if (unlikely(vq->vq.num_free < 1)) { 11491ce9e605STiwei Bie pr_debug("Can't add buf len 1 - avail = 0\n"); 1150df0bfe75SYueHaibing kfree(desc); 11511ce9e605STiwei Bie END_USE(vq); 11521ce9e605STiwei Bie return -ENOSPC; 11531ce9e605STiwei Bie } 11541ce9e605STiwei Bie 11551ce9e605STiwei Bie i = 0; 11561ce9e605STiwei Bie id = vq->free_head; 11571ce9e605STiwei Bie BUG_ON(id == vq->packed.vring.num); 11581ce9e605STiwei Bie 11591ce9e605STiwei Bie for (n = 0; n < out_sgs + in_sgs; n++) { 11601ce9e605STiwei Bie for (sg = sgs[n]; sg; sg = sg_next(sg)) { 11611ce9e605STiwei Bie addr = vring_map_one_sg(vq, sg, n < out_sgs ? 11621ce9e605STiwei Bie DMA_TO_DEVICE : DMA_FROM_DEVICE); 11631ce9e605STiwei Bie if (vring_mapping_error(vq, addr)) 11641ce9e605STiwei Bie goto unmap_release; 11651ce9e605STiwei Bie 11661ce9e605STiwei Bie desc[i].flags = cpu_to_le16(n < out_sgs ? 11671ce9e605STiwei Bie 0 : VRING_DESC_F_WRITE); 11681ce9e605STiwei Bie desc[i].addr = cpu_to_le64(addr); 11691ce9e605STiwei Bie desc[i].len = cpu_to_le32(sg->length); 11701ce9e605STiwei Bie i++; 11711ce9e605STiwei Bie } 11721ce9e605STiwei Bie } 11731ce9e605STiwei Bie 11741ce9e605STiwei Bie /* Now that the indirect table is filled in, map it. */ 11751ce9e605STiwei Bie addr = vring_map_single(vq, desc, 11761ce9e605STiwei Bie total_sg * sizeof(struct vring_packed_desc), 11771ce9e605STiwei Bie DMA_TO_DEVICE); 11781ce9e605STiwei Bie if (vring_mapping_error(vq, addr)) 11791ce9e605STiwei Bie goto unmap_release; 11801ce9e605STiwei Bie 11811ce9e605STiwei Bie vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 11821ce9e605STiwei Bie vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 11831ce9e605STiwei Bie sizeof(struct vring_packed_desc)); 11841ce9e605STiwei Bie vq->packed.vring.desc[head].id = cpu_to_le16(id); 11851ce9e605STiwei Bie 11861ce9e605STiwei Bie if (vq->use_dma_api) { 11871ce9e605STiwei Bie vq->packed.desc_extra[id].addr = addr; 11881ce9e605STiwei Bie vq->packed.desc_extra[id].len = total_sg * 11891ce9e605STiwei Bie sizeof(struct vring_packed_desc); 11901ce9e605STiwei Bie vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 11911ce9e605STiwei Bie vq->packed.avail_used_flags; 11921ce9e605STiwei Bie } 11931ce9e605STiwei Bie 11941ce9e605STiwei Bie /* 11951ce9e605STiwei Bie * A driver MUST NOT make the first descriptor in the list 11961ce9e605STiwei Bie * available before all subsequent descriptors comprising 11971ce9e605STiwei Bie * the list are made available. 11981ce9e605STiwei Bie */ 11991ce9e605STiwei Bie virtio_wmb(vq->weak_barriers); 12001ce9e605STiwei Bie vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 12011ce9e605STiwei Bie vq->packed.avail_used_flags); 12021ce9e605STiwei Bie 12031ce9e605STiwei Bie /* We're using some buffers from the free list. */ 12041ce9e605STiwei Bie vq->vq.num_free -= 1; 12051ce9e605STiwei Bie 12061ce9e605STiwei Bie /* Update free pointer */ 12071ce9e605STiwei Bie n = head + 1; 12081ce9e605STiwei Bie if (n >= vq->packed.vring.num) { 12091ce9e605STiwei Bie n = 0; 12101ce9e605STiwei Bie vq->packed.avail_wrap_counter ^= 1; 12111ce9e605STiwei Bie vq->packed.avail_used_flags ^= 12121ce9e605STiwei Bie 1 << VRING_PACKED_DESC_F_AVAIL | 12131ce9e605STiwei Bie 1 << VRING_PACKED_DESC_F_USED; 12141ce9e605STiwei Bie } 12151ce9e605STiwei Bie vq->packed.next_avail_idx = n; 1216aeef9b47SJason Wang vq->free_head = vq->packed.desc_extra[id].next; 12171ce9e605STiwei Bie 12181ce9e605STiwei Bie /* Store token and indirect buffer state. */ 12191ce9e605STiwei Bie vq->packed.desc_state[id].num = 1; 12201ce9e605STiwei Bie vq->packed.desc_state[id].data = data; 12211ce9e605STiwei Bie vq->packed.desc_state[id].indir_desc = desc; 12221ce9e605STiwei Bie vq->packed.desc_state[id].last = id; 12231ce9e605STiwei Bie 12241ce9e605STiwei Bie vq->num_added += 1; 12251ce9e605STiwei Bie 12261ce9e605STiwei Bie pr_debug("Added buffer head %i to %p\n", head, vq); 12271ce9e605STiwei Bie END_USE(vq); 12281ce9e605STiwei Bie 12291ce9e605STiwei Bie return 0; 12301ce9e605STiwei Bie 12311ce9e605STiwei Bie unmap_release: 12321ce9e605STiwei Bie err_idx = i; 12331ce9e605STiwei Bie 12341ce9e605STiwei Bie for (i = 0; i < err_idx; i++) 12351ce9e605STiwei Bie vring_unmap_desc_packed(vq, &desc[i]); 12361ce9e605STiwei Bie 12371ce9e605STiwei Bie kfree(desc); 12381ce9e605STiwei Bie 12391ce9e605STiwei Bie END_USE(vq); 1240f7728002SHalil Pasic return -ENOMEM; 12411ce9e605STiwei Bie } 12421ce9e605STiwei Bie 12431ce9e605STiwei Bie static inline int virtqueue_add_packed(struct virtqueue *_vq, 12441ce9e605STiwei Bie struct scatterlist *sgs[], 12451ce9e605STiwei Bie unsigned int total_sg, 12461ce9e605STiwei Bie unsigned int out_sgs, 12471ce9e605STiwei Bie unsigned int in_sgs, 12481ce9e605STiwei Bie void *data, 12491ce9e605STiwei Bie void *ctx, 12501ce9e605STiwei Bie gfp_t gfp) 12511ce9e605STiwei Bie { 12521ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 12531ce9e605STiwei Bie struct vring_packed_desc *desc; 12541ce9e605STiwei Bie struct scatterlist *sg; 12551ce9e605STiwei Bie unsigned int i, n, c, descs_used, err_idx; 12563f649ab7SKees Cook __le16 head_flags, flags; 12573f649ab7SKees Cook u16 head, id, prev, curr, avail_used_flags; 1258fc6d70f4SXuan Zhuo int err; 12591ce9e605STiwei Bie 12601ce9e605STiwei Bie START_USE(vq); 12611ce9e605STiwei Bie 12621ce9e605STiwei Bie BUG_ON(data == NULL); 12631ce9e605STiwei Bie BUG_ON(ctx && vq->indirect); 12641ce9e605STiwei Bie 12651ce9e605STiwei Bie if (unlikely(vq->broken)) { 12661ce9e605STiwei Bie END_USE(vq); 12671ce9e605STiwei Bie return -EIO; 12681ce9e605STiwei Bie } 12691ce9e605STiwei Bie 12701ce9e605STiwei Bie LAST_ADD_TIME_UPDATE(vq); 12711ce9e605STiwei Bie 12721ce9e605STiwei Bie BUG_ON(total_sg == 0); 12731ce9e605STiwei Bie 127435c51e09SXianting Tian if (virtqueue_use_indirect(vq, total_sg)) { 1275fc6d70f4SXuan Zhuo err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1276fc6d70f4SXuan Zhuo in_sgs, data, gfp); 12771861ba62SMichael S. Tsirkin if (err != -ENOMEM) { 12781861ba62SMichael S. Tsirkin END_USE(vq); 1279fc6d70f4SXuan Zhuo return err; 12801861ba62SMichael S. Tsirkin } 1281fc6d70f4SXuan Zhuo 1282fc6d70f4SXuan Zhuo /* fall back on direct */ 1283fc6d70f4SXuan Zhuo } 12841ce9e605STiwei Bie 12851ce9e605STiwei Bie head = vq->packed.next_avail_idx; 12861ce9e605STiwei Bie avail_used_flags = vq->packed.avail_used_flags; 12871ce9e605STiwei Bie 12881ce9e605STiwei Bie WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 12891ce9e605STiwei Bie 12901ce9e605STiwei Bie desc = vq->packed.vring.desc; 12911ce9e605STiwei Bie i = head; 12921ce9e605STiwei Bie descs_used = total_sg; 12931ce9e605STiwei Bie 12941ce9e605STiwei Bie if (unlikely(vq->vq.num_free < descs_used)) { 12951ce9e605STiwei Bie pr_debug("Can't add buf len %i - avail = %i\n", 12961ce9e605STiwei Bie descs_used, vq->vq.num_free); 12971ce9e605STiwei Bie END_USE(vq); 12981ce9e605STiwei Bie return -ENOSPC; 12991ce9e605STiwei Bie } 13001ce9e605STiwei Bie 13011ce9e605STiwei Bie id = vq->free_head; 13021ce9e605STiwei Bie BUG_ON(id == vq->packed.vring.num); 13031ce9e605STiwei Bie 13041ce9e605STiwei Bie curr = id; 13051ce9e605STiwei Bie c = 0; 13061ce9e605STiwei Bie for (n = 0; n < out_sgs + in_sgs; n++) { 13071ce9e605STiwei Bie for (sg = sgs[n]; sg; sg = sg_next(sg)) { 13081ce9e605STiwei Bie dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 13091ce9e605STiwei Bie DMA_TO_DEVICE : DMA_FROM_DEVICE); 13101ce9e605STiwei Bie if (vring_mapping_error(vq, addr)) 13111ce9e605STiwei Bie goto unmap_release; 13121ce9e605STiwei Bie 13131ce9e605STiwei Bie flags = cpu_to_le16(vq->packed.avail_used_flags | 13141ce9e605STiwei Bie (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 13151ce9e605STiwei Bie (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 13161ce9e605STiwei Bie if (i == head) 13171ce9e605STiwei Bie head_flags = flags; 13181ce9e605STiwei Bie else 13191ce9e605STiwei Bie desc[i].flags = flags; 13201ce9e605STiwei Bie 13211ce9e605STiwei Bie desc[i].addr = cpu_to_le64(addr); 13221ce9e605STiwei Bie desc[i].len = cpu_to_le32(sg->length); 13231ce9e605STiwei Bie desc[i].id = cpu_to_le16(id); 13241ce9e605STiwei Bie 13251ce9e605STiwei Bie if (unlikely(vq->use_dma_api)) { 13261ce9e605STiwei Bie vq->packed.desc_extra[curr].addr = addr; 13271ce9e605STiwei Bie vq->packed.desc_extra[curr].len = sg->length; 13281ce9e605STiwei Bie vq->packed.desc_extra[curr].flags = 13291ce9e605STiwei Bie le16_to_cpu(flags); 13301ce9e605STiwei Bie } 13311ce9e605STiwei Bie prev = curr; 1332aeef9b47SJason Wang curr = vq->packed.desc_extra[curr].next; 13331ce9e605STiwei Bie 13341ce9e605STiwei Bie if ((unlikely(++i >= vq->packed.vring.num))) { 13351ce9e605STiwei Bie i = 0; 13361ce9e605STiwei Bie vq->packed.avail_used_flags ^= 13371ce9e605STiwei Bie 1 << VRING_PACKED_DESC_F_AVAIL | 13381ce9e605STiwei Bie 1 << VRING_PACKED_DESC_F_USED; 13391ce9e605STiwei Bie } 13401ce9e605STiwei Bie } 13411ce9e605STiwei Bie } 13421ce9e605STiwei Bie 13431ce9e605STiwei Bie if (i < head) 13441ce9e605STiwei Bie vq->packed.avail_wrap_counter ^= 1; 13451ce9e605STiwei Bie 13461ce9e605STiwei Bie /* We're using some buffers from the free list. */ 13471ce9e605STiwei Bie vq->vq.num_free -= descs_used; 13481ce9e605STiwei Bie 13491ce9e605STiwei Bie /* Update free pointer */ 13501ce9e605STiwei Bie vq->packed.next_avail_idx = i; 13511ce9e605STiwei Bie vq->free_head = curr; 13521ce9e605STiwei Bie 13531ce9e605STiwei Bie /* Store token. */ 13541ce9e605STiwei Bie vq->packed.desc_state[id].num = descs_used; 13551ce9e605STiwei Bie vq->packed.desc_state[id].data = data; 13561ce9e605STiwei Bie vq->packed.desc_state[id].indir_desc = ctx; 13571ce9e605STiwei Bie vq->packed.desc_state[id].last = prev; 13581ce9e605STiwei Bie 13591ce9e605STiwei Bie /* 13601ce9e605STiwei Bie * A driver MUST NOT make the first descriptor in the list 13611ce9e605STiwei Bie * available before all subsequent descriptors comprising 13621ce9e605STiwei Bie * the list are made available. 13631ce9e605STiwei Bie */ 13641ce9e605STiwei Bie virtio_wmb(vq->weak_barriers); 13651ce9e605STiwei Bie vq->packed.vring.desc[head].flags = head_flags; 13661ce9e605STiwei Bie vq->num_added += descs_used; 13671ce9e605STiwei Bie 13681ce9e605STiwei Bie pr_debug("Added buffer head %i to %p\n", head, vq); 13691ce9e605STiwei Bie END_USE(vq); 13701ce9e605STiwei Bie 13711ce9e605STiwei Bie return 0; 13721ce9e605STiwei Bie 13731ce9e605STiwei Bie unmap_release: 13741ce9e605STiwei Bie err_idx = i; 13751ce9e605STiwei Bie i = head; 137644593865SJason Wang curr = vq->free_head; 13771ce9e605STiwei Bie 13781ce9e605STiwei Bie vq->packed.avail_used_flags = avail_used_flags; 13791ce9e605STiwei Bie 13801ce9e605STiwei Bie for (n = 0; n < total_sg; n++) { 13811ce9e605STiwei Bie if (i == err_idx) 13821ce9e605STiwei Bie break; 1383d80dc15bSXuan Zhuo vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); 138444593865SJason Wang curr = vq->packed.desc_extra[curr].next; 13851ce9e605STiwei Bie i++; 13861ce9e605STiwei Bie if (i >= vq->packed.vring.num) 13871ce9e605STiwei Bie i = 0; 13881ce9e605STiwei Bie } 13891ce9e605STiwei Bie 13901ce9e605STiwei Bie END_USE(vq); 13911ce9e605STiwei Bie return -EIO; 13921ce9e605STiwei Bie } 13931ce9e605STiwei Bie 13941ce9e605STiwei Bie static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 13951ce9e605STiwei Bie { 13961ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 1397f51f9826STiwei Bie u16 new, old, off_wrap, flags, wrap_counter, event_idx; 13981ce9e605STiwei Bie bool needs_kick; 13991ce9e605STiwei Bie union { 14001ce9e605STiwei Bie struct { 14011ce9e605STiwei Bie __le16 off_wrap; 14021ce9e605STiwei Bie __le16 flags; 14031ce9e605STiwei Bie }; 14041ce9e605STiwei Bie u32 u32; 14051ce9e605STiwei Bie } snapshot; 14061ce9e605STiwei Bie 14071ce9e605STiwei Bie START_USE(vq); 14081ce9e605STiwei Bie 14091ce9e605STiwei Bie /* 14101ce9e605STiwei Bie * We need to expose the new flags value before checking notification 14111ce9e605STiwei Bie * suppressions. 14121ce9e605STiwei Bie */ 14131ce9e605STiwei Bie virtio_mb(vq->weak_barriers); 14141ce9e605STiwei Bie 1415f51f9826STiwei Bie old = vq->packed.next_avail_idx - vq->num_added; 1416f51f9826STiwei Bie new = vq->packed.next_avail_idx; 14171ce9e605STiwei Bie vq->num_added = 0; 14181ce9e605STiwei Bie 14191ce9e605STiwei Bie snapshot.u32 = *(u32 *)vq->packed.vring.device; 14201ce9e605STiwei Bie flags = le16_to_cpu(snapshot.flags); 14211ce9e605STiwei Bie 14221ce9e605STiwei Bie LAST_ADD_TIME_CHECK(vq); 14231ce9e605STiwei Bie LAST_ADD_TIME_INVALID(vq); 14241ce9e605STiwei Bie 1425f51f9826STiwei Bie if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 14261ce9e605STiwei Bie needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1427f51f9826STiwei Bie goto out; 1428f51f9826STiwei Bie } 1429f51f9826STiwei Bie 1430f51f9826STiwei Bie off_wrap = le16_to_cpu(snapshot.off_wrap); 1431f51f9826STiwei Bie 1432f51f9826STiwei Bie wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1433f51f9826STiwei Bie event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1434f51f9826STiwei Bie if (wrap_counter != vq->packed.avail_wrap_counter) 1435f51f9826STiwei Bie event_idx -= vq->packed.vring.num; 1436f51f9826STiwei Bie 1437f51f9826STiwei Bie needs_kick = vring_need_event(event_idx, new, old); 1438f51f9826STiwei Bie out: 14391ce9e605STiwei Bie END_USE(vq); 14401ce9e605STiwei Bie return needs_kick; 14411ce9e605STiwei Bie } 14421ce9e605STiwei Bie 14431ce9e605STiwei Bie static void detach_buf_packed(struct vring_virtqueue *vq, 14441ce9e605STiwei Bie unsigned int id, void **ctx) 14451ce9e605STiwei Bie { 14461ce9e605STiwei Bie struct vring_desc_state_packed *state = NULL; 14471ce9e605STiwei Bie struct vring_packed_desc *desc; 14481ce9e605STiwei Bie unsigned int i, curr; 14491ce9e605STiwei Bie 14501ce9e605STiwei Bie state = &vq->packed.desc_state[id]; 14511ce9e605STiwei Bie 14521ce9e605STiwei Bie /* Clear data ptr. */ 14531ce9e605STiwei Bie state->data = NULL; 14541ce9e605STiwei Bie 1455aeef9b47SJason Wang vq->packed.desc_extra[state->last].next = vq->free_head; 14561ce9e605STiwei Bie vq->free_head = id; 14571ce9e605STiwei Bie vq->vq.num_free += state->num; 14581ce9e605STiwei Bie 14591ce9e605STiwei Bie if (unlikely(vq->use_dma_api)) { 14601ce9e605STiwei Bie curr = id; 14611ce9e605STiwei Bie for (i = 0; i < state->num; i++) { 1462d80dc15bSXuan Zhuo vring_unmap_extra_packed(vq, 14631ce9e605STiwei Bie &vq->packed.desc_extra[curr]); 1464aeef9b47SJason Wang curr = vq->packed.desc_extra[curr].next; 14651ce9e605STiwei Bie } 14661ce9e605STiwei Bie } 14671ce9e605STiwei Bie 14681ce9e605STiwei Bie if (vq->indirect) { 14691ce9e605STiwei Bie u32 len; 14701ce9e605STiwei Bie 14711ce9e605STiwei Bie /* Free the indirect table, if any, now that it's unmapped. */ 14721ce9e605STiwei Bie desc = state->indir_desc; 14731ce9e605STiwei Bie if (!desc) 14741ce9e605STiwei Bie return; 14751ce9e605STiwei Bie 14761ce9e605STiwei Bie if (vq->use_dma_api) { 14771ce9e605STiwei Bie len = vq->packed.desc_extra[id].len; 14781ce9e605STiwei Bie for (i = 0; i < len / sizeof(struct vring_packed_desc); 14791ce9e605STiwei Bie i++) 14801ce9e605STiwei Bie vring_unmap_desc_packed(vq, &desc[i]); 14811ce9e605STiwei Bie } 14821ce9e605STiwei Bie kfree(desc); 14831ce9e605STiwei Bie state->indir_desc = NULL; 14841ce9e605STiwei Bie } else if (ctx) { 14851ce9e605STiwei Bie *ctx = state->indir_desc; 14861ce9e605STiwei Bie } 14871ce9e605STiwei Bie } 14881ce9e605STiwei Bie 14891ce9e605STiwei Bie static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 14901ce9e605STiwei Bie u16 idx, bool used_wrap_counter) 14911ce9e605STiwei Bie { 14921ce9e605STiwei Bie bool avail, used; 14931ce9e605STiwei Bie u16 flags; 14941ce9e605STiwei Bie 14951ce9e605STiwei Bie flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 14961ce9e605STiwei Bie avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 14971ce9e605STiwei Bie used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 14981ce9e605STiwei Bie 14991ce9e605STiwei Bie return avail == used && used == used_wrap_counter; 15001ce9e605STiwei Bie } 15011ce9e605STiwei Bie 15021ce9e605STiwei Bie static inline bool more_used_packed(const struct vring_virtqueue *vq) 15031ce9e605STiwei Bie { 1504a7722890Shuangjie.albert u16 last_used; 1505a7722890Shuangjie.albert u16 last_used_idx; 1506a7722890Shuangjie.albert bool used_wrap_counter; 1507a7722890Shuangjie.albert 1508a7722890Shuangjie.albert last_used_idx = READ_ONCE(vq->last_used_idx); 1509a7722890Shuangjie.albert last_used = packed_last_used(last_used_idx); 1510a7722890Shuangjie.albert used_wrap_counter = packed_used_wrap_counter(last_used_idx); 1511a7722890Shuangjie.albert return is_used_desc_packed(vq, last_used, used_wrap_counter); 15121ce9e605STiwei Bie } 15131ce9e605STiwei Bie 15141ce9e605STiwei Bie static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 15151ce9e605STiwei Bie unsigned int *len, 15161ce9e605STiwei Bie void **ctx) 15171ce9e605STiwei Bie { 15181ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 1519a7722890Shuangjie.albert u16 last_used, id, last_used_idx; 1520a7722890Shuangjie.albert bool used_wrap_counter; 15211ce9e605STiwei Bie void *ret; 15221ce9e605STiwei Bie 15231ce9e605STiwei Bie START_USE(vq); 15241ce9e605STiwei Bie 15251ce9e605STiwei Bie if (unlikely(vq->broken)) { 15261ce9e605STiwei Bie END_USE(vq); 15271ce9e605STiwei Bie return NULL; 15281ce9e605STiwei Bie } 15291ce9e605STiwei Bie 15301ce9e605STiwei Bie if (!more_used_packed(vq)) { 15311ce9e605STiwei Bie pr_debug("No more buffers in queue\n"); 15321ce9e605STiwei Bie END_USE(vq); 15331ce9e605STiwei Bie return NULL; 15341ce9e605STiwei Bie } 15351ce9e605STiwei Bie 15361ce9e605STiwei Bie /* Only get used elements after they have been exposed by host. */ 15371ce9e605STiwei Bie virtio_rmb(vq->weak_barriers); 15381ce9e605STiwei Bie 1539a7722890Shuangjie.albert last_used_idx = READ_ONCE(vq->last_used_idx); 1540a7722890Shuangjie.albert used_wrap_counter = packed_used_wrap_counter(last_used_idx); 1541a7722890Shuangjie.albert last_used = packed_last_used(last_used_idx); 15421ce9e605STiwei Bie id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 15431ce9e605STiwei Bie *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 15441ce9e605STiwei Bie 15451ce9e605STiwei Bie if (unlikely(id >= vq->packed.vring.num)) { 15461ce9e605STiwei Bie BAD_RING(vq, "id %u out of range\n", id); 15471ce9e605STiwei Bie return NULL; 15481ce9e605STiwei Bie } 15491ce9e605STiwei Bie if (unlikely(!vq->packed.desc_state[id].data)) { 15501ce9e605STiwei Bie BAD_RING(vq, "id %u is not a head!\n", id); 15511ce9e605STiwei Bie return NULL; 15521ce9e605STiwei Bie } 15531ce9e605STiwei Bie 15541ce9e605STiwei Bie /* detach_buf_packed clears data, so grab it now. */ 15551ce9e605STiwei Bie ret = vq->packed.desc_state[id].data; 15561ce9e605STiwei Bie detach_buf_packed(vq, id, ctx); 15571ce9e605STiwei Bie 1558a7722890Shuangjie.albert last_used += vq->packed.desc_state[id].num; 1559a7722890Shuangjie.albert if (unlikely(last_used >= vq->packed.vring.num)) { 1560a7722890Shuangjie.albert last_used -= vq->packed.vring.num; 1561a7722890Shuangjie.albert used_wrap_counter ^= 1; 15621ce9e605STiwei Bie } 15631ce9e605STiwei Bie 1564a7722890Shuangjie.albert last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1565a7722890Shuangjie.albert WRITE_ONCE(vq->last_used_idx, last_used); 1566a7722890Shuangjie.albert 1567f51f9826STiwei Bie /* 1568f51f9826STiwei Bie * If we expect an interrupt for the next entry, tell host 1569f51f9826STiwei Bie * by writing event index and flush out the write before 1570f51f9826STiwei Bie * the read in the next get_buf call. 1571f51f9826STiwei Bie */ 1572f51f9826STiwei Bie if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1573f51f9826STiwei Bie virtio_store_mb(vq->weak_barriers, 1574f51f9826STiwei Bie &vq->packed.vring.driver->off_wrap, 1575a7722890Shuangjie.albert cpu_to_le16(vq->last_used_idx)); 1576f51f9826STiwei Bie 15771ce9e605STiwei Bie LAST_ADD_TIME_INVALID(vq); 15781ce9e605STiwei Bie 15791ce9e605STiwei Bie END_USE(vq); 15801ce9e605STiwei Bie return ret; 15811ce9e605STiwei Bie } 15821ce9e605STiwei Bie 15831ce9e605STiwei Bie static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 15841ce9e605STiwei Bie { 15851ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 15861ce9e605STiwei Bie 15871ce9e605STiwei Bie if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 15881ce9e605STiwei Bie vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 15891ce9e605STiwei Bie vq->packed.vring.driver->flags = 15901ce9e605STiwei Bie cpu_to_le16(vq->packed.event_flags_shadow); 15911ce9e605STiwei Bie } 15921ce9e605STiwei Bie } 15931ce9e605STiwei Bie 159431532340SSolomon Tan static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 15951ce9e605STiwei Bie { 15961ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 15971ce9e605STiwei Bie 15981ce9e605STiwei Bie START_USE(vq); 15991ce9e605STiwei Bie 16001ce9e605STiwei Bie /* 16011ce9e605STiwei Bie * We optimistically turn back on interrupts, then check if there was 16021ce9e605STiwei Bie * more to do. 16031ce9e605STiwei Bie */ 16041ce9e605STiwei Bie 1605f51f9826STiwei Bie if (vq->event) { 1606f51f9826STiwei Bie vq->packed.vring.driver->off_wrap = 1607a7722890Shuangjie.albert cpu_to_le16(vq->last_used_idx); 1608f51f9826STiwei Bie /* 1609f51f9826STiwei Bie * We need to update event offset and event wrap 1610f51f9826STiwei Bie * counter first before updating event flags. 1611f51f9826STiwei Bie */ 1612f51f9826STiwei Bie virtio_wmb(vq->weak_barriers); 1613f51f9826STiwei Bie } 1614f51f9826STiwei Bie 16151ce9e605STiwei Bie if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1616f51f9826STiwei Bie vq->packed.event_flags_shadow = vq->event ? 1617f51f9826STiwei Bie VRING_PACKED_EVENT_FLAG_DESC : 1618f51f9826STiwei Bie VRING_PACKED_EVENT_FLAG_ENABLE; 16191ce9e605STiwei Bie vq->packed.vring.driver->flags = 16201ce9e605STiwei Bie cpu_to_le16(vq->packed.event_flags_shadow); 16211ce9e605STiwei Bie } 16221ce9e605STiwei Bie 16231ce9e605STiwei Bie END_USE(vq); 1624a7722890Shuangjie.albert return vq->last_used_idx; 16251ce9e605STiwei Bie } 16261ce9e605STiwei Bie 16271ce9e605STiwei Bie static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 16281ce9e605STiwei Bie { 16291ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 16301ce9e605STiwei Bie bool wrap_counter; 16311ce9e605STiwei Bie u16 used_idx; 16321ce9e605STiwei Bie 16331ce9e605STiwei Bie wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 16341ce9e605STiwei Bie used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 16351ce9e605STiwei Bie 16361ce9e605STiwei Bie return is_used_desc_packed(vq, used_idx, wrap_counter); 16371ce9e605STiwei Bie } 16381ce9e605STiwei Bie 16391ce9e605STiwei Bie static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 16401ce9e605STiwei Bie { 16411ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 1642a7722890Shuangjie.albert u16 used_idx, wrap_counter, last_used_idx; 1643f51f9826STiwei Bie u16 bufs; 16441ce9e605STiwei Bie 16451ce9e605STiwei Bie START_USE(vq); 16461ce9e605STiwei Bie 16471ce9e605STiwei Bie /* 16481ce9e605STiwei Bie * We optimistically turn back on interrupts, then check if there was 16491ce9e605STiwei Bie * more to do. 16501ce9e605STiwei Bie */ 16511ce9e605STiwei Bie 1652f51f9826STiwei Bie if (vq->event) { 1653f51f9826STiwei Bie /* TODO: tune this threshold */ 1654f51f9826STiwei Bie bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1655a7722890Shuangjie.albert last_used_idx = READ_ONCE(vq->last_used_idx); 1656a7722890Shuangjie.albert wrap_counter = packed_used_wrap_counter(last_used_idx); 16571ce9e605STiwei Bie 1658a7722890Shuangjie.albert used_idx = packed_last_used(last_used_idx) + bufs; 1659f51f9826STiwei Bie if (used_idx >= vq->packed.vring.num) { 1660f51f9826STiwei Bie used_idx -= vq->packed.vring.num; 1661f51f9826STiwei Bie wrap_counter ^= 1; 1662f51f9826STiwei Bie } 1663f51f9826STiwei Bie 1664f51f9826STiwei Bie vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1665f51f9826STiwei Bie (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1666f51f9826STiwei Bie 1667f51f9826STiwei Bie /* 1668f51f9826STiwei Bie * We need to update event offset and event wrap 1669f51f9826STiwei Bie * counter first before updating event flags. 1670f51f9826STiwei Bie */ 1671f51f9826STiwei Bie virtio_wmb(vq->weak_barriers); 1672f51f9826STiwei Bie } 1673f51f9826STiwei Bie 16741ce9e605STiwei Bie if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1675f51f9826STiwei Bie vq->packed.event_flags_shadow = vq->event ? 1676f51f9826STiwei Bie VRING_PACKED_EVENT_FLAG_DESC : 1677f51f9826STiwei Bie VRING_PACKED_EVENT_FLAG_ENABLE; 16781ce9e605STiwei Bie vq->packed.vring.driver->flags = 16791ce9e605STiwei Bie cpu_to_le16(vq->packed.event_flags_shadow); 16801ce9e605STiwei Bie } 16811ce9e605STiwei Bie 16821ce9e605STiwei Bie /* 16831ce9e605STiwei Bie * We need to update event suppression structure first 16841ce9e605STiwei Bie * before re-checking for more used buffers. 16851ce9e605STiwei Bie */ 16861ce9e605STiwei Bie virtio_mb(vq->weak_barriers); 16871ce9e605STiwei Bie 1688a7722890Shuangjie.albert last_used_idx = READ_ONCE(vq->last_used_idx); 1689a7722890Shuangjie.albert wrap_counter = packed_used_wrap_counter(last_used_idx); 1690a7722890Shuangjie.albert used_idx = packed_last_used(last_used_idx); 1691a7722890Shuangjie.albert if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 16921ce9e605STiwei Bie END_USE(vq); 16931ce9e605STiwei Bie return false; 16941ce9e605STiwei Bie } 16951ce9e605STiwei Bie 16961ce9e605STiwei Bie END_USE(vq); 16971ce9e605STiwei Bie return true; 16981ce9e605STiwei Bie } 16991ce9e605STiwei Bie 17001ce9e605STiwei Bie static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 17011ce9e605STiwei Bie { 17021ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 17031ce9e605STiwei Bie unsigned int i; 17041ce9e605STiwei Bie void *buf; 17051ce9e605STiwei Bie 17061ce9e605STiwei Bie START_USE(vq); 17071ce9e605STiwei Bie 17081ce9e605STiwei Bie for (i = 0; i < vq->packed.vring.num; i++) { 17091ce9e605STiwei Bie if (!vq->packed.desc_state[i].data) 17101ce9e605STiwei Bie continue; 17111ce9e605STiwei Bie /* detach_buf clears data, so grab it now. */ 17121ce9e605STiwei Bie buf = vq->packed.desc_state[i].data; 17131ce9e605STiwei Bie detach_buf_packed(vq, i, NULL); 17141ce9e605STiwei Bie END_USE(vq); 17151ce9e605STiwei Bie return buf; 17161ce9e605STiwei Bie } 17171ce9e605STiwei Bie /* That should have freed everything. */ 17181ce9e605STiwei Bie BUG_ON(vq->vq.num_free != vq->packed.vring.num); 17191ce9e605STiwei Bie 17201ce9e605STiwei Bie END_USE(vq); 17211ce9e605STiwei Bie return NULL; 17221ce9e605STiwei Bie } 17231ce9e605STiwei Bie 172496ef18a2SXuan Zhuo static struct vring_desc_extra *vring_alloc_desc_extra(unsigned int num) 17255a222421SJason Wang { 17265a222421SJason Wang struct vring_desc_extra *desc_extra; 17275a222421SJason Wang unsigned int i; 17285a222421SJason Wang 17295a222421SJason Wang desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), 17305a222421SJason Wang GFP_KERNEL); 17315a222421SJason Wang if (!desc_extra) 17325a222421SJason Wang return NULL; 17335a222421SJason Wang 17345a222421SJason Wang memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 17355a222421SJason Wang 17365a222421SJason Wang for (i = 0; i < num - 1; i++) 17375a222421SJason Wang desc_extra[i].next = i + 1; 17385a222421SJason Wang 17395a222421SJason Wang return desc_extra; 17405a222421SJason Wang } 17415a222421SJason Wang 17421ce9e605STiwei Bie static struct virtqueue *vring_create_virtqueue_packed( 17431ce9e605STiwei Bie unsigned int index, 17441ce9e605STiwei Bie unsigned int num, 17451ce9e605STiwei Bie unsigned int vring_align, 17461ce9e605STiwei Bie struct virtio_device *vdev, 17471ce9e605STiwei Bie bool weak_barriers, 17481ce9e605STiwei Bie bool may_reduce_num, 17491ce9e605STiwei Bie bool context, 17501ce9e605STiwei Bie bool (*notify)(struct virtqueue *), 17511ce9e605STiwei Bie void (*callback)(struct virtqueue *), 17521ce9e605STiwei Bie const char *name) 17531ce9e605STiwei Bie { 17541ce9e605STiwei Bie struct vring_virtqueue *vq; 17551ce9e605STiwei Bie struct vring_packed_desc *ring; 17561ce9e605STiwei Bie struct vring_packed_desc_event *driver, *device; 17571ce9e605STiwei Bie dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 17581ce9e605STiwei Bie size_t ring_size_in_bytes, event_size_in_bytes; 17591ce9e605STiwei Bie 17601ce9e605STiwei Bie ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 17611ce9e605STiwei Bie 17621ce9e605STiwei Bie ring = vring_alloc_queue(vdev, ring_size_in_bytes, 17631ce9e605STiwei Bie &ring_dma_addr, 17641ce9e605STiwei Bie GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 17651ce9e605STiwei Bie if (!ring) 17661ce9e605STiwei Bie goto err_ring; 17671ce9e605STiwei Bie 17681ce9e605STiwei Bie event_size_in_bytes = sizeof(struct vring_packed_desc_event); 17691ce9e605STiwei Bie 17701ce9e605STiwei Bie driver = vring_alloc_queue(vdev, event_size_in_bytes, 17711ce9e605STiwei Bie &driver_event_dma_addr, 17721ce9e605STiwei Bie GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 17731ce9e605STiwei Bie if (!driver) 17741ce9e605STiwei Bie goto err_driver; 17751ce9e605STiwei Bie 17761ce9e605STiwei Bie device = vring_alloc_queue(vdev, event_size_in_bytes, 17771ce9e605STiwei Bie &device_event_dma_addr, 17781ce9e605STiwei Bie GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 17791ce9e605STiwei Bie if (!device) 17801ce9e605STiwei Bie goto err_device; 17811ce9e605STiwei Bie 17821ce9e605STiwei Bie vq = kmalloc(sizeof(*vq), GFP_KERNEL); 17831ce9e605STiwei Bie if (!vq) 17841ce9e605STiwei Bie goto err_vq; 17851ce9e605STiwei Bie 17861ce9e605STiwei Bie vq->vq.callback = callback; 17871ce9e605STiwei Bie vq->vq.vdev = vdev; 17881ce9e605STiwei Bie vq->vq.name = name; 17891ce9e605STiwei Bie vq->vq.index = index; 17901ce9e605STiwei Bie vq->we_own_ring = true; 17911ce9e605STiwei Bie vq->notify = notify; 17921ce9e605STiwei Bie vq->weak_barriers = weak_barriers; 1793c346dae4SJason Wang #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 17948b4ec69dSJason Wang vq->broken = true; 1795c346dae4SJason Wang #else 1796c346dae4SJason Wang vq->broken = false; 1797c346dae4SJason Wang #endif 17981ce9e605STiwei Bie vq->packed_ring = true; 17991ce9e605STiwei Bie vq->use_dma_api = vring_use_dma_api(vdev); 18001ce9e605STiwei Bie 18011ce9e605STiwei Bie vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 18021ce9e605STiwei Bie !context; 18031ce9e605STiwei Bie vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 18041ce9e605STiwei Bie 180545383fb0STiwei Bie if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 180645383fb0STiwei Bie vq->weak_barriers = false; 180745383fb0STiwei Bie 18081ce9e605STiwei Bie vq->packed.ring_dma_addr = ring_dma_addr; 18091ce9e605STiwei Bie vq->packed.driver_event_dma_addr = driver_event_dma_addr; 18101ce9e605STiwei Bie vq->packed.device_event_dma_addr = device_event_dma_addr; 18111ce9e605STiwei Bie 18121ce9e605STiwei Bie vq->packed.ring_size_in_bytes = ring_size_in_bytes; 18131ce9e605STiwei Bie vq->packed.event_size_in_bytes = event_size_in_bytes; 18141ce9e605STiwei Bie 18151ce9e605STiwei Bie vq->packed.vring.num = num; 18161ce9e605STiwei Bie vq->packed.vring.desc = ring; 18171ce9e605STiwei Bie vq->packed.vring.driver = driver; 18181ce9e605STiwei Bie vq->packed.vring.device = device; 18191ce9e605STiwei Bie 18201ce9e605STiwei Bie vq->packed.next_avail_idx = 0; 18211ce9e605STiwei Bie vq->packed.avail_wrap_counter = 1; 18221ce9e605STiwei Bie vq->packed.event_flags_shadow = 0; 18231ce9e605STiwei Bie vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 18241ce9e605STiwei Bie 18251ce9e605STiwei Bie vq->packed.desc_state = kmalloc_array(num, 18261ce9e605STiwei Bie sizeof(struct vring_desc_state_packed), 18271ce9e605STiwei Bie GFP_KERNEL); 18281ce9e605STiwei Bie if (!vq->packed.desc_state) 18291ce9e605STiwei Bie goto err_desc_state; 18301ce9e605STiwei Bie 18311ce9e605STiwei Bie memset(vq->packed.desc_state, 0, 18321ce9e605STiwei Bie num * sizeof(struct vring_desc_state_packed)); 18331ce9e605STiwei Bie 18341ce9e605STiwei Bie /* Put everything in free lists. */ 18351ce9e605STiwei Bie vq->free_head = 0; 18361ce9e605STiwei Bie 183796ef18a2SXuan Zhuo vq->packed.desc_extra = vring_alloc_desc_extra(num); 18381ce9e605STiwei Bie if (!vq->packed.desc_extra) 18391ce9e605STiwei Bie goto err_desc_extra; 18401ce9e605STiwei Bie 18411ce9e605STiwei Bie /* No callback? Tell other side not to bother us. */ 18421ce9e605STiwei Bie if (!callback) { 18431ce9e605STiwei Bie vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 18441ce9e605STiwei Bie vq->packed.vring.driver->flags = 18451ce9e605STiwei Bie cpu_to_le16(vq->packed.event_flags_shadow); 18461ce9e605STiwei Bie } 18471ce9e605STiwei Bie 18483a897128SXuan Zhuo virtqueue_init(vq, num); 18493a897128SXuan Zhuo 18500e566c8fSParav Pandit spin_lock(&vdev->vqs_list_lock); 1851e152d8afSDan Carpenter list_add_tail(&vq->vq.list, &vdev->vqs); 18520e566c8fSParav Pandit spin_unlock(&vdev->vqs_list_lock); 18531ce9e605STiwei Bie return &vq->vq; 18541ce9e605STiwei Bie 18551ce9e605STiwei Bie err_desc_extra: 18561ce9e605STiwei Bie kfree(vq->packed.desc_state); 18571ce9e605STiwei Bie err_desc_state: 18581ce9e605STiwei Bie kfree(vq); 18591ce9e605STiwei Bie err_vq: 1860ae93d8eaSDan Carpenter vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); 18611ce9e605STiwei Bie err_device: 1862ae93d8eaSDan Carpenter vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); 18631ce9e605STiwei Bie err_driver: 18641ce9e605STiwei Bie vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 18651ce9e605STiwei Bie err_ring: 18661ce9e605STiwei Bie return NULL; 18671ce9e605STiwei Bie } 18681ce9e605STiwei Bie 18691ce9e605STiwei Bie 18701ce9e605STiwei Bie /* 1871e6f633e5STiwei Bie * Generic functions and exported symbols. 1872e6f633e5STiwei Bie */ 1873e6f633e5STiwei Bie 1874e6f633e5STiwei Bie static inline int virtqueue_add(struct virtqueue *_vq, 1875e6f633e5STiwei Bie struct scatterlist *sgs[], 1876e6f633e5STiwei Bie unsigned int total_sg, 1877e6f633e5STiwei Bie unsigned int out_sgs, 1878e6f633e5STiwei Bie unsigned int in_sgs, 1879e6f633e5STiwei Bie void *data, 1880e6f633e5STiwei Bie void *ctx, 1881e6f633e5STiwei Bie gfp_t gfp) 1882e6f633e5STiwei Bie { 18831ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 18841ce9e605STiwei Bie 18851ce9e605STiwei Bie return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 18861ce9e605STiwei Bie out_sgs, in_sgs, data, ctx, gfp) : 18871ce9e605STiwei Bie virtqueue_add_split(_vq, sgs, total_sg, 1888e6f633e5STiwei Bie out_sgs, in_sgs, data, ctx, gfp); 1889e6f633e5STiwei Bie } 1890e6f633e5STiwei Bie 1891e6f633e5STiwei Bie /** 1892e6f633e5STiwei Bie * virtqueue_add_sgs - expose buffers to other end 1893a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 1894e6f633e5STiwei Bie * @sgs: array of terminated scatterlists. 1895a5581206SJiang Biao * @out_sgs: the number of scatterlists readable by other side 1896a5581206SJiang Biao * @in_sgs: the number of scatterlists which are writable (after readable ones) 1897e6f633e5STiwei Bie * @data: the token identifying the buffer. 1898e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 1899e6f633e5STiwei Bie * 1900e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 1901e6f633e5STiwei Bie * at the same time (except where noted). 1902e6f633e5STiwei Bie * 1903e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1904e6f633e5STiwei Bie */ 1905e6f633e5STiwei Bie int virtqueue_add_sgs(struct virtqueue *_vq, 1906e6f633e5STiwei Bie struct scatterlist *sgs[], 1907e6f633e5STiwei Bie unsigned int out_sgs, 1908e6f633e5STiwei Bie unsigned int in_sgs, 1909e6f633e5STiwei Bie void *data, 1910e6f633e5STiwei Bie gfp_t gfp) 1911e6f633e5STiwei Bie { 1912e6f633e5STiwei Bie unsigned int i, total_sg = 0; 1913e6f633e5STiwei Bie 1914e6f633e5STiwei Bie /* Count them first. */ 1915e6f633e5STiwei Bie for (i = 0; i < out_sgs + in_sgs; i++) { 1916e6f633e5STiwei Bie struct scatterlist *sg; 1917e6f633e5STiwei Bie 1918e6f633e5STiwei Bie for (sg = sgs[i]; sg; sg = sg_next(sg)) 1919e6f633e5STiwei Bie total_sg++; 1920e6f633e5STiwei Bie } 1921e6f633e5STiwei Bie return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1922e6f633e5STiwei Bie data, NULL, gfp); 1923e6f633e5STiwei Bie } 1924e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1925e6f633e5STiwei Bie 1926e6f633e5STiwei Bie /** 1927e6f633e5STiwei Bie * virtqueue_add_outbuf - expose output buffers to other end 1928e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1929e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 1930e6f633e5STiwei Bie * @num: the number of entries in @sg readable by other side 1931e6f633e5STiwei Bie * @data: the token identifying the buffer. 1932e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 1933e6f633e5STiwei Bie * 1934e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 1935e6f633e5STiwei Bie * at the same time (except where noted). 1936e6f633e5STiwei Bie * 1937e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1938e6f633e5STiwei Bie */ 1939e6f633e5STiwei Bie int virtqueue_add_outbuf(struct virtqueue *vq, 1940e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 1941e6f633e5STiwei Bie void *data, 1942e6f633e5STiwei Bie gfp_t gfp) 1943e6f633e5STiwei Bie { 1944e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1945e6f633e5STiwei Bie } 1946e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1947e6f633e5STiwei Bie 1948e6f633e5STiwei Bie /** 1949e6f633e5STiwei Bie * virtqueue_add_inbuf - expose input buffers to other end 1950e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1951e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 1952e6f633e5STiwei Bie * @num: the number of entries in @sg writable by other side 1953e6f633e5STiwei Bie * @data: the token identifying the buffer. 1954e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 1955e6f633e5STiwei Bie * 1956e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 1957e6f633e5STiwei Bie * at the same time (except where noted). 1958e6f633e5STiwei Bie * 1959e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1960e6f633e5STiwei Bie */ 1961e6f633e5STiwei Bie int virtqueue_add_inbuf(struct virtqueue *vq, 1962e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 1963e6f633e5STiwei Bie void *data, 1964e6f633e5STiwei Bie gfp_t gfp) 1965e6f633e5STiwei Bie { 1966e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1967e6f633e5STiwei Bie } 1968e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1969e6f633e5STiwei Bie 1970e6f633e5STiwei Bie /** 1971e6f633e5STiwei Bie * virtqueue_add_inbuf_ctx - expose input buffers to other end 1972e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1973e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 1974e6f633e5STiwei Bie * @num: the number of entries in @sg writable by other side 1975e6f633e5STiwei Bie * @data: the token identifying the buffer. 1976e6f633e5STiwei Bie * @ctx: extra context for the token 1977e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 1978e6f633e5STiwei Bie * 1979e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 1980e6f633e5STiwei Bie * at the same time (except where noted). 1981e6f633e5STiwei Bie * 1982e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1983e6f633e5STiwei Bie */ 1984e6f633e5STiwei Bie int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1985e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 1986e6f633e5STiwei Bie void *data, 1987e6f633e5STiwei Bie void *ctx, 1988e6f633e5STiwei Bie gfp_t gfp) 1989e6f633e5STiwei Bie { 1990e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1991e6f633e5STiwei Bie } 1992e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1993e6f633e5STiwei Bie 1994e6f633e5STiwei Bie /** 1995e6f633e5STiwei Bie * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1996a5581206SJiang Biao * @_vq: the struct virtqueue 1997e6f633e5STiwei Bie * 1998e6f633e5STiwei Bie * Instead of virtqueue_kick(), you can do: 1999e6f633e5STiwei Bie * if (virtqueue_kick_prepare(vq)) 2000e6f633e5STiwei Bie * virtqueue_notify(vq); 2001e6f633e5STiwei Bie * 2002e6f633e5STiwei Bie * This is sometimes useful because the virtqueue_kick_prepare() needs 2003e6f633e5STiwei Bie * to be serialized, but the actual virtqueue_notify() call does not. 2004e6f633e5STiwei Bie */ 2005e6f633e5STiwei Bie bool virtqueue_kick_prepare(struct virtqueue *_vq) 2006e6f633e5STiwei Bie { 20071ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 20081ce9e605STiwei Bie 20091ce9e605STiwei Bie return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 20101ce9e605STiwei Bie virtqueue_kick_prepare_split(_vq); 2011e6f633e5STiwei Bie } 2012e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 2013e6f633e5STiwei Bie 2014e6f633e5STiwei Bie /** 2015e6f633e5STiwei Bie * virtqueue_notify - second half of split virtqueue_kick call. 2016a5581206SJiang Biao * @_vq: the struct virtqueue 2017e6f633e5STiwei Bie * 2018e6f633e5STiwei Bie * This does not need to be serialized. 2019e6f633e5STiwei Bie * 2020e6f633e5STiwei Bie * Returns false if host notify failed or queue is broken, otherwise true. 2021e6f633e5STiwei Bie */ 2022e6f633e5STiwei Bie bool virtqueue_notify(struct virtqueue *_vq) 2023e6f633e5STiwei Bie { 2024e6f633e5STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 2025e6f633e5STiwei Bie 2026e6f633e5STiwei Bie if (unlikely(vq->broken)) 2027e6f633e5STiwei Bie return false; 2028e6f633e5STiwei Bie 2029e6f633e5STiwei Bie /* Prod other side to tell it about changes. */ 2030e6f633e5STiwei Bie if (!vq->notify(_vq)) { 2031e6f633e5STiwei Bie vq->broken = true; 2032e6f633e5STiwei Bie return false; 2033e6f633e5STiwei Bie } 2034e6f633e5STiwei Bie return true; 2035e6f633e5STiwei Bie } 2036e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_notify); 2037e6f633e5STiwei Bie 2038e6f633e5STiwei Bie /** 2039e6f633e5STiwei Bie * virtqueue_kick - update after add_buf 2040e6f633e5STiwei Bie * @vq: the struct virtqueue 2041e6f633e5STiwei Bie * 2042e6f633e5STiwei Bie * After one or more virtqueue_add_* calls, invoke this to kick 2043e6f633e5STiwei Bie * the other side. 2044e6f633e5STiwei Bie * 2045e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 2046e6f633e5STiwei Bie * operations at the same time (except where noted). 2047e6f633e5STiwei Bie * 2048e6f633e5STiwei Bie * Returns false if kick failed, otherwise true. 2049e6f633e5STiwei Bie */ 2050e6f633e5STiwei Bie bool virtqueue_kick(struct virtqueue *vq) 2051e6f633e5STiwei Bie { 2052e6f633e5STiwei Bie if (virtqueue_kick_prepare(vq)) 2053e6f633e5STiwei Bie return virtqueue_notify(vq); 2054e6f633e5STiwei Bie return true; 2055e6f633e5STiwei Bie } 2056e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_kick); 2057e6f633e5STiwei Bie 2058e6f633e5STiwei Bie /** 205931c11db6SYang Li * virtqueue_get_buf_ctx - get the next used buffer 2060a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 2061e6f633e5STiwei Bie * @len: the length written into the buffer 2062a5581206SJiang Biao * @ctx: extra context for the token 2063e6f633e5STiwei Bie * 2064e6f633e5STiwei Bie * If the device wrote data into the buffer, @len will be set to the 2065e6f633e5STiwei Bie * amount written. This means you don't need to clear the buffer 2066e6f633e5STiwei Bie * beforehand to ensure there's no data leakage in the case of short 2067e6f633e5STiwei Bie * writes. 2068e6f633e5STiwei Bie * 2069e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 2070e6f633e5STiwei Bie * operations at the same time (except where noted). 2071e6f633e5STiwei Bie * 2072e6f633e5STiwei Bie * Returns NULL if there are no used buffers, or the "data" token 2073e6f633e5STiwei Bie * handed to virtqueue_add_*(). 2074e6f633e5STiwei Bie */ 2075e6f633e5STiwei Bie void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 2076e6f633e5STiwei Bie void **ctx) 2077e6f633e5STiwei Bie { 20781ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 20791ce9e605STiwei Bie 20801ce9e605STiwei Bie return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 20811ce9e605STiwei Bie virtqueue_get_buf_ctx_split(_vq, len, ctx); 2082e6f633e5STiwei Bie } 2083e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 2084e6f633e5STiwei Bie 2085e6f633e5STiwei Bie void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 2086e6f633e5STiwei Bie { 2087e6f633e5STiwei Bie return virtqueue_get_buf_ctx(_vq, len, NULL); 2088e6f633e5STiwei Bie } 2089e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_get_buf); 2090e6f633e5STiwei Bie /** 2091e6f633e5STiwei Bie * virtqueue_disable_cb - disable callbacks 2092a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 2093e6f633e5STiwei Bie * 2094e6f633e5STiwei Bie * Note that this is not necessarily synchronous, hence unreliable and only 2095e6f633e5STiwei Bie * useful as an optimization. 2096e6f633e5STiwei Bie * 2097e6f633e5STiwei Bie * Unlike other operations, this need not be serialized. 2098e6f633e5STiwei Bie */ 2099e6f633e5STiwei Bie void virtqueue_disable_cb(struct virtqueue *_vq) 2100e6f633e5STiwei Bie { 21011ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 21021ce9e605STiwei Bie 21038d622d21SMichael S. Tsirkin /* If device triggered an event already it won't trigger one again: 21048d622d21SMichael S. Tsirkin * no need to disable. 21058d622d21SMichael S. Tsirkin */ 21068d622d21SMichael S. Tsirkin if (vq->event_triggered) 21078d622d21SMichael S. Tsirkin return; 21088d622d21SMichael S. Tsirkin 21091ce9e605STiwei Bie if (vq->packed_ring) 21101ce9e605STiwei Bie virtqueue_disable_cb_packed(_vq); 21111ce9e605STiwei Bie else 2112e6f633e5STiwei Bie virtqueue_disable_cb_split(_vq); 2113e6f633e5STiwei Bie } 2114e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 2115e6f633e5STiwei Bie 2116e6f633e5STiwei Bie /** 2117e6f633e5STiwei Bie * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 2118a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 2119e6f633e5STiwei Bie * 2120e6f633e5STiwei Bie * This re-enables callbacks; it returns current queue state 2121e6f633e5STiwei Bie * in an opaque unsigned value. This value should be later tested by 2122e6f633e5STiwei Bie * virtqueue_poll, to detect a possible race between the driver checking for 2123e6f633e5STiwei Bie * more work, and enabling callbacks. 2124e6f633e5STiwei Bie * 2125e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 2126e6f633e5STiwei Bie * operations at the same time (except where noted). 2127e6f633e5STiwei Bie */ 212831532340SSolomon Tan unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2129e6f633e5STiwei Bie { 21301ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 21311ce9e605STiwei Bie 21328d622d21SMichael S. Tsirkin if (vq->event_triggered) 21338d622d21SMichael S. Tsirkin vq->event_triggered = false; 21348d622d21SMichael S. Tsirkin 21351ce9e605STiwei Bie return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 21361ce9e605STiwei Bie virtqueue_enable_cb_prepare_split(_vq); 2137e6f633e5STiwei Bie } 2138e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 2139e6f633e5STiwei Bie 2140e6f633e5STiwei Bie /** 2141e6f633e5STiwei Bie * virtqueue_poll - query pending used buffers 2142a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 2143e6f633e5STiwei Bie * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 2144e6f633e5STiwei Bie * 2145e6f633e5STiwei Bie * Returns "true" if there are pending used buffers in the queue. 2146e6f633e5STiwei Bie * 2147e6f633e5STiwei Bie * This does not need to be serialized. 2148e6f633e5STiwei Bie */ 214931532340SSolomon Tan bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx) 2150e6f633e5STiwei Bie { 2151e6f633e5STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 2152e6f633e5STiwei Bie 2153481a0d74SMao Wenan if (unlikely(vq->broken)) 2154481a0d74SMao Wenan return false; 2155481a0d74SMao Wenan 2156e6f633e5STiwei Bie virtio_mb(vq->weak_barriers); 21571ce9e605STiwei Bie return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 21581ce9e605STiwei Bie virtqueue_poll_split(_vq, last_used_idx); 2159e6f633e5STiwei Bie } 2160e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_poll); 2161e6f633e5STiwei Bie 2162e6f633e5STiwei Bie /** 2163e6f633e5STiwei Bie * virtqueue_enable_cb - restart callbacks after disable_cb. 2164a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 2165e6f633e5STiwei Bie * 2166e6f633e5STiwei Bie * This re-enables callbacks; it returns "false" if there are pending 2167e6f633e5STiwei Bie * buffers in the queue, to detect a possible race between the driver 2168e6f633e5STiwei Bie * checking for more work, and enabling callbacks. 2169e6f633e5STiwei Bie * 2170e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 2171e6f633e5STiwei Bie * operations at the same time (except where noted). 2172e6f633e5STiwei Bie */ 2173e6f633e5STiwei Bie bool virtqueue_enable_cb(struct virtqueue *_vq) 2174e6f633e5STiwei Bie { 217531532340SSolomon Tan unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq); 2176e6f633e5STiwei Bie 2177e6f633e5STiwei Bie return !virtqueue_poll(_vq, last_used_idx); 2178e6f633e5STiwei Bie } 2179e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 2180e6f633e5STiwei Bie 2181e6f633e5STiwei Bie /** 2182e6f633e5STiwei Bie * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2183a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 2184e6f633e5STiwei Bie * 2185e6f633e5STiwei Bie * This re-enables callbacks but hints to the other side to delay 2186e6f633e5STiwei Bie * interrupts until most of the available buffers have been processed; 2187e6f633e5STiwei Bie * it returns "false" if there are many pending buffers in the queue, 2188e6f633e5STiwei Bie * to detect a possible race between the driver checking for more work, 2189e6f633e5STiwei Bie * and enabling callbacks. 2190e6f633e5STiwei Bie * 2191e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 2192e6f633e5STiwei Bie * operations at the same time (except where noted). 2193e6f633e5STiwei Bie */ 2194e6f633e5STiwei Bie bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2195e6f633e5STiwei Bie { 21961ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 21971ce9e605STiwei Bie 21988d622d21SMichael S. Tsirkin if (vq->event_triggered) 21998d622d21SMichael S. Tsirkin vq->event_triggered = false; 22008d622d21SMichael S. Tsirkin 22011ce9e605STiwei Bie return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 22021ce9e605STiwei Bie virtqueue_enable_cb_delayed_split(_vq); 2203e6f633e5STiwei Bie } 2204e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2205e6f633e5STiwei Bie 2206138fd251STiwei Bie /** 2207138fd251STiwei Bie * virtqueue_detach_unused_buf - detach first unused buffer 2208a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 2209138fd251STiwei Bie * 2210138fd251STiwei Bie * Returns NULL or the "data" token handed to virtqueue_add_*(). 2211a62eecb3SXuan Zhuo * This is not valid on an active queue; it is useful for device 2212a62eecb3SXuan Zhuo * shutdown or the reset queue. 2213138fd251STiwei Bie */ 2214138fd251STiwei Bie void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2215138fd251STiwei Bie { 22161ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 22171ce9e605STiwei Bie 22181ce9e605STiwei Bie return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 22191ce9e605STiwei Bie virtqueue_detach_unused_buf_split(_vq); 2220138fd251STiwei Bie } 22217c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2222c021eac4SShirley Ma 2223138fd251STiwei Bie static inline bool more_used(const struct vring_virtqueue *vq) 2224138fd251STiwei Bie { 22251ce9e605STiwei Bie return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2226138fd251STiwei Bie } 2227138fd251STiwei Bie 22280a8a69ddSRusty Russell irqreturn_t vring_interrupt(int irq, void *_vq) 22290a8a69ddSRusty Russell { 22300a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 22310a8a69ddSRusty Russell 22320a8a69ddSRusty Russell if (!more_used(vq)) { 22330a8a69ddSRusty Russell pr_debug("virtqueue interrupt with no work for %p\n", vq); 22340a8a69ddSRusty Russell return IRQ_NONE; 22350a8a69ddSRusty Russell } 22360a8a69ddSRusty Russell 22378b4ec69dSJason Wang if (unlikely(vq->broken)) { 2238c346dae4SJason Wang #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 22398b4ec69dSJason Wang dev_warn_once(&vq->vq.vdev->dev, 22408b4ec69dSJason Wang "virtio vring IRQ raised before DRIVER_OK"); 22418b4ec69dSJason Wang return IRQ_NONE; 2242c346dae4SJason Wang #else 2243c346dae4SJason Wang return IRQ_HANDLED; 2244c346dae4SJason Wang #endif 22458b4ec69dSJason Wang } 22460a8a69ddSRusty Russell 22478d622d21SMichael S. Tsirkin /* Just a hint for performance: so it's ok that this can be racy! */ 22488d622d21SMichael S. Tsirkin if (vq->event) 22498d622d21SMichael S. Tsirkin vq->event_triggered = true; 22508d622d21SMichael S. Tsirkin 22510a8a69ddSRusty Russell pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 225218445c4dSRusty Russell if (vq->vq.callback) 225318445c4dSRusty Russell vq->vq.callback(&vq->vq); 22540a8a69ddSRusty Russell 22550a8a69ddSRusty Russell return IRQ_HANDLED; 22560a8a69ddSRusty Russell } 2257c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_interrupt); 22580a8a69ddSRusty Russell 22591ce9e605STiwei Bie /* Only available for split ring */ 226007d9629dSXuan Zhuo static struct virtqueue *__vring_new_virtqueue(unsigned int index, 2261cd4c812aSXuan Zhuo struct vring_virtqueue_split *vring_split, 22620a8a69ddSRusty Russell struct virtio_device *vdev, 22637b21e34fSRusty Russell bool weak_barriers, 2264f94682ddSMichael S. Tsirkin bool context, 226546f9c2b9SHeinz Graalfs bool (*notify)(struct virtqueue *), 22669499f5e7SRusty Russell void (*callback)(struct virtqueue *), 22679499f5e7SRusty Russell const char *name) 22680a8a69ddSRusty Russell { 22692a2d1382SAndy Lutomirski struct vring_virtqueue *vq; 2270*a2b36c8dSXuan Zhuo int err; 22710a8a69ddSRusty Russell 22721ce9e605STiwei Bie if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 22731ce9e605STiwei Bie return NULL; 22741ce9e605STiwei Bie 2275cbeedb72STiwei Bie vq = kmalloc(sizeof(*vq), GFP_KERNEL); 22760a8a69ddSRusty Russell if (!vq) 22770a8a69ddSRusty Russell return NULL; 22780a8a69ddSRusty Russell 22791ce9e605STiwei Bie vq->packed_ring = false; 22800a8a69ddSRusty Russell vq->vq.callback = callback; 22810a8a69ddSRusty Russell vq->vq.vdev = vdev; 22829499f5e7SRusty Russell vq->vq.name = name; 228306ca287dSRusty Russell vq->vq.index = index; 22842a2d1382SAndy Lutomirski vq->we_own_ring = false; 22850a8a69ddSRusty Russell vq->notify = notify; 22867b21e34fSRusty Russell vq->weak_barriers = weak_barriers; 2287c346dae4SJason Wang #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 22888b4ec69dSJason Wang vq->broken = true; 2289c346dae4SJason Wang #else 2290c346dae4SJason Wang vq->broken = false; 2291c346dae4SJason Wang #endif 2292fb3fba6bSTiwei Bie vq->use_dma_api = vring_use_dma_api(vdev); 22930a8a69ddSRusty Russell 22945a08b04fSMichael S. Tsirkin vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 22955a08b04fSMichael S. Tsirkin !context; 2296a5c262c5SMichael S. Tsirkin vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 22979fa29b9dSMark McLoughlin 229845383fb0STiwei Bie if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 229945383fb0STiwei Bie vq->weak_barriers = false; 230045383fb0STiwei Bie 2301d79dca75STiwei Bie vq->split.queue_dma_addr = 0; 2302d79dca75STiwei Bie vq->split.queue_size_in_bytes = 0; 2303d79dca75STiwei Bie 2304cd4c812aSXuan Zhuo vq->split.vring = vring_split->vring; 2305e593bf97STiwei Bie vq->split.avail_flags_shadow = 0; 2306e593bf97STiwei Bie vq->split.avail_idx_shadow = 0; 2307e593bf97STiwei Bie 23080a8a69ddSRusty Russell /* No callback? Tell other side not to bother us. */ 2309f277ec42SVenkatesh Srinivas if (!callback) { 2310e593bf97STiwei Bie vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 23110ea1e4a6SLadi Prosek if (!vq->event) 2312e593bf97STiwei Bie vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2313e593bf97STiwei Bie vq->split.avail_flags_shadow); 2314f277ec42SVenkatesh Srinivas } 23150a8a69ddSRusty Russell 2316*a2b36c8dSXuan Zhuo err = vring_alloc_state_extra_split(vring_split); 2317*a2b36c8dSXuan Zhuo if (err) { 2318*a2b36c8dSXuan Zhuo kfree(vq); 2319*a2b36c8dSXuan Zhuo return NULL; 2320*a2b36c8dSXuan Zhuo } 232172b5e895SJason Wang 23220a8a69ddSRusty Russell /* Put everything in free lists. */ 23230a8a69ddSRusty Russell vq->free_head = 0; 2324*a2b36c8dSXuan Zhuo 2325*a2b36c8dSXuan Zhuo vq->split.desc_state = vring_split->desc_state; 2326*a2b36c8dSXuan Zhuo vq->split.desc_extra = vring_split->desc_extra; 23270a8a69ddSRusty Russell 2328cd4c812aSXuan Zhuo virtqueue_init(vq, vring_split->vring.num); 23293a897128SXuan Zhuo 23300e566c8fSParav Pandit spin_lock(&vdev->vqs_list_lock); 2331e152d8afSDan Carpenter list_add_tail(&vq->vq.list, &vdev->vqs); 23320e566c8fSParav Pandit spin_unlock(&vdev->vqs_list_lock); 23330a8a69ddSRusty Russell return &vq->vq; 23340a8a69ddSRusty Russell } 23352a2d1382SAndy Lutomirski 23362a2d1382SAndy Lutomirski struct virtqueue *vring_create_virtqueue( 23372a2d1382SAndy Lutomirski unsigned int index, 23382a2d1382SAndy Lutomirski unsigned int num, 23392a2d1382SAndy Lutomirski unsigned int vring_align, 23402a2d1382SAndy Lutomirski struct virtio_device *vdev, 23412a2d1382SAndy Lutomirski bool weak_barriers, 23422a2d1382SAndy Lutomirski bool may_reduce_num, 2343f94682ddSMichael S. Tsirkin bool context, 23442a2d1382SAndy Lutomirski bool (*notify)(struct virtqueue *), 23452a2d1382SAndy Lutomirski void (*callback)(struct virtqueue *), 23462a2d1382SAndy Lutomirski const char *name) 23472a2d1382SAndy Lutomirski { 23481ce9e605STiwei Bie 23491ce9e605STiwei Bie if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 23501ce9e605STiwei Bie return vring_create_virtqueue_packed(index, num, vring_align, 23511ce9e605STiwei Bie vdev, weak_barriers, may_reduce_num, 23521ce9e605STiwei Bie context, notify, callback, name); 23531ce9e605STiwei Bie 2354d79dca75STiwei Bie return vring_create_virtqueue_split(index, num, vring_align, 2355d79dca75STiwei Bie vdev, weak_barriers, may_reduce_num, 2356d79dca75STiwei Bie context, notify, callback, name); 23572a2d1382SAndy Lutomirski } 23582a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(vring_create_virtqueue); 23592a2d1382SAndy Lutomirski 23601ce9e605STiwei Bie /* Only available for split ring */ 23612a2d1382SAndy Lutomirski struct virtqueue *vring_new_virtqueue(unsigned int index, 23622a2d1382SAndy Lutomirski unsigned int num, 23632a2d1382SAndy Lutomirski unsigned int vring_align, 23642a2d1382SAndy Lutomirski struct virtio_device *vdev, 23652a2d1382SAndy Lutomirski bool weak_barriers, 2366f94682ddSMichael S. Tsirkin bool context, 23672a2d1382SAndy Lutomirski void *pages, 23682a2d1382SAndy Lutomirski bool (*notify)(struct virtqueue *vq), 23692a2d1382SAndy Lutomirski void (*callback)(struct virtqueue *vq), 23702a2d1382SAndy Lutomirski const char *name) 23712a2d1382SAndy Lutomirski { 2372cd4c812aSXuan Zhuo struct vring_virtqueue_split vring_split = {}; 23731ce9e605STiwei Bie 23741ce9e605STiwei Bie if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 23751ce9e605STiwei Bie return NULL; 23761ce9e605STiwei Bie 2377cd4c812aSXuan Zhuo vring_init(&vring_split.vring, num, pages, vring_align); 2378cd4c812aSXuan Zhuo return __vring_new_virtqueue(index, &vring_split, vdev, weak_barriers, 2379cd4c812aSXuan Zhuo context, notify, callback, name); 23802a2d1382SAndy Lutomirski } 2381c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_new_virtqueue); 23820a8a69ddSRusty Russell 23833ea19e32SXuan Zhuo static void vring_free(struct virtqueue *_vq) 23840a8a69ddSRusty Russell { 23852a2d1382SAndy Lutomirski struct vring_virtqueue *vq = to_vvq(_vq); 23862a2d1382SAndy Lutomirski 23872a2d1382SAndy Lutomirski if (vq->we_own_ring) { 23881ce9e605STiwei Bie if (vq->packed_ring) { 23891ce9e605STiwei Bie vring_free_queue(vq->vq.vdev, 23901ce9e605STiwei Bie vq->packed.ring_size_in_bytes, 23911ce9e605STiwei Bie vq->packed.vring.desc, 23921ce9e605STiwei Bie vq->packed.ring_dma_addr); 23931ce9e605STiwei Bie 23941ce9e605STiwei Bie vring_free_queue(vq->vq.vdev, 23951ce9e605STiwei Bie vq->packed.event_size_in_bytes, 23961ce9e605STiwei Bie vq->packed.vring.driver, 23971ce9e605STiwei Bie vq->packed.driver_event_dma_addr); 23981ce9e605STiwei Bie 23991ce9e605STiwei Bie vring_free_queue(vq->vq.vdev, 24001ce9e605STiwei Bie vq->packed.event_size_in_bytes, 24011ce9e605STiwei Bie vq->packed.vring.device, 24021ce9e605STiwei Bie vq->packed.device_event_dma_addr); 24031ce9e605STiwei Bie 24041ce9e605STiwei Bie kfree(vq->packed.desc_state); 24051ce9e605STiwei Bie kfree(vq->packed.desc_extra); 24061ce9e605STiwei Bie } else { 2407d79dca75STiwei Bie vring_free_queue(vq->vq.vdev, 2408d79dca75STiwei Bie vq->split.queue_size_in_bytes, 2409d79dca75STiwei Bie vq->split.vring.desc, 2410d79dca75STiwei Bie vq->split.queue_dma_addr); 2411f13f09a1SSuman Anna } 2412f13f09a1SSuman Anna } 241372b5e895SJason Wang if (!vq->packed_ring) { 2414cbeedb72STiwei Bie kfree(vq->split.desc_state); 241572b5e895SJason Wang kfree(vq->split.desc_extra); 241672b5e895SJason Wang } 24173ea19e32SXuan Zhuo } 24183ea19e32SXuan Zhuo 24193ea19e32SXuan Zhuo void vring_del_virtqueue(struct virtqueue *_vq) 24203ea19e32SXuan Zhuo { 24213ea19e32SXuan Zhuo struct vring_virtqueue *vq = to_vvq(_vq); 24223ea19e32SXuan Zhuo 24233ea19e32SXuan Zhuo spin_lock(&vq->vq.vdev->vqs_list_lock); 24243ea19e32SXuan Zhuo list_del(&_vq->list); 24253ea19e32SXuan Zhuo spin_unlock(&vq->vq.vdev->vqs_list_lock); 24263ea19e32SXuan Zhuo 24273ea19e32SXuan Zhuo vring_free(_vq); 24283ea19e32SXuan Zhuo 24292a2d1382SAndy Lutomirski kfree(vq); 24300a8a69ddSRusty Russell } 2431c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_del_virtqueue); 24320a8a69ddSRusty Russell 2433e34f8725SRusty Russell /* Manipulates transport-specific feature bits. */ 2434e34f8725SRusty Russell void vring_transport_features(struct virtio_device *vdev) 2435e34f8725SRusty Russell { 2436e34f8725SRusty Russell unsigned int i; 2437e34f8725SRusty Russell 2438e34f8725SRusty Russell for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2439e34f8725SRusty Russell switch (i) { 24409fa29b9dSMark McLoughlin case VIRTIO_RING_F_INDIRECT_DESC: 24419fa29b9dSMark McLoughlin break; 2442a5c262c5SMichael S. Tsirkin case VIRTIO_RING_F_EVENT_IDX: 2443a5c262c5SMichael S. Tsirkin break; 2444747ae34aSMichael S. Tsirkin case VIRTIO_F_VERSION_1: 2445747ae34aSMichael S. Tsirkin break; 2446321bd212SMichael S. Tsirkin case VIRTIO_F_ACCESS_PLATFORM: 24471a937693SMichael S. Tsirkin break; 2448f959a128STiwei Bie case VIRTIO_F_RING_PACKED: 2449f959a128STiwei Bie break; 245045383fb0STiwei Bie case VIRTIO_F_ORDER_PLATFORM: 245145383fb0STiwei Bie break; 2452e34f8725SRusty Russell default: 2453e34f8725SRusty Russell /* We don't understand this bit. */ 2454e16e12beSMichael S. Tsirkin __virtio_clear_bit(vdev, i); 2455e34f8725SRusty Russell } 2456e34f8725SRusty Russell } 2457e34f8725SRusty Russell } 2458e34f8725SRusty Russell EXPORT_SYMBOL_GPL(vring_transport_features); 2459e34f8725SRusty Russell 24605dfc1762SRusty Russell /** 24615dfc1762SRusty Russell * virtqueue_get_vring_size - return the size of the virtqueue's vring 2462a5581206SJiang Biao * @_vq: the struct virtqueue containing the vring of interest. 24635dfc1762SRusty Russell * 24645dfc1762SRusty Russell * Returns the size of the vring. This is mainly used for boasting to 24655dfc1762SRusty Russell * userspace. Unlike other operations, this need not be serialized. 24665dfc1762SRusty Russell */ 24678f9f4668SRick Jones unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 24688f9f4668SRick Jones { 24698f9f4668SRick Jones 24708f9f4668SRick Jones struct vring_virtqueue *vq = to_vvq(_vq); 24718f9f4668SRick Jones 24721ce9e605STiwei Bie return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 24738f9f4668SRick Jones } 24748f9f4668SRick Jones EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 24758f9f4668SRick Jones 2476b3b32c94SHeinz Graalfs bool virtqueue_is_broken(struct virtqueue *_vq) 2477b3b32c94SHeinz Graalfs { 2478b3b32c94SHeinz Graalfs struct vring_virtqueue *vq = to_vvq(_vq); 2479b3b32c94SHeinz Graalfs 248060f07798SParav Pandit return READ_ONCE(vq->broken); 2481b3b32c94SHeinz Graalfs } 2482b3b32c94SHeinz Graalfs EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2483b3b32c94SHeinz Graalfs 2484e2dcdfe9SRusty Russell /* 2485e2dcdfe9SRusty Russell * This should prevent the device from being used, allowing drivers to 2486e2dcdfe9SRusty Russell * recover. You may need to grab appropriate locks to flush. 2487e2dcdfe9SRusty Russell */ 2488e2dcdfe9SRusty Russell void virtio_break_device(struct virtio_device *dev) 2489e2dcdfe9SRusty Russell { 2490e2dcdfe9SRusty Russell struct virtqueue *_vq; 2491e2dcdfe9SRusty Russell 24920e566c8fSParav Pandit spin_lock(&dev->vqs_list_lock); 2493e2dcdfe9SRusty Russell list_for_each_entry(_vq, &dev->vqs, list) { 2494e2dcdfe9SRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 249560f07798SParav Pandit 249660f07798SParav Pandit /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 249760f07798SParav Pandit WRITE_ONCE(vq->broken, true); 2498e2dcdfe9SRusty Russell } 24990e566c8fSParav Pandit spin_unlock(&dev->vqs_list_lock); 2500e2dcdfe9SRusty Russell } 2501e2dcdfe9SRusty Russell EXPORT_SYMBOL_GPL(virtio_break_device); 2502e2dcdfe9SRusty Russell 2503be83f04dSJason Wang /* 2504be83f04dSJason Wang * This should allow the device to be used by the driver. You may 2505be83f04dSJason Wang * need to grab appropriate locks to flush the write to 2506be83f04dSJason Wang * vq->broken. This should only be used in some specific case e.g 2507be83f04dSJason Wang * (probing and restoring). This function should only be called by the 2508be83f04dSJason Wang * core, not directly by the driver. 2509be83f04dSJason Wang */ 2510be83f04dSJason Wang void __virtio_unbreak_device(struct virtio_device *dev) 2511be83f04dSJason Wang { 2512be83f04dSJason Wang struct virtqueue *_vq; 2513be83f04dSJason Wang 2514be83f04dSJason Wang spin_lock(&dev->vqs_list_lock); 2515be83f04dSJason Wang list_for_each_entry(_vq, &dev->vqs, list) { 2516be83f04dSJason Wang struct vring_virtqueue *vq = to_vvq(_vq); 2517be83f04dSJason Wang 2518be83f04dSJason Wang /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2519be83f04dSJason Wang WRITE_ONCE(vq->broken, false); 2520be83f04dSJason Wang } 2521be83f04dSJason Wang spin_unlock(&dev->vqs_list_lock); 2522be83f04dSJason Wang } 2523be83f04dSJason Wang EXPORT_SYMBOL_GPL(__virtio_unbreak_device); 2524be83f04dSJason Wang 25252a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 252689062652SCornelia Huck { 252789062652SCornelia Huck struct vring_virtqueue *vq = to_vvq(_vq); 252889062652SCornelia Huck 25292a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 253089062652SCornelia Huck 25311ce9e605STiwei Bie if (vq->packed_ring) 25321ce9e605STiwei Bie return vq->packed.ring_dma_addr; 25331ce9e605STiwei Bie 2534d79dca75STiwei Bie return vq->split.queue_dma_addr; 25352a2d1382SAndy Lutomirski } 25362a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 25372a2d1382SAndy Lutomirski 25382a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 253989062652SCornelia Huck { 254089062652SCornelia Huck struct vring_virtqueue *vq = to_vvq(_vq); 254189062652SCornelia Huck 25422a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 25432a2d1382SAndy Lutomirski 25441ce9e605STiwei Bie if (vq->packed_ring) 25451ce9e605STiwei Bie return vq->packed.driver_event_dma_addr; 25461ce9e605STiwei Bie 2547d79dca75STiwei Bie return vq->split.queue_dma_addr + 2548e593bf97STiwei Bie ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 254989062652SCornelia Huck } 25502a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 25512a2d1382SAndy Lutomirski 25522a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 25532a2d1382SAndy Lutomirski { 25542a2d1382SAndy Lutomirski struct vring_virtqueue *vq = to_vvq(_vq); 25552a2d1382SAndy Lutomirski 25562a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 25572a2d1382SAndy Lutomirski 25581ce9e605STiwei Bie if (vq->packed_ring) 25591ce9e605STiwei Bie return vq->packed.device_event_dma_addr; 25601ce9e605STiwei Bie 2561d79dca75STiwei Bie return vq->split.queue_dma_addr + 2562e593bf97STiwei Bie ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 25632a2d1382SAndy Lutomirski } 25642a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 25652a2d1382SAndy Lutomirski 25661ce9e605STiwei Bie /* Only available for split ring */ 25672a2d1382SAndy Lutomirski const struct vring *virtqueue_get_vring(struct virtqueue *vq) 25682a2d1382SAndy Lutomirski { 2569e593bf97STiwei Bie return &to_vvq(vq)->split.vring; 25702a2d1382SAndy Lutomirski } 25712a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_vring); 257289062652SCornelia Huck 2573c6fd4701SRusty Russell MODULE_LICENSE("GPL"); 2574