1fd534e9bSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 20a8a69ddSRusty Russell /* Virtio ring implementation. 30a8a69ddSRusty Russell * 40a8a69ddSRusty Russell * Copyright 2007 Rusty Russell IBM Corporation 50a8a69ddSRusty Russell */ 60a8a69ddSRusty Russell #include <linux/virtio.h> 70a8a69ddSRusty Russell #include <linux/virtio_ring.h> 8e34f8725SRusty Russell #include <linux/virtio_config.h> 90a8a69ddSRusty Russell #include <linux/device.h> 105a0e3ad6STejun Heo #include <linux/slab.h> 11b5a2c4f1SPaul Gortmaker #include <linux/module.h> 12e93300b1SRusty Russell #include <linux/hrtimer.h> 13780bc790SAndy Lutomirski #include <linux/dma-mapping.h> 14f8ce7263SMichael S. Tsirkin #include <linux/spinlock.h> 1578fe3987SAndy Lutomirski #include <xen/xen.h> 160a8a69ddSRusty Russell 170a8a69ddSRusty Russell #ifdef DEBUG 180a8a69ddSRusty Russell /* For development, we want to crash whenever the ring is screwed. */ 199499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 209499f5e7SRusty Russell do { \ 219499f5e7SRusty Russell dev_err(&(_vq)->vq.vdev->dev, \ 229499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 239499f5e7SRusty Russell BUG(); \ 249499f5e7SRusty Russell } while (0) 25c5f841f1SRusty Russell /* Caller is supposed to guarantee no reentry. */ 263a35ce7dSRoel Kluin #define START_USE(_vq) \ 27c5f841f1SRusty Russell do { \ 28c5f841f1SRusty Russell if ((_vq)->in_use) \ 299499f5e7SRusty Russell panic("%s:in_use = %i\n", \ 309499f5e7SRusty Russell (_vq)->vq.name, (_vq)->in_use); \ 31c5f841f1SRusty Russell (_vq)->in_use = __LINE__; \ 32c5f841f1SRusty Russell } while (0) 333a35ce7dSRoel Kluin #define END_USE(_vq) \ 3497a545abSRusty Russell do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 354d6a105eSTiwei Bie #define LAST_ADD_TIME_UPDATE(_vq) \ 364d6a105eSTiwei Bie do { \ 374d6a105eSTiwei Bie ktime_t now = ktime_get(); \ 384d6a105eSTiwei Bie \ 394d6a105eSTiwei Bie /* No kick or get, with .1 second between? Warn. */ \ 404d6a105eSTiwei Bie if ((_vq)->last_add_time_valid) \ 414d6a105eSTiwei Bie WARN_ON(ktime_to_ms(ktime_sub(now, \ 424d6a105eSTiwei Bie (_vq)->last_add_time)) > 100); \ 434d6a105eSTiwei Bie (_vq)->last_add_time = now; \ 444d6a105eSTiwei Bie (_vq)->last_add_time_valid = true; \ 454d6a105eSTiwei Bie } while (0) 464d6a105eSTiwei Bie #define LAST_ADD_TIME_CHECK(_vq) \ 474d6a105eSTiwei Bie do { \ 484d6a105eSTiwei Bie if ((_vq)->last_add_time_valid) { \ 494d6a105eSTiwei Bie WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), \ 504d6a105eSTiwei Bie (_vq)->last_add_time)) > 100); \ 514d6a105eSTiwei Bie } \ 524d6a105eSTiwei Bie } while (0) 534d6a105eSTiwei Bie #define LAST_ADD_TIME_INVALID(_vq) \ 544d6a105eSTiwei Bie ((_vq)->last_add_time_valid = false) 550a8a69ddSRusty Russell #else 569499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 579499f5e7SRusty Russell do { \ 589499f5e7SRusty Russell dev_err(&_vq->vq.vdev->dev, \ 599499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 609499f5e7SRusty Russell (_vq)->broken = true; \ 619499f5e7SRusty Russell } while (0) 620a8a69ddSRusty Russell #define START_USE(vq) 630a8a69ddSRusty Russell #define END_USE(vq) 644d6a105eSTiwei Bie #define LAST_ADD_TIME_UPDATE(vq) 654d6a105eSTiwei Bie #define LAST_ADD_TIME_CHECK(vq) 664d6a105eSTiwei Bie #define LAST_ADD_TIME_INVALID(vq) 670a8a69ddSRusty Russell #endif 680a8a69ddSRusty Russell 69cbeedb72STiwei Bie struct vring_desc_state_split { 70780bc790SAndy Lutomirski void *data; /* Data for callback. */ 71780bc790SAndy Lutomirski struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ 72780bc790SAndy Lutomirski }; 73780bc790SAndy Lutomirski 741ce9e605STiwei Bie struct vring_desc_state_packed { 751ce9e605STiwei Bie void *data; /* Data for callback. */ 761ce9e605STiwei Bie struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ 771ce9e605STiwei Bie u16 num; /* Descriptor list length. */ 781ce9e605STiwei Bie u16 last; /* The last desc state in a list. */ 791ce9e605STiwei Bie }; 801ce9e605STiwei Bie 811f28750fSJason Wang struct vring_desc_extra { 82ef5c366fSJason Wang dma_addr_t addr; /* Descriptor DMA addr. */ 83ef5c366fSJason Wang u32 len; /* Descriptor length. */ 841ce9e605STiwei Bie u16 flags; /* Descriptor flags. */ 85aeef9b47SJason Wang u16 next; /* The next desc state in a list. */ 861ce9e605STiwei Bie }; 871ce9e605STiwei Bie 8843b4f721SMichael S. Tsirkin struct vring_virtqueue { 890a8a69ddSRusty Russell struct virtqueue vq; 900a8a69ddSRusty Russell 911ce9e605STiwei Bie /* Is this a packed ring? */ 921ce9e605STiwei Bie bool packed_ring; 931ce9e605STiwei Bie 94fb3fba6bSTiwei Bie /* Is DMA API used? */ 95fb3fba6bSTiwei Bie bool use_dma_api; 96fb3fba6bSTiwei Bie 977b21e34fSRusty Russell /* Can we use weak barriers? */ 987b21e34fSRusty Russell bool weak_barriers; 997b21e34fSRusty Russell 1000a8a69ddSRusty Russell /* Other side has made a mess, don't try any more. */ 1010a8a69ddSRusty Russell bool broken; 1020a8a69ddSRusty Russell 1039fa29b9dSMark McLoughlin /* Host supports indirect buffers */ 1049fa29b9dSMark McLoughlin bool indirect; 1059fa29b9dSMark McLoughlin 106a5c262c5SMichael S. Tsirkin /* Host publishes avail event idx */ 107a5c262c5SMichael S. Tsirkin bool event; 108a5c262c5SMichael S. Tsirkin 1090a8a69ddSRusty Russell /* Head of free buffer list. */ 1100a8a69ddSRusty Russell unsigned int free_head; 1110a8a69ddSRusty Russell /* Number we've added since last sync. */ 1120a8a69ddSRusty Russell unsigned int num_added; 1130a8a69ddSRusty Russell 114a7722890Shuangjie.albert /* Last used index we've seen. 115a7722890Shuangjie.albert * for split ring, it just contains last used index 116a7722890Shuangjie.albert * for packed ring: 117a7722890Shuangjie.albert * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index. 118a7722890Shuangjie.albert * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter. 119a7722890Shuangjie.albert */ 1201bc4953eSAnthony Liguori u16 last_used_idx; 1210a8a69ddSRusty Russell 1228d622d21SMichael S. Tsirkin /* Hint for event idx: already triggered no need to disable. */ 1238d622d21SMichael S. Tsirkin bool event_triggered; 1248d622d21SMichael S. Tsirkin 1251ce9e605STiwei Bie union { 1261ce9e605STiwei Bie /* Available for split ring */ 127e593bf97STiwei Bie struct { 1281ce9e605STiwei Bie /* Actual memory layout for this queue. */ 129e593bf97STiwei Bie struct vring vring; 130e593bf97STiwei Bie 131f277ec42SVenkatesh Srinivas /* Last written value to avail->flags */ 132f277ec42SVenkatesh Srinivas u16 avail_flags_shadow; 133f277ec42SVenkatesh Srinivas 1341ce9e605STiwei Bie /* 1351ce9e605STiwei Bie * Last written value to avail->idx in 1361ce9e605STiwei Bie * guest byte order. 1371ce9e605STiwei Bie */ 138f277ec42SVenkatesh Srinivas u16 avail_idx_shadow; 139cbeedb72STiwei Bie 140cbeedb72STiwei Bie /* Per-descriptor state. */ 141cbeedb72STiwei Bie struct vring_desc_state_split *desc_state; 14272b5e895SJason Wang struct vring_desc_extra *desc_extra; 143d79dca75STiwei Bie 1441ce9e605STiwei Bie /* DMA address and size information */ 145d79dca75STiwei Bie dma_addr_t queue_dma_addr; 1461ce9e605STiwei Bie size_t queue_size_in_bytes; 147e593bf97STiwei Bie } split; 148f277ec42SVenkatesh Srinivas 1491ce9e605STiwei Bie /* Available for packed ring */ 1501ce9e605STiwei Bie struct { 1511ce9e605STiwei Bie /* Actual memory layout for this queue. */ 1529c0644eeSMichael S. Tsirkin struct { 1539c0644eeSMichael S. Tsirkin unsigned int num; 1549c0644eeSMichael S. Tsirkin struct vring_packed_desc *desc; 1559c0644eeSMichael S. Tsirkin struct vring_packed_desc_event *driver; 1569c0644eeSMichael S. Tsirkin struct vring_packed_desc_event *device; 1579c0644eeSMichael S. Tsirkin } vring; 1581ce9e605STiwei Bie 1591ce9e605STiwei Bie /* Driver ring wrap counter. */ 1601ce9e605STiwei Bie bool avail_wrap_counter; 1611ce9e605STiwei Bie 1621ce9e605STiwei Bie /* Avail used flags. */ 1631ce9e605STiwei Bie u16 avail_used_flags; 1641ce9e605STiwei Bie 1651ce9e605STiwei Bie /* Index of the next avail descriptor. */ 1661ce9e605STiwei Bie u16 next_avail_idx; 1671ce9e605STiwei Bie 1681ce9e605STiwei Bie /* 1691ce9e605STiwei Bie * Last written value to driver->flags in 1701ce9e605STiwei Bie * guest byte order. 1711ce9e605STiwei Bie */ 1721ce9e605STiwei Bie u16 event_flags_shadow; 1731ce9e605STiwei Bie 1741ce9e605STiwei Bie /* Per-descriptor state. */ 1751ce9e605STiwei Bie struct vring_desc_state_packed *desc_state; 1761f28750fSJason Wang struct vring_desc_extra *desc_extra; 1771ce9e605STiwei Bie 1781ce9e605STiwei Bie /* DMA address and size information */ 1791ce9e605STiwei Bie dma_addr_t ring_dma_addr; 1801ce9e605STiwei Bie dma_addr_t driver_event_dma_addr; 1811ce9e605STiwei Bie dma_addr_t device_event_dma_addr; 1821ce9e605STiwei Bie size_t ring_size_in_bytes; 1831ce9e605STiwei Bie size_t event_size_in_bytes; 1841ce9e605STiwei Bie } packed; 1851ce9e605STiwei Bie }; 1861ce9e605STiwei Bie 1870a8a69ddSRusty Russell /* How to notify other side. FIXME: commonalize hcalls! */ 18846f9c2b9SHeinz Graalfs bool (*notify)(struct virtqueue *vq); 1890a8a69ddSRusty Russell 1902a2d1382SAndy Lutomirski /* DMA, allocation, and size information */ 1912a2d1382SAndy Lutomirski bool we_own_ring; 1922a2d1382SAndy Lutomirski 1930a8a69ddSRusty Russell #ifdef DEBUG 1940a8a69ddSRusty Russell /* They're supposed to lock for us. */ 1950a8a69ddSRusty Russell unsigned int in_use; 196e93300b1SRusty Russell 197e93300b1SRusty Russell /* Figure out if their kicks are too delayed. */ 198e93300b1SRusty Russell bool last_add_time_valid; 199e93300b1SRusty Russell ktime_t last_add_time; 2000a8a69ddSRusty Russell #endif 2010a8a69ddSRusty Russell }; 2020a8a69ddSRusty Russell 203e6f633e5STiwei Bie 204e6f633e5STiwei Bie /* 205e6f633e5STiwei Bie * Helpers. 206e6f633e5STiwei Bie */ 207e6f633e5STiwei Bie 2080a8a69ddSRusty Russell #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 2090a8a69ddSRusty Russell 21035c51e09SXianting Tian static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq, 2112f18c2d1STiwei Bie unsigned int total_sg) 2122f18c2d1STiwei Bie { 2132f18c2d1STiwei Bie /* 2142f18c2d1STiwei Bie * If the host supports indirect descriptor tables, and we have multiple 2152f18c2d1STiwei Bie * buffers, then go indirect. FIXME: tune this threshold 2162f18c2d1STiwei Bie */ 2172f18c2d1STiwei Bie return (vq->indirect && total_sg > 1 && vq->vq.num_free); 2182f18c2d1STiwei Bie } 2192f18c2d1STiwei Bie 220d26c96c8SAndy Lutomirski /* 2211a937693SMichael S. Tsirkin * Modern virtio devices have feature bits to specify whether they need a 2221a937693SMichael S. Tsirkin * quirk and bypass the IOMMU. If not there, just use the DMA API. 2231a937693SMichael S. Tsirkin * 2241a937693SMichael S. Tsirkin * If there, the interaction between virtio and DMA API is messy. 225d26c96c8SAndy Lutomirski * 226d26c96c8SAndy Lutomirski * On most systems with virtio, physical addresses match bus addresses, 227d26c96c8SAndy Lutomirski * and it doesn't particularly matter whether we use the DMA API. 228d26c96c8SAndy Lutomirski * 229d26c96c8SAndy Lutomirski * On some systems, including Xen and any system with a physical device 230d26c96c8SAndy Lutomirski * that speaks virtio behind a physical IOMMU, we must use the DMA API 231d26c96c8SAndy Lutomirski * for virtio DMA to work at all. 232d26c96c8SAndy Lutomirski * 233d26c96c8SAndy Lutomirski * On other systems, including SPARC and PPC64, virtio-pci devices are 234d26c96c8SAndy Lutomirski * enumerated as though they are behind an IOMMU, but the virtio host 235d26c96c8SAndy Lutomirski * ignores the IOMMU, so we must either pretend that the IOMMU isn't 236d26c96c8SAndy Lutomirski * there or somehow map everything as the identity. 237d26c96c8SAndy Lutomirski * 238d26c96c8SAndy Lutomirski * For the time being, we preserve historic behavior and bypass the DMA 239d26c96c8SAndy Lutomirski * API. 2401a937693SMichael S. Tsirkin * 2411a937693SMichael S. Tsirkin * TODO: install a per-device DMA ops structure that does the right thing 2421a937693SMichael S. Tsirkin * taking into account all the above quirks, and use the DMA API 2431a937693SMichael S. Tsirkin * unconditionally on data path. 244d26c96c8SAndy Lutomirski */ 245d26c96c8SAndy Lutomirski 246d26c96c8SAndy Lutomirski static bool vring_use_dma_api(struct virtio_device *vdev) 247d26c96c8SAndy Lutomirski { 24824b6842aSMichael S. Tsirkin if (!virtio_has_dma_quirk(vdev)) 2491a937693SMichael S. Tsirkin return true; 2501a937693SMichael S. Tsirkin 2511a937693SMichael S. Tsirkin /* Otherwise, we are left to guess. */ 25278fe3987SAndy Lutomirski /* 25378fe3987SAndy Lutomirski * In theory, it's possible to have a buggy QEMU-supposed 25478fe3987SAndy Lutomirski * emulated Q35 IOMMU and Xen enabled at the same time. On 25578fe3987SAndy Lutomirski * such a configuration, virtio has never worked and will 25678fe3987SAndy Lutomirski * not work without an even larger kludge. Instead, enable 25778fe3987SAndy Lutomirski * the DMA API if we're a Xen guest, which at least allows 25878fe3987SAndy Lutomirski * all of the sensible Xen configurations to work correctly. 25978fe3987SAndy Lutomirski */ 26078fe3987SAndy Lutomirski if (xen_domain()) 26178fe3987SAndy Lutomirski return true; 26278fe3987SAndy Lutomirski 263d26c96c8SAndy Lutomirski return false; 264d26c96c8SAndy Lutomirski } 265d26c96c8SAndy Lutomirski 266e6d6dd6cSJoerg Roedel size_t virtio_max_dma_size(struct virtio_device *vdev) 267e6d6dd6cSJoerg Roedel { 268e6d6dd6cSJoerg Roedel size_t max_segment_size = SIZE_MAX; 269e6d6dd6cSJoerg Roedel 270e6d6dd6cSJoerg Roedel if (vring_use_dma_api(vdev)) 271817fc978SWill Deacon max_segment_size = dma_max_mapping_size(vdev->dev.parent); 272e6d6dd6cSJoerg Roedel 273e6d6dd6cSJoerg Roedel return max_segment_size; 274e6d6dd6cSJoerg Roedel } 275e6d6dd6cSJoerg Roedel EXPORT_SYMBOL_GPL(virtio_max_dma_size); 276e6d6dd6cSJoerg Roedel 277d79dca75STiwei Bie static void *vring_alloc_queue(struct virtio_device *vdev, size_t size, 278d79dca75STiwei Bie dma_addr_t *dma_handle, gfp_t flag) 279d79dca75STiwei Bie { 280d79dca75STiwei Bie if (vring_use_dma_api(vdev)) { 281d79dca75STiwei Bie return dma_alloc_coherent(vdev->dev.parent, size, 282d79dca75STiwei Bie dma_handle, flag); 283d79dca75STiwei Bie } else { 284d79dca75STiwei Bie void *queue = alloc_pages_exact(PAGE_ALIGN(size), flag); 285d79dca75STiwei Bie 286d79dca75STiwei Bie if (queue) { 287d79dca75STiwei Bie phys_addr_t phys_addr = virt_to_phys(queue); 288d79dca75STiwei Bie *dma_handle = (dma_addr_t)phys_addr; 289d79dca75STiwei Bie 290d79dca75STiwei Bie /* 291d79dca75STiwei Bie * Sanity check: make sure we dind't truncate 292d79dca75STiwei Bie * the address. The only arches I can find that 293d79dca75STiwei Bie * have 64-bit phys_addr_t but 32-bit dma_addr_t 294d79dca75STiwei Bie * are certain non-highmem MIPS and x86 295d79dca75STiwei Bie * configurations, but these configurations 296d79dca75STiwei Bie * should never allocate physical pages above 32 297d79dca75STiwei Bie * bits, so this is fine. Just in case, throw a 298d79dca75STiwei Bie * warning and abort if we end up with an 299d79dca75STiwei Bie * unrepresentable address. 300d79dca75STiwei Bie */ 301d79dca75STiwei Bie if (WARN_ON_ONCE(*dma_handle != phys_addr)) { 302d79dca75STiwei Bie free_pages_exact(queue, PAGE_ALIGN(size)); 303d79dca75STiwei Bie return NULL; 304d79dca75STiwei Bie } 305d79dca75STiwei Bie } 306d79dca75STiwei Bie return queue; 307d79dca75STiwei Bie } 308d79dca75STiwei Bie } 309d79dca75STiwei Bie 310d79dca75STiwei Bie static void vring_free_queue(struct virtio_device *vdev, size_t size, 311d79dca75STiwei Bie void *queue, dma_addr_t dma_handle) 312d79dca75STiwei Bie { 313d79dca75STiwei Bie if (vring_use_dma_api(vdev)) 314d79dca75STiwei Bie dma_free_coherent(vdev->dev.parent, size, queue, dma_handle); 315d79dca75STiwei Bie else 316d79dca75STiwei Bie free_pages_exact(queue, PAGE_ALIGN(size)); 317d79dca75STiwei Bie } 318d79dca75STiwei Bie 319780bc790SAndy Lutomirski /* 320780bc790SAndy Lutomirski * The DMA ops on various arches are rather gnarly right now, and 321780bc790SAndy Lutomirski * making all of the arch DMA ops work on the vring device itself 322780bc790SAndy Lutomirski * is a mess. For now, we use the parent device for DMA ops. 323780bc790SAndy Lutomirski */ 32475bfa81bSMichael S. Tsirkin static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) 325780bc790SAndy Lutomirski { 326780bc790SAndy Lutomirski return vq->vq.vdev->dev.parent; 327780bc790SAndy Lutomirski } 328780bc790SAndy Lutomirski 329780bc790SAndy Lutomirski /* Map one sg entry. */ 330780bc790SAndy Lutomirski static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq, 331780bc790SAndy Lutomirski struct scatterlist *sg, 332780bc790SAndy Lutomirski enum dma_data_direction direction) 333780bc790SAndy Lutomirski { 334fb3fba6bSTiwei Bie if (!vq->use_dma_api) 335780bc790SAndy Lutomirski return (dma_addr_t)sg_phys(sg); 336780bc790SAndy Lutomirski 337780bc790SAndy Lutomirski /* 338780bc790SAndy Lutomirski * We can't use dma_map_sg, because we don't use scatterlists in 339780bc790SAndy Lutomirski * the way it expects (we don't guarantee that the scatterlist 340780bc790SAndy Lutomirski * will exist for the lifetime of the mapping). 341780bc790SAndy Lutomirski */ 342780bc790SAndy Lutomirski return dma_map_page(vring_dma_dev(vq), 343780bc790SAndy Lutomirski sg_page(sg), sg->offset, sg->length, 344780bc790SAndy Lutomirski direction); 345780bc790SAndy Lutomirski } 346780bc790SAndy Lutomirski 347780bc790SAndy Lutomirski static dma_addr_t vring_map_single(const struct vring_virtqueue *vq, 348780bc790SAndy Lutomirski void *cpu_addr, size_t size, 349780bc790SAndy Lutomirski enum dma_data_direction direction) 350780bc790SAndy Lutomirski { 351fb3fba6bSTiwei Bie if (!vq->use_dma_api) 352780bc790SAndy Lutomirski return (dma_addr_t)virt_to_phys(cpu_addr); 353780bc790SAndy Lutomirski 354780bc790SAndy Lutomirski return dma_map_single(vring_dma_dev(vq), 355780bc790SAndy Lutomirski cpu_addr, size, direction); 356780bc790SAndy Lutomirski } 357780bc790SAndy Lutomirski 358e6f633e5STiwei Bie static int vring_mapping_error(const struct vring_virtqueue *vq, 359e6f633e5STiwei Bie dma_addr_t addr) 360e6f633e5STiwei Bie { 361fb3fba6bSTiwei Bie if (!vq->use_dma_api) 362e6f633e5STiwei Bie return 0; 363e6f633e5STiwei Bie 364e6f633e5STiwei Bie return dma_mapping_error(vring_dma_dev(vq), addr); 365e6f633e5STiwei Bie } 366e6f633e5STiwei Bie 367e6f633e5STiwei Bie 368e6f633e5STiwei Bie /* 369e6f633e5STiwei Bie * Split ring specific functions - *_split(). 370e6f633e5STiwei Bie */ 371e6f633e5STiwei Bie 37272b5e895SJason Wang static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, 373780bc790SAndy Lutomirski struct vring_desc *desc) 374780bc790SAndy Lutomirski { 375780bc790SAndy Lutomirski u16 flags; 376780bc790SAndy Lutomirski 377fb3fba6bSTiwei Bie if (!vq->use_dma_api) 378780bc790SAndy Lutomirski return; 379780bc790SAndy Lutomirski 380780bc790SAndy Lutomirski flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); 381780bc790SAndy Lutomirski 382780bc790SAndy Lutomirski dma_unmap_page(vring_dma_dev(vq), 383780bc790SAndy Lutomirski virtio64_to_cpu(vq->vq.vdev, desc->addr), 384780bc790SAndy Lutomirski virtio32_to_cpu(vq->vq.vdev, desc->len), 385780bc790SAndy Lutomirski (flags & VRING_DESC_F_WRITE) ? 386780bc790SAndy Lutomirski DMA_FROM_DEVICE : DMA_TO_DEVICE); 387780bc790SAndy Lutomirski } 388780bc790SAndy Lutomirski 38972b5e895SJason Wang static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, 39072b5e895SJason Wang unsigned int i) 39172b5e895SJason Wang { 39272b5e895SJason Wang struct vring_desc_extra *extra = vq->split.desc_extra; 39372b5e895SJason Wang u16 flags; 39472b5e895SJason Wang 39572b5e895SJason Wang if (!vq->use_dma_api) 39672b5e895SJason Wang goto out; 39772b5e895SJason Wang 39872b5e895SJason Wang flags = extra[i].flags; 39972b5e895SJason Wang 40072b5e895SJason Wang if (flags & VRING_DESC_F_INDIRECT) { 40172b5e895SJason Wang dma_unmap_single(vring_dma_dev(vq), 40272b5e895SJason Wang extra[i].addr, 40372b5e895SJason Wang extra[i].len, 40472b5e895SJason Wang (flags & VRING_DESC_F_WRITE) ? 40572b5e895SJason Wang DMA_FROM_DEVICE : DMA_TO_DEVICE); 40672b5e895SJason Wang } else { 40772b5e895SJason Wang dma_unmap_page(vring_dma_dev(vq), 40872b5e895SJason Wang extra[i].addr, 40972b5e895SJason Wang extra[i].len, 41072b5e895SJason Wang (flags & VRING_DESC_F_WRITE) ? 41172b5e895SJason Wang DMA_FROM_DEVICE : DMA_TO_DEVICE); 41272b5e895SJason Wang } 41372b5e895SJason Wang 41472b5e895SJason Wang out: 41572b5e895SJason Wang return extra[i].next; 41672b5e895SJason Wang } 41772b5e895SJason Wang 418138fd251STiwei Bie static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, 419138fd251STiwei Bie unsigned int total_sg, 420138fd251STiwei Bie gfp_t gfp) 4219fa29b9dSMark McLoughlin { 4229fa29b9dSMark McLoughlin struct vring_desc *desc; 423b25bd251SRusty Russell unsigned int i; 4249fa29b9dSMark McLoughlin 425b92b1b89SWill Deacon /* 426b92b1b89SWill Deacon * We require lowmem mappings for the descriptors because 427b92b1b89SWill Deacon * otherwise virt_to_phys will give us bogus addresses in the 428b92b1b89SWill Deacon * virtqueue. 429b92b1b89SWill Deacon */ 43082107539SMichal Hocko gfp &= ~__GFP_HIGHMEM; 431b92b1b89SWill Deacon 4326da2ec56SKees Cook desc = kmalloc_array(total_sg, sizeof(struct vring_desc), gfp); 4339fa29b9dSMark McLoughlin if (!desc) 434b25bd251SRusty Russell return NULL; 4359fa29b9dSMark McLoughlin 436b25bd251SRusty Russell for (i = 0; i < total_sg; i++) 43700e6f3d9SMichael S. Tsirkin desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1); 438b25bd251SRusty Russell return desc; 4399fa29b9dSMark McLoughlin } 4409fa29b9dSMark McLoughlin 441fe4c3862SJason Wang static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, 442fe4c3862SJason Wang struct vring_desc *desc, 443fe4c3862SJason Wang unsigned int i, 444fe4c3862SJason Wang dma_addr_t addr, 445fe4c3862SJason Wang unsigned int len, 44672b5e895SJason Wang u16 flags, 44772b5e895SJason Wang bool indirect) 448fe4c3862SJason Wang { 44972b5e895SJason Wang struct vring_virtqueue *vring = to_vvq(vq); 45072b5e895SJason Wang struct vring_desc_extra *extra = vring->split.desc_extra; 45172b5e895SJason Wang u16 next; 45272b5e895SJason Wang 453fe4c3862SJason Wang desc[i].flags = cpu_to_virtio16(vq->vdev, flags); 454fe4c3862SJason Wang desc[i].addr = cpu_to_virtio64(vq->vdev, addr); 455fe4c3862SJason Wang desc[i].len = cpu_to_virtio32(vq->vdev, len); 456fe4c3862SJason Wang 45772b5e895SJason Wang if (!indirect) { 45872b5e895SJason Wang next = extra[i].next; 45972b5e895SJason Wang desc[i].next = cpu_to_virtio16(vq->vdev, next); 46072b5e895SJason Wang 46172b5e895SJason Wang extra[i].addr = addr; 46272b5e895SJason Wang extra[i].len = len; 46372b5e895SJason Wang extra[i].flags = flags; 46472b5e895SJason Wang } else 46572b5e895SJason Wang next = virtio16_to_cpu(vq->vdev, desc[i].next); 46672b5e895SJason Wang 46772b5e895SJason Wang return next; 468fe4c3862SJason Wang } 469fe4c3862SJason Wang 470138fd251STiwei Bie static inline int virtqueue_add_split(struct virtqueue *_vq, 47113816c76SRusty Russell struct scatterlist *sgs[], 472eeebf9b1SRusty Russell unsigned int total_sg, 47313816c76SRusty Russell unsigned int out_sgs, 47413816c76SRusty Russell unsigned int in_sgs, 475bbd603efSMichael S. Tsirkin void *data, 4765a08b04fSMichael S. Tsirkin void *ctx, 477bbd603efSMichael S. Tsirkin gfp_t gfp) 4780a8a69ddSRusty Russell { 4790a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 48013816c76SRusty Russell struct scatterlist *sg; 481b25bd251SRusty Russell struct vring_desc *desc; 4823f649ab7SKees Cook unsigned int i, n, avail, descs_used, prev, err_idx; 4831fe9b6feSMichael S. Tsirkin int head; 484b25bd251SRusty Russell bool indirect; 4850a8a69ddSRusty Russell 4869fa29b9dSMark McLoughlin START_USE(vq); 4879fa29b9dSMark McLoughlin 4880a8a69ddSRusty Russell BUG_ON(data == NULL); 4895a08b04fSMichael S. Tsirkin BUG_ON(ctx && vq->indirect); 4909fa29b9dSMark McLoughlin 49170670444SRusty Russell if (unlikely(vq->broken)) { 49270670444SRusty Russell END_USE(vq); 49370670444SRusty Russell return -EIO; 49470670444SRusty Russell } 49570670444SRusty Russell 4964d6a105eSTiwei Bie LAST_ADD_TIME_UPDATE(vq); 497e93300b1SRusty Russell 49813816c76SRusty Russell BUG_ON(total_sg == 0); 4990a8a69ddSRusty Russell 500b25bd251SRusty Russell head = vq->free_head; 501b25bd251SRusty Russell 50235c51e09SXianting Tian if (virtqueue_use_indirect(vq, total_sg)) 503138fd251STiwei Bie desc = alloc_indirect_split(_vq, total_sg, gfp); 50444ed8089SRichard W.M. Jones else { 505b25bd251SRusty Russell desc = NULL; 506e593bf97STiwei Bie WARN_ON_ONCE(total_sg > vq->split.vring.num && !vq->indirect); 50744ed8089SRichard W.M. Jones } 508b25bd251SRusty Russell 509b25bd251SRusty Russell if (desc) { 510b25bd251SRusty Russell /* Use a single buffer which doesn't continue */ 511780bc790SAndy Lutomirski indirect = true; 512b25bd251SRusty Russell /* Set up rest to use this indirect table. */ 513b25bd251SRusty Russell i = 0; 514b25bd251SRusty Russell descs_used = 1; 515b25bd251SRusty Russell } else { 516780bc790SAndy Lutomirski indirect = false; 517e593bf97STiwei Bie desc = vq->split.vring.desc; 518b25bd251SRusty Russell i = head; 519b25bd251SRusty Russell descs_used = total_sg; 520b25bd251SRusty Russell } 521b25bd251SRusty Russell 522b4b4ff73SXianting Tian if (unlikely(vq->vq.num_free < descs_used)) { 5230a8a69ddSRusty Russell pr_debug("Can't add buf len %i - avail = %i\n", 524b25bd251SRusty Russell descs_used, vq->vq.num_free); 52544653eaeSRusty Russell /* FIXME: for historical reasons, we force a notify here if 52644653eaeSRusty Russell * there are outgoing parts to the buffer. Presumably the 52744653eaeSRusty Russell * host should service the ring ASAP. */ 52813816c76SRusty Russell if (out_sgs) 529426e3e0aSRusty Russell vq->notify(&vq->vq); 53058625edfSWei Yongjun if (indirect) 53158625edfSWei Yongjun kfree(desc); 5320a8a69ddSRusty Russell END_USE(vq); 5330a8a69ddSRusty Russell return -ENOSPC; 5340a8a69ddSRusty Russell } 5350a8a69ddSRusty Russell 53613816c76SRusty Russell for (n = 0; n < out_sgs; n++) { 537eeebf9b1SRusty Russell for (sg = sgs[n]; sg; sg = sg_next(sg)) { 538780bc790SAndy Lutomirski dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE); 539780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 540780bc790SAndy Lutomirski goto unmap_release; 541780bc790SAndy Lutomirski 5420a8a69ddSRusty Russell prev = i; 54372b5e895SJason Wang /* Note that we trust indirect descriptor 54472b5e895SJason Wang * table since it use stream DMA mapping. 54572b5e895SJason Wang */ 546fe4c3862SJason Wang i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, 54772b5e895SJason Wang VRING_DESC_F_NEXT, 54872b5e895SJason Wang indirect); 5490a8a69ddSRusty Russell } 55013816c76SRusty Russell } 55113816c76SRusty Russell for (; n < (out_sgs + in_sgs); n++) { 552eeebf9b1SRusty Russell for (sg = sgs[n]; sg; sg = sg_next(sg)) { 553780bc790SAndy Lutomirski dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE); 554780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 555780bc790SAndy Lutomirski goto unmap_release; 556780bc790SAndy Lutomirski 5570a8a69ddSRusty Russell prev = i; 55872b5e895SJason Wang /* Note that we trust indirect descriptor 55972b5e895SJason Wang * table since it use stream DMA mapping. 56072b5e895SJason Wang */ 561fe4c3862SJason Wang i = virtqueue_add_desc_split(_vq, desc, i, addr, 562fe4c3862SJason Wang sg->length, 563fe4c3862SJason Wang VRING_DESC_F_NEXT | 56472b5e895SJason Wang VRING_DESC_F_WRITE, 56572b5e895SJason Wang indirect); 56613816c76SRusty Russell } 5670a8a69ddSRusty Russell } 5680a8a69ddSRusty Russell /* Last one doesn't continue. */ 56900e6f3d9SMichael S. Tsirkin desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); 57072b5e895SJason Wang if (!indirect && vq->use_dma_api) 571890d3356SVincent Whitchurch vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= 57272b5e895SJason Wang ~VRING_DESC_F_NEXT; 5730a8a69ddSRusty Russell 574780bc790SAndy Lutomirski if (indirect) { 575780bc790SAndy Lutomirski /* Now that the indirect table is filled in, map it. */ 576780bc790SAndy Lutomirski dma_addr_t addr = vring_map_single( 577780bc790SAndy Lutomirski vq, desc, total_sg * sizeof(struct vring_desc), 578780bc790SAndy Lutomirski DMA_TO_DEVICE); 579780bc790SAndy Lutomirski if (vring_mapping_error(vq, addr)) 580780bc790SAndy Lutomirski goto unmap_release; 581780bc790SAndy Lutomirski 582fe4c3862SJason Wang virtqueue_add_desc_split(_vq, vq->split.vring.desc, 583fe4c3862SJason Wang head, addr, 584fe4c3862SJason Wang total_sg * sizeof(struct vring_desc), 58572b5e895SJason Wang VRING_DESC_F_INDIRECT, 58672b5e895SJason Wang false); 587780bc790SAndy Lutomirski } 588780bc790SAndy Lutomirski 589780bc790SAndy Lutomirski /* We're using some buffers from the free list. */ 590780bc790SAndy Lutomirski vq->vq.num_free -= descs_used; 591780bc790SAndy Lutomirski 5920a8a69ddSRusty Russell /* Update free pointer */ 593b25bd251SRusty Russell if (indirect) 59472b5e895SJason Wang vq->free_head = vq->split.desc_extra[head].next; 595b25bd251SRusty Russell else 5960a8a69ddSRusty Russell vq->free_head = i; 5970a8a69ddSRusty Russell 598780bc790SAndy Lutomirski /* Store token and indirect buffer state. */ 599cbeedb72STiwei Bie vq->split.desc_state[head].data = data; 600780bc790SAndy Lutomirski if (indirect) 601cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc = desc; 60287646a34SJason Wang else 603cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc = ctx; 6040a8a69ddSRusty Russell 6050a8a69ddSRusty Russell /* Put entry in available array (but don't update avail->idx until they 6063b720b8cSRusty Russell * do sync). */ 607e593bf97STiwei Bie avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1); 608e593bf97STiwei Bie vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head); 6090a8a69ddSRusty Russell 610ee7cd898SRusty Russell /* Descriptors and available array need to be set before we expose the 611ee7cd898SRusty Russell * new available array entries. */ 612a9a0fef7SRusty Russell virtio_wmb(vq->weak_barriers); 613e593bf97STiwei Bie vq->split.avail_idx_shadow++; 614e593bf97STiwei Bie vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 615e593bf97STiwei Bie vq->split.avail_idx_shadow); 616ee7cd898SRusty Russell vq->num_added++; 617ee7cd898SRusty Russell 6185e05bf58STetsuo Handa pr_debug("Added buffer head %i to %p\n", head, vq); 6195e05bf58STetsuo Handa END_USE(vq); 6205e05bf58STetsuo Handa 621ee7cd898SRusty Russell /* This is very unlikely, but theoretically possible. Kick 622ee7cd898SRusty Russell * just in case. */ 623ee7cd898SRusty Russell if (unlikely(vq->num_added == (1 << 16) - 1)) 624ee7cd898SRusty Russell virtqueue_kick(_vq); 625ee7cd898SRusty Russell 62698e8c6bcSRusty Russell return 0; 627780bc790SAndy Lutomirski 628780bc790SAndy Lutomirski unmap_release: 629780bc790SAndy Lutomirski err_idx = i; 630cf8f1696SMatthias Lange 631cf8f1696SMatthias Lange if (indirect) 632cf8f1696SMatthias Lange i = 0; 633cf8f1696SMatthias Lange else 634780bc790SAndy Lutomirski i = head; 635780bc790SAndy Lutomirski 636780bc790SAndy Lutomirski for (n = 0; n < total_sg; n++) { 637780bc790SAndy Lutomirski if (i == err_idx) 638780bc790SAndy Lutomirski break; 63972b5e895SJason Wang if (indirect) { 64072b5e895SJason Wang vring_unmap_one_split_indirect(vq, &desc[i]); 641cf8f1696SMatthias Lange i = virtio16_to_cpu(_vq->vdev, desc[i].next); 64272b5e895SJason Wang } else 64372b5e895SJason Wang i = vring_unmap_one_split(vq, i); 644780bc790SAndy Lutomirski } 645780bc790SAndy Lutomirski 646780bc790SAndy Lutomirski if (indirect) 647780bc790SAndy Lutomirski kfree(desc); 648780bc790SAndy Lutomirski 6493cc36f6eSMichael S. Tsirkin END_USE(vq); 650f7728002SHalil Pasic return -ENOMEM; 6510a8a69ddSRusty Russell } 65213816c76SRusty Russell 653138fd251STiwei Bie static bool virtqueue_kick_prepare_split(struct virtqueue *_vq) 6540a8a69ddSRusty Russell { 6550a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 656a5c262c5SMichael S. Tsirkin u16 new, old; 65741f0377fSRusty Russell bool needs_kick; 65841f0377fSRusty Russell 6590a8a69ddSRusty Russell START_USE(vq); 660a72caae2SJason Wang /* We need to expose available array entries before checking avail 661a72caae2SJason Wang * event. */ 662a9a0fef7SRusty Russell virtio_mb(vq->weak_barriers); 6630a8a69ddSRusty Russell 664e593bf97STiwei Bie old = vq->split.avail_idx_shadow - vq->num_added; 665e593bf97STiwei Bie new = vq->split.avail_idx_shadow; 6660a8a69ddSRusty Russell vq->num_added = 0; 6670a8a69ddSRusty Russell 6684d6a105eSTiwei Bie LAST_ADD_TIME_CHECK(vq); 6694d6a105eSTiwei Bie LAST_ADD_TIME_INVALID(vq); 670e93300b1SRusty Russell 67141f0377fSRusty Russell if (vq->event) { 672e593bf97STiwei Bie needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, 673e593bf97STiwei Bie vring_avail_event(&vq->split.vring)), 67441f0377fSRusty Russell new, old); 67541f0377fSRusty Russell } else { 676e593bf97STiwei Bie needs_kick = !(vq->split.vring.used->flags & 677e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 678e593bf97STiwei Bie VRING_USED_F_NO_NOTIFY)); 67941f0377fSRusty Russell } 6800a8a69ddSRusty Russell END_USE(vq); 68141f0377fSRusty Russell return needs_kick; 68241f0377fSRusty Russell } 683138fd251STiwei Bie 684138fd251STiwei Bie static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, 6855a08b04fSMichael S. Tsirkin void **ctx) 6860a8a69ddSRusty Russell { 687780bc790SAndy Lutomirski unsigned int i, j; 688c60923cbSGonglei __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); 6890a8a69ddSRusty Russell 6900a8a69ddSRusty Russell /* Clear data ptr. */ 691cbeedb72STiwei Bie vq->split.desc_state[head].data = NULL; 6920a8a69ddSRusty Russell 693780bc790SAndy Lutomirski /* Put back on free list: unmap first-level descriptors and find end */ 6940a8a69ddSRusty Russell i = head; 6959fa29b9dSMark McLoughlin 696e593bf97STiwei Bie while (vq->split.vring.desc[i].flags & nextflag) { 69772b5e895SJason Wang vring_unmap_one_split(vq, i); 69872b5e895SJason Wang i = vq->split.desc_extra[i].next; 69906ca287dSRusty Russell vq->vq.num_free++; 7000a8a69ddSRusty Russell } 7010a8a69ddSRusty Russell 70272b5e895SJason Wang vring_unmap_one_split(vq, i); 70372b5e895SJason Wang vq->split.desc_extra[i].next = vq->free_head; 7040a8a69ddSRusty Russell vq->free_head = head; 705780bc790SAndy Lutomirski 7060a8a69ddSRusty Russell /* Plus final descriptor */ 70706ca287dSRusty Russell vq->vq.num_free++; 708780bc790SAndy Lutomirski 7095a08b04fSMichael S. Tsirkin if (vq->indirect) { 710cbeedb72STiwei Bie struct vring_desc *indir_desc = 711cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc; 7125a08b04fSMichael S. Tsirkin u32 len; 7135a08b04fSMichael S. Tsirkin 7145a08b04fSMichael S. Tsirkin /* Free the indirect table, if any, now that it's unmapped. */ 7155a08b04fSMichael S. Tsirkin if (!indir_desc) 7165a08b04fSMichael S. Tsirkin return; 7175a08b04fSMichael S. Tsirkin 71872b5e895SJason Wang len = vq->split.desc_extra[head].len; 719780bc790SAndy Lutomirski 72072b5e895SJason Wang BUG_ON(!(vq->split.desc_extra[head].flags & 72172b5e895SJason Wang VRING_DESC_F_INDIRECT)); 722780bc790SAndy Lutomirski BUG_ON(len == 0 || len % sizeof(struct vring_desc)); 723780bc790SAndy Lutomirski 724780bc790SAndy Lutomirski for (j = 0; j < len / sizeof(struct vring_desc); j++) 72572b5e895SJason Wang vring_unmap_one_split_indirect(vq, &indir_desc[j]); 726780bc790SAndy Lutomirski 7275a08b04fSMichael S. Tsirkin kfree(indir_desc); 728cbeedb72STiwei Bie vq->split.desc_state[head].indir_desc = NULL; 7295a08b04fSMichael S. Tsirkin } else if (ctx) { 730cbeedb72STiwei Bie *ctx = vq->split.desc_state[head].indir_desc; 731780bc790SAndy Lutomirski } 7320a8a69ddSRusty Russell } 7330a8a69ddSRusty Russell 734138fd251STiwei Bie static inline bool more_used_split(const struct vring_virtqueue *vq) 7350a8a69ddSRusty Russell { 736e593bf97STiwei Bie return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, 737e593bf97STiwei Bie vq->split.vring.used->idx); 7380a8a69ddSRusty Russell } 7390a8a69ddSRusty Russell 740138fd251STiwei Bie static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq, 741138fd251STiwei Bie unsigned int *len, 7425a08b04fSMichael S. Tsirkin void **ctx) 7430a8a69ddSRusty Russell { 7440a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 7450a8a69ddSRusty Russell void *ret; 7460a8a69ddSRusty Russell unsigned int i; 7473b720b8cSRusty Russell u16 last_used; 7480a8a69ddSRusty Russell 7490a8a69ddSRusty Russell START_USE(vq); 7500a8a69ddSRusty Russell 7515ef82752SRusty Russell if (unlikely(vq->broken)) { 7525ef82752SRusty Russell END_USE(vq); 7535ef82752SRusty Russell return NULL; 7545ef82752SRusty Russell } 7555ef82752SRusty Russell 756138fd251STiwei Bie if (!more_used_split(vq)) { 7570a8a69ddSRusty Russell pr_debug("No more buffers in queue\n"); 7580a8a69ddSRusty Russell END_USE(vq); 7590a8a69ddSRusty Russell return NULL; 7600a8a69ddSRusty Russell } 7610a8a69ddSRusty Russell 7622d61ba95SMichael S. Tsirkin /* Only get used array entries after they have been exposed by host. */ 763a9a0fef7SRusty Russell virtio_rmb(vq->weak_barriers); 7642d61ba95SMichael S. Tsirkin 765e593bf97STiwei Bie last_used = (vq->last_used_idx & (vq->split.vring.num - 1)); 766e593bf97STiwei Bie i = virtio32_to_cpu(_vq->vdev, 767e593bf97STiwei Bie vq->split.vring.used->ring[last_used].id); 768e593bf97STiwei Bie *len = virtio32_to_cpu(_vq->vdev, 769e593bf97STiwei Bie vq->split.vring.used->ring[last_used].len); 7700a8a69ddSRusty Russell 771e593bf97STiwei Bie if (unlikely(i >= vq->split.vring.num)) { 7720a8a69ddSRusty Russell BAD_RING(vq, "id %u out of range\n", i); 7730a8a69ddSRusty Russell return NULL; 7740a8a69ddSRusty Russell } 775cbeedb72STiwei Bie if (unlikely(!vq->split.desc_state[i].data)) { 7760a8a69ddSRusty Russell BAD_RING(vq, "id %u is not a head!\n", i); 7770a8a69ddSRusty Russell return NULL; 7780a8a69ddSRusty Russell } 7790a8a69ddSRusty Russell 780138fd251STiwei Bie /* detach_buf_split clears data, so grab it now. */ 781cbeedb72STiwei Bie ret = vq->split.desc_state[i].data; 782138fd251STiwei Bie detach_buf_split(vq, i, ctx); 7830a8a69ddSRusty Russell vq->last_used_idx++; 784a5c262c5SMichael S. Tsirkin /* If we expect an interrupt for the next entry, tell host 785a5c262c5SMichael S. Tsirkin * by writing event index and flush out the write before 786a5c262c5SMichael S. Tsirkin * the read in the next get_buf call. */ 787e593bf97STiwei Bie if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) 788788e5b3aSMichael S. Tsirkin virtio_store_mb(vq->weak_barriers, 789e593bf97STiwei Bie &vring_used_event(&vq->split.vring), 790788e5b3aSMichael S. Tsirkin cpu_to_virtio16(_vq->vdev, vq->last_used_idx)); 791a5c262c5SMichael S. Tsirkin 7924d6a105eSTiwei Bie LAST_ADD_TIME_INVALID(vq); 793e93300b1SRusty Russell 7940a8a69ddSRusty Russell END_USE(vq); 7950a8a69ddSRusty Russell return ret; 7960a8a69ddSRusty Russell } 797138fd251STiwei Bie 798138fd251STiwei Bie static void virtqueue_disable_cb_split(struct virtqueue *_vq) 799138fd251STiwei Bie { 800138fd251STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 801138fd251STiwei Bie 802e593bf97STiwei Bie if (!(vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { 803e593bf97STiwei Bie vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 8048d622d21SMichael S. Tsirkin if (vq->event) 8058d622d21SMichael S. Tsirkin /* TODO: this is a hack. Figure out a cleaner value to write. */ 8068d622d21SMichael S. Tsirkin vring_used_event(&vq->split.vring) = 0x0; 8078d622d21SMichael S. Tsirkin else 808e593bf97STiwei Bie vq->split.vring.avail->flags = 809e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 810e593bf97STiwei Bie vq->split.avail_flags_shadow); 811138fd251STiwei Bie } 812138fd251STiwei Bie } 813138fd251STiwei Bie 81431532340SSolomon Tan static unsigned int virtqueue_enable_cb_prepare_split(struct virtqueue *_vq) 815cc229884SMichael S. Tsirkin { 816cc229884SMichael S. Tsirkin struct vring_virtqueue *vq = to_vvq(_vq); 817cc229884SMichael S. Tsirkin u16 last_used_idx; 818cc229884SMichael S. Tsirkin 819cc229884SMichael S. Tsirkin START_USE(vq); 820cc229884SMichael S. Tsirkin 821cc229884SMichael S. Tsirkin /* We optimistically turn back on interrupts, then check if there was 822cc229884SMichael S. Tsirkin * more to do. */ 823cc229884SMichael S. Tsirkin /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 824cc229884SMichael S. Tsirkin * either clear the flags bit or point the event index at the next 825cc229884SMichael S. Tsirkin * entry. Always do both to keep code simple. */ 826e593bf97STiwei Bie if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 827e593bf97STiwei Bie vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 8280ea1e4a6SLadi Prosek if (!vq->event) 829e593bf97STiwei Bie vq->split.vring.avail->flags = 830e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 831e593bf97STiwei Bie vq->split.avail_flags_shadow); 832f277ec42SVenkatesh Srinivas } 833e593bf97STiwei Bie vring_used_event(&vq->split.vring) = cpu_to_virtio16(_vq->vdev, 834e593bf97STiwei Bie last_used_idx = vq->last_used_idx); 835cc229884SMichael S. Tsirkin END_USE(vq); 836cc229884SMichael S. Tsirkin return last_used_idx; 837cc229884SMichael S. Tsirkin } 838138fd251STiwei Bie 83931532340SSolomon Tan static bool virtqueue_poll_split(struct virtqueue *_vq, unsigned int last_used_idx) 840138fd251STiwei Bie { 841138fd251STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 842138fd251STiwei Bie 843138fd251STiwei Bie return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, 844e593bf97STiwei Bie vq->split.vring.used->idx); 845138fd251STiwei Bie } 846138fd251STiwei Bie 847138fd251STiwei Bie static bool virtqueue_enable_cb_delayed_split(struct virtqueue *_vq) 8487ab358c2SMichael S. Tsirkin { 8497ab358c2SMichael S. Tsirkin struct vring_virtqueue *vq = to_vvq(_vq); 8507ab358c2SMichael S. Tsirkin u16 bufs; 8517ab358c2SMichael S. Tsirkin 8527ab358c2SMichael S. Tsirkin START_USE(vq); 8537ab358c2SMichael S. Tsirkin 8547ab358c2SMichael S. Tsirkin /* We optimistically turn back on interrupts, then check if there was 8557ab358c2SMichael S. Tsirkin * more to do. */ 8567ab358c2SMichael S. Tsirkin /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 8577ab358c2SMichael S. Tsirkin * either clear the flags bit or point the event index at the next 8580ea1e4a6SLadi Prosek * entry. Always update the event index to keep code simple. */ 859e593bf97STiwei Bie if (vq->split.avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { 860e593bf97STiwei Bie vq->split.avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; 8610ea1e4a6SLadi Prosek if (!vq->event) 862e593bf97STiwei Bie vq->split.vring.avail->flags = 863e593bf97STiwei Bie cpu_to_virtio16(_vq->vdev, 864e593bf97STiwei Bie vq->split.avail_flags_shadow); 865f277ec42SVenkatesh Srinivas } 8667ab358c2SMichael S. Tsirkin /* TODO: tune this threshold */ 867e593bf97STiwei Bie bufs = (u16)(vq->split.avail_idx_shadow - vq->last_used_idx) * 3 / 4; 868788e5b3aSMichael S. Tsirkin 869788e5b3aSMichael S. Tsirkin virtio_store_mb(vq->weak_barriers, 870e593bf97STiwei Bie &vring_used_event(&vq->split.vring), 871788e5b3aSMichael S. Tsirkin cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs)); 872788e5b3aSMichael S. Tsirkin 873e593bf97STiwei Bie if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->split.vring.used->idx) 874e593bf97STiwei Bie - vq->last_used_idx) > bufs)) { 8757ab358c2SMichael S. Tsirkin END_USE(vq); 8767ab358c2SMichael S. Tsirkin return false; 8777ab358c2SMichael S. Tsirkin } 8787ab358c2SMichael S. Tsirkin 8797ab358c2SMichael S. Tsirkin END_USE(vq); 8807ab358c2SMichael S. Tsirkin return true; 8817ab358c2SMichael S. Tsirkin } 8827ab358c2SMichael S. Tsirkin 883138fd251STiwei Bie static void *virtqueue_detach_unused_buf_split(struct virtqueue *_vq) 884c021eac4SShirley Ma { 885c021eac4SShirley Ma struct vring_virtqueue *vq = to_vvq(_vq); 886c021eac4SShirley Ma unsigned int i; 887c021eac4SShirley Ma void *buf; 888c021eac4SShirley Ma 889c021eac4SShirley Ma START_USE(vq); 890c021eac4SShirley Ma 891e593bf97STiwei Bie for (i = 0; i < vq->split.vring.num; i++) { 892cbeedb72STiwei Bie if (!vq->split.desc_state[i].data) 893c021eac4SShirley Ma continue; 894138fd251STiwei Bie /* detach_buf_split clears data, so grab it now. */ 895cbeedb72STiwei Bie buf = vq->split.desc_state[i].data; 896138fd251STiwei Bie detach_buf_split(vq, i, NULL); 897e593bf97STiwei Bie vq->split.avail_idx_shadow--; 898e593bf97STiwei Bie vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev, 899e593bf97STiwei Bie vq->split.avail_idx_shadow); 900c021eac4SShirley Ma END_USE(vq); 901c021eac4SShirley Ma return buf; 902c021eac4SShirley Ma } 903c021eac4SShirley Ma /* That should have freed everything. */ 904e593bf97STiwei Bie BUG_ON(vq->vq.num_free != vq->split.vring.num); 905c021eac4SShirley Ma 906c021eac4SShirley Ma END_USE(vq); 907c021eac4SShirley Ma return NULL; 908c021eac4SShirley Ma } 909138fd251STiwei Bie 910d79dca75STiwei Bie static struct virtqueue *vring_create_virtqueue_split( 911d79dca75STiwei Bie unsigned int index, 912d79dca75STiwei Bie unsigned int num, 913d79dca75STiwei Bie unsigned int vring_align, 914d79dca75STiwei Bie struct virtio_device *vdev, 915d79dca75STiwei Bie bool weak_barriers, 916d79dca75STiwei Bie bool may_reduce_num, 917d79dca75STiwei Bie bool context, 918d79dca75STiwei Bie bool (*notify)(struct virtqueue *), 919d79dca75STiwei Bie void (*callback)(struct virtqueue *), 920d79dca75STiwei Bie const char *name) 921d79dca75STiwei Bie { 922d79dca75STiwei Bie struct virtqueue *vq; 923d79dca75STiwei Bie void *queue = NULL; 924d79dca75STiwei Bie dma_addr_t dma_addr; 925d79dca75STiwei Bie size_t queue_size_in_bytes; 926d79dca75STiwei Bie struct vring vring; 927d79dca75STiwei Bie 928d79dca75STiwei Bie /* We assume num is a power of 2. */ 929d79dca75STiwei Bie if (num & (num - 1)) { 930d79dca75STiwei Bie dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 931d79dca75STiwei Bie return NULL; 932d79dca75STiwei Bie } 933d79dca75STiwei Bie 934d79dca75STiwei Bie /* TODO: allocate each queue chunk individually */ 935d79dca75STiwei Bie for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { 936d79dca75STiwei Bie queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 937d79dca75STiwei Bie &dma_addr, 938d79dca75STiwei Bie GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 939d79dca75STiwei Bie if (queue) 940d79dca75STiwei Bie break; 941cf94db21SCornelia Huck if (!may_reduce_num) 942cf94db21SCornelia Huck return NULL; 943d79dca75STiwei Bie } 944d79dca75STiwei Bie 945d79dca75STiwei Bie if (!num) 946d79dca75STiwei Bie return NULL; 947d79dca75STiwei Bie 948d79dca75STiwei Bie if (!queue) { 949d79dca75STiwei Bie /* Try to get a single page. You are my only hope! */ 950d79dca75STiwei Bie queue = vring_alloc_queue(vdev, vring_size(num, vring_align), 951d79dca75STiwei Bie &dma_addr, GFP_KERNEL|__GFP_ZERO); 952d79dca75STiwei Bie } 953d79dca75STiwei Bie if (!queue) 954d79dca75STiwei Bie return NULL; 955d79dca75STiwei Bie 956d79dca75STiwei Bie queue_size_in_bytes = vring_size(num, vring_align); 957d79dca75STiwei Bie vring_init(&vring, num, queue, vring_align); 958d79dca75STiwei Bie 959d79dca75STiwei Bie vq = __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 960d79dca75STiwei Bie notify, callback, name); 961d79dca75STiwei Bie if (!vq) { 962d79dca75STiwei Bie vring_free_queue(vdev, queue_size_in_bytes, queue, 963d79dca75STiwei Bie dma_addr); 964d79dca75STiwei Bie return NULL; 965d79dca75STiwei Bie } 966d79dca75STiwei Bie 967d79dca75STiwei Bie to_vvq(vq)->split.queue_dma_addr = dma_addr; 968d79dca75STiwei Bie to_vvq(vq)->split.queue_size_in_bytes = queue_size_in_bytes; 969d79dca75STiwei Bie to_vvq(vq)->we_own_ring = true; 970d79dca75STiwei Bie 971d79dca75STiwei Bie return vq; 972d79dca75STiwei Bie } 973d79dca75STiwei Bie 974e6f633e5STiwei Bie 975e6f633e5STiwei Bie /* 9761ce9e605STiwei Bie * Packed ring specific functions - *_packed(). 9771ce9e605STiwei Bie */ 978a7722890Shuangjie.albert static inline bool packed_used_wrap_counter(u16 last_used_idx) 979a7722890Shuangjie.albert { 980a7722890Shuangjie.albert return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 981a7722890Shuangjie.albert } 982a7722890Shuangjie.albert 983a7722890Shuangjie.albert static inline u16 packed_last_used(u16 last_used_idx) 984a7722890Shuangjie.albert { 985a7722890Shuangjie.albert return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); 986a7722890Shuangjie.albert } 9871ce9e605STiwei Bie 988d80dc15bSXuan Zhuo static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, 989d80dc15bSXuan Zhuo struct vring_desc_extra *extra) 9901ce9e605STiwei Bie { 9911ce9e605STiwei Bie u16 flags; 9921ce9e605STiwei Bie 9931ce9e605STiwei Bie if (!vq->use_dma_api) 9941ce9e605STiwei Bie return; 9951ce9e605STiwei Bie 996d80dc15bSXuan Zhuo flags = extra->flags; 9971ce9e605STiwei Bie 9981ce9e605STiwei Bie if (flags & VRING_DESC_F_INDIRECT) { 9991ce9e605STiwei Bie dma_unmap_single(vring_dma_dev(vq), 1000d80dc15bSXuan Zhuo extra->addr, extra->len, 10011ce9e605STiwei Bie (flags & VRING_DESC_F_WRITE) ? 10021ce9e605STiwei Bie DMA_FROM_DEVICE : DMA_TO_DEVICE); 10031ce9e605STiwei Bie } else { 10041ce9e605STiwei Bie dma_unmap_page(vring_dma_dev(vq), 1005d80dc15bSXuan Zhuo extra->addr, extra->len, 10061ce9e605STiwei Bie (flags & VRING_DESC_F_WRITE) ? 10071ce9e605STiwei Bie DMA_FROM_DEVICE : DMA_TO_DEVICE); 10081ce9e605STiwei Bie } 10091ce9e605STiwei Bie } 10101ce9e605STiwei Bie 10111ce9e605STiwei Bie static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, 10121ce9e605STiwei Bie struct vring_packed_desc *desc) 10131ce9e605STiwei Bie { 10141ce9e605STiwei Bie u16 flags; 10151ce9e605STiwei Bie 10161ce9e605STiwei Bie if (!vq->use_dma_api) 10171ce9e605STiwei Bie return; 10181ce9e605STiwei Bie 10191ce9e605STiwei Bie flags = le16_to_cpu(desc->flags); 10201ce9e605STiwei Bie 10211ce9e605STiwei Bie dma_unmap_page(vring_dma_dev(vq), 10221ce9e605STiwei Bie le64_to_cpu(desc->addr), 10231ce9e605STiwei Bie le32_to_cpu(desc->len), 10241ce9e605STiwei Bie (flags & VRING_DESC_F_WRITE) ? 10251ce9e605STiwei Bie DMA_FROM_DEVICE : DMA_TO_DEVICE); 10261ce9e605STiwei Bie } 10271ce9e605STiwei Bie 10281ce9e605STiwei Bie static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, 10291ce9e605STiwei Bie gfp_t gfp) 10301ce9e605STiwei Bie { 10311ce9e605STiwei Bie struct vring_packed_desc *desc; 10321ce9e605STiwei Bie 10331ce9e605STiwei Bie /* 10341ce9e605STiwei Bie * We require lowmem mappings for the descriptors because 10351ce9e605STiwei Bie * otherwise virt_to_phys will give us bogus addresses in the 10361ce9e605STiwei Bie * virtqueue. 10371ce9e605STiwei Bie */ 10381ce9e605STiwei Bie gfp &= ~__GFP_HIGHMEM; 10391ce9e605STiwei Bie 10401ce9e605STiwei Bie desc = kmalloc_array(total_sg, sizeof(struct vring_packed_desc), gfp); 10411ce9e605STiwei Bie 10421ce9e605STiwei Bie return desc; 10431ce9e605STiwei Bie } 10441ce9e605STiwei Bie 10451ce9e605STiwei Bie static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 10461ce9e605STiwei Bie struct scatterlist *sgs[], 10471ce9e605STiwei Bie unsigned int total_sg, 10481ce9e605STiwei Bie unsigned int out_sgs, 10491ce9e605STiwei Bie unsigned int in_sgs, 10501ce9e605STiwei Bie void *data, 10511ce9e605STiwei Bie gfp_t gfp) 10521ce9e605STiwei Bie { 10531ce9e605STiwei Bie struct vring_packed_desc *desc; 10541ce9e605STiwei Bie struct scatterlist *sg; 10551ce9e605STiwei Bie unsigned int i, n, err_idx; 10561ce9e605STiwei Bie u16 head, id; 10571ce9e605STiwei Bie dma_addr_t addr; 10581ce9e605STiwei Bie 10591ce9e605STiwei Bie head = vq->packed.next_avail_idx; 10601ce9e605STiwei Bie desc = alloc_indirect_packed(total_sg, gfp); 1061fc6d70f4SXuan Zhuo if (!desc) 1062fc6d70f4SXuan Zhuo return -ENOMEM; 10631ce9e605STiwei Bie 10641ce9e605STiwei Bie if (unlikely(vq->vq.num_free < 1)) { 10651ce9e605STiwei Bie pr_debug("Can't add buf len 1 - avail = 0\n"); 1066df0bfe75SYueHaibing kfree(desc); 10671ce9e605STiwei Bie END_USE(vq); 10681ce9e605STiwei Bie return -ENOSPC; 10691ce9e605STiwei Bie } 10701ce9e605STiwei Bie 10711ce9e605STiwei Bie i = 0; 10721ce9e605STiwei Bie id = vq->free_head; 10731ce9e605STiwei Bie BUG_ON(id == vq->packed.vring.num); 10741ce9e605STiwei Bie 10751ce9e605STiwei Bie for (n = 0; n < out_sgs + in_sgs; n++) { 10761ce9e605STiwei Bie for (sg = sgs[n]; sg; sg = sg_next(sg)) { 10771ce9e605STiwei Bie addr = vring_map_one_sg(vq, sg, n < out_sgs ? 10781ce9e605STiwei Bie DMA_TO_DEVICE : DMA_FROM_DEVICE); 10791ce9e605STiwei Bie if (vring_mapping_error(vq, addr)) 10801ce9e605STiwei Bie goto unmap_release; 10811ce9e605STiwei Bie 10821ce9e605STiwei Bie desc[i].flags = cpu_to_le16(n < out_sgs ? 10831ce9e605STiwei Bie 0 : VRING_DESC_F_WRITE); 10841ce9e605STiwei Bie desc[i].addr = cpu_to_le64(addr); 10851ce9e605STiwei Bie desc[i].len = cpu_to_le32(sg->length); 10861ce9e605STiwei Bie i++; 10871ce9e605STiwei Bie } 10881ce9e605STiwei Bie } 10891ce9e605STiwei Bie 10901ce9e605STiwei Bie /* Now that the indirect table is filled in, map it. */ 10911ce9e605STiwei Bie addr = vring_map_single(vq, desc, 10921ce9e605STiwei Bie total_sg * sizeof(struct vring_packed_desc), 10931ce9e605STiwei Bie DMA_TO_DEVICE); 10941ce9e605STiwei Bie if (vring_mapping_error(vq, addr)) 10951ce9e605STiwei Bie goto unmap_release; 10961ce9e605STiwei Bie 10971ce9e605STiwei Bie vq->packed.vring.desc[head].addr = cpu_to_le64(addr); 10981ce9e605STiwei Bie vq->packed.vring.desc[head].len = cpu_to_le32(total_sg * 10991ce9e605STiwei Bie sizeof(struct vring_packed_desc)); 11001ce9e605STiwei Bie vq->packed.vring.desc[head].id = cpu_to_le16(id); 11011ce9e605STiwei Bie 11021ce9e605STiwei Bie if (vq->use_dma_api) { 11031ce9e605STiwei Bie vq->packed.desc_extra[id].addr = addr; 11041ce9e605STiwei Bie vq->packed.desc_extra[id].len = total_sg * 11051ce9e605STiwei Bie sizeof(struct vring_packed_desc); 11061ce9e605STiwei Bie vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | 11071ce9e605STiwei Bie vq->packed.avail_used_flags; 11081ce9e605STiwei Bie } 11091ce9e605STiwei Bie 11101ce9e605STiwei Bie /* 11111ce9e605STiwei Bie * A driver MUST NOT make the first descriptor in the list 11121ce9e605STiwei Bie * available before all subsequent descriptors comprising 11131ce9e605STiwei Bie * the list are made available. 11141ce9e605STiwei Bie */ 11151ce9e605STiwei Bie virtio_wmb(vq->weak_barriers); 11161ce9e605STiwei Bie vq->packed.vring.desc[head].flags = cpu_to_le16(VRING_DESC_F_INDIRECT | 11171ce9e605STiwei Bie vq->packed.avail_used_flags); 11181ce9e605STiwei Bie 11191ce9e605STiwei Bie /* We're using some buffers from the free list. */ 11201ce9e605STiwei Bie vq->vq.num_free -= 1; 11211ce9e605STiwei Bie 11221ce9e605STiwei Bie /* Update free pointer */ 11231ce9e605STiwei Bie n = head + 1; 11241ce9e605STiwei Bie if (n >= vq->packed.vring.num) { 11251ce9e605STiwei Bie n = 0; 11261ce9e605STiwei Bie vq->packed.avail_wrap_counter ^= 1; 11271ce9e605STiwei Bie vq->packed.avail_used_flags ^= 11281ce9e605STiwei Bie 1 << VRING_PACKED_DESC_F_AVAIL | 11291ce9e605STiwei Bie 1 << VRING_PACKED_DESC_F_USED; 11301ce9e605STiwei Bie } 11311ce9e605STiwei Bie vq->packed.next_avail_idx = n; 1132aeef9b47SJason Wang vq->free_head = vq->packed.desc_extra[id].next; 11331ce9e605STiwei Bie 11341ce9e605STiwei Bie /* Store token and indirect buffer state. */ 11351ce9e605STiwei Bie vq->packed.desc_state[id].num = 1; 11361ce9e605STiwei Bie vq->packed.desc_state[id].data = data; 11371ce9e605STiwei Bie vq->packed.desc_state[id].indir_desc = desc; 11381ce9e605STiwei Bie vq->packed.desc_state[id].last = id; 11391ce9e605STiwei Bie 11401ce9e605STiwei Bie vq->num_added += 1; 11411ce9e605STiwei Bie 11421ce9e605STiwei Bie pr_debug("Added buffer head %i to %p\n", head, vq); 11431ce9e605STiwei Bie END_USE(vq); 11441ce9e605STiwei Bie 11451ce9e605STiwei Bie return 0; 11461ce9e605STiwei Bie 11471ce9e605STiwei Bie unmap_release: 11481ce9e605STiwei Bie err_idx = i; 11491ce9e605STiwei Bie 11501ce9e605STiwei Bie for (i = 0; i < err_idx; i++) 11511ce9e605STiwei Bie vring_unmap_desc_packed(vq, &desc[i]); 11521ce9e605STiwei Bie 11531ce9e605STiwei Bie kfree(desc); 11541ce9e605STiwei Bie 11551ce9e605STiwei Bie END_USE(vq); 1156f7728002SHalil Pasic return -ENOMEM; 11571ce9e605STiwei Bie } 11581ce9e605STiwei Bie 11591ce9e605STiwei Bie static inline int virtqueue_add_packed(struct virtqueue *_vq, 11601ce9e605STiwei Bie struct scatterlist *sgs[], 11611ce9e605STiwei Bie unsigned int total_sg, 11621ce9e605STiwei Bie unsigned int out_sgs, 11631ce9e605STiwei Bie unsigned int in_sgs, 11641ce9e605STiwei Bie void *data, 11651ce9e605STiwei Bie void *ctx, 11661ce9e605STiwei Bie gfp_t gfp) 11671ce9e605STiwei Bie { 11681ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 11691ce9e605STiwei Bie struct vring_packed_desc *desc; 11701ce9e605STiwei Bie struct scatterlist *sg; 11711ce9e605STiwei Bie unsigned int i, n, c, descs_used, err_idx; 11723f649ab7SKees Cook __le16 head_flags, flags; 11733f649ab7SKees Cook u16 head, id, prev, curr, avail_used_flags; 1174fc6d70f4SXuan Zhuo int err; 11751ce9e605STiwei Bie 11761ce9e605STiwei Bie START_USE(vq); 11771ce9e605STiwei Bie 11781ce9e605STiwei Bie BUG_ON(data == NULL); 11791ce9e605STiwei Bie BUG_ON(ctx && vq->indirect); 11801ce9e605STiwei Bie 11811ce9e605STiwei Bie if (unlikely(vq->broken)) { 11821ce9e605STiwei Bie END_USE(vq); 11831ce9e605STiwei Bie return -EIO; 11841ce9e605STiwei Bie } 11851ce9e605STiwei Bie 11861ce9e605STiwei Bie LAST_ADD_TIME_UPDATE(vq); 11871ce9e605STiwei Bie 11881ce9e605STiwei Bie BUG_ON(total_sg == 0); 11891ce9e605STiwei Bie 119035c51e09SXianting Tian if (virtqueue_use_indirect(vq, total_sg)) { 1191fc6d70f4SXuan Zhuo err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1192fc6d70f4SXuan Zhuo in_sgs, data, gfp); 11931861ba62SMichael S. Tsirkin if (err != -ENOMEM) { 11941861ba62SMichael S. Tsirkin END_USE(vq); 1195fc6d70f4SXuan Zhuo return err; 11961861ba62SMichael S. Tsirkin } 1197fc6d70f4SXuan Zhuo 1198fc6d70f4SXuan Zhuo /* fall back on direct */ 1199fc6d70f4SXuan Zhuo } 12001ce9e605STiwei Bie 12011ce9e605STiwei Bie head = vq->packed.next_avail_idx; 12021ce9e605STiwei Bie avail_used_flags = vq->packed.avail_used_flags; 12031ce9e605STiwei Bie 12041ce9e605STiwei Bie WARN_ON_ONCE(total_sg > vq->packed.vring.num && !vq->indirect); 12051ce9e605STiwei Bie 12061ce9e605STiwei Bie desc = vq->packed.vring.desc; 12071ce9e605STiwei Bie i = head; 12081ce9e605STiwei Bie descs_used = total_sg; 12091ce9e605STiwei Bie 12101ce9e605STiwei Bie if (unlikely(vq->vq.num_free < descs_used)) { 12111ce9e605STiwei Bie pr_debug("Can't add buf len %i - avail = %i\n", 12121ce9e605STiwei Bie descs_used, vq->vq.num_free); 12131ce9e605STiwei Bie END_USE(vq); 12141ce9e605STiwei Bie return -ENOSPC; 12151ce9e605STiwei Bie } 12161ce9e605STiwei Bie 12171ce9e605STiwei Bie id = vq->free_head; 12181ce9e605STiwei Bie BUG_ON(id == vq->packed.vring.num); 12191ce9e605STiwei Bie 12201ce9e605STiwei Bie curr = id; 12211ce9e605STiwei Bie c = 0; 12221ce9e605STiwei Bie for (n = 0; n < out_sgs + in_sgs; n++) { 12231ce9e605STiwei Bie for (sg = sgs[n]; sg; sg = sg_next(sg)) { 12241ce9e605STiwei Bie dma_addr_t addr = vring_map_one_sg(vq, sg, n < out_sgs ? 12251ce9e605STiwei Bie DMA_TO_DEVICE : DMA_FROM_DEVICE); 12261ce9e605STiwei Bie if (vring_mapping_error(vq, addr)) 12271ce9e605STiwei Bie goto unmap_release; 12281ce9e605STiwei Bie 12291ce9e605STiwei Bie flags = cpu_to_le16(vq->packed.avail_used_flags | 12301ce9e605STiwei Bie (++c == total_sg ? 0 : VRING_DESC_F_NEXT) | 12311ce9e605STiwei Bie (n < out_sgs ? 0 : VRING_DESC_F_WRITE)); 12321ce9e605STiwei Bie if (i == head) 12331ce9e605STiwei Bie head_flags = flags; 12341ce9e605STiwei Bie else 12351ce9e605STiwei Bie desc[i].flags = flags; 12361ce9e605STiwei Bie 12371ce9e605STiwei Bie desc[i].addr = cpu_to_le64(addr); 12381ce9e605STiwei Bie desc[i].len = cpu_to_le32(sg->length); 12391ce9e605STiwei Bie desc[i].id = cpu_to_le16(id); 12401ce9e605STiwei Bie 12411ce9e605STiwei Bie if (unlikely(vq->use_dma_api)) { 12421ce9e605STiwei Bie vq->packed.desc_extra[curr].addr = addr; 12431ce9e605STiwei Bie vq->packed.desc_extra[curr].len = sg->length; 12441ce9e605STiwei Bie vq->packed.desc_extra[curr].flags = 12451ce9e605STiwei Bie le16_to_cpu(flags); 12461ce9e605STiwei Bie } 12471ce9e605STiwei Bie prev = curr; 1248aeef9b47SJason Wang curr = vq->packed.desc_extra[curr].next; 12491ce9e605STiwei Bie 12501ce9e605STiwei Bie if ((unlikely(++i >= vq->packed.vring.num))) { 12511ce9e605STiwei Bie i = 0; 12521ce9e605STiwei Bie vq->packed.avail_used_flags ^= 12531ce9e605STiwei Bie 1 << VRING_PACKED_DESC_F_AVAIL | 12541ce9e605STiwei Bie 1 << VRING_PACKED_DESC_F_USED; 12551ce9e605STiwei Bie } 12561ce9e605STiwei Bie } 12571ce9e605STiwei Bie } 12581ce9e605STiwei Bie 12591ce9e605STiwei Bie if (i < head) 12601ce9e605STiwei Bie vq->packed.avail_wrap_counter ^= 1; 12611ce9e605STiwei Bie 12621ce9e605STiwei Bie /* We're using some buffers from the free list. */ 12631ce9e605STiwei Bie vq->vq.num_free -= descs_used; 12641ce9e605STiwei Bie 12651ce9e605STiwei Bie /* Update free pointer */ 12661ce9e605STiwei Bie vq->packed.next_avail_idx = i; 12671ce9e605STiwei Bie vq->free_head = curr; 12681ce9e605STiwei Bie 12691ce9e605STiwei Bie /* Store token. */ 12701ce9e605STiwei Bie vq->packed.desc_state[id].num = descs_used; 12711ce9e605STiwei Bie vq->packed.desc_state[id].data = data; 12721ce9e605STiwei Bie vq->packed.desc_state[id].indir_desc = ctx; 12731ce9e605STiwei Bie vq->packed.desc_state[id].last = prev; 12741ce9e605STiwei Bie 12751ce9e605STiwei Bie /* 12761ce9e605STiwei Bie * A driver MUST NOT make the first descriptor in the list 12771ce9e605STiwei Bie * available before all subsequent descriptors comprising 12781ce9e605STiwei Bie * the list are made available. 12791ce9e605STiwei Bie */ 12801ce9e605STiwei Bie virtio_wmb(vq->weak_barriers); 12811ce9e605STiwei Bie vq->packed.vring.desc[head].flags = head_flags; 12821ce9e605STiwei Bie vq->num_added += descs_used; 12831ce9e605STiwei Bie 12841ce9e605STiwei Bie pr_debug("Added buffer head %i to %p\n", head, vq); 12851ce9e605STiwei Bie END_USE(vq); 12861ce9e605STiwei Bie 12871ce9e605STiwei Bie return 0; 12881ce9e605STiwei Bie 12891ce9e605STiwei Bie unmap_release: 12901ce9e605STiwei Bie err_idx = i; 12911ce9e605STiwei Bie i = head; 129244593865SJason Wang curr = vq->free_head; 12931ce9e605STiwei Bie 12941ce9e605STiwei Bie vq->packed.avail_used_flags = avail_used_flags; 12951ce9e605STiwei Bie 12961ce9e605STiwei Bie for (n = 0; n < total_sg; n++) { 12971ce9e605STiwei Bie if (i == err_idx) 12981ce9e605STiwei Bie break; 1299d80dc15bSXuan Zhuo vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); 130044593865SJason Wang curr = vq->packed.desc_extra[curr].next; 13011ce9e605STiwei Bie i++; 13021ce9e605STiwei Bie if (i >= vq->packed.vring.num) 13031ce9e605STiwei Bie i = 0; 13041ce9e605STiwei Bie } 13051ce9e605STiwei Bie 13061ce9e605STiwei Bie END_USE(vq); 13071ce9e605STiwei Bie return -EIO; 13081ce9e605STiwei Bie } 13091ce9e605STiwei Bie 13101ce9e605STiwei Bie static bool virtqueue_kick_prepare_packed(struct virtqueue *_vq) 13111ce9e605STiwei Bie { 13121ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 1313f51f9826STiwei Bie u16 new, old, off_wrap, flags, wrap_counter, event_idx; 13141ce9e605STiwei Bie bool needs_kick; 13151ce9e605STiwei Bie union { 13161ce9e605STiwei Bie struct { 13171ce9e605STiwei Bie __le16 off_wrap; 13181ce9e605STiwei Bie __le16 flags; 13191ce9e605STiwei Bie }; 13201ce9e605STiwei Bie u32 u32; 13211ce9e605STiwei Bie } snapshot; 13221ce9e605STiwei Bie 13231ce9e605STiwei Bie START_USE(vq); 13241ce9e605STiwei Bie 13251ce9e605STiwei Bie /* 13261ce9e605STiwei Bie * We need to expose the new flags value before checking notification 13271ce9e605STiwei Bie * suppressions. 13281ce9e605STiwei Bie */ 13291ce9e605STiwei Bie virtio_mb(vq->weak_barriers); 13301ce9e605STiwei Bie 1331f51f9826STiwei Bie old = vq->packed.next_avail_idx - vq->num_added; 1332f51f9826STiwei Bie new = vq->packed.next_avail_idx; 13331ce9e605STiwei Bie vq->num_added = 0; 13341ce9e605STiwei Bie 13351ce9e605STiwei Bie snapshot.u32 = *(u32 *)vq->packed.vring.device; 13361ce9e605STiwei Bie flags = le16_to_cpu(snapshot.flags); 13371ce9e605STiwei Bie 13381ce9e605STiwei Bie LAST_ADD_TIME_CHECK(vq); 13391ce9e605STiwei Bie LAST_ADD_TIME_INVALID(vq); 13401ce9e605STiwei Bie 1341f51f9826STiwei Bie if (flags != VRING_PACKED_EVENT_FLAG_DESC) { 13421ce9e605STiwei Bie needs_kick = (flags != VRING_PACKED_EVENT_FLAG_DISABLE); 1343f51f9826STiwei Bie goto out; 1344f51f9826STiwei Bie } 1345f51f9826STiwei Bie 1346f51f9826STiwei Bie off_wrap = le16_to_cpu(snapshot.off_wrap); 1347f51f9826STiwei Bie 1348f51f9826STiwei Bie wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 1349f51f9826STiwei Bie event_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 1350f51f9826STiwei Bie if (wrap_counter != vq->packed.avail_wrap_counter) 1351f51f9826STiwei Bie event_idx -= vq->packed.vring.num; 1352f51f9826STiwei Bie 1353f51f9826STiwei Bie needs_kick = vring_need_event(event_idx, new, old); 1354f51f9826STiwei Bie out: 13551ce9e605STiwei Bie END_USE(vq); 13561ce9e605STiwei Bie return needs_kick; 13571ce9e605STiwei Bie } 13581ce9e605STiwei Bie 13591ce9e605STiwei Bie static void detach_buf_packed(struct vring_virtqueue *vq, 13601ce9e605STiwei Bie unsigned int id, void **ctx) 13611ce9e605STiwei Bie { 13621ce9e605STiwei Bie struct vring_desc_state_packed *state = NULL; 13631ce9e605STiwei Bie struct vring_packed_desc *desc; 13641ce9e605STiwei Bie unsigned int i, curr; 13651ce9e605STiwei Bie 13661ce9e605STiwei Bie state = &vq->packed.desc_state[id]; 13671ce9e605STiwei Bie 13681ce9e605STiwei Bie /* Clear data ptr. */ 13691ce9e605STiwei Bie state->data = NULL; 13701ce9e605STiwei Bie 1371aeef9b47SJason Wang vq->packed.desc_extra[state->last].next = vq->free_head; 13721ce9e605STiwei Bie vq->free_head = id; 13731ce9e605STiwei Bie vq->vq.num_free += state->num; 13741ce9e605STiwei Bie 13751ce9e605STiwei Bie if (unlikely(vq->use_dma_api)) { 13761ce9e605STiwei Bie curr = id; 13771ce9e605STiwei Bie for (i = 0; i < state->num; i++) { 1378d80dc15bSXuan Zhuo vring_unmap_extra_packed(vq, 13791ce9e605STiwei Bie &vq->packed.desc_extra[curr]); 1380aeef9b47SJason Wang curr = vq->packed.desc_extra[curr].next; 13811ce9e605STiwei Bie } 13821ce9e605STiwei Bie } 13831ce9e605STiwei Bie 13841ce9e605STiwei Bie if (vq->indirect) { 13851ce9e605STiwei Bie u32 len; 13861ce9e605STiwei Bie 13871ce9e605STiwei Bie /* Free the indirect table, if any, now that it's unmapped. */ 13881ce9e605STiwei Bie desc = state->indir_desc; 13891ce9e605STiwei Bie if (!desc) 13901ce9e605STiwei Bie return; 13911ce9e605STiwei Bie 13921ce9e605STiwei Bie if (vq->use_dma_api) { 13931ce9e605STiwei Bie len = vq->packed.desc_extra[id].len; 13941ce9e605STiwei Bie for (i = 0; i < len / sizeof(struct vring_packed_desc); 13951ce9e605STiwei Bie i++) 13961ce9e605STiwei Bie vring_unmap_desc_packed(vq, &desc[i]); 13971ce9e605STiwei Bie } 13981ce9e605STiwei Bie kfree(desc); 13991ce9e605STiwei Bie state->indir_desc = NULL; 14001ce9e605STiwei Bie } else if (ctx) { 14011ce9e605STiwei Bie *ctx = state->indir_desc; 14021ce9e605STiwei Bie } 14031ce9e605STiwei Bie } 14041ce9e605STiwei Bie 14051ce9e605STiwei Bie static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, 14061ce9e605STiwei Bie u16 idx, bool used_wrap_counter) 14071ce9e605STiwei Bie { 14081ce9e605STiwei Bie bool avail, used; 14091ce9e605STiwei Bie u16 flags; 14101ce9e605STiwei Bie 14111ce9e605STiwei Bie flags = le16_to_cpu(vq->packed.vring.desc[idx].flags); 14121ce9e605STiwei Bie avail = !!(flags & (1 << VRING_PACKED_DESC_F_AVAIL)); 14131ce9e605STiwei Bie used = !!(flags & (1 << VRING_PACKED_DESC_F_USED)); 14141ce9e605STiwei Bie 14151ce9e605STiwei Bie return avail == used && used == used_wrap_counter; 14161ce9e605STiwei Bie } 14171ce9e605STiwei Bie 14181ce9e605STiwei Bie static inline bool more_used_packed(const struct vring_virtqueue *vq) 14191ce9e605STiwei Bie { 1420a7722890Shuangjie.albert u16 last_used; 1421a7722890Shuangjie.albert u16 last_used_idx; 1422a7722890Shuangjie.albert bool used_wrap_counter; 1423a7722890Shuangjie.albert 1424a7722890Shuangjie.albert last_used_idx = READ_ONCE(vq->last_used_idx); 1425a7722890Shuangjie.albert last_used = packed_last_used(last_used_idx); 1426a7722890Shuangjie.albert used_wrap_counter = packed_used_wrap_counter(last_used_idx); 1427a7722890Shuangjie.albert return is_used_desc_packed(vq, last_used, used_wrap_counter); 14281ce9e605STiwei Bie } 14291ce9e605STiwei Bie 14301ce9e605STiwei Bie static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, 14311ce9e605STiwei Bie unsigned int *len, 14321ce9e605STiwei Bie void **ctx) 14331ce9e605STiwei Bie { 14341ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 1435a7722890Shuangjie.albert u16 last_used, id, last_used_idx; 1436a7722890Shuangjie.albert bool used_wrap_counter; 14371ce9e605STiwei Bie void *ret; 14381ce9e605STiwei Bie 14391ce9e605STiwei Bie START_USE(vq); 14401ce9e605STiwei Bie 14411ce9e605STiwei Bie if (unlikely(vq->broken)) { 14421ce9e605STiwei Bie END_USE(vq); 14431ce9e605STiwei Bie return NULL; 14441ce9e605STiwei Bie } 14451ce9e605STiwei Bie 14461ce9e605STiwei Bie if (!more_used_packed(vq)) { 14471ce9e605STiwei Bie pr_debug("No more buffers in queue\n"); 14481ce9e605STiwei Bie END_USE(vq); 14491ce9e605STiwei Bie return NULL; 14501ce9e605STiwei Bie } 14511ce9e605STiwei Bie 14521ce9e605STiwei Bie /* Only get used elements after they have been exposed by host. */ 14531ce9e605STiwei Bie virtio_rmb(vq->weak_barriers); 14541ce9e605STiwei Bie 1455a7722890Shuangjie.albert last_used_idx = READ_ONCE(vq->last_used_idx); 1456a7722890Shuangjie.albert used_wrap_counter = packed_used_wrap_counter(last_used_idx); 1457a7722890Shuangjie.albert last_used = packed_last_used(last_used_idx); 14581ce9e605STiwei Bie id = le16_to_cpu(vq->packed.vring.desc[last_used].id); 14591ce9e605STiwei Bie *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); 14601ce9e605STiwei Bie 14611ce9e605STiwei Bie if (unlikely(id >= vq->packed.vring.num)) { 14621ce9e605STiwei Bie BAD_RING(vq, "id %u out of range\n", id); 14631ce9e605STiwei Bie return NULL; 14641ce9e605STiwei Bie } 14651ce9e605STiwei Bie if (unlikely(!vq->packed.desc_state[id].data)) { 14661ce9e605STiwei Bie BAD_RING(vq, "id %u is not a head!\n", id); 14671ce9e605STiwei Bie return NULL; 14681ce9e605STiwei Bie } 14691ce9e605STiwei Bie 14701ce9e605STiwei Bie /* detach_buf_packed clears data, so grab it now. */ 14711ce9e605STiwei Bie ret = vq->packed.desc_state[id].data; 14721ce9e605STiwei Bie detach_buf_packed(vq, id, ctx); 14731ce9e605STiwei Bie 1474a7722890Shuangjie.albert last_used += vq->packed.desc_state[id].num; 1475a7722890Shuangjie.albert if (unlikely(last_used >= vq->packed.vring.num)) { 1476a7722890Shuangjie.albert last_used -= vq->packed.vring.num; 1477a7722890Shuangjie.albert used_wrap_counter ^= 1; 14781ce9e605STiwei Bie } 14791ce9e605STiwei Bie 1480a7722890Shuangjie.albert last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1481a7722890Shuangjie.albert WRITE_ONCE(vq->last_used_idx, last_used); 1482a7722890Shuangjie.albert 1483f51f9826STiwei Bie /* 1484f51f9826STiwei Bie * If we expect an interrupt for the next entry, tell host 1485f51f9826STiwei Bie * by writing event index and flush out the write before 1486f51f9826STiwei Bie * the read in the next get_buf call. 1487f51f9826STiwei Bie */ 1488f51f9826STiwei Bie if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) 1489f51f9826STiwei Bie virtio_store_mb(vq->weak_barriers, 1490f51f9826STiwei Bie &vq->packed.vring.driver->off_wrap, 1491a7722890Shuangjie.albert cpu_to_le16(vq->last_used_idx)); 1492f51f9826STiwei Bie 14931ce9e605STiwei Bie LAST_ADD_TIME_INVALID(vq); 14941ce9e605STiwei Bie 14951ce9e605STiwei Bie END_USE(vq); 14961ce9e605STiwei Bie return ret; 14971ce9e605STiwei Bie } 14981ce9e605STiwei Bie 14991ce9e605STiwei Bie static void virtqueue_disable_cb_packed(struct virtqueue *_vq) 15001ce9e605STiwei Bie { 15011ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 15021ce9e605STiwei Bie 15031ce9e605STiwei Bie if (vq->packed.event_flags_shadow != VRING_PACKED_EVENT_FLAG_DISABLE) { 15041ce9e605STiwei Bie vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 15051ce9e605STiwei Bie vq->packed.vring.driver->flags = 15061ce9e605STiwei Bie cpu_to_le16(vq->packed.event_flags_shadow); 15071ce9e605STiwei Bie } 15081ce9e605STiwei Bie } 15091ce9e605STiwei Bie 151031532340SSolomon Tan static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) 15111ce9e605STiwei Bie { 15121ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 15131ce9e605STiwei Bie 15141ce9e605STiwei Bie START_USE(vq); 15151ce9e605STiwei Bie 15161ce9e605STiwei Bie /* 15171ce9e605STiwei Bie * We optimistically turn back on interrupts, then check if there was 15181ce9e605STiwei Bie * more to do. 15191ce9e605STiwei Bie */ 15201ce9e605STiwei Bie 1521f51f9826STiwei Bie if (vq->event) { 1522f51f9826STiwei Bie vq->packed.vring.driver->off_wrap = 1523a7722890Shuangjie.albert cpu_to_le16(vq->last_used_idx); 1524f51f9826STiwei Bie /* 1525f51f9826STiwei Bie * We need to update event offset and event wrap 1526f51f9826STiwei Bie * counter first before updating event flags. 1527f51f9826STiwei Bie */ 1528f51f9826STiwei Bie virtio_wmb(vq->weak_barriers); 1529f51f9826STiwei Bie } 1530f51f9826STiwei Bie 15311ce9e605STiwei Bie if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1532f51f9826STiwei Bie vq->packed.event_flags_shadow = vq->event ? 1533f51f9826STiwei Bie VRING_PACKED_EVENT_FLAG_DESC : 1534f51f9826STiwei Bie VRING_PACKED_EVENT_FLAG_ENABLE; 15351ce9e605STiwei Bie vq->packed.vring.driver->flags = 15361ce9e605STiwei Bie cpu_to_le16(vq->packed.event_flags_shadow); 15371ce9e605STiwei Bie } 15381ce9e605STiwei Bie 15391ce9e605STiwei Bie END_USE(vq); 1540a7722890Shuangjie.albert return vq->last_used_idx; 15411ce9e605STiwei Bie } 15421ce9e605STiwei Bie 15431ce9e605STiwei Bie static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) 15441ce9e605STiwei Bie { 15451ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 15461ce9e605STiwei Bie bool wrap_counter; 15471ce9e605STiwei Bie u16 used_idx; 15481ce9e605STiwei Bie 15491ce9e605STiwei Bie wrap_counter = off_wrap >> VRING_PACKED_EVENT_F_WRAP_CTR; 15501ce9e605STiwei Bie used_idx = off_wrap & ~(1 << VRING_PACKED_EVENT_F_WRAP_CTR); 15511ce9e605STiwei Bie 15521ce9e605STiwei Bie return is_used_desc_packed(vq, used_idx, wrap_counter); 15531ce9e605STiwei Bie } 15541ce9e605STiwei Bie 15551ce9e605STiwei Bie static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) 15561ce9e605STiwei Bie { 15571ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 1558a7722890Shuangjie.albert u16 used_idx, wrap_counter, last_used_idx; 1559f51f9826STiwei Bie u16 bufs; 15601ce9e605STiwei Bie 15611ce9e605STiwei Bie START_USE(vq); 15621ce9e605STiwei Bie 15631ce9e605STiwei Bie /* 15641ce9e605STiwei Bie * We optimistically turn back on interrupts, then check if there was 15651ce9e605STiwei Bie * more to do. 15661ce9e605STiwei Bie */ 15671ce9e605STiwei Bie 1568f51f9826STiwei Bie if (vq->event) { 1569f51f9826STiwei Bie /* TODO: tune this threshold */ 1570f51f9826STiwei Bie bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; 1571a7722890Shuangjie.albert last_used_idx = READ_ONCE(vq->last_used_idx); 1572a7722890Shuangjie.albert wrap_counter = packed_used_wrap_counter(last_used_idx); 15731ce9e605STiwei Bie 1574a7722890Shuangjie.albert used_idx = packed_last_used(last_used_idx) + bufs; 1575f51f9826STiwei Bie if (used_idx >= vq->packed.vring.num) { 1576f51f9826STiwei Bie used_idx -= vq->packed.vring.num; 1577f51f9826STiwei Bie wrap_counter ^= 1; 1578f51f9826STiwei Bie } 1579f51f9826STiwei Bie 1580f51f9826STiwei Bie vq->packed.vring.driver->off_wrap = cpu_to_le16(used_idx | 1581f51f9826STiwei Bie (wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); 1582f51f9826STiwei Bie 1583f51f9826STiwei Bie /* 1584f51f9826STiwei Bie * We need to update event offset and event wrap 1585f51f9826STiwei Bie * counter first before updating event flags. 1586f51f9826STiwei Bie */ 1587f51f9826STiwei Bie virtio_wmb(vq->weak_barriers); 1588f51f9826STiwei Bie } 1589f51f9826STiwei Bie 15901ce9e605STiwei Bie if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) { 1591f51f9826STiwei Bie vq->packed.event_flags_shadow = vq->event ? 1592f51f9826STiwei Bie VRING_PACKED_EVENT_FLAG_DESC : 1593f51f9826STiwei Bie VRING_PACKED_EVENT_FLAG_ENABLE; 15941ce9e605STiwei Bie vq->packed.vring.driver->flags = 15951ce9e605STiwei Bie cpu_to_le16(vq->packed.event_flags_shadow); 15961ce9e605STiwei Bie } 15971ce9e605STiwei Bie 15981ce9e605STiwei Bie /* 15991ce9e605STiwei Bie * We need to update event suppression structure first 16001ce9e605STiwei Bie * before re-checking for more used buffers. 16011ce9e605STiwei Bie */ 16021ce9e605STiwei Bie virtio_mb(vq->weak_barriers); 16031ce9e605STiwei Bie 1604a7722890Shuangjie.albert last_used_idx = READ_ONCE(vq->last_used_idx); 1605a7722890Shuangjie.albert wrap_counter = packed_used_wrap_counter(last_used_idx); 1606a7722890Shuangjie.albert used_idx = packed_last_used(last_used_idx); 1607a7722890Shuangjie.albert if (is_used_desc_packed(vq, used_idx, wrap_counter)) { 16081ce9e605STiwei Bie END_USE(vq); 16091ce9e605STiwei Bie return false; 16101ce9e605STiwei Bie } 16111ce9e605STiwei Bie 16121ce9e605STiwei Bie END_USE(vq); 16131ce9e605STiwei Bie return true; 16141ce9e605STiwei Bie } 16151ce9e605STiwei Bie 16161ce9e605STiwei Bie static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) 16171ce9e605STiwei Bie { 16181ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 16191ce9e605STiwei Bie unsigned int i; 16201ce9e605STiwei Bie void *buf; 16211ce9e605STiwei Bie 16221ce9e605STiwei Bie START_USE(vq); 16231ce9e605STiwei Bie 16241ce9e605STiwei Bie for (i = 0; i < vq->packed.vring.num; i++) { 16251ce9e605STiwei Bie if (!vq->packed.desc_state[i].data) 16261ce9e605STiwei Bie continue; 16271ce9e605STiwei Bie /* detach_buf clears data, so grab it now. */ 16281ce9e605STiwei Bie buf = vq->packed.desc_state[i].data; 16291ce9e605STiwei Bie detach_buf_packed(vq, i, NULL); 16301ce9e605STiwei Bie END_USE(vq); 16311ce9e605STiwei Bie return buf; 16321ce9e605STiwei Bie } 16331ce9e605STiwei Bie /* That should have freed everything. */ 16341ce9e605STiwei Bie BUG_ON(vq->vq.num_free != vq->packed.vring.num); 16351ce9e605STiwei Bie 16361ce9e605STiwei Bie END_USE(vq); 16371ce9e605STiwei Bie return NULL; 16381ce9e605STiwei Bie } 16391ce9e605STiwei Bie 16405a222421SJason Wang static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq, 16415a222421SJason Wang unsigned int num) 16425a222421SJason Wang { 16435a222421SJason Wang struct vring_desc_extra *desc_extra; 16445a222421SJason Wang unsigned int i; 16455a222421SJason Wang 16465a222421SJason Wang desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), 16475a222421SJason Wang GFP_KERNEL); 16485a222421SJason Wang if (!desc_extra) 16495a222421SJason Wang return NULL; 16505a222421SJason Wang 16515a222421SJason Wang memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); 16525a222421SJason Wang 16535a222421SJason Wang for (i = 0; i < num - 1; i++) 16545a222421SJason Wang desc_extra[i].next = i + 1; 16555a222421SJason Wang 16565a222421SJason Wang return desc_extra; 16575a222421SJason Wang } 16585a222421SJason Wang 16591ce9e605STiwei Bie static struct virtqueue *vring_create_virtqueue_packed( 16601ce9e605STiwei Bie unsigned int index, 16611ce9e605STiwei Bie unsigned int num, 16621ce9e605STiwei Bie unsigned int vring_align, 16631ce9e605STiwei Bie struct virtio_device *vdev, 16641ce9e605STiwei Bie bool weak_barriers, 16651ce9e605STiwei Bie bool may_reduce_num, 16661ce9e605STiwei Bie bool context, 16671ce9e605STiwei Bie bool (*notify)(struct virtqueue *), 16681ce9e605STiwei Bie void (*callback)(struct virtqueue *), 16691ce9e605STiwei Bie const char *name) 16701ce9e605STiwei Bie { 16711ce9e605STiwei Bie struct vring_virtqueue *vq; 16721ce9e605STiwei Bie struct vring_packed_desc *ring; 16731ce9e605STiwei Bie struct vring_packed_desc_event *driver, *device; 16741ce9e605STiwei Bie dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 16751ce9e605STiwei Bie size_t ring_size_in_bytes, event_size_in_bytes; 16761ce9e605STiwei Bie 16771ce9e605STiwei Bie ring_size_in_bytes = num * sizeof(struct vring_packed_desc); 16781ce9e605STiwei Bie 16791ce9e605STiwei Bie ring = vring_alloc_queue(vdev, ring_size_in_bytes, 16801ce9e605STiwei Bie &ring_dma_addr, 16811ce9e605STiwei Bie GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 16821ce9e605STiwei Bie if (!ring) 16831ce9e605STiwei Bie goto err_ring; 16841ce9e605STiwei Bie 16851ce9e605STiwei Bie event_size_in_bytes = sizeof(struct vring_packed_desc_event); 16861ce9e605STiwei Bie 16871ce9e605STiwei Bie driver = vring_alloc_queue(vdev, event_size_in_bytes, 16881ce9e605STiwei Bie &driver_event_dma_addr, 16891ce9e605STiwei Bie GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 16901ce9e605STiwei Bie if (!driver) 16911ce9e605STiwei Bie goto err_driver; 16921ce9e605STiwei Bie 16931ce9e605STiwei Bie device = vring_alloc_queue(vdev, event_size_in_bytes, 16941ce9e605STiwei Bie &device_event_dma_addr, 16951ce9e605STiwei Bie GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); 16961ce9e605STiwei Bie if (!device) 16971ce9e605STiwei Bie goto err_device; 16981ce9e605STiwei Bie 16991ce9e605STiwei Bie vq = kmalloc(sizeof(*vq), GFP_KERNEL); 17001ce9e605STiwei Bie if (!vq) 17011ce9e605STiwei Bie goto err_vq; 17021ce9e605STiwei Bie 17031ce9e605STiwei Bie vq->vq.callback = callback; 17041ce9e605STiwei Bie vq->vq.vdev = vdev; 17051ce9e605STiwei Bie vq->vq.name = name; 17061ce9e605STiwei Bie vq->vq.num_free = num; 17071ce9e605STiwei Bie vq->vq.index = index; 17081ce9e605STiwei Bie vq->we_own_ring = true; 17091ce9e605STiwei Bie vq->notify = notify; 17101ce9e605STiwei Bie vq->weak_barriers = weak_barriers; 1711*c346dae4SJason Wang #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 17128b4ec69dSJason Wang vq->broken = true; 1713*c346dae4SJason Wang #else 1714*c346dae4SJason Wang vq->broken = false; 1715*c346dae4SJason Wang #endif 1716a7722890Shuangjie.albert vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); 17178d622d21SMichael S. Tsirkin vq->event_triggered = false; 17181ce9e605STiwei Bie vq->num_added = 0; 17191ce9e605STiwei Bie vq->packed_ring = true; 17201ce9e605STiwei Bie vq->use_dma_api = vring_use_dma_api(vdev); 17211ce9e605STiwei Bie #ifdef DEBUG 17221ce9e605STiwei Bie vq->in_use = false; 17231ce9e605STiwei Bie vq->last_add_time_valid = false; 17241ce9e605STiwei Bie #endif 17251ce9e605STiwei Bie 17261ce9e605STiwei Bie vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 17271ce9e605STiwei Bie !context; 17281ce9e605STiwei Bie vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 17291ce9e605STiwei Bie 173045383fb0STiwei Bie if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 173145383fb0STiwei Bie vq->weak_barriers = false; 173245383fb0STiwei Bie 17331ce9e605STiwei Bie vq->packed.ring_dma_addr = ring_dma_addr; 17341ce9e605STiwei Bie vq->packed.driver_event_dma_addr = driver_event_dma_addr; 17351ce9e605STiwei Bie vq->packed.device_event_dma_addr = device_event_dma_addr; 17361ce9e605STiwei Bie 17371ce9e605STiwei Bie vq->packed.ring_size_in_bytes = ring_size_in_bytes; 17381ce9e605STiwei Bie vq->packed.event_size_in_bytes = event_size_in_bytes; 17391ce9e605STiwei Bie 17401ce9e605STiwei Bie vq->packed.vring.num = num; 17411ce9e605STiwei Bie vq->packed.vring.desc = ring; 17421ce9e605STiwei Bie vq->packed.vring.driver = driver; 17431ce9e605STiwei Bie vq->packed.vring.device = device; 17441ce9e605STiwei Bie 17451ce9e605STiwei Bie vq->packed.next_avail_idx = 0; 17461ce9e605STiwei Bie vq->packed.avail_wrap_counter = 1; 17471ce9e605STiwei Bie vq->packed.event_flags_shadow = 0; 17481ce9e605STiwei Bie vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; 17491ce9e605STiwei Bie 17501ce9e605STiwei Bie vq->packed.desc_state = kmalloc_array(num, 17511ce9e605STiwei Bie sizeof(struct vring_desc_state_packed), 17521ce9e605STiwei Bie GFP_KERNEL); 17531ce9e605STiwei Bie if (!vq->packed.desc_state) 17541ce9e605STiwei Bie goto err_desc_state; 17551ce9e605STiwei Bie 17561ce9e605STiwei Bie memset(vq->packed.desc_state, 0, 17571ce9e605STiwei Bie num * sizeof(struct vring_desc_state_packed)); 17581ce9e605STiwei Bie 17591ce9e605STiwei Bie /* Put everything in free lists. */ 17601ce9e605STiwei Bie vq->free_head = 0; 17611ce9e605STiwei Bie 17625a222421SJason Wang vq->packed.desc_extra = vring_alloc_desc_extra(vq, num); 17631ce9e605STiwei Bie if (!vq->packed.desc_extra) 17641ce9e605STiwei Bie goto err_desc_extra; 17651ce9e605STiwei Bie 17661ce9e605STiwei Bie /* No callback? Tell other side not to bother us. */ 17671ce9e605STiwei Bie if (!callback) { 17681ce9e605STiwei Bie vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; 17691ce9e605STiwei Bie vq->packed.vring.driver->flags = 17701ce9e605STiwei Bie cpu_to_le16(vq->packed.event_flags_shadow); 17711ce9e605STiwei Bie } 17721ce9e605STiwei Bie 17730e566c8fSParav Pandit spin_lock(&vdev->vqs_list_lock); 1774e152d8afSDan Carpenter list_add_tail(&vq->vq.list, &vdev->vqs); 17750e566c8fSParav Pandit spin_unlock(&vdev->vqs_list_lock); 17761ce9e605STiwei Bie return &vq->vq; 17771ce9e605STiwei Bie 17781ce9e605STiwei Bie err_desc_extra: 17791ce9e605STiwei Bie kfree(vq->packed.desc_state); 17801ce9e605STiwei Bie err_desc_state: 17811ce9e605STiwei Bie kfree(vq); 17821ce9e605STiwei Bie err_vq: 1783ae93d8eaSDan Carpenter vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); 17841ce9e605STiwei Bie err_device: 1785ae93d8eaSDan Carpenter vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); 17861ce9e605STiwei Bie err_driver: 17871ce9e605STiwei Bie vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); 17881ce9e605STiwei Bie err_ring: 17891ce9e605STiwei Bie return NULL; 17901ce9e605STiwei Bie } 17911ce9e605STiwei Bie 17921ce9e605STiwei Bie 17931ce9e605STiwei Bie /* 1794e6f633e5STiwei Bie * Generic functions and exported symbols. 1795e6f633e5STiwei Bie */ 1796e6f633e5STiwei Bie 1797e6f633e5STiwei Bie static inline int virtqueue_add(struct virtqueue *_vq, 1798e6f633e5STiwei Bie struct scatterlist *sgs[], 1799e6f633e5STiwei Bie unsigned int total_sg, 1800e6f633e5STiwei Bie unsigned int out_sgs, 1801e6f633e5STiwei Bie unsigned int in_sgs, 1802e6f633e5STiwei Bie void *data, 1803e6f633e5STiwei Bie void *ctx, 1804e6f633e5STiwei Bie gfp_t gfp) 1805e6f633e5STiwei Bie { 18061ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 18071ce9e605STiwei Bie 18081ce9e605STiwei Bie return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, 18091ce9e605STiwei Bie out_sgs, in_sgs, data, ctx, gfp) : 18101ce9e605STiwei Bie virtqueue_add_split(_vq, sgs, total_sg, 1811e6f633e5STiwei Bie out_sgs, in_sgs, data, ctx, gfp); 1812e6f633e5STiwei Bie } 1813e6f633e5STiwei Bie 1814e6f633e5STiwei Bie /** 1815e6f633e5STiwei Bie * virtqueue_add_sgs - expose buffers to other end 1816a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 1817e6f633e5STiwei Bie * @sgs: array of terminated scatterlists. 1818a5581206SJiang Biao * @out_sgs: the number of scatterlists readable by other side 1819a5581206SJiang Biao * @in_sgs: the number of scatterlists which are writable (after readable ones) 1820e6f633e5STiwei Bie * @data: the token identifying the buffer. 1821e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 1822e6f633e5STiwei Bie * 1823e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 1824e6f633e5STiwei Bie * at the same time (except where noted). 1825e6f633e5STiwei Bie * 1826e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1827e6f633e5STiwei Bie */ 1828e6f633e5STiwei Bie int virtqueue_add_sgs(struct virtqueue *_vq, 1829e6f633e5STiwei Bie struct scatterlist *sgs[], 1830e6f633e5STiwei Bie unsigned int out_sgs, 1831e6f633e5STiwei Bie unsigned int in_sgs, 1832e6f633e5STiwei Bie void *data, 1833e6f633e5STiwei Bie gfp_t gfp) 1834e6f633e5STiwei Bie { 1835e6f633e5STiwei Bie unsigned int i, total_sg = 0; 1836e6f633e5STiwei Bie 1837e6f633e5STiwei Bie /* Count them first. */ 1838e6f633e5STiwei Bie for (i = 0; i < out_sgs + in_sgs; i++) { 1839e6f633e5STiwei Bie struct scatterlist *sg; 1840e6f633e5STiwei Bie 1841e6f633e5STiwei Bie for (sg = sgs[i]; sg; sg = sg_next(sg)) 1842e6f633e5STiwei Bie total_sg++; 1843e6f633e5STiwei Bie } 1844e6f633e5STiwei Bie return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, 1845e6f633e5STiwei Bie data, NULL, gfp); 1846e6f633e5STiwei Bie } 1847e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_sgs); 1848e6f633e5STiwei Bie 1849e6f633e5STiwei Bie /** 1850e6f633e5STiwei Bie * virtqueue_add_outbuf - expose output buffers to other end 1851e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1852e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 1853e6f633e5STiwei Bie * @num: the number of entries in @sg readable by other side 1854e6f633e5STiwei Bie * @data: the token identifying the buffer. 1855e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 1856e6f633e5STiwei Bie * 1857e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 1858e6f633e5STiwei Bie * at the same time (except where noted). 1859e6f633e5STiwei Bie * 1860e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1861e6f633e5STiwei Bie */ 1862e6f633e5STiwei Bie int virtqueue_add_outbuf(struct virtqueue *vq, 1863e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 1864e6f633e5STiwei Bie void *data, 1865e6f633e5STiwei Bie gfp_t gfp) 1866e6f633e5STiwei Bie { 1867e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 1, 0, data, NULL, gfp); 1868e6f633e5STiwei Bie } 1869e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); 1870e6f633e5STiwei Bie 1871e6f633e5STiwei Bie /** 1872e6f633e5STiwei Bie * virtqueue_add_inbuf - expose input buffers to other end 1873e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1874e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 1875e6f633e5STiwei Bie * @num: the number of entries in @sg writable by other side 1876e6f633e5STiwei Bie * @data: the token identifying the buffer. 1877e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 1878e6f633e5STiwei Bie * 1879e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 1880e6f633e5STiwei Bie * at the same time (except where noted). 1881e6f633e5STiwei Bie * 1882e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1883e6f633e5STiwei Bie */ 1884e6f633e5STiwei Bie int virtqueue_add_inbuf(struct virtqueue *vq, 1885e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 1886e6f633e5STiwei Bie void *data, 1887e6f633e5STiwei Bie gfp_t gfp) 1888e6f633e5STiwei Bie { 1889e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 0, 1, data, NULL, gfp); 1890e6f633e5STiwei Bie } 1891e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_inbuf); 1892e6f633e5STiwei Bie 1893e6f633e5STiwei Bie /** 1894e6f633e5STiwei Bie * virtqueue_add_inbuf_ctx - expose input buffers to other end 1895e6f633e5STiwei Bie * @vq: the struct virtqueue we're talking about. 1896e6f633e5STiwei Bie * @sg: scatterlist (must be well-formed and terminated!) 1897e6f633e5STiwei Bie * @num: the number of entries in @sg writable by other side 1898e6f633e5STiwei Bie * @data: the token identifying the buffer. 1899e6f633e5STiwei Bie * @ctx: extra context for the token 1900e6f633e5STiwei Bie * @gfp: how to do memory allocations (if necessary). 1901e6f633e5STiwei Bie * 1902e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue operations 1903e6f633e5STiwei Bie * at the same time (except where noted). 1904e6f633e5STiwei Bie * 1905e6f633e5STiwei Bie * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). 1906e6f633e5STiwei Bie */ 1907e6f633e5STiwei Bie int virtqueue_add_inbuf_ctx(struct virtqueue *vq, 1908e6f633e5STiwei Bie struct scatterlist *sg, unsigned int num, 1909e6f633e5STiwei Bie void *data, 1910e6f633e5STiwei Bie void *ctx, 1911e6f633e5STiwei Bie gfp_t gfp) 1912e6f633e5STiwei Bie { 1913e6f633e5STiwei Bie return virtqueue_add(vq, &sg, num, 0, 1, data, ctx, gfp); 1914e6f633e5STiwei Bie } 1915e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_add_inbuf_ctx); 1916e6f633e5STiwei Bie 1917e6f633e5STiwei Bie /** 1918e6f633e5STiwei Bie * virtqueue_kick_prepare - first half of split virtqueue_kick call. 1919a5581206SJiang Biao * @_vq: the struct virtqueue 1920e6f633e5STiwei Bie * 1921e6f633e5STiwei Bie * Instead of virtqueue_kick(), you can do: 1922e6f633e5STiwei Bie * if (virtqueue_kick_prepare(vq)) 1923e6f633e5STiwei Bie * virtqueue_notify(vq); 1924e6f633e5STiwei Bie * 1925e6f633e5STiwei Bie * This is sometimes useful because the virtqueue_kick_prepare() needs 1926e6f633e5STiwei Bie * to be serialized, but the actual virtqueue_notify() call does not. 1927e6f633e5STiwei Bie */ 1928e6f633e5STiwei Bie bool virtqueue_kick_prepare(struct virtqueue *_vq) 1929e6f633e5STiwei Bie { 19301ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 19311ce9e605STiwei Bie 19321ce9e605STiwei Bie return vq->packed_ring ? virtqueue_kick_prepare_packed(_vq) : 19331ce9e605STiwei Bie virtqueue_kick_prepare_split(_vq); 1934e6f633e5STiwei Bie } 1935e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 1936e6f633e5STiwei Bie 1937e6f633e5STiwei Bie /** 1938e6f633e5STiwei Bie * virtqueue_notify - second half of split virtqueue_kick call. 1939a5581206SJiang Biao * @_vq: the struct virtqueue 1940e6f633e5STiwei Bie * 1941e6f633e5STiwei Bie * This does not need to be serialized. 1942e6f633e5STiwei Bie * 1943e6f633e5STiwei Bie * Returns false if host notify failed or queue is broken, otherwise true. 1944e6f633e5STiwei Bie */ 1945e6f633e5STiwei Bie bool virtqueue_notify(struct virtqueue *_vq) 1946e6f633e5STiwei Bie { 1947e6f633e5STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 1948e6f633e5STiwei Bie 1949e6f633e5STiwei Bie if (unlikely(vq->broken)) 1950e6f633e5STiwei Bie return false; 1951e6f633e5STiwei Bie 1952e6f633e5STiwei Bie /* Prod other side to tell it about changes. */ 1953e6f633e5STiwei Bie if (!vq->notify(_vq)) { 1954e6f633e5STiwei Bie vq->broken = true; 1955e6f633e5STiwei Bie return false; 1956e6f633e5STiwei Bie } 1957e6f633e5STiwei Bie return true; 1958e6f633e5STiwei Bie } 1959e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_notify); 1960e6f633e5STiwei Bie 1961e6f633e5STiwei Bie /** 1962e6f633e5STiwei Bie * virtqueue_kick - update after add_buf 1963e6f633e5STiwei Bie * @vq: the struct virtqueue 1964e6f633e5STiwei Bie * 1965e6f633e5STiwei Bie * After one or more virtqueue_add_* calls, invoke this to kick 1966e6f633e5STiwei Bie * the other side. 1967e6f633e5STiwei Bie * 1968e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 1969e6f633e5STiwei Bie * operations at the same time (except where noted). 1970e6f633e5STiwei Bie * 1971e6f633e5STiwei Bie * Returns false if kick failed, otherwise true. 1972e6f633e5STiwei Bie */ 1973e6f633e5STiwei Bie bool virtqueue_kick(struct virtqueue *vq) 1974e6f633e5STiwei Bie { 1975e6f633e5STiwei Bie if (virtqueue_kick_prepare(vq)) 1976e6f633e5STiwei Bie return virtqueue_notify(vq); 1977e6f633e5STiwei Bie return true; 1978e6f633e5STiwei Bie } 1979e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_kick); 1980e6f633e5STiwei Bie 1981e6f633e5STiwei Bie /** 198231c11db6SYang Li * virtqueue_get_buf_ctx - get the next used buffer 1983a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 1984e6f633e5STiwei Bie * @len: the length written into the buffer 1985a5581206SJiang Biao * @ctx: extra context for the token 1986e6f633e5STiwei Bie * 1987e6f633e5STiwei Bie * If the device wrote data into the buffer, @len will be set to the 1988e6f633e5STiwei Bie * amount written. This means you don't need to clear the buffer 1989e6f633e5STiwei Bie * beforehand to ensure there's no data leakage in the case of short 1990e6f633e5STiwei Bie * writes. 1991e6f633e5STiwei Bie * 1992e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 1993e6f633e5STiwei Bie * operations at the same time (except where noted). 1994e6f633e5STiwei Bie * 1995e6f633e5STiwei Bie * Returns NULL if there are no used buffers, or the "data" token 1996e6f633e5STiwei Bie * handed to virtqueue_add_*(). 1997e6f633e5STiwei Bie */ 1998e6f633e5STiwei Bie void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, 1999e6f633e5STiwei Bie void **ctx) 2000e6f633e5STiwei Bie { 20011ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 20021ce9e605STiwei Bie 20031ce9e605STiwei Bie return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : 20041ce9e605STiwei Bie virtqueue_get_buf_ctx_split(_vq, len, ctx); 2005e6f633e5STiwei Bie } 2006e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); 2007e6f633e5STiwei Bie 2008e6f633e5STiwei Bie void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 2009e6f633e5STiwei Bie { 2010e6f633e5STiwei Bie return virtqueue_get_buf_ctx(_vq, len, NULL); 2011e6f633e5STiwei Bie } 2012e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_get_buf); 2013e6f633e5STiwei Bie /** 2014e6f633e5STiwei Bie * virtqueue_disable_cb - disable callbacks 2015a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 2016e6f633e5STiwei Bie * 2017e6f633e5STiwei Bie * Note that this is not necessarily synchronous, hence unreliable and only 2018e6f633e5STiwei Bie * useful as an optimization. 2019e6f633e5STiwei Bie * 2020e6f633e5STiwei Bie * Unlike other operations, this need not be serialized. 2021e6f633e5STiwei Bie */ 2022e6f633e5STiwei Bie void virtqueue_disable_cb(struct virtqueue *_vq) 2023e6f633e5STiwei Bie { 20241ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 20251ce9e605STiwei Bie 20268d622d21SMichael S. Tsirkin /* If device triggered an event already it won't trigger one again: 20278d622d21SMichael S. Tsirkin * no need to disable. 20288d622d21SMichael S. Tsirkin */ 20298d622d21SMichael S. Tsirkin if (vq->event_triggered) 20308d622d21SMichael S. Tsirkin return; 20318d622d21SMichael S. Tsirkin 20321ce9e605STiwei Bie if (vq->packed_ring) 20331ce9e605STiwei Bie virtqueue_disable_cb_packed(_vq); 20341ce9e605STiwei Bie else 2035e6f633e5STiwei Bie virtqueue_disable_cb_split(_vq); 2036e6f633e5STiwei Bie } 2037e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 2038e6f633e5STiwei Bie 2039e6f633e5STiwei Bie /** 2040e6f633e5STiwei Bie * virtqueue_enable_cb_prepare - restart callbacks after disable_cb 2041a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 2042e6f633e5STiwei Bie * 2043e6f633e5STiwei Bie * This re-enables callbacks; it returns current queue state 2044e6f633e5STiwei Bie * in an opaque unsigned value. This value should be later tested by 2045e6f633e5STiwei Bie * virtqueue_poll, to detect a possible race between the driver checking for 2046e6f633e5STiwei Bie * more work, and enabling callbacks. 2047e6f633e5STiwei Bie * 2048e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 2049e6f633e5STiwei Bie * operations at the same time (except where noted). 2050e6f633e5STiwei Bie */ 205131532340SSolomon Tan unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq) 2052e6f633e5STiwei Bie { 20531ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 20541ce9e605STiwei Bie 20558d622d21SMichael S. Tsirkin if (vq->event_triggered) 20568d622d21SMichael S. Tsirkin vq->event_triggered = false; 20578d622d21SMichael S. Tsirkin 20581ce9e605STiwei Bie return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(_vq) : 20591ce9e605STiwei Bie virtqueue_enable_cb_prepare_split(_vq); 2060e6f633e5STiwei Bie } 2061e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); 2062e6f633e5STiwei Bie 2063e6f633e5STiwei Bie /** 2064e6f633e5STiwei Bie * virtqueue_poll - query pending used buffers 2065a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 2066e6f633e5STiwei Bie * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). 2067e6f633e5STiwei Bie * 2068e6f633e5STiwei Bie * Returns "true" if there are pending used buffers in the queue. 2069e6f633e5STiwei Bie * 2070e6f633e5STiwei Bie * This does not need to be serialized. 2071e6f633e5STiwei Bie */ 207231532340SSolomon Tan bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx) 2073e6f633e5STiwei Bie { 2074e6f633e5STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 2075e6f633e5STiwei Bie 2076481a0d74SMao Wenan if (unlikely(vq->broken)) 2077481a0d74SMao Wenan return false; 2078481a0d74SMao Wenan 2079e6f633e5STiwei Bie virtio_mb(vq->weak_barriers); 20801ce9e605STiwei Bie return vq->packed_ring ? virtqueue_poll_packed(_vq, last_used_idx) : 20811ce9e605STiwei Bie virtqueue_poll_split(_vq, last_used_idx); 2082e6f633e5STiwei Bie } 2083e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_poll); 2084e6f633e5STiwei Bie 2085e6f633e5STiwei Bie /** 2086e6f633e5STiwei Bie * virtqueue_enable_cb - restart callbacks after disable_cb. 2087a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 2088e6f633e5STiwei Bie * 2089e6f633e5STiwei Bie * This re-enables callbacks; it returns "false" if there are pending 2090e6f633e5STiwei Bie * buffers in the queue, to detect a possible race between the driver 2091e6f633e5STiwei Bie * checking for more work, and enabling callbacks. 2092e6f633e5STiwei Bie * 2093e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 2094e6f633e5STiwei Bie * operations at the same time (except where noted). 2095e6f633e5STiwei Bie */ 2096e6f633e5STiwei Bie bool virtqueue_enable_cb(struct virtqueue *_vq) 2097e6f633e5STiwei Bie { 209831532340SSolomon Tan unsigned int last_used_idx = virtqueue_enable_cb_prepare(_vq); 2099e6f633e5STiwei Bie 2100e6f633e5STiwei Bie return !virtqueue_poll(_vq, last_used_idx); 2101e6f633e5STiwei Bie } 2102e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 2103e6f633e5STiwei Bie 2104e6f633e5STiwei Bie /** 2105e6f633e5STiwei Bie * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 2106a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 2107e6f633e5STiwei Bie * 2108e6f633e5STiwei Bie * This re-enables callbacks but hints to the other side to delay 2109e6f633e5STiwei Bie * interrupts until most of the available buffers have been processed; 2110e6f633e5STiwei Bie * it returns "false" if there are many pending buffers in the queue, 2111e6f633e5STiwei Bie * to detect a possible race between the driver checking for more work, 2112e6f633e5STiwei Bie * and enabling callbacks. 2113e6f633e5STiwei Bie * 2114e6f633e5STiwei Bie * Caller must ensure we don't call this with other virtqueue 2115e6f633e5STiwei Bie * operations at the same time (except where noted). 2116e6f633e5STiwei Bie */ 2117e6f633e5STiwei Bie bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 2118e6f633e5STiwei Bie { 21191ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 21201ce9e605STiwei Bie 21218d622d21SMichael S. Tsirkin if (vq->event_triggered) 21228d622d21SMichael S. Tsirkin vq->event_triggered = false; 21238d622d21SMichael S. Tsirkin 21241ce9e605STiwei Bie return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(_vq) : 21251ce9e605STiwei Bie virtqueue_enable_cb_delayed_split(_vq); 2126e6f633e5STiwei Bie } 2127e6f633e5STiwei Bie EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 2128e6f633e5STiwei Bie 2129138fd251STiwei Bie /** 2130138fd251STiwei Bie * virtqueue_detach_unused_buf - detach first unused buffer 2131a5581206SJiang Biao * @_vq: the struct virtqueue we're talking about. 2132138fd251STiwei Bie * 2133138fd251STiwei Bie * Returns NULL or the "data" token handed to virtqueue_add_*(). 2134138fd251STiwei Bie * This is not valid on an active queue; it is useful only for device 2135138fd251STiwei Bie * shutdown. 2136138fd251STiwei Bie */ 2137138fd251STiwei Bie void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 2138138fd251STiwei Bie { 21391ce9e605STiwei Bie struct vring_virtqueue *vq = to_vvq(_vq); 21401ce9e605STiwei Bie 21411ce9e605STiwei Bie return vq->packed_ring ? virtqueue_detach_unused_buf_packed(_vq) : 21421ce9e605STiwei Bie virtqueue_detach_unused_buf_split(_vq); 2143138fd251STiwei Bie } 21447c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 2145c021eac4SShirley Ma 2146138fd251STiwei Bie static inline bool more_used(const struct vring_virtqueue *vq) 2147138fd251STiwei Bie { 21481ce9e605STiwei Bie return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq); 2149138fd251STiwei Bie } 2150138fd251STiwei Bie 21510a8a69ddSRusty Russell irqreturn_t vring_interrupt(int irq, void *_vq) 21520a8a69ddSRusty Russell { 21530a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 21540a8a69ddSRusty Russell 21550a8a69ddSRusty Russell if (!more_used(vq)) { 21560a8a69ddSRusty Russell pr_debug("virtqueue interrupt with no work for %p\n", vq); 21570a8a69ddSRusty Russell return IRQ_NONE; 21580a8a69ddSRusty Russell } 21590a8a69ddSRusty Russell 21608b4ec69dSJason Wang if (unlikely(vq->broken)) { 2161*c346dae4SJason Wang #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 21628b4ec69dSJason Wang dev_warn_once(&vq->vq.vdev->dev, 21638b4ec69dSJason Wang "virtio vring IRQ raised before DRIVER_OK"); 21648b4ec69dSJason Wang return IRQ_NONE; 2165*c346dae4SJason Wang #else 2166*c346dae4SJason Wang return IRQ_HANDLED; 2167*c346dae4SJason Wang #endif 21688b4ec69dSJason Wang } 21690a8a69ddSRusty Russell 21708d622d21SMichael S. Tsirkin /* Just a hint for performance: so it's ok that this can be racy! */ 21718d622d21SMichael S. Tsirkin if (vq->event) 21728d622d21SMichael S. Tsirkin vq->event_triggered = true; 21738d622d21SMichael S. Tsirkin 21740a8a69ddSRusty Russell pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 217518445c4dSRusty Russell if (vq->vq.callback) 217618445c4dSRusty Russell vq->vq.callback(&vq->vq); 21770a8a69ddSRusty Russell 21780a8a69ddSRusty Russell return IRQ_HANDLED; 21790a8a69ddSRusty Russell } 2180c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_interrupt); 21810a8a69ddSRusty Russell 21821ce9e605STiwei Bie /* Only available for split ring */ 21832a2d1382SAndy Lutomirski struct virtqueue *__vring_new_virtqueue(unsigned int index, 21842a2d1382SAndy Lutomirski struct vring vring, 21850a8a69ddSRusty Russell struct virtio_device *vdev, 21867b21e34fSRusty Russell bool weak_barriers, 2187f94682ddSMichael S. Tsirkin bool context, 218846f9c2b9SHeinz Graalfs bool (*notify)(struct virtqueue *), 21899499f5e7SRusty Russell void (*callback)(struct virtqueue *), 21909499f5e7SRusty Russell const char *name) 21910a8a69ddSRusty Russell { 21922a2d1382SAndy Lutomirski struct vring_virtqueue *vq; 21930a8a69ddSRusty Russell 21941ce9e605STiwei Bie if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 21951ce9e605STiwei Bie return NULL; 21961ce9e605STiwei Bie 2197cbeedb72STiwei Bie vq = kmalloc(sizeof(*vq), GFP_KERNEL); 21980a8a69ddSRusty Russell if (!vq) 21990a8a69ddSRusty Russell return NULL; 22000a8a69ddSRusty Russell 22011ce9e605STiwei Bie vq->packed_ring = false; 22020a8a69ddSRusty Russell vq->vq.callback = callback; 22030a8a69ddSRusty Russell vq->vq.vdev = vdev; 22049499f5e7SRusty Russell vq->vq.name = name; 22052a2d1382SAndy Lutomirski vq->vq.num_free = vring.num; 220606ca287dSRusty Russell vq->vq.index = index; 22072a2d1382SAndy Lutomirski vq->we_own_ring = false; 22080a8a69ddSRusty Russell vq->notify = notify; 22097b21e34fSRusty Russell vq->weak_barriers = weak_barriers; 2210*c346dae4SJason Wang #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 22118b4ec69dSJason Wang vq->broken = true; 2212*c346dae4SJason Wang #else 2213*c346dae4SJason Wang vq->broken = false; 2214*c346dae4SJason Wang #endif 22150a8a69ddSRusty Russell vq->last_used_idx = 0; 22168d622d21SMichael S. Tsirkin vq->event_triggered = false; 22170a8a69ddSRusty Russell vq->num_added = 0; 2218fb3fba6bSTiwei Bie vq->use_dma_api = vring_use_dma_api(vdev); 22190a8a69ddSRusty Russell #ifdef DEBUG 22200a8a69ddSRusty Russell vq->in_use = false; 2221e93300b1SRusty Russell vq->last_add_time_valid = false; 22220a8a69ddSRusty Russell #endif 22230a8a69ddSRusty Russell 22245a08b04fSMichael S. Tsirkin vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC) && 22255a08b04fSMichael S. Tsirkin !context; 2226a5c262c5SMichael S. Tsirkin vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 22279fa29b9dSMark McLoughlin 222845383fb0STiwei Bie if (virtio_has_feature(vdev, VIRTIO_F_ORDER_PLATFORM)) 222945383fb0STiwei Bie vq->weak_barriers = false; 223045383fb0STiwei Bie 2231d79dca75STiwei Bie vq->split.queue_dma_addr = 0; 2232d79dca75STiwei Bie vq->split.queue_size_in_bytes = 0; 2233d79dca75STiwei Bie 2234e593bf97STiwei Bie vq->split.vring = vring; 2235e593bf97STiwei Bie vq->split.avail_flags_shadow = 0; 2236e593bf97STiwei Bie vq->split.avail_idx_shadow = 0; 2237e593bf97STiwei Bie 22380a8a69ddSRusty Russell /* No callback? Tell other side not to bother us. */ 2239f277ec42SVenkatesh Srinivas if (!callback) { 2240e593bf97STiwei Bie vq->split.avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; 22410ea1e4a6SLadi Prosek if (!vq->event) 2242e593bf97STiwei Bie vq->split.vring.avail->flags = cpu_to_virtio16(vdev, 2243e593bf97STiwei Bie vq->split.avail_flags_shadow); 2244f277ec42SVenkatesh Srinivas } 22450a8a69ddSRusty Russell 2246cbeedb72STiwei Bie vq->split.desc_state = kmalloc_array(vring.num, 2247cbeedb72STiwei Bie sizeof(struct vring_desc_state_split), GFP_KERNEL); 22485bc72234SJason Wang if (!vq->split.desc_state) 22495bc72234SJason Wang goto err_state; 2250cbeedb72STiwei Bie 225172b5e895SJason Wang vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num); 225272b5e895SJason Wang if (!vq->split.desc_extra) 225372b5e895SJason Wang goto err_extra; 225472b5e895SJason Wang 22550a8a69ddSRusty Russell /* Put everything in free lists. */ 22560a8a69ddSRusty Russell vq->free_head = 0; 2257cbeedb72STiwei Bie memset(vq->split.desc_state, 0, vring.num * 2258cbeedb72STiwei Bie sizeof(struct vring_desc_state_split)); 22590a8a69ddSRusty Russell 22600e566c8fSParav Pandit spin_lock(&vdev->vqs_list_lock); 2261e152d8afSDan Carpenter list_add_tail(&vq->vq.list, &vdev->vqs); 22620e566c8fSParav Pandit spin_unlock(&vdev->vqs_list_lock); 22630a8a69ddSRusty Russell return &vq->vq; 22645bc72234SJason Wang 226572b5e895SJason Wang err_extra: 226672b5e895SJason Wang kfree(vq->split.desc_state); 22675bc72234SJason Wang err_state: 22685bc72234SJason Wang kfree(vq); 22695bc72234SJason Wang return NULL; 22700a8a69ddSRusty Russell } 22712a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(__vring_new_virtqueue); 22722a2d1382SAndy Lutomirski 22732a2d1382SAndy Lutomirski struct virtqueue *vring_create_virtqueue( 22742a2d1382SAndy Lutomirski unsigned int index, 22752a2d1382SAndy Lutomirski unsigned int num, 22762a2d1382SAndy Lutomirski unsigned int vring_align, 22772a2d1382SAndy Lutomirski struct virtio_device *vdev, 22782a2d1382SAndy Lutomirski bool weak_barriers, 22792a2d1382SAndy Lutomirski bool may_reduce_num, 2280f94682ddSMichael S. Tsirkin bool context, 22812a2d1382SAndy Lutomirski bool (*notify)(struct virtqueue *), 22822a2d1382SAndy Lutomirski void (*callback)(struct virtqueue *), 22832a2d1382SAndy Lutomirski const char *name) 22842a2d1382SAndy Lutomirski { 22851ce9e605STiwei Bie 22861ce9e605STiwei Bie if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 22871ce9e605STiwei Bie return vring_create_virtqueue_packed(index, num, vring_align, 22881ce9e605STiwei Bie vdev, weak_barriers, may_reduce_num, 22891ce9e605STiwei Bie context, notify, callback, name); 22901ce9e605STiwei Bie 2291d79dca75STiwei Bie return vring_create_virtqueue_split(index, num, vring_align, 2292d79dca75STiwei Bie vdev, weak_barriers, may_reduce_num, 2293d79dca75STiwei Bie context, notify, callback, name); 22942a2d1382SAndy Lutomirski } 22952a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(vring_create_virtqueue); 22962a2d1382SAndy Lutomirski 22971ce9e605STiwei Bie /* Only available for split ring */ 22982a2d1382SAndy Lutomirski struct virtqueue *vring_new_virtqueue(unsigned int index, 22992a2d1382SAndy Lutomirski unsigned int num, 23002a2d1382SAndy Lutomirski unsigned int vring_align, 23012a2d1382SAndy Lutomirski struct virtio_device *vdev, 23022a2d1382SAndy Lutomirski bool weak_barriers, 2303f94682ddSMichael S. Tsirkin bool context, 23042a2d1382SAndy Lutomirski void *pages, 23052a2d1382SAndy Lutomirski bool (*notify)(struct virtqueue *vq), 23062a2d1382SAndy Lutomirski void (*callback)(struct virtqueue *vq), 23072a2d1382SAndy Lutomirski const char *name) 23082a2d1382SAndy Lutomirski { 23092a2d1382SAndy Lutomirski struct vring vring; 23101ce9e605STiwei Bie 23111ce9e605STiwei Bie if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) 23121ce9e605STiwei Bie return NULL; 23131ce9e605STiwei Bie 23142a2d1382SAndy Lutomirski vring_init(&vring, num, pages, vring_align); 2315f94682ddSMichael S. Tsirkin return __vring_new_virtqueue(index, vring, vdev, weak_barriers, context, 23162a2d1382SAndy Lutomirski notify, callback, name); 23172a2d1382SAndy Lutomirski } 2318c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_new_virtqueue); 23190a8a69ddSRusty Russell 23202a2d1382SAndy Lutomirski void vring_del_virtqueue(struct virtqueue *_vq) 23210a8a69ddSRusty Russell { 23222a2d1382SAndy Lutomirski struct vring_virtqueue *vq = to_vvq(_vq); 23232a2d1382SAndy Lutomirski 23240e566c8fSParav Pandit spin_lock(&vq->vq.vdev->vqs_list_lock); 2325249f2554SParav Pandit list_del(&_vq->list); 23260e566c8fSParav Pandit spin_unlock(&vq->vq.vdev->vqs_list_lock); 2327249f2554SParav Pandit 23282a2d1382SAndy Lutomirski if (vq->we_own_ring) { 23291ce9e605STiwei Bie if (vq->packed_ring) { 23301ce9e605STiwei Bie vring_free_queue(vq->vq.vdev, 23311ce9e605STiwei Bie vq->packed.ring_size_in_bytes, 23321ce9e605STiwei Bie vq->packed.vring.desc, 23331ce9e605STiwei Bie vq->packed.ring_dma_addr); 23341ce9e605STiwei Bie 23351ce9e605STiwei Bie vring_free_queue(vq->vq.vdev, 23361ce9e605STiwei Bie vq->packed.event_size_in_bytes, 23371ce9e605STiwei Bie vq->packed.vring.driver, 23381ce9e605STiwei Bie vq->packed.driver_event_dma_addr); 23391ce9e605STiwei Bie 23401ce9e605STiwei Bie vring_free_queue(vq->vq.vdev, 23411ce9e605STiwei Bie vq->packed.event_size_in_bytes, 23421ce9e605STiwei Bie vq->packed.vring.device, 23431ce9e605STiwei Bie vq->packed.device_event_dma_addr); 23441ce9e605STiwei Bie 23451ce9e605STiwei Bie kfree(vq->packed.desc_state); 23461ce9e605STiwei Bie kfree(vq->packed.desc_extra); 23471ce9e605STiwei Bie } else { 2348d79dca75STiwei Bie vring_free_queue(vq->vq.vdev, 2349d79dca75STiwei Bie vq->split.queue_size_in_bytes, 2350d79dca75STiwei Bie vq->split.vring.desc, 2351d79dca75STiwei Bie vq->split.queue_dma_addr); 2352f13f09a1SSuman Anna } 2353f13f09a1SSuman Anna } 235472b5e895SJason Wang if (!vq->packed_ring) { 2355cbeedb72STiwei Bie kfree(vq->split.desc_state); 235672b5e895SJason Wang kfree(vq->split.desc_extra); 235772b5e895SJason Wang } 23582a2d1382SAndy Lutomirski kfree(vq); 23590a8a69ddSRusty Russell } 2360c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_del_virtqueue); 23610a8a69ddSRusty Russell 2362e34f8725SRusty Russell /* Manipulates transport-specific feature bits. */ 2363e34f8725SRusty Russell void vring_transport_features(struct virtio_device *vdev) 2364e34f8725SRusty Russell { 2365e34f8725SRusty Russell unsigned int i; 2366e34f8725SRusty Russell 2367e34f8725SRusty Russell for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 2368e34f8725SRusty Russell switch (i) { 23699fa29b9dSMark McLoughlin case VIRTIO_RING_F_INDIRECT_DESC: 23709fa29b9dSMark McLoughlin break; 2371a5c262c5SMichael S. Tsirkin case VIRTIO_RING_F_EVENT_IDX: 2372a5c262c5SMichael S. Tsirkin break; 2373747ae34aSMichael S. Tsirkin case VIRTIO_F_VERSION_1: 2374747ae34aSMichael S. Tsirkin break; 2375321bd212SMichael S. Tsirkin case VIRTIO_F_ACCESS_PLATFORM: 23761a937693SMichael S. Tsirkin break; 2377f959a128STiwei Bie case VIRTIO_F_RING_PACKED: 2378f959a128STiwei Bie break; 237945383fb0STiwei Bie case VIRTIO_F_ORDER_PLATFORM: 238045383fb0STiwei Bie break; 2381e34f8725SRusty Russell default: 2382e34f8725SRusty Russell /* We don't understand this bit. */ 2383e16e12beSMichael S. Tsirkin __virtio_clear_bit(vdev, i); 2384e34f8725SRusty Russell } 2385e34f8725SRusty Russell } 2386e34f8725SRusty Russell } 2387e34f8725SRusty Russell EXPORT_SYMBOL_GPL(vring_transport_features); 2388e34f8725SRusty Russell 23895dfc1762SRusty Russell /** 23905dfc1762SRusty Russell * virtqueue_get_vring_size - return the size of the virtqueue's vring 2391a5581206SJiang Biao * @_vq: the struct virtqueue containing the vring of interest. 23925dfc1762SRusty Russell * 23935dfc1762SRusty Russell * Returns the size of the vring. This is mainly used for boasting to 23945dfc1762SRusty Russell * userspace. Unlike other operations, this need not be serialized. 23955dfc1762SRusty Russell */ 23968f9f4668SRick Jones unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 23978f9f4668SRick Jones { 23988f9f4668SRick Jones 23998f9f4668SRick Jones struct vring_virtqueue *vq = to_vvq(_vq); 24008f9f4668SRick Jones 24011ce9e605STiwei Bie return vq->packed_ring ? vq->packed.vring.num : vq->split.vring.num; 24028f9f4668SRick Jones } 24038f9f4668SRick Jones EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 24048f9f4668SRick Jones 2405b3b32c94SHeinz Graalfs bool virtqueue_is_broken(struct virtqueue *_vq) 2406b3b32c94SHeinz Graalfs { 2407b3b32c94SHeinz Graalfs struct vring_virtqueue *vq = to_vvq(_vq); 2408b3b32c94SHeinz Graalfs 240960f07798SParav Pandit return READ_ONCE(vq->broken); 2410b3b32c94SHeinz Graalfs } 2411b3b32c94SHeinz Graalfs EXPORT_SYMBOL_GPL(virtqueue_is_broken); 2412b3b32c94SHeinz Graalfs 2413e2dcdfe9SRusty Russell /* 2414e2dcdfe9SRusty Russell * This should prevent the device from being used, allowing drivers to 2415e2dcdfe9SRusty Russell * recover. You may need to grab appropriate locks to flush. 2416e2dcdfe9SRusty Russell */ 2417e2dcdfe9SRusty Russell void virtio_break_device(struct virtio_device *dev) 2418e2dcdfe9SRusty Russell { 2419e2dcdfe9SRusty Russell struct virtqueue *_vq; 2420e2dcdfe9SRusty Russell 24210e566c8fSParav Pandit spin_lock(&dev->vqs_list_lock); 2422e2dcdfe9SRusty Russell list_for_each_entry(_vq, &dev->vqs, list) { 2423e2dcdfe9SRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 242460f07798SParav Pandit 242560f07798SParav Pandit /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 242660f07798SParav Pandit WRITE_ONCE(vq->broken, true); 2427e2dcdfe9SRusty Russell } 24280e566c8fSParav Pandit spin_unlock(&dev->vqs_list_lock); 2429e2dcdfe9SRusty Russell } 2430e2dcdfe9SRusty Russell EXPORT_SYMBOL_GPL(virtio_break_device); 2431e2dcdfe9SRusty Russell 2432be83f04dSJason Wang /* 2433be83f04dSJason Wang * This should allow the device to be used by the driver. You may 2434be83f04dSJason Wang * need to grab appropriate locks to flush the write to 2435be83f04dSJason Wang * vq->broken. This should only be used in some specific case e.g 2436be83f04dSJason Wang * (probing and restoring). This function should only be called by the 2437be83f04dSJason Wang * core, not directly by the driver. 2438be83f04dSJason Wang */ 2439be83f04dSJason Wang void __virtio_unbreak_device(struct virtio_device *dev) 2440be83f04dSJason Wang { 2441be83f04dSJason Wang struct virtqueue *_vq; 2442be83f04dSJason Wang 2443be83f04dSJason Wang spin_lock(&dev->vqs_list_lock); 2444be83f04dSJason Wang list_for_each_entry(_vq, &dev->vqs, list) { 2445be83f04dSJason Wang struct vring_virtqueue *vq = to_vvq(_vq); 2446be83f04dSJason Wang 2447be83f04dSJason Wang /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ 2448be83f04dSJason Wang WRITE_ONCE(vq->broken, false); 2449be83f04dSJason Wang } 2450be83f04dSJason Wang spin_unlock(&dev->vqs_list_lock); 2451be83f04dSJason Wang } 2452be83f04dSJason Wang EXPORT_SYMBOL_GPL(__virtio_unbreak_device); 2453be83f04dSJason Wang 24542a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_desc_addr(struct virtqueue *_vq) 245589062652SCornelia Huck { 245689062652SCornelia Huck struct vring_virtqueue *vq = to_vvq(_vq); 245789062652SCornelia Huck 24582a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 245989062652SCornelia Huck 24601ce9e605STiwei Bie if (vq->packed_ring) 24611ce9e605STiwei Bie return vq->packed.ring_dma_addr; 24621ce9e605STiwei Bie 2463d79dca75STiwei Bie return vq->split.queue_dma_addr; 24642a2d1382SAndy Lutomirski } 24652a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_desc_addr); 24662a2d1382SAndy Lutomirski 24672a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_avail_addr(struct virtqueue *_vq) 246889062652SCornelia Huck { 246989062652SCornelia Huck struct vring_virtqueue *vq = to_vvq(_vq); 247089062652SCornelia Huck 24712a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 24722a2d1382SAndy Lutomirski 24731ce9e605STiwei Bie if (vq->packed_ring) 24741ce9e605STiwei Bie return vq->packed.driver_event_dma_addr; 24751ce9e605STiwei Bie 2476d79dca75STiwei Bie return vq->split.queue_dma_addr + 2477e593bf97STiwei Bie ((char *)vq->split.vring.avail - (char *)vq->split.vring.desc); 247889062652SCornelia Huck } 24792a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_avail_addr); 24802a2d1382SAndy Lutomirski 24812a2d1382SAndy Lutomirski dma_addr_t virtqueue_get_used_addr(struct virtqueue *_vq) 24822a2d1382SAndy Lutomirski { 24832a2d1382SAndy Lutomirski struct vring_virtqueue *vq = to_vvq(_vq); 24842a2d1382SAndy Lutomirski 24852a2d1382SAndy Lutomirski BUG_ON(!vq->we_own_ring); 24862a2d1382SAndy Lutomirski 24871ce9e605STiwei Bie if (vq->packed_ring) 24881ce9e605STiwei Bie return vq->packed.device_event_dma_addr; 24891ce9e605STiwei Bie 2490d79dca75STiwei Bie return vq->split.queue_dma_addr + 2491e593bf97STiwei Bie ((char *)vq->split.vring.used - (char *)vq->split.vring.desc); 24922a2d1382SAndy Lutomirski } 24932a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_used_addr); 24942a2d1382SAndy Lutomirski 24951ce9e605STiwei Bie /* Only available for split ring */ 24962a2d1382SAndy Lutomirski const struct vring *virtqueue_get_vring(struct virtqueue *vq) 24972a2d1382SAndy Lutomirski { 2498e593bf97STiwei Bie return &to_vvq(vq)->split.vring; 24992a2d1382SAndy Lutomirski } 25002a2d1382SAndy Lutomirski EXPORT_SYMBOL_GPL(virtqueue_get_vring); 250189062652SCornelia Huck 2502c6fd4701SRusty Russell MODULE_LICENSE("GPL"); 2503