10a8a69ddSRusty Russell /* Virtio ring implementation. 20a8a69ddSRusty Russell * 30a8a69ddSRusty Russell * Copyright 2007 Rusty Russell IBM Corporation 40a8a69ddSRusty Russell * 50a8a69ddSRusty Russell * This program is free software; you can redistribute it and/or modify 60a8a69ddSRusty Russell * it under the terms of the GNU General Public License as published by 70a8a69ddSRusty Russell * the Free Software Foundation; either version 2 of the License, or 80a8a69ddSRusty Russell * (at your option) any later version. 90a8a69ddSRusty Russell * 100a8a69ddSRusty Russell * This program is distributed in the hope that it will be useful, 110a8a69ddSRusty Russell * but WITHOUT ANY WARRANTY; without even the implied warranty of 120a8a69ddSRusty Russell * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 130a8a69ddSRusty Russell * GNU General Public License for more details. 140a8a69ddSRusty Russell * 150a8a69ddSRusty Russell * You should have received a copy of the GNU General Public License 160a8a69ddSRusty Russell * along with this program; if not, write to the Free Software 170a8a69ddSRusty Russell * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 180a8a69ddSRusty Russell */ 190a8a69ddSRusty Russell #include <linux/virtio.h> 200a8a69ddSRusty Russell #include <linux/virtio_ring.h> 21e34f8725SRusty Russell #include <linux/virtio_config.h> 220a8a69ddSRusty Russell #include <linux/device.h> 235a0e3ad6STejun Heo #include <linux/slab.h> 24b5a2c4f1SPaul Gortmaker #include <linux/module.h> 250a8a69ddSRusty Russell 26d57ed95dSMichael S. Tsirkin /* virtio guest is communicating with a virtual "device" that actually runs on 27d57ed95dSMichael S. Tsirkin * a host processor. Memory barriers are used to control SMP effects. */ 28d57ed95dSMichael S. Tsirkin #ifdef CONFIG_SMP 29d57ed95dSMichael S. Tsirkin /* Where possible, use SMP barriers which are more lightweight than mandatory 30d57ed95dSMichael S. Tsirkin * barriers, because mandatory barriers control MMIO effects on accesses 317b21e34fSRusty Russell * through relaxed memory I/O windows (which virtio-pci does not use). */ 327b21e34fSRusty Russell #define virtio_mb(vq) \ 337b21e34fSRusty Russell do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0) 347b21e34fSRusty Russell #define virtio_rmb(vq) \ 357b21e34fSRusty Russell do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0) 367b21e34fSRusty Russell #define virtio_wmb(vq) \ 377b21e34fSRusty Russell do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0) 38d57ed95dSMichael S. Tsirkin #else 39d57ed95dSMichael S. Tsirkin /* We must force memory ordering even if guest is UP since host could be 40d57ed95dSMichael S. Tsirkin * running on another CPU, but SMP barriers are defined to barrier() in that 41d57ed95dSMichael S. Tsirkin * configuration. So fall back to mandatory barriers instead. */ 427b21e34fSRusty Russell #define virtio_mb(vq) mb() 437b21e34fSRusty Russell #define virtio_rmb(vq) rmb() 447b21e34fSRusty Russell #define virtio_wmb(vq) wmb() 45d57ed95dSMichael S. Tsirkin #endif 46d57ed95dSMichael S. Tsirkin 470a8a69ddSRusty Russell #ifdef DEBUG 480a8a69ddSRusty Russell /* For development, we want to crash whenever the ring is screwed. */ 499499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 509499f5e7SRusty Russell do { \ 519499f5e7SRusty Russell dev_err(&(_vq)->vq.vdev->dev, \ 529499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 539499f5e7SRusty Russell BUG(); \ 549499f5e7SRusty Russell } while (0) 55c5f841f1SRusty Russell /* Caller is supposed to guarantee no reentry. */ 563a35ce7dSRoel Kluin #define START_USE(_vq) \ 57c5f841f1SRusty Russell do { \ 58c5f841f1SRusty Russell if ((_vq)->in_use) \ 599499f5e7SRusty Russell panic("%s:in_use = %i\n", \ 609499f5e7SRusty Russell (_vq)->vq.name, (_vq)->in_use); \ 61c5f841f1SRusty Russell (_vq)->in_use = __LINE__; \ 62c5f841f1SRusty Russell } while (0) 633a35ce7dSRoel Kluin #define END_USE(_vq) \ 6497a545abSRusty Russell do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 650a8a69ddSRusty Russell #else 669499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 679499f5e7SRusty Russell do { \ 689499f5e7SRusty Russell dev_err(&_vq->vq.vdev->dev, \ 699499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 709499f5e7SRusty Russell (_vq)->broken = true; \ 719499f5e7SRusty Russell } while (0) 720a8a69ddSRusty Russell #define START_USE(vq) 730a8a69ddSRusty Russell #define END_USE(vq) 740a8a69ddSRusty Russell #endif 750a8a69ddSRusty Russell 760a8a69ddSRusty Russell struct vring_virtqueue 770a8a69ddSRusty Russell { 780a8a69ddSRusty Russell struct virtqueue vq; 790a8a69ddSRusty Russell 800a8a69ddSRusty Russell /* Actual memory layout for this queue */ 810a8a69ddSRusty Russell struct vring vring; 820a8a69ddSRusty Russell 837b21e34fSRusty Russell /* Can we use weak barriers? */ 847b21e34fSRusty Russell bool weak_barriers; 857b21e34fSRusty Russell 860a8a69ddSRusty Russell /* Other side has made a mess, don't try any more. */ 870a8a69ddSRusty Russell bool broken; 880a8a69ddSRusty Russell 899fa29b9dSMark McLoughlin /* Host supports indirect buffers */ 909fa29b9dSMark McLoughlin bool indirect; 919fa29b9dSMark McLoughlin 92a5c262c5SMichael S. Tsirkin /* Host publishes avail event idx */ 93a5c262c5SMichael S. Tsirkin bool event; 94a5c262c5SMichael S. Tsirkin 950a8a69ddSRusty Russell /* Number of free buffers */ 960a8a69ddSRusty Russell unsigned int num_free; 970a8a69ddSRusty Russell /* Head of free buffer list. */ 980a8a69ddSRusty Russell unsigned int free_head; 990a8a69ddSRusty Russell /* Number we've added since last sync. */ 1000a8a69ddSRusty Russell unsigned int num_added; 1010a8a69ddSRusty Russell 1020a8a69ddSRusty Russell /* Last used index we've seen. */ 1031bc4953eSAnthony Liguori u16 last_used_idx; 1040a8a69ddSRusty Russell 1050a8a69ddSRusty Russell /* How to notify other side. FIXME: commonalize hcalls! */ 1060a8a69ddSRusty Russell void (*notify)(struct virtqueue *vq); 1070a8a69ddSRusty Russell 1080a8a69ddSRusty Russell #ifdef DEBUG 1090a8a69ddSRusty Russell /* They're supposed to lock for us. */ 1100a8a69ddSRusty Russell unsigned int in_use; 1110a8a69ddSRusty Russell #endif 1120a8a69ddSRusty Russell 1130a8a69ddSRusty Russell /* Tokens for callbacks. */ 1140a8a69ddSRusty Russell void *data[]; 1150a8a69ddSRusty Russell }; 1160a8a69ddSRusty Russell 1170a8a69ddSRusty Russell #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 1180a8a69ddSRusty Russell 1199fa29b9dSMark McLoughlin /* Set up an indirect table of descriptors and add it to the queue. */ 1209fa29b9dSMark McLoughlin static int vring_add_indirect(struct vring_virtqueue *vq, 1219fa29b9dSMark McLoughlin struct scatterlist sg[], 1229fa29b9dSMark McLoughlin unsigned int out, 123bbd603efSMichael S. Tsirkin unsigned int in, 124bbd603efSMichael S. Tsirkin gfp_t gfp) 1259fa29b9dSMark McLoughlin { 1269fa29b9dSMark McLoughlin struct vring_desc *desc; 1279fa29b9dSMark McLoughlin unsigned head; 1289fa29b9dSMark McLoughlin int i; 1299fa29b9dSMark McLoughlin 130bbd603efSMichael S. Tsirkin desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp); 1319fa29b9dSMark McLoughlin if (!desc) 132686d3637SMichael S. Tsirkin return -ENOMEM; 1339fa29b9dSMark McLoughlin 1349fa29b9dSMark McLoughlin /* Transfer entries from the sg list into the indirect page */ 1359fa29b9dSMark McLoughlin for (i = 0; i < out; i++) { 1369fa29b9dSMark McLoughlin desc[i].flags = VRING_DESC_F_NEXT; 1379fa29b9dSMark McLoughlin desc[i].addr = sg_phys(sg); 1389fa29b9dSMark McLoughlin desc[i].len = sg->length; 1399fa29b9dSMark McLoughlin desc[i].next = i+1; 1409fa29b9dSMark McLoughlin sg++; 1419fa29b9dSMark McLoughlin } 1429fa29b9dSMark McLoughlin for (; i < (out + in); i++) { 1439fa29b9dSMark McLoughlin desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; 1449fa29b9dSMark McLoughlin desc[i].addr = sg_phys(sg); 1459fa29b9dSMark McLoughlin desc[i].len = sg->length; 1469fa29b9dSMark McLoughlin desc[i].next = i+1; 1479fa29b9dSMark McLoughlin sg++; 1489fa29b9dSMark McLoughlin } 1499fa29b9dSMark McLoughlin 1509fa29b9dSMark McLoughlin /* Last one doesn't continue. */ 1519fa29b9dSMark McLoughlin desc[i-1].flags &= ~VRING_DESC_F_NEXT; 1529fa29b9dSMark McLoughlin desc[i-1].next = 0; 1539fa29b9dSMark McLoughlin 1549fa29b9dSMark McLoughlin /* We're about to use a buffer */ 1559fa29b9dSMark McLoughlin vq->num_free--; 1569fa29b9dSMark McLoughlin 1579fa29b9dSMark McLoughlin /* Use a single buffer which doesn't continue */ 1589fa29b9dSMark McLoughlin head = vq->free_head; 1599fa29b9dSMark McLoughlin vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT; 1609fa29b9dSMark McLoughlin vq->vring.desc[head].addr = virt_to_phys(desc); 1619fa29b9dSMark McLoughlin vq->vring.desc[head].len = i * sizeof(struct vring_desc); 1629fa29b9dSMark McLoughlin 1639fa29b9dSMark McLoughlin /* Update free pointer */ 1649fa29b9dSMark McLoughlin vq->free_head = vq->vring.desc[head].next; 1659fa29b9dSMark McLoughlin 1669fa29b9dSMark McLoughlin return head; 1679fa29b9dSMark McLoughlin } 1689fa29b9dSMark McLoughlin 169*5dfc1762SRusty Russell /** 170*5dfc1762SRusty Russell * virtqueue_add_buf_gfp - expose buffer to other end 171*5dfc1762SRusty Russell * @vq: the struct virtqueue we're talking about. 172*5dfc1762SRusty Russell * @sg: the description of the buffer(s). 173*5dfc1762SRusty Russell * @out_num: the number of sg readable by other side 174*5dfc1762SRusty Russell * @in_num: the number of sg which are writable (after readable ones) 175*5dfc1762SRusty Russell * @data: the token identifying the buffer. 176*5dfc1762SRusty Russell * @gfp: how to do memory allocations (if necessary). 177*5dfc1762SRusty Russell * 178*5dfc1762SRusty Russell * Caller must ensure we don't call this with other virtqueue operations 179*5dfc1762SRusty Russell * at the same time (except where noted). 180*5dfc1762SRusty Russell * 181*5dfc1762SRusty Russell * Returns remaining capacity of queue or a negative error 182*5dfc1762SRusty Russell * (ie. ENOSPC). Note that it only really makes sense to treat all 183*5dfc1762SRusty Russell * positive return values as "available": indirect buffers mean that 184*5dfc1762SRusty Russell * we can put an entire sg[] array inside a single queue entry. 185*5dfc1762SRusty Russell */ 186bbd603efSMichael S. Tsirkin int virtqueue_add_buf_gfp(struct virtqueue *_vq, 1870a8a69ddSRusty Russell struct scatterlist sg[], 1880a8a69ddSRusty Russell unsigned int out, 1890a8a69ddSRusty Russell unsigned int in, 190bbd603efSMichael S. Tsirkin void *data, 191bbd603efSMichael S. Tsirkin gfp_t gfp) 1920a8a69ddSRusty Russell { 1930a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 1941fe9b6feSMichael S. Tsirkin unsigned int i, avail, uninitialized_var(prev); 1951fe9b6feSMichael S. Tsirkin int head; 1960a8a69ddSRusty Russell 1979fa29b9dSMark McLoughlin START_USE(vq); 1989fa29b9dSMark McLoughlin 1990a8a69ddSRusty Russell BUG_ON(data == NULL); 2009fa29b9dSMark McLoughlin 2019fa29b9dSMark McLoughlin /* If the host supports indirect descriptor tables, and we have multiple 2029fa29b9dSMark McLoughlin * buffers, then go indirect. FIXME: tune this threshold */ 2039fa29b9dSMark McLoughlin if (vq->indirect && (out + in) > 1 && vq->num_free) { 204bbd603efSMichael S. Tsirkin head = vring_add_indirect(vq, sg, out, in, gfp); 2051fe9b6feSMichael S. Tsirkin if (likely(head >= 0)) 2069fa29b9dSMark McLoughlin goto add_head; 2079fa29b9dSMark McLoughlin } 2089fa29b9dSMark McLoughlin 2090a8a69ddSRusty Russell BUG_ON(out + in > vq->vring.num); 2100a8a69ddSRusty Russell BUG_ON(out + in == 0); 2110a8a69ddSRusty Russell 2120a8a69ddSRusty Russell if (vq->num_free < out + in) { 2130a8a69ddSRusty Russell pr_debug("Can't add buf len %i - avail = %i\n", 2140a8a69ddSRusty Russell out + in, vq->num_free); 21544653eaeSRusty Russell /* FIXME: for historical reasons, we force a notify here if 21644653eaeSRusty Russell * there are outgoing parts to the buffer. Presumably the 21744653eaeSRusty Russell * host should service the ring ASAP. */ 21844653eaeSRusty Russell if (out) 219426e3e0aSRusty Russell vq->notify(&vq->vq); 2200a8a69ddSRusty Russell END_USE(vq); 2210a8a69ddSRusty Russell return -ENOSPC; 2220a8a69ddSRusty Russell } 2230a8a69ddSRusty Russell 2240a8a69ddSRusty Russell /* We're about to use some buffers from the free list. */ 2250a8a69ddSRusty Russell vq->num_free -= out + in; 2260a8a69ddSRusty Russell 2270a8a69ddSRusty Russell head = vq->free_head; 2280a8a69ddSRusty Russell for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { 2290a8a69ddSRusty Russell vq->vring.desc[i].flags = VRING_DESC_F_NEXT; 23015f9c890SRusty Russell vq->vring.desc[i].addr = sg_phys(sg); 2310a8a69ddSRusty Russell vq->vring.desc[i].len = sg->length; 2320a8a69ddSRusty Russell prev = i; 2330a8a69ddSRusty Russell sg++; 2340a8a69ddSRusty Russell } 2350a8a69ddSRusty Russell for (; in; i = vq->vring.desc[i].next, in--) { 2360a8a69ddSRusty Russell vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; 23715f9c890SRusty Russell vq->vring.desc[i].addr = sg_phys(sg); 2380a8a69ddSRusty Russell vq->vring.desc[i].len = sg->length; 2390a8a69ddSRusty Russell prev = i; 2400a8a69ddSRusty Russell sg++; 2410a8a69ddSRusty Russell } 2420a8a69ddSRusty Russell /* Last one doesn't continue. */ 2430a8a69ddSRusty Russell vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; 2440a8a69ddSRusty Russell 2450a8a69ddSRusty Russell /* Update free pointer */ 2460a8a69ddSRusty Russell vq->free_head = i; 2470a8a69ddSRusty Russell 2489fa29b9dSMark McLoughlin add_head: 2490a8a69ddSRusty Russell /* Set token. */ 2500a8a69ddSRusty Russell vq->data[head] = data; 2510a8a69ddSRusty Russell 2520a8a69ddSRusty Russell /* Put entry in available array (but don't update avail->idx until they 2530a8a69ddSRusty Russell * do sync). FIXME: avoid modulus here? */ 2540a8a69ddSRusty Russell avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num; 2550a8a69ddSRusty Russell vq->vring.avail->ring[avail] = head; 2560a8a69ddSRusty Russell 2570a8a69ddSRusty Russell pr_debug("Added buffer head %i to %p\n", head, vq); 2580a8a69ddSRusty Russell END_USE(vq); 2593c1b27d5SRusty Russell 2603c1b27d5SRusty Russell return vq->num_free; 2610a8a69ddSRusty Russell } 262bbd603efSMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp); 2630a8a69ddSRusty Russell 264*5dfc1762SRusty Russell /** 265*5dfc1762SRusty Russell * virtqueue_kick - update after add_buf 266*5dfc1762SRusty Russell * @vq: the struct virtqueue 267*5dfc1762SRusty Russell * 268*5dfc1762SRusty Russell * After one or more virtqueue_add_buf_gfp calls, invoke this to kick 269*5dfc1762SRusty Russell * the other side. 270*5dfc1762SRusty Russell * 271*5dfc1762SRusty Russell * Caller must ensure we don't call this with other virtqueue 272*5dfc1762SRusty Russell * operations at the same time (except where noted). 273*5dfc1762SRusty Russell */ 2747c5e9ed0SMichael S. Tsirkin void virtqueue_kick(struct virtqueue *_vq) 2750a8a69ddSRusty Russell { 2760a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 277a5c262c5SMichael S. Tsirkin u16 new, old; 2780a8a69ddSRusty Russell START_USE(vq); 2790a8a69ddSRusty Russell /* Descriptors and available array need to be set before we expose the 2800a8a69ddSRusty Russell * new available array entries. */ 2817b21e34fSRusty Russell virtio_wmb(vq); 2820a8a69ddSRusty Russell 283a5c262c5SMichael S. Tsirkin old = vq->vring.avail->idx; 284a5c262c5SMichael S. Tsirkin new = vq->vring.avail->idx = old + vq->num_added; 2850a8a69ddSRusty Russell vq->num_added = 0; 2860a8a69ddSRusty Russell 2870a8a69ddSRusty Russell /* Need to update avail index before checking if we should notify */ 2887b21e34fSRusty Russell virtio_mb(vq); 2890a8a69ddSRusty Russell 290a5c262c5SMichael S. Tsirkin if (vq->event ? 291a5c262c5SMichael S. Tsirkin vring_need_event(vring_avail_event(&vq->vring), new, old) : 292a5c262c5SMichael S. Tsirkin !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) 2930a8a69ddSRusty Russell /* Prod other side to tell it about changes. */ 2940a8a69ddSRusty Russell vq->notify(&vq->vq); 2950a8a69ddSRusty Russell 2960a8a69ddSRusty Russell END_USE(vq); 2970a8a69ddSRusty Russell } 2987c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_kick); 2990a8a69ddSRusty Russell 3000a8a69ddSRusty Russell static void detach_buf(struct vring_virtqueue *vq, unsigned int head) 3010a8a69ddSRusty Russell { 3020a8a69ddSRusty Russell unsigned int i; 3030a8a69ddSRusty Russell 3040a8a69ddSRusty Russell /* Clear data ptr. */ 3050a8a69ddSRusty Russell vq->data[head] = NULL; 3060a8a69ddSRusty Russell 3070a8a69ddSRusty Russell /* Put back on free list: find end */ 3080a8a69ddSRusty Russell i = head; 3099fa29b9dSMark McLoughlin 3109fa29b9dSMark McLoughlin /* Free the indirect table */ 3119fa29b9dSMark McLoughlin if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT) 3129fa29b9dSMark McLoughlin kfree(phys_to_virt(vq->vring.desc[i].addr)); 3139fa29b9dSMark McLoughlin 3140a8a69ddSRusty Russell while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { 3150a8a69ddSRusty Russell i = vq->vring.desc[i].next; 3160a8a69ddSRusty Russell vq->num_free++; 3170a8a69ddSRusty Russell } 3180a8a69ddSRusty Russell 3190a8a69ddSRusty Russell vq->vring.desc[i].next = vq->free_head; 3200a8a69ddSRusty Russell vq->free_head = head; 3210a8a69ddSRusty Russell /* Plus final descriptor */ 3220a8a69ddSRusty Russell vq->num_free++; 3230a8a69ddSRusty Russell } 3240a8a69ddSRusty Russell 3250a8a69ddSRusty Russell static inline bool more_used(const struct vring_virtqueue *vq) 3260a8a69ddSRusty Russell { 3270a8a69ddSRusty Russell return vq->last_used_idx != vq->vring.used->idx; 3280a8a69ddSRusty Russell } 3290a8a69ddSRusty Russell 330*5dfc1762SRusty Russell /** 331*5dfc1762SRusty Russell * virtqueue_get_buf - get the next used buffer 332*5dfc1762SRusty Russell * @vq: the struct virtqueue we're talking about. 333*5dfc1762SRusty Russell * @len: the length written into the buffer 334*5dfc1762SRusty Russell * 335*5dfc1762SRusty Russell * If the driver wrote data into the buffer, @len will be set to the 336*5dfc1762SRusty Russell * amount written. This means you don't need to clear the buffer 337*5dfc1762SRusty Russell * beforehand to ensure there's no data leakage in the case of short 338*5dfc1762SRusty Russell * writes. 339*5dfc1762SRusty Russell * 340*5dfc1762SRusty Russell * Caller must ensure we don't call this with other virtqueue 341*5dfc1762SRusty Russell * operations at the same time (except where noted). 342*5dfc1762SRusty Russell * 343*5dfc1762SRusty Russell * Returns NULL if there are no used buffers, or the "data" token 344*5dfc1762SRusty Russell * handed to virtqueue_add_buf_gfp(). 345*5dfc1762SRusty Russell */ 3467c5e9ed0SMichael S. Tsirkin void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 3470a8a69ddSRusty Russell { 3480a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 3490a8a69ddSRusty Russell void *ret; 3500a8a69ddSRusty Russell unsigned int i; 3510a8a69ddSRusty Russell 3520a8a69ddSRusty Russell START_USE(vq); 3530a8a69ddSRusty Russell 3545ef82752SRusty Russell if (unlikely(vq->broken)) { 3555ef82752SRusty Russell END_USE(vq); 3565ef82752SRusty Russell return NULL; 3575ef82752SRusty Russell } 3585ef82752SRusty Russell 3590a8a69ddSRusty Russell if (!more_used(vq)) { 3600a8a69ddSRusty Russell pr_debug("No more buffers in queue\n"); 3610a8a69ddSRusty Russell END_USE(vq); 3620a8a69ddSRusty Russell return NULL; 3630a8a69ddSRusty Russell } 3640a8a69ddSRusty Russell 3652d61ba95SMichael S. Tsirkin /* Only get used array entries after they have been exposed by host. */ 3667b21e34fSRusty Russell virtio_rmb(vq); 3672d61ba95SMichael S. Tsirkin 3680a8a69ddSRusty Russell i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; 3690a8a69ddSRusty Russell *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; 3700a8a69ddSRusty Russell 3710a8a69ddSRusty Russell if (unlikely(i >= vq->vring.num)) { 3720a8a69ddSRusty Russell BAD_RING(vq, "id %u out of range\n", i); 3730a8a69ddSRusty Russell return NULL; 3740a8a69ddSRusty Russell } 3750a8a69ddSRusty Russell if (unlikely(!vq->data[i])) { 3760a8a69ddSRusty Russell BAD_RING(vq, "id %u is not a head!\n", i); 3770a8a69ddSRusty Russell return NULL; 3780a8a69ddSRusty Russell } 3790a8a69ddSRusty Russell 3800a8a69ddSRusty Russell /* detach_buf clears data, so grab it now. */ 3810a8a69ddSRusty Russell ret = vq->data[i]; 3820a8a69ddSRusty Russell detach_buf(vq, i); 3830a8a69ddSRusty Russell vq->last_used_idx++; 384a5c262c5SMichael S. Tsirkin /* If we expect an interrupt for the next entry, tell host 385a5c262c5SMichael S. Tsirkin * by writing event index and flush out the write before 386a5c262c5SMichael S. Tsirkin * the read in the next get_buf call. */ 387a5c262c5SMichael S. Tsirkin if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { 388a5c262c5SMichael S. Tsirkin vring_used_event(&vq->vring) = vq->last_used_idx; 3897b21e34fSRusty Russell virtio_mb(vq); 390a5c262c5SMichael S. Tsirkin } 391a5c262c5SMichael S. Tsirkin 3920a8a69ddSRusty Russell END_USE(vq); 3930a8a69ddSRusty Russell return ret; 3940a8a69ddSRusty Russell } 3957c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_get_buf); 3960a8a69ddSRusty Russell 397*5dfc1762SRusty Russell /** 398*5dfc1762SRusty Russell * virtqueue_disable_cb - disable callbacks 399*5dfc1762SRusty Russell * @vq: the struct virtqueue we're talking about. 400*5dfc1762SRusty Russell * 401*5dfc1762SRusty Russell * Note that this is not necessarily synchronous, hence unreliable and only 402*5dfc1762SRusty Russell * useful as an optimization. 403*5dfc1762SRusty Russell * 404*5dfc1762SRusty Russell * Unlike other operations, this need not be serialized. 405*5dfc1762SRusty Russell */ 4067c5e9ed0SMichael S. Tsirkin void virtqueue_disable_cb(struct virtqueue *_vq) 40718445c4dSRusty Russell { 40818445c4dSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 40918445c4dSRusty Russell 41018445c4dSRusty Russell vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 41118445c4dSRusty Russell } 4127c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 41318445c4dSRusty Russell 414*5dfc1762SRusty Russell /** 415*5dfc1762SRusty Russell * virtqueue_enable_cb - restart callbacks after disable_cb. 416*5dfc1762SRusty Russell * @vq: the struct virtqueue we're talking about. 417*5dfc1762SRusty Russell * 418*5dfc1762SRusty Russell * This re-enables callbacks; it returns "false" if there are pending 419*5dfc1762SRusty Russell * buffers in the queue, to detect a possible race between the driver 420*5dfc1762SRusty Russell * checking for more work, and enabling callbacks. 421*5dfc1762SRusty Russell * 422*5dfc1762SRusty Russell * Caller must ensure we don't call this with other virtqueue 423*5dfc1762SRusty Russell * operations at the same time (except where noted). 424*5dfc1762SRusty Russell */ 4257c5e9ed0SMichael S. Tsirkin bool virtqueue_enable_cb(struct virtqueue *_vq) 4260a8a69ddSRusty Russell { 4270a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 4280a8a69ddSRusty Russell 4290a8a69ddSRusty Russell START_USE(vq); 4300a8a69ddSRusty Russell 4310a8a69ddSRusty Russell /* We optimistically turn back on interrupts, then check if there was 4320a8a69ddSRusty Russell * more to do. */ 433a5c262c5SMichael S. Tsirkin /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 434a5c262c5SMichael S. Tsirkin * either clear the flags bit or point the event index at the next 435a5c262c5SMichael S. Tsirkin * entry. Always do both to keep code simple. */ 4360a8a69ddSRusty Russell vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; 437a5c262c5SMichael S. Tsirkin vring_used_event(&vq->vring) = vq->last_used_idx; 4387b21e34fSRusty Russell virtio_mb(vq); 4390a8a69ddSRusty Russell if (unlikely(more_used(vq))) { 4400a8a69ddSRusty Russell END_USE(vq); 4410a8a69ddSRusty Russell return false; 4420a8a69ddSRusty Russell } 4430a8a69ddSRusty Russell 4440a8a69ddSRusty Russell END_USE(vq); 4450a8a69ddSRusty Russell return true; 4460a8a69ddSRusty Russell } 4477c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 4480a8a69ddSRusty Russell 449*5dfc1762SRusty Russell /** 450*5dfc1762SRusty Russell * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 451*5dfc1762SRusty Russell * @vq: the struct virtqueue we're talking about. 452*5dfc1762SRusty Russell * 453*5dfc1762SRusty Russell * This re-enables callbacks but hints to the other side to delay 454*5dfc1762SRusty Russell * interrupts until most of the available buffers have been processed; 455*5dfc1762SRusty Russell * it returns "false" if there are many pending buffers in the queue, 456*5dfc1762SRusty Russell * to detect a possible race between the driver checking for more work, 457*5dfc1762SRusty Russell * and enabling callbacks. 458*5dfc1762SRusty Russell * 459*5dfc1762SRusty Russell * Caller must ensure we don't call this with other virtqueue 460*5dfc1762SRusty Russell * operations at the same time (except where noted). 461*5dfc1762SRusty Russell */ 4627ab358c2SMichael S. Tsirkin bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 4637ab358c2SMichael S. Tsirkin { 4647ab358c2SMichael S. Tsirkin struct vring_virtqueue *vq = to_vvq(_vq); 4657ab358c2SMichael S. Tsirkin u16 bufs; 4667ab358c2SMichael S. Tsirkin 4677ab358c2SMichael S. Tsirkin START_USE(vq); 4687ab358c2SMichael S. Tsirkin 4697ab358c2SMichael S. Tsirkin /* We optimistically turn back on interrupts, then check if there was 4707ab358c2SMichael S. Tsirkin * more to do. */ 4717ab358c2SMichael S. Tsirkin /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 4727ab358c2SMichael S. Tsirkin * either clear the flags bit or point the event index at the next 4737ab358c2SMichael S. Tsirkin * entry. Always do both to keep code simple. */ 4747ab358c2SMichael S. Tsirkin vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; 4757ab358c2SMichael S. Tsirkin /* TODO: tune this threshold */ 4767ab358c2SMichael S. Tsirkin bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; 4777ab358c2SMichael S. Tsirkin vring_used_event(&vq->vring) = vq->last_used_idx + bufs; 4787b21e34fSRusty Russell virtio_mb(vq); 4797ab358c2SMichael S. Tsirkin if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) { 4807ab358c2SMichael S. Tsirkin END_USE(vq); 4817ab358c2SMichael S. Tsirkin return false; 4827ab358c2SMichael S. Tsirkin } 4837ab358c2SMichael S. Tsirkin 4847ab358c2SMichael S. Tsirkin END_USE(vq); 4857ab358c2SMichael S. Tsirkin return true; 4867ab358c2SMichael S. Tsirkin } 4877ab358c2SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 4887ab358c2SMichael S. Tsirkin 489*5dfc1762SRusty Russell /** 490*5dfc1762SRusty Russell * virtqueue_detach_unused_buf - detach first unused buffer 491*5dfc1762SRusty Russell * @vq: the struct virtqueue we're talking about. 492*5dfc1762SRusty Russell * 493*5dfc1762SRusty Russell * Returns NULL or the "data" token handed to virtqueue_add_buf_gfp(). 494*5dfc1762SRusty Russell * This is not valid on an active queue; it is useful only for device 495*5dfc1762SRusty Russell * shutdown. 496*5dfc1762SRusty Russell */ 4977c5e9ed0SMichael S. Tsirkin void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 498c021eac4SShirley Ma { 499c021eac4SShirley Ma struct vring_virtqueue *vq = to_vvq(_vq); 500c021eac4SShirley Ma unsigned int i; 501c021eac4SShirley Ma void *buf; 502c021eac4SShirley Ma 503c021eac4SShirley Ma START_USE(vq); 504c021eac4SShirley Ma 505c021eac4SShirley Ma for (i = 0; i < vq->vring.num; i++) { 506c021eac4SShirley Ma if (!vq->data[i]) 507c021eac4SShirley Ma continue; 508c021eac4SShirley Ma /* detach_buf clears data, so grab it now. */ 509c021eac4SShirley Ma buf = vq->data[i]; 510c021eac4SShirley Ma detach_buf(vq, i); 511b3258ff1SAmit Shah vq->vring.avail->idx--; 512c021eac4SShirley Ma END_USE(vq); 513c021eac4SShirley Ma return buf; 514c021eac4SShirley Ma } 515c021eac4SShirley Ma /* That should have freed everything. */ 516c021eac4SShirley Ma BUG_ON(vq->num_free != vq->vring.num); 517c021eac4SShirley Ma 518c021eac4SShirley Ma END_USE(vq); 519c021eac4SShirley Ma return NULL; 520c021eac4SShirley Ma } 5217c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 522c021eac4SShirley Ma 5230a8a69ddSRusty Russell irqreturn_t vring_interrupt(int irq, void *_vq) 5240a8a69ddSRusty Russell { 5250a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 5260a8a69ddSRusty Russell 5270a8a69ddSRusty Russell if (!more_used(vq)) { 5280a8a69ddSRusty Russell pr_debug("virtqueue interrupt with no work for %p\n", vq); 5290a8a69ddSRusty Russell return IRQ_NONE; 5300a8a69ddSRusty Russell } 5310a8a69ddSRusty Russell 5320a8a69ddSRusty Russell if (unlikely(vq->broken)) 5330a8a69ddSRusty Russell return IRQ_HANDLED; 5340a8a69ddSRusty Russell 5350a8a69ddSRusty Russell pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 53618445c4dSRusty Russell if (vq->vq.callback) 53718445c4dSRusty Russell vq->vq.callback(&vq->vq); 5380a8a69ddSRusty Russell 5390a8a69ddSRusty Russell return IRQ_HANDLED; 5400a8a69ddSRusty Russell } 541c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_interrupt); 5420a8a69ddSRusty Russell 5430a8a69ddSRusty Russell struct virtqueue *vring_new_virtqueue(unsigned int num, 54487c7d57cSRusty Russell unsigned int vring_align, 5450a8a69ddSRusty Russell struct virtio_device *vdev, 5467b21e34fSRusty Russell bool weak_barriers, 5470a8a69ddSRusty Russell void *pages, 5480a8a69ddSRusty Russell void (*notify)(struct virtqueue *), 5499499f5e7SRusty Russell void (*callback)(struct virtqueue *), 5509499f5e7SRusty Russell const char *name) 5510a8a69ddSRusty Russell { 5520a8a69ddSRusty Russell struct vring_virtqueue *vq; 5530a8a69ddSRusty Russell unsigned int i; 5540a8a69ddSRusty Russell 55542b36cc0SRusty Russell /* We assume num is a power of 2. */ 55642b36cc0SRusty Russell if (num & (num - 1)) { 55742b36cc0SRusty Russell dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 55842b36cc0SRusty Russell return NULL; 55942b36cc0SRusty Russell } 56042b36cc0SRusty Russell 5610a8a69ddSRusty Russell vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL); 5620a8a69ddSRusty Russell if (!vq) 5630a8a69ddSRusty Russell return NULL; 5640a8a69ddSRusty Russell 56587c7d57cSRusty Russell vring_init(&vq->vring, num, pages, vring_align); 5660a8a69ddSRusty Russell vq->vq.callback = callback; 5670a8a69ddSRusty Russell vq->vq.vdev = vdev; 5689499f5e7SRusty Russell vq->vq.name = name; 5690a8a69ddSRusty Russell vq->notify = notify; 5707b21e34fSRusty Russell vq->weak_barriers = weak_barriers; 5710a8a69ddSRusty Russell vq->broken = false; 5720a8a69ddSRusty Russell vq->last_used_idx = 0; 5730a8a69ddSRusty Russell vq->num_added = 0; 5749499f5e7SRusty Russell list_add_tail(&vq->vq.list, &vdev->vqs); 5750a8a69ddSRusty Russell #ifdef DEBUG 5760a8a69ddSRusty Russell vq->in_use = false; 5770a8a69ddSRusty Russell #endif 5780a8a69ddSRusty Russell 5799fa29b9dSMark McLoughlin vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); 580a5c262c5SMichael S. Tsirkin vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 5819fa29b9dSMark McLoughlin 5820a8a69ddSRusty Russell /* No callback? Tell other side not to bother us. */ 5830a8a69ddSRusty Russell if (!callback) 5840a8a69ddSRusty Russell vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 5850a8a69ddSRusty Russell 5860a8a69ddSRusty Russell /* Put everything in free lists. */ 5870a8a69ddSRusty Russell vq->num_free = num; 5880a8a69ddSRusty Russell vq->free_head = 0; 5893b870624SAmit Shah for (i = 0; i < num-1; i++) { 5900a8a69ddSRusty Russell vq->vring.desc[i].next = i+1; 5913b870624SAmit Shah vq->data[i] = NULL; 5923b870624SAmit Shah } 5933b870624SAmit Shah vq->data[i] = NULL; 5940a8a69ddSRusty Russell 5950a8a69ddSRusty Russell return &vq->vq; 5960a8a69ddSRusty Russell } 597c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_new_virtqueue); 5980a8a69ddSRusty Russell 5990a8a69ddSRusty Russell void vring_del_virtqueue(struct virtqueue *vq) 6000a8a69ddSRusty Russell { 6019499f5e7SRusty Russell list_del(&vq->list); 6020a8a69ddSRusty Russell kfree(to_vvq(vq)); 6030a8a69ddSRusty Russell } 604c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_del_virtqueue); 6050a8a69ddSRusty Russell 606e34f8725SRusty Russell /* Manipulates transport-specific feature bits. */ 607e34f8725SRusty Russell void vring_transport_features(struct virtio_device *vdev) 608e34f8725SRusty Russell { 609e34f8725SRusty Russell unsigned int i; 610e34f8725SRusty Russell 611e34f8725SRusty Russell for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 612e34f8725SRusty Russell switch (i) { 6139fa29b9dSMark McLoughlin case VIRTIO_RING_F_INDIRECT_DESC: 6149fa29b9dSMark McLoughlin break; 615a5c262c5SMichael S. Tsirkin case VIRTIO_RING_F_EVENT_IDX: 616a5c262c5SMichael S. Tsirkin break; 617e34f8725SRusty Russell default: 618e34f8725SRusty Russell /* We don't understand this bit. */ 619e34f8725SRusty Russell clear_bit(i, vdev->features); 620e34f8725SRusty Russell } 621e34f8725SRusty Russell } 622e34f8725SRusty Russell } 623e34f8725SRusty Russell EXPORT_SYMBOL_GPL(vring_transport_features); 624e34f8725SRusty Russell 625*5dfc1762SRusty Russell /** 626*5dfc1762SRusty Russell * virtqueue_get_vring_size - return the size of the virtqueue's vring 627*5dfc1762SRusty Russell * @vq: the struct virtqueue containing the vring of interest. 628*5dfc1762SRusty Russell * 629*5dfc1762SRusty Russell * Returns the size of the vring. This is mainly used for boasting to 630*5dfc1762SRusty Russell * userspace. Unlike other operations, this need not be serialized. 631*5dfc1762SRusty Russell */ 6328f9f4668SRick Jones unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 6338f9f4668SRick Jones { 6348f9f4668SRick Jones 6358f9f4668SRick Jones struct vring_virtqueue *vq = to_vvq(_vq); 6368f9f4668SRick Jones 6378f9f4668SRick Jones return vq->vring.num; 6388f9f4668SRick Jones } 6398f9f4668SRick Jones EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 6408f9f4668SRick Jones 641c6fd4701SRusty Russell MODULE_LICENSE("GPL"); 642