10a8a69ddSRusty Russell /* Virtio ring implementation. 20a8a69ddSRusty Russell * 30a8a69ddSRusty Russell * Copyright 2007 Rusty Russell IBM Corporation 40a8a69ddSRusty Russell * 50a8a69ddSRusty Russell * This program is free software; you can redistribute it and/or modify 60a8a69ddSRusty Russell * it under the terms of the GNU General Public License as published by 70a8a69ddSRusty Russell * the Free Software Foundation; either version 2 of the License, or 80a8a69ddSRusty Russell * (at your option) any later version. 90a8a69ddSRusty Russell * 100a8a69ddSRusty Russell * This program is distributed in the hope that it will be useful, 110a8a69ddSRusty Russell * but WITHOUT ANY WARRANTY; without even the implied warranty of 120a8a69ddSRusty Russell * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 130a8a69ddSRusty Russell * GNU General Public License for more details. 140a8a69ddSRusty Russell * 150a8a69ddSRusty Russell * You should have received a copy of the GNU General Public License 160a8a69ddSRusty Russell * along with this program; if not, write to the Free Software 170a8a69ddSRusty Russell * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 180a8a69ddSRusty Russell */ 190a8a69ddSRusty Russell #include <linux/virtio.h> 200a8a69ddSRusty Russell #include <linux/virtio_ring.h> 21e34f8725SRusty Russell #include <linux/virtio_config.h> 220a8a69ddSRusty Russell #include <linux/device.h> 235a0e3ad6STejun Heo #include <linux/slab.h> 24b5a2c4f1SPaul Gortmaker #include <linux/module.h> 25e93300b1SRusty Russell #include <linux/hrtimer.h> 260a8a69ddSRusty Russell 27d57ed95dSMichael S. Tsirkin /* virtio guest is communicating with a virtual "device" that actually runs on 28d57ed95dSMichael S. Tsirkin * a host processor. Memory barriers are used to control SMP effects. */ 29d57ed95dSMichael S. Tsirkin #ifdef CONFIG_SMP 30d57ed95dSMichael S. Tsirkin /* Where possible, use SMP barriers which are more lightweight than mandatory 31d57ed95dSMichael S. Tsirkin * barriers, because mandatory barriers control MMIO effects on accesses 327b21e34fSRusty Russell * through relaxed memory I/O windows (which virtio-pci does not use). */ 337b21e34fSRusty Russell #define virtio_mb(vq) \ 347b21e34fSRusty Russell do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0) 357b21e34fSRusty Russell #define virtio_rmb(vq) \ 367b21e34fSRusty Russell do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0) 377b21e34fSRusty Russell #define virtio_wmb(vq) \ 384dbc5d9fSJason Wang do { if ((vq)->weak_barriers) smp_wmb(); else wmb(); } while(0) 39d57ed95dSMichael S. Tsirkin #else 40d57ed95dSMichael S. Tsirkin /* We must force memory ordering even if guest is UP since host could be 41d57ed95dSMichael S. Tsirkin * running on another CPU, but SMP barriers are defined to barrier() in that 42d57ed95dSMichael S. Tsirkin * configuration. So fall back to mandatory barriers instead. */ 437b21e34fSRusty Russell #define virtio_mb(vq) mb() 447b21e34fSRusty Russell #define virtio_rmb(vq) rmb() 457b21e34fSRusty Russell #define virtio_wmb(vq) wmb() 46d57ed95dSMichael S. Tsirkin #endif 47d57ed95dSMichael S. Tsirkin 480a8a69ddSRusty Russell #ifdef DEBUG 490a8a69ddSRusty Russell /* For development, we want to crash whenever the ring is screwed. */ 509499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 519499f5e7SRusty Russell do { \ 529499f5e7SRusty Russell dev_err(&(_vq)->vq.vdev->dev, \ 539499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 549499f5e7SRusty Russell BUG(); \ 559499f5e7SRusty Russell } while (0) 56c5f841f1SRusty Russell /* Caller is supposed to guarantee no reentry. */ 573a35ce7dSRoel Kluin #define START_USE(_vq) \ 58c5f841f1SRusty Russell do { \ 59c5f841f1SRusty Russell if ((_vq)->in_use) \ 609499f5e7SRusty Russell panic("%s:in_use = %i\n", \ 619499f5e7SRusty Russell (_vq)->vq.name, (_vq)->in_use); \ 62c5f841f1SRusty Russell (_vq)->in_use = __LINE__; \ 63c5f841f1SRusty Russell } while (0) 643a35ce7dSRoel Kluin #define END_USE(_vq) \ 6597a545abSRusty Russell do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0) 660a8a69ddSRusty Russell #else 679499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 689499f5e7SRusty Russell do { \ 699499f5e7SRusty Russell dev_err(&_vq->vq.vdev->dev, \ 709499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 719499f5e7SRusty Russell (_vq)->broken = true; \ 729499f5e7SRusty Russell } while (0) 730a8a69ddSRusty Russell #define START_USE(vq) 740a8a69ddSRusty Russell #define END_USE(vq) 750a8a69ddSRusty Russell #endif 760a8a69ddSRusty Russell 770a8a69ddSRusty Russell struct vring_virtqueue 780a8a69ddSRusty Russell { 790a8a69ddSRusty Russell struct virtqueue vq; 800a8a69ddSRusty Russell 810a8a69ddSRusty Russell /* Actual memory layout for this queue */ 820a8a69ddSRusty Russell struct vring vring; 830a8a69ddSRusty Russell 847b21e34fSRusty Russell /* Can we use weak barriers? */ 857b21e34fSRusty Russell bool weak_barriers; 867b21e34fSRusty Russell 870a8a69ddSRusty Russell /* Other side has made a mess, don't try any more. */ 880a8a69ddSRusty Russell bool broken; 890a8a69ddSRusty Russell 909fa29b9dSMark McLoughlin /* Host supports indirect buffers */ 919fa29b9dSMark McLoughlin bool indirect; 929fa29b9dSMark McLoughlin 93a5c262c5SMichael S. Tsirkin /* Host publishes avail event idx */ 94a5c262c5SMichael S. Tsirkin bool event; 95a5c262c5SMichael S. Tsirkin 960a8a69ddSRusty Russell /* Number of free buffers */ 970a8a69ddSRusty Russell unsigned int num_free; 980a8a69ddSRusty Russell /* Head of free buffer list. */ 990a8a69ddSRusty Russell unsigned int free_head; 1000a8a69ddSRusty Russell /* Number we've added since last sync. */ 1010a8a69ddSRusty Russell unsigned int num_added; 1020a8a69ddSRusty Russell 1030a8a69ddSRusty Russell /* Last used index we've seen. */ 1041bc4953eSAnthony Liguori u16 last_used_idx; 1050a8a69ddSRusty Russell 1060a8a69ddSRusty Russell /* How to notify other side. FIXME: commonalize hcalls! */ 1070a8a69ddSRusty Russell void (*notify)(struct virtqueue *vq); 1080a8a69ddSRusty Russell 10917bb6d40SJason Wang /* Index of the queue */ 11017bb6d40SJason Wang int queue_index; 11117bb6d40SJason Wang 1120a8a69ddSRusty Russell #ifdef DEBUG 1130a8a69ddSRusty Russell /* They're supposed to lock for us. */ 1140a8a69ddSRusty Russell unsigned int in_use; 115e93300b1SRusty Russell 116e93300b1SRusty Russell /* Figure out if their kicks are too delayed. */ 117e93300b1SRusty Russell bool last_add_time_valid; 118e93300b1SRusty Russell ktime_t last_add_time; 1190a8a69ddSRusty Russell #endif 1200a8a69ddSRusty Russell 1210a8a69ddSRusty Russell /* Tokens for callbacks. */ 1220a8a69ddSRusty Russell void *data[]; 1230a8a69ddSRusty Russell }; 1240a8a69ddSRusty Russell 1250a8a69ddSRusty Russell #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 1260a8a69ddSRusty Russell 1279fa29b9dSMark McLoughlin /* Set up an indirect table of descriptors and add it to the queue. */ 1289fa29b9dSMark McLoughlin static int vring_add_indirect(struct vring_virtqueue *vq, 1299fa29b9dSMark McLoughlin struct scatterlist sg[], 1309fa29b9dSMark McLoughlin unsigned int out, 131bbd603efSMichael S. Tsirkin unsigned int in, 132bbd603efSMichael S. Tsirkin gfp_t gfp) 1339fa29b9dSMark McLoughlin { 1349fa29b9dSMark McLoughlin struct vring_desc *desc; 1359fa29b9dSMark McLoughlin unsigned head; 1369fa29b9dSMark McLoughlin int i; 1379fa29b9dSMark McLoughlin 138*b92b1b89SWill Deacon /* 139*b92b1b89SWill Deacon * We require lowmem mappings for the descriptors because 140*b92b1b89SWill Deacon * otherwise virt_to_phys will give us bogus addresses in the 141*b92b1b89SWill Deacon * virtqueue. 142*b92b1b89SWill Deacon */ 143*b92b1b89SWill Deacon gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH); 144*b92b1b89SWill Deacon 145bbd603efSMichael S. Tsirkin desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp); 1469fa29b9dSMark McLoughlin if (!desc) 147686d3637SMichael S. Tsirkin return -ENOMEM; 1489fa29b9dSMark McLoughlin 1499fa29b9dSMark McLoughlin /* Transfer entries from the sg list into the indirect page */ 1509fa29b9dSMark McLoughlin for (i = 0; i < out; i++) { 1519fa29b9dSMark McLoughlin desc[i].flags = VRING_DESC_F_NEXT; 1529fa29b9dSMark McLoughlin desc[i].addr = sg_phys(sg); 1539fa29b9dSMark McLoughlin desc[i].len = sg->length; 1549fa29b9dSMark McLoughlin desc[i].next = i+1; 1559fa29b9dSMark McLoughlin sg++; 1569fa29b9dSMark McLoughlin } 1579fa29b9dSMark McLoughlin for (; i < (out + in); i++) { 1589fa29b9dSMark McLoughlin desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; 1599fa29b9dSMark McLoughlin desc[i].addr = sg_phys(sg); 1609fa29b9dSMark McLoughlin desc[i].len = sg->length; 1619fa29b9dSMark McLoughlin desc[i].next = i+1; 1629fa29b9dSMark McLoughlin sg++; 1639fa29b9dSMark McLoughlin } 1649fa29b9dSMark McLoughlin 1659fa29b9dSMark McLoughlin /* Last one doesn't continue. */ 1669fa29b9dSMark McLoughlin desc[i-1].flags &= ~VRING_DESC_F_NEXT; 1679fa29b9dSMark McLoughlin desc[i-1].next = 0; 1689fa29b9dSMark McLoughlin 1699fa29b9dSMark McLoughlin /* We're about to use a buffer */ 1709fa29b9dSMark McLoughlin vq->num_free--; 1719fa29b9dSMark McLoughlin 1729fa29b9dSMark McLoughlin /* Use a single buffer which doesn't continue */ 1739fa29b9dSMark McLoughlin head = vq->free_head; 1749fa29b9dSMark McLoughlin vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT; 1759fa29b9dSMark McLoughlin vq->vring.desc[head].addr = virt_to_phys(desc); 1769fa29b9dSMark McLoughlin vq->vring.desc[head].len = i * sizeof(struct vring_desc); 1779fa29b9dSMark McLoughlin 1789fa29b9dSMark McLoughlin /* Update free pointer */ 1799fa29b9dSMark McLoughlin vq->free_head = vq->vring.desc[head].next; 1809fa29b9dSMark McLoughlin 1819fa29b9dSMark McLoughlin return head; 1829fa29b9dSMark McLoughlin } 1839fa29b9dSMark McLoughlin 18417bb6d40SJason Wang int virtqueue_get_queue_index(struct virtqueue *_vq) 18517bb6d40SJason Wang { 18617bb6d40SJason Wang struct vring_virtqueue *vq = to_vvq(_vq); 18717bb6d40SJason Wang return vq->queue_index; 18817bb6d40SJason Wang } 18917bb6d40SJason Wang EXPORT_SYMBOL_GPL(virtqueue_get_queue_index); 19017bb6d40SJason Wang 1915dfc1762SRusty Russell /** 192f96fde41SRusty Russell * virtqueue_add_buf - expose buffer to other end 1935dfc1762SRusty Russell * @vq: the struct virtqueue we're talking about. 1945dfc1762SRusty Russell * @sg: the description of the buffer(s). 1955dfc1762SRusty Russell * @out_num: the number of sg readable by other side 1965dfc1762SRusty Russell * @in_num: the number of sg which are writable (after readable ones) 1975dfc1762SRusty Russell * @data: the token identifying the buffer. 1985dfc1762SRusty Russell * @gfp: how to do memory allocations (if necessary). 1995dfc1762SRusty Russell * 2005dfc1762SRusty Russell * Caller must ensure we don't call this with other virtqueue operations 2015dfc1762SRusty Russell * at the same time (except where noted). 2025dfc1762SRusty Russell * 2035dfc1762SRusty Russell * Returns remaining capacity of queue or a negative error 2045dfc1762SRusty Russell * (ie. ENOSPC). Note that it only really makes sense to treat all 2055dfc1762SRusty Russell * positive return values as "available": indirect buffers mean that 2065dfc1762SRusty Russell * we can put an entire sg[] array inside a single queue entry. 2075dfc1762SRusty Russell */ 208f96fde41SRusty Russell int virtqueue_add_buf(struct virtqueue *_vq, 2090a8a69ddSRusty Russell struct scatterlist sg[], 2100a8a69ddSRusty Russell unsigned int out, 2110a8a69ddSRusty Russell unsigned int in, 212bbd603efSMichael S. Tsirkin void *data, 213bbd603efSMichael S. Tsirkin gfp_t gfp) 2140a8a69ddSRusty Russell { 2150a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 2161fe9b6feSMichael S. Tsirkin unsigned int i, avail, uninitialized_var(prev); 2171fe9b6feSMichael S. Tsirkin int head; 2180a8a69ddSRusty Russell 2199fa29b9dSMark McLoughlin START_USE(vq); 2209fa29b9dSMark McLoughlin 2210a8a69ddSRusty Russell BUG_ON(data == NULL); 2229fa29b9dSMark McLoughlin 223e93300b1SRusty Russell #ifdef DEBUG 224e93300b1SRusty Russell { 225e93300b1SRusty Russell ktime_t now = ktime_get(); 226e93300b1SRusty Russell 227e93300b1SRusty Russell /* No kick or get, with .1 second between? Warn. */ 228e93300b1SRusty Russell if (vq->last_add_time_valid) 229e93300b1SRusty Russell WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time)) 230e93300b1SRusty Russell > 100); 231e93300b1SRusty Russell vq->last_add_time = now; 232e93300b1SRusty Russell vq->last_add_time_valid = true; 233e93300b1SRusty Russell } 234e93300b1SRusty Russell #endif 235e93300b1SRusty Russell 2369fa29b9dSMark McLoughlin /* If the host supports indirect descriptor tables, and we have multiple 2379fa29b9dSMark McLoughlin * buffers, then go indirect. FIXME: tune this threshold */ 2389fa29b9dSMark McLoughlin if (vq->indirect && (out + in) > 1 && vq->num_free) { 239bbd603efSMichael S. Tsirkin head = vring_add_indirect(vq, sg, out, in, gfp); 2401fe9b6feSMichael S. Tsirkin if (likely(head >= 0)) 2419fa29b9dSMark McLoughlin goto add_head; 2429fa29b9dSMark McLoughlin } 2439fa29b9dSMark McLoughlin 2440a8a69ddSRusty Russell BUG_ON(out + in > vq->vring.num); 2450a8a69ddSRusty Russell BUG_ON(out + in == 0); 2460a8a69ddSRusty Russell 2470a8a69ddSRusty Russell if (vq->num_free < out + in) { 2480a8a69ddSRusty Russell pr_debug("Can't add buf len %i - avail = %i\n", 2490a8a69ddSRusty Russell out + in, vq->num_free); 25044653eaeSRusty Russell /* FIXME: for historical reasons, we force a notify here if 25144653eaeSRusty Russell * there are outgoing parts to the buffer. Presumably the 25244653eaeSRusty Russell * host should service the ring ASAP. */ 25344653eaeSRusty Russell if (out) 254426e3e0aSRusty Russell vq->notify(&vq->vq); 2550a8a69ddSRusty Russell END_USE(vq); 2560a8a69ddSRusty Russell return -ENOSPC; 2570a8a69ddSRusty Russell } 2580a8a69ddSRusty Russell 2590a8a69ddSRusty Russell /* We're about to use some buffers from the free list. */ 2600a8a69ddSRusty Russell vq->num_free -= out + in; 2610a8a69ddSRusty Russell 2620a8a69ddSRusty Russell head = vq->free_head; 2630a8a69ddSRusty Russell for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { 2640a8a69ddSRusty Russell vq->vring.desc[i].flags = VRING_DESC_F_NEXT; 26515f9c890SRusty Russell vq->vring.desc[i].addr = sg_phys(sg); 2660a8a69ddSRusty Russell vq->vring.desc[i].len = sg->length; 2670a8a69ddSRusty Russell prev = i; 2680a8a69ddSRusty Russell sg++; 2690a8a69ddSRusty Russell } 2700a8a69ddSRusty Russell for (; in; i = vq->vring.desc[i].next, in--) { 2710a8a69ddSRusty Russell vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; 27215f9c890SRusty Russell vq->vring.desc[i].addr = sg_phys(sg); 2730a8a69ddSRusty Russell vq->vring.desc[i].len = sg->length; 2740a8a69ddSRusty Russell prev = i; 2750a8a69ddSRusty Russell sg++; 2760a8a69ddSRusty Russell } 2770a8a69ddSRusty Russell /* Last one doesn't continue. */ 2780a8a69ddSRusty Russell vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; 2790a8a69ddSRusty Russell 2800a8a69ddSRusty Russell /* Update free pointer */ 2810a8a69ddSRusty Russell vq->free_head = i; 2820a8a69ddSRusty Russell 2839fa29b9dSMark McLoughlin add_head: 2840a8a69ddSRusty Russell /* Set token. */ 2850a8a69ddSRusty Russell vq->data[head] = data; 2860a8a69ddSRusty Russell 2870a8a69ddSRusty Russell /* Put entry in available array (but don't update avail->idx until they 2883b720b8cSRusty Russell * do sync). */ 289ee7cd898SRusty Russell avail = (vq->vring.avail->idx & (vq->vring.num-1)); 2900a8a69ddSRusty Russell vq->vring.avail->ring[avail] = head; 2910a8a69ddSRusty Russell 292ee7cd898SRusty Russell /* Descriptors and available array need to be set before we expose the 293ee7cd898SRusty Russell * new available array entries. */ 294ee7cd898SRusty Russell virtio_wmb(vq); 295ee7cd898SRusty Russell vq->vring.avail->idx++; 296ee7cd898SRusty Russell vq->num_added++; 297ee7cd898SRusty Russell 298ee7cd898SRusty Russell /* This is very unlikely, but theoretically possible. Kick 299ee7cd898SRusty Russell * just in case. */ 300ee7cd898SRusty Russell if (unlikely(vq->num_added == (1 << 16) - 1)) 301ee7cd898SRusty Russell virtqueue_kick(_vq); 302ee7cd898SRusty Russell 3030a8a69ddSRusty Russell pr_debug("Added buffer head %i to %p\n", head, vq); 3040a8a69ddSRusty Russell END_USE(vq); 3053c1b27d5SRusty Russell 3063c1b27d5SRusty Russell return vq->num_free; 3070a8a69ddSRusty Russell } 308f96fde41SRusty Russell EXPORT_SYMBOL_GPL(virtqueue_add_buf); 3090a8a69ddSRusty Russell 3105dfc1762SRusty Russell /** 31141f0377fSRusty Russell * virtqueue_kick_prepare - first half of split virtqueue_kick call. 3125dfc1762SRusty Russell * @vq: the struct virtqueue 3135dfc1762SRusty Russell * 31441f0377fSRusty Russell * Instead of virtqueue_kick(), you can do: 31541f0377fSRusty Russell * if (virtqueue_kick_prepare(vq)) 31641f0377fSRusty Russell * virtqueue_notify(vq); 3175dfc1762SRusty Russell * 31841f0377fSRusty Russell * This is sometimes useful because the virtqueue_kick_prepare() needs 31941f0377fSRusty Russell * to be serialized, but the actual virtqueue_notify() call does not. 3205dfc1762SRusty Russell */ 32141f0377fSRusty Russell bool virtqueue_kick_prepare(struct virtqueue *_vq) 3220a8a69ddSRusty Russell { 3230a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 324a5c262c5SMichael S. Tsirkin u16 new, old; 32541f0377fSRusty Russell bool needs_kick; 32641f0377fSRusty Russell 3270a8a69ddSRusty Russell START_USE(vq); 328a72caae2SJason Wang /* We need to expose available array entries before checking avail 329a72caae2SJason Wang * event. */ 330a72caae2SJason Wang virtio_mb(vq); 3310a8a69ddSRusty Russell 332ee7cd898SRusty Russell old = vq->vring.avail->idx - vq->num_added; 333ee7cd898SRusty Russell new = vq->vring.avail->idx; 3340a8a69ddSRusty Russell vq->num_added = 0; 3350a8a69ddSRusty Russell 336e93300b1SRusty Russell #ifdef DEBUG 337e93300b1SRusty Russell if (vq->last_add_time_valid) { 338e93300b1SRusty Russell WARN_ON(ktime_to_ms(ktime_sub(ktime_get(), 339e93300b1SRusty Russell vq->last_add_time)) > 100); 340e93300b1SRusty Russell } 341e93300b1SRusty Russell vq->last_add_time_valid = false; 342e93300b1SRusty Russell #endif 343e93300b1SRusty Russell 34441f0377fSRusty Russell if (vq->event) { 34541f0377fSRusty Russell needs_kick = vring_need_event(vring_avail_event(&vq->vring), 34641f0377fSRusty Russell new, old); 34741f0377fSRusty Russell } else { 34841f0377fSRusty Russell needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY); 34941f0377fSRusty Russell } 3500a8a69ddSRusty Russell END_USE(vq); 35141f0377fSRusty Russell return needs_kick; 35241f0377fSRusty Russell } 35341f0377fSRusty Russell EXPORT_SYMBOL_GPL(virtqueue_kick_prepare); 35441f0377fSRusty Russell 35541f0377fSRusty Russell /** 35641f0377fSRusty Russell * virtqueue_notify - second half of split virtqueue_kick call. 35741f0377fSRusty Russell * @vq: the struct virtqueue 35841f0377fSRusty Russell * 35941f0377fSRusty Russell * This does not need to be serialized. 36041f0377fSRusty Russell */ 36141f0377fSRusty Russell void virtqueue_notify(struct virtqueue *_vq) 36241f0377fSRusty Russell { 36341f0377fSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 36441f0377fSRusty Russell 36541f0377fSRusty Russell /* Prod other side to tell it about changes. */ 36641f0377fSRusty Russell vq->notify(_vq); 36741f0377fSRusty Russell } 36841f0377fSRusty Russell EXPORT_SYMBOL_GPL(virtqueue_notify); 36941f0377fSRusty Russell 37041f0377fSRusty Russell /** 37141f0377fSRusty Russell * virtqueue_kick - update after add_buf 37241f0377fSRusty Russell * @vq: the struct virtqueue 37341f0377fSRusty Russell * 37441f0377fSRusty Russell * After one or more virtqueue_add_buf calls, invoke this to kick 37541f0377fSRusty Russell * the other side. 37641f0377fSRusty Russell * 37741f0377fSRusty Russell * Caller must ensure we don't call this with other virtqueue 37841f0377fSRusty Russell * operations at the same time (except where noted). 37941f0377fSRusty Russell */ 38041f0377fSRusty Russell void virtqueue_kick(struct virtqueue *vq) 38141f0377fSRusty Russell { 38241f0377fSRusty Russell if (virtqueue_kick_prepare(vq)) 38341f0377fSRusty Russell virtqueue_notify(vq); 3840a8a69ddSRusty Russell } 3857c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_kick); 3860a8a69ddSRusty Russell 3870a8a69ddSRusty Russell static void detach_buf(struct vring_virtqueue *vq, unsigned int head) 3880a8a69ddSRusty Russell { 3890a8a69ddSRusty Russell unsigned int i; 3900a8a69ddSRusty Russell 3910a8a69ddSRusty Russell /* Clear data ptr. */ 3920a8a69ddSRusty Russell vq->data[head] = NULL; 3930a8a69ddSRusty Russell 3940a8a69ddSRusty Russell /* Put back on free list: find end */ 3950a8a69ddSRusty Russell i = head; 3969fa29b9dSMark McLoughlin 3979fa29b9dSMark McLoughlin /* Free the indirect table */ 3989fa29b9dSMark McLoughlin if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT) 3999fa29b9dSMark McLoughlin kfree(phys_to_virt(vq->vring.desc[i].addr)); 4009fa29b9dSMark McLoughlin 4010a8a69ddSRusty Russell while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { 4020a8a69ddSRusty Russell i = vq->vring.desc[i].next; 4030a8a69ddSRusty Russell vq->num_free++; 4040a8a69ddSRusty Russell } 4050a8a69ddSRusty Russell 4060a8a69ddSRusty Russell vq->vring.desc[i].next = vq->free_head; 4070a8a69ddSRusty Russell vq->free_head = head; 4080a8a69ddSRusty Russell /* Plus final descriptor */ 4090a8a69ddSRusty Russell vq->num_free++; 4100a8a69ddSRusty Russell } 4110a8a69ddSRusty Russell 4120a8a69ddSRusty Russell static inline bool more_used(const struct vring_virtqueue *vq) 4130a8a69ddSRusty Russell { 4140a8a69ddSRusty Russell return vq->last_used_idx != vq->vring.used->idx; 4150a8a69ddSRusty Russell } 4160a8a69ddSRusty Russell 4175dfc1762SRusty Russell /** 4185dfc1762SRusty Russell * virtqueue_get_buf - get the next used buffer 4195dfc1762SRusty Russell * @vq: the struct virtqueue we're talking about. 4205dfc1762SRusty Russell * @len: the length written into the buffer 4215dfc1762SRusty Russell * 4225dfc1762SRusty Russell * If the driver wrote data into the buffer, @len will be set to the 4235dfc1762SRusty Russell * amount written. This means you don't need to clear the buffer 4245dfc1762SRusty Russell * beforehand to ensure there's no data leakage in the case of short 4255dfc1762SRusty Russell * writes. 4265dfc1762SRusty Russell * 4275dfc1762SRusty Russell * Caller must ensure we don't call this with other virtqueue 4285dfc1762SRusty Russell * operations at the same time (except where noted). 4295dfc1762SRusty Russell * 4305dfc1762SRusty Russell * Returns NULL if there are no used buffers, or the "data" token 431f96fde41SRusty Russell * handed to virtqueue_add_buf(). 4325dfc1762SRusty Russell */ 4337c5e9ed0SMichael S. Tsirkin void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) 4340a8a69ddSRusty Russell { 4350a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 4360a8a69ddSRusty Russell void *ret; 4370a8a69ddSRusty Russell unsigned int i; 4383b720b8cSRusty Russell u16 last_used; 4390a8a69ddSRusty Russell 4400a8a69ddSRusty Russell START_USE(vq); 4410a8a69ddSRusty Russell 4425ef82752SRusty Russell if (unlikely(vq->broken)) { 4435ef82752SRusty Russell END_USE(vq); 4445ef82752SRusty Russell return NULL; 4455ef82752SRusty Russell } 4465ef82752SRusty Russell 4470a8a69ddSRusty Russell if (!more_used(vq)) { 4480a8a69ddSRusty Russell pr_debug("No more buffers in queue\n"); 4490a8a69ddSRusty Russell END_USE(vq); 4500a8a69ddSRusty Russell return NULL; 4510a8a69ddSRusty Russell } 4520a8a69ddSRusty Russell 4532d61ba95SMichael S. Tsirkin /* Only get used array entries after they have been exposed by host. */ 4547b21e34fSRusty Russell virtio_rmb(vq); 4552d61ba95SMichael S. Tsirkin 4563b720b8cSRusty Russell last_used = (vq->last_used_idx & (vq->vring.num - 1)); 4573b720b8cSRusty Russell i = vq->vring.used->ring[last_used].id; 4583b720b8cSRusty Russell *len = vq->vring.used->ring[last_used].len; 4590a8a69ddSRusty Russell 4600a8a69ddSRusty Russell if (unlikely(i >= vq->vring.num)) { 4610a8a69ddSRusty Russell BAD_RING(vq, "id %u out of range\n", i); 4620a8a69ddSRusty Russell return NULL; 4630a8a69ddSRusty Russell } 4640a8a69ddSRusty Russell if (unlikely(!vq->data[i])) { 4650a8a69ddSRusty Russell BAD_RING(vq, "id %u is not a head!\n", i); 4660a8a69ddSRusty Russell return NULL; 4670a8a69ddSRusty Russell } 4680a8a69ddSRusty Russell 4690a8a69ddSRusty Russell /* detach_buf clears data, so grab it now. */ 4700a8a69ddSRusty Russell ret = vq->data[i]; 4710a8a69ddSRusty Russell detach_buf(vq, i); 4720a8a69ddSRusty Russell vq->last_used_idx++; 473a5c262c5SMichael S. Tsirkin /* If we expect an interrupt for the next entry, tell host 474a5c262c5SMichael S. Tsirkin * by writing event index and flush out the write before 475a5c262c5SMichael S. Tsirkin * the read in the next get_buf call. */ 476a5c262c5SMichael S. Tsirkin if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { 477a5c262c5SMichael S. Tsirkin vring_used_event(&vq->vring) = vq->last_used_idx; 4787b21e34fSRusty Russell virtio_mb(vq); 479a5c262c5SMichael S. Tsirkin } 480a5c262c5SMichael S. Tsirkin 481e93300b1SRusty Russell #ifdef DEBUG 482e93300b1SRusty Russell vq->last_add_time_valid = false; 483e93300b1SRusty Russell #endif 484e93300b1SRusty Russell 4850a8a69ddSRusty Russell END_USE(vq); 4860a8a69ddSRusty Russell return ret; 4870a8a69ddSRusty Russell } 4887c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_get_buf); 4890a8a69ddSRusty Russell 4905dfc1762SRusty Russell /** 4915dfc1762SRusty Russell * virtqueue_disable_cb - disable callbacks 4925dfc1762SRusty Russell * @vq: the struct virtqueue we're talking about. 4935dfc1762SRusty Russell * 4945dfc1762SRusty Russell * Note that this is not necessarily synchronous, hence unreliable and only 4955dfc1762SRusty Russell * useful as an optimization. 4965dfc1762SRusty Russell * 4975dfc1762SRusty Russell * Unlike other operations, this need not be serialized. 4985dfc1762SRusty Russell */ 4997c5e9ed0SMichael S. Tsirkin void virtqueue_disable_cb(struct virtqueue *_vq) 50018445c4dSRusty Russell { 50118445c4dSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 50218445c4dSRusty Russell 50318445c4dSRusty Russell vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 50418445c4dSRusty Russell } 5057c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_disable_cb); 50618445c4dSRusty Russell 5075dfc1762SRusty Russell /** 5085dfc1762SRusty Russell * virtqueue_enable_cb - restart callbacks after disable_cb. 5095dfc1762SRusty Russell * @vq: the struct virtqueue we're talking about. 5105dfc1762SRusty Russell * 5115dfc1762SRusty Russell * This re-enables callbacks; it returns "false" if there are pending 5125dfc1762SRusty Russell * buffers in the queue, to detect a possible race between the driver 5135dfc1762SRusty Russell * checking for more work, and enabling callbacks. 5145dfc1762SRusty Russell * 5155dfc1762SRusty Russell * Caller must ensure we don't call this with other virtqueue 5165dfc1762SRusty Russell * operations at the same time (except where noted). 5175dfc1762SRusty Russell */ 5187c5e9ed0SMichael S. Tsirkin bool virtqueue_enable_cb(struct virtqueue *_vq) 5190a8a69ddSRusty Russell { 5200a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 5210a8a69ddSRusty Russell 5220a8a69ddSRusty Russell START_USE(vq); 5230a8a69ddSRusty Russell 5240a8a69ddSRusty Russell /* We optimistically turn back on interrupts, then check if there was 5250a8a69ddSRusty Russell * more to do. */ 526a5c262c5SMichael S. Tsirkin /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to 527a5c262c5SMichael S. Tsirkin * either clear the flags bit or point the event index at the next 528a5c262c5SMichael S. Tsirkin * entry. Always do both to keep code simple. */ 5290a8a69ddSRusty Russell vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; 530a5c262c5SMichael S. Tsirkin vring_used_event(&vq->vring) = vq->last_used_idx; 5317b21e34fSRusty Russell virtio_mb(vq); 5320a8a69ddSRusty Russell if (unlikely(more_used(vq))) { 5330a8a69ddSRusty Russell END_USE(vq); 5340a8a69ddSRusty Russell return false; 5350a8a69ddSRusty Russell } 5360a8a69ddSRusty Russell 5370a8a69ddSRusty Russell END_USE(vq); 5380a8a69ddSRusty Russell return true; 5390a8a69ddSRusty Russell } 5407c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_enable_cb); 5410a8a69ddSRusty Russell 5425dfc1762SRusty Russell /** 5435dfc1762SRusty Russell * virtqueue_enable_cb_delayed - restart callbacks after disable_cb. 5445dfc1762SRusty Russell * @vq: the struct virtqueue we're talking about. 5455dfc1762SRusty Russell * 5465dfc1762SRusty Russell * This re-enables callbacks but hints to the other side to delay 5475dfc1762SRusty Russell * interrupts until most of the available buffers have been processed; 5485dfc1762SRusty Russell * it returns "false" if there are many pending buffers in the queue, 5495dfc1762SRusty Russell * to detect a possible race between the driver checking for more work, 5505dfc1762SRusty Russell * and enabling callbacks. 5515dfc1762SRusty Russell * 5525dfc1762SRusty Russell * Caller must ensure we don't call this with other virtqueue 5535dfc1762SRusty Russell * operations at the same time (except where noted). 5545dfc1762SRusty Russell */ 5557ab358c2SMichael S. Tsirkin bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) 5567ab358c2SMichael S. Tsirkin { 5577ab358c2SMichael S. Tsirkin struct vring_virtqueue *vq = to_vvq(_vq); 5587ab358c2SMichael S. Tsirkin u16 bufs; 5597ab358c2SMichael S. Tsirkin 5607ab358c2SMichael S. Tsirkin START_USE(vq); 5617ab358c2SMichael S. Tsirkin 5627ab358c2SMichael S. Tsirkin /* We optimistically turn back on interrupts, then check if there was 5637ab358c2SMichael S. Tsirkin * more to do. */ 5647ab358c2SMichael S. Tsirkin /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to 5657ab358c2SMichael S. Tsirkin * either clear the flags bit or point the event index at the next 5667ab358c2SMichael S. Tsirkin * entry. Always do both to keep code simple. */ 5677ab358c2SMichael S. Tsirkin vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; 5687ab358c2SMichael S. Tsirkin /* TODO: tune this threshold */ 5697ab358c2SMichael S. Tsirkin bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; 5707ab358c2SMichael S. Tsirkin vring_used_event(&vq->vring) = vq->last_used_idx + bufs; 5717b21e34fSRusty Russell virtio_mb(vq); 5727ab358c2SMichael S. Tsirkin if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) { 5737ab358c2SMichael S. Tsirkin END_USE(vq); 5747ab358c2SMichael S. Tsirkin return false; 5757ab358c2SMichael S. Tsirkin } 5767ab358c2SMichael S. Tsirkin 5777ab358c2SMichael S. Tsirkin END_USE(vq); 5787ab358c2SMichael S. Tsirkin return true; 5797ab358c2SMichael S. Tsirkin } 5807ab358c2SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); 5817ab358c2SMichael S. Tsirkin 5825dfc1762SRusty Russell /** 5835dfc1762SRusty Russell * virtqueue_detach_unused_buf - detach first unused buffer 5845dfc1762SRusty Russell * @vq: the struct virtqueue we're talking about. 5855dfc1762SRusty Russell * 586f96fde41SRusty Russell * Returns NULL or the "data" token handed to virtqueue_add_buf(). 5875dfc1762SRusty Russell * This is not valid on an active queue; it is useful only for device 5885dfc1762SRusty Russell * shutdown. 5895dfc1762SRusty Russell */ 5907c5e9ed0SMichael S. Tsirkin void *virtqueue_detach_unused_buf(struct virtqueue *_vq) 591c021eac4SShirley Ma { 592c021eac4SShirley Ma struct vring_virtqueue *vq = to_vvq(_vq); 593c021eac4SShirley Ma unsigned int i; 594c021eac4SShirley Ma void *buf; 595c021eac4SShirley Ma 596c021eac4SShirley Ma START_USE(vq); 597c021eac4SShirley Ma 598c021eac4SShirley Ma for (i = 0; i < vq->vring.num; i++) { 599c021eac4SShirley Ma if (!vq->data[i]) 600c021eac4SShirley Ma continue; 601c021eac4SShirley Ma /* detach_buf clears data, so grab it now. */ 602c021eac4SShirley Ma buf = vq->data[i]; 603c021eac4SShirley Ma detach_buf(vq, i); 604b3258ff1SAmit Shah vq->vring.avail->idx--; 605c021eac4SShirley Ma END_USE(vq); 606c021eac4SShirley Ma return buf; 607c021eac4SShirley Ma } 608c021eac4SShirley Ma /* That should have freed everything. */ 609c021eac4SShirley Ma BUG_ON(vq->num_free != vq->vring.num); 610c021eac4SShirley Ma 611c021eac4SShirley Ma END_USE(vq); 612c021eac4SShirley Ma return NULL; 613c021eac4SShirley Ma } 6147c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf); 615c021eac4SShirley Ma 6160a8a69ddSRusty Russell irqreturn_t vring_interrupt(int irq, void *_vq) 6170a8a69ddSRusty Russell { 6180a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 6190a8a69ddSRusty Russell 6200a8a69ddSRusty Russell if (!more_used(vq)) { 6210a8a69ddSRusty Russell pr_debug("virtqueue interrupt with no work for %p\n", vq); 6220a8a69ddSRusty Russell return IRQ_NONE; 6230a8a69ddSRusty Russell } 6240a8a69ddSRusty Russell 6250a8a69ddSRusty Russell if (unlikely(vq->broken)) 6260a8a69ddSRusty Russell return IRQ_HANDLED; 6270a8a69ddSRusty Russell 6280a8a69ddSRusty Russell pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 62918445c4dSRusty Russell if (vq->vq.callback) 63018445c4dSRusty Russell vq->vq.callback(&vq->vq); 6310a8a69ddSRusty Russell 6320a8a69ddSRusty Russell return IRQ_HANDLED; 6330a8a69ddSRusty Russell } 634c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_interrupt); 6350a8a69ddSRusty Russell 63617bb6d40SJason Wang struct virtqueue *vring_new_virtqueue(unsigned int index, 63717bb6d40SJason Wang unsigned int num, 63887c7d57cSRusty Russell unsigned int vring_align, 6390a8a69ddSRusty Russell struct virtio_device *vdev, 6407b21e34fSRusty Russell bool weak_barriers, 6410a8a69ddSRusty Russell void *pages, 6420a8a69ddSRusty Russell void (*notify)(struct virtqueue *), 6439499f5e7SRusty Russell void (*callback)(struct virtqueue *), 6449499f5e7SRusty Russell const char *name) 6450a8a69ddSRusty Russell { 6460a8a69ddSRusty Russell struct vring_virtqueue *vq; 6470a8a69ddSRusty Russell unsigned int i; 6480a8a69ddSRusty Russell 64942b36cc0SRusty Russell /* We assume num is a power of 2. */ 65042b36cc0SRusty Russell if (num & (num - 1)) { 65142b36cc0SRusty Russell dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 65242b36cc0SRusty Russell return NULL; 65342b36cc0SRusty Russell } 65442b36cc0SRusty Russell 6550a8a69ddSRusty Russell vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL); 6560a8a69ddSRusty Russell if (!vq) 6570a8a69ddSRusty Russell return NULL; 6580a8a69ddSRusty Russell 65987c7d57cSRusty Russell vring_init(&vq->vring, num, pages, vring_align); 6600a8a69ddSRusty Russell vq->vq.callback = callback; 6610a8a69ddSRusty Russell vq->vq.vdev = vdev; 6629499f5e7SRusty Russell vq->vq.name = name; 6630a8a69ddSRusty Russell vq->notify = notify; 6647b21e34fSRusty Russell vq->weak_barriers = weak_barriers; 6650a8a69ddSRusty Russell vq->broken = false; 6660a8a69ddSRusty Russell vq->last_used_idx = 0; 6670a8a69ddSRusty Russell vq->num_added = 0; 66817bb6d40SJason Wang vq->queue_index = index; 6699499f5e7SRusty Russell list_add_tail(&vq->vq.list, &vdev->vqs); 6700a8a69ddSRusty Russell #ifdef DEBUG 6710a8a69ddSRusty Russell vq->in_use = false; 672e93300b1SRusty Russell vq->last_add_time_valid = false; 6730a8a69ddSRusty Russell #endif 6740a8a69ddSRusty Russell 6759fa29b9dSMark McLoughlin vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); 676a5c262c5SMichael S. Tsirkin vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); 6779fa29b9dSMark McLoughlin 6780a8a69ddSRusty Russell /* No callback? Tell other side not to bother us. */ 6790a8a69ddSRusty Russell if (!callback) 6800a8a69ddSRusty Russell vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 6810a8a69ddSRusty Russell 6820a8a69ddSRusty Russell /* Put everything in free lists. */ 6830a8a69ddSRusty Russell vq->num_free = num; 6840a8a69ddSRusty Russell vq->free_head = 0; 6853b870624SAmit Shah for (i = 0; i < num-1; i++) { 6860a8a69ddSRusty Russell vq->vring.desc[i].next = i+1; 6873b870624SAmit Shah vq->data[i] = NULL; 6883b870624SAmit Shah } 6893b870624SAmit Shah vq->data[i] = NULL; 6900a8a69ddSRusty Russell 6910a8a69ddSRusty Russell return &vq->vq; 6920a8a69ddSRusty Russell } 693c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_new_virtqueue); 6940a8a69ddSRusty Russell 6950a8a69ddSRusty Russell void vring_del_virtqueue(struct virtqueue *vq) 6960a8a69ddSRusty Russell { 6979499f5e7SRusty Russell list_del(&vq->list); 6980a8a69ddSRusty Russell kfree(to_vvq(vq)); 6990a8a69ddSRusty Russell } 700c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_del_virtqueue); 7010a8a69ddSRusty Russell 702e34f8725SRusty Russell /* Manipulates transport-specific feature bits. */ 703e34f8725SRusty Russell void vring_transport_features(struct virtio_device *vdev) 704e34f8725SRusty Russell { 705e34f8725SRusty Russell unsigned int i; 706e34f8725SRusty Russell 707e34f8725SRusty Russell for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 708e34f8725SRusty Russell switch (i) { 7099fa29b9dSMark McLoughlin case VIRTIO_RING_F_INDIRECT_DESC: 7109fa29b9dSMark McLoughlin break; 711a5c262c5SMichael S. Tsirkin case VIRTIO_RING_F_EVENT_IDX: 712a5c262c5SMichael S. Tsirkin break; 713e34f8725SRusty Russell default: 714e34f8725SRusty Russell /* We don't understand this bit. */ 715e34f8725SRusty Russell clear_bit(i, vdev->features); 716e34f8725SRusty Russell } 717e34f8725SRusty Russell } 718e34f8725SRusty Russell } 719e34f8725SRusty Russell EXPORT_SYMBOL_GPL(vring_transport_features); 720e34f8725SRusty Russell 7215dfc1762SRusty Russell /** 7225dfc1762SRusty Russell * virtqueue_get_vring_size - return the size of the virtqueue's vring 7235dfc1762SRusty Russell * @vq: the struct virtqueue containing the vring of interest. 7245dfc1762SRusty Russell * 7255dfc1762SRusty Russell * Returns the size of the vring. This is mainly used for boasting to 7265dfc1762SRusty Russell * userspace. Unlike other operations, this need not be serialized. 7275dfc1762SRusty Russell */ 7288f9f4668SRick Jones unsigned int virtqueue_get_vring_size(struct virtqueue *_vq) 7298f9f4668SRick Jones { 7308f9f4668SRick Jones 7318f9f4668SRick Jones struct vring_virtqueue *vq = to_vvq(_vq); 7328f9f4668SRick Jones 7338f9f4668SRick Jones return vq->vring.num; 7348f9f4668SRick Jones } 7358f9f4668SRick Jones EXPORT_SYMBOL_GPL(virtqueue_get_vring_size); 7368f9f4668SRick Jones 737c6fd4701SRusty Russell MODULE_LICENSE("GPL"); 738