10a8a69ddSRusty Russell /* Virtio ring implementation. 20a8a69ddSRusty Russell * 30a8a69ddSRusty Russell * Copyright 2007 Rusty Russell IBM Corporation 40a8a69ddSRusty Russell * 50a8a69ddSRusty Russell * This program is free software; you can redistribute it and/or modify 60a8a69ddSRusty Russell * it under the terms of the GNU General Public License as published by 70a8a69ddSRusty Russell * the Free Software Foundation; either version 2 of the License, or 80a8a69ddSRusty Russell * (at your option) any later version. 90a8a69ddSRusty Russell * 100a8a69ddSRusty Russell * This program is distributed in the hope that it will be useful, 110a8a69ddSRusty Russell * but WITHOUT ANY WARRANTY; without even the implied warranty of 120a8a69ddSRusty Russell * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 130a8a69ddSRusty Russell * GNU General Public License for more details. 140a8a69ddSRusty Russell * 150a8a69ddSRusty Russell * You should have received a copy of the GNU General Public License 160a8a69ddSRusty Russell * along with this program; if not, write to the Free Software 170a8a69ddSRusty Russell * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 180a8a69ddSRusty Russell */ 190a8a69ddSRusty Russell #include <linux/virtio.h> 200a8a69ddSRusty Russell #include <linux/virtio_ring.h> 21e34f8725SRusty Russell #include <linux/virtio_config.h> 220a8a69ddSRusty Russell #include <linux/device.h> 230a8a69ddSRusty Russell 240a8a69ddSRusty Russell #ifdef DEBUG 250a8a69ddSRusty Russell /* For development, we want to crash whenever the ring is screwed. */ 26*9499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 27*9499f5e7SRusty Russell do { \ 28*9499f5e7SRusty Russell dev_err(&(_vq)->vq.vdev->dev, \ 29*9499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 30*9499f5e7SRusty Russell BUG(); \ 31*9499f5e7SRusty Russell } while (0) 32c5f841f1SRusty Russell /* Caller is supposed to guarantee no reentry. */ 333a35ce7dSRoel Kluin #define START_USE(_vq) \ 34c5f841f1SRusty Russell do { \ 35c5f841f1SRusty Russell if ((_vq)->in_use) \ 36*9499f5e7SRusty Russell panic("%s:in_use = %i\n", \ 37*9499f5e7SRusty Russell (_vq)->vq.name, (_vq)->in_use); \ 38c5f841f1SRusty Russell (_vq)->in_use = __LINE__; \ 39c5f841f1SRusty Russell mb(); \ 40c5f841f1SRusty Russell } while (0) 413a35ce7dSRoel Kluin #define END_USE(_vq) \ 423a35ce7dSRoel Kluin do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; mb(); } while(0) 430a8a69ddSRusty Russell #else 44*9499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...) \ 45*9499f5e7SRusty Russell do { \ 46*9499f5e7SRusty Russell dev_err(&_vq->vq.vdev->dev, \ 47*9499f5e7SRusty Russell "%s:"fmt, (_vq)->vq.name, ##args); \ 48*9499f5e7SRusty Russell (_vq)->broken = true; \ 49*9499f5e7SRusty Russell } while (0) 500a8a69ddSRusty Russell #define START_USE(vq) 510a8a69ddSRusty Russell #define END_USE(vq) 520a8a69ddSRusty Russell #endif 530a8a69ddSRusty Russell 540a8a69ddSRusty Russell struct vring_virtqueue 550a8a69ddSRusty Russell { 560a8a69ddSRusty Russell struct virtqueue vq; 570a8a69ddSRusty Russell 580a8a69ddSRusty Russell /* Actual memory layout for this queue */ 590a8a69ddSRusty Russell struct vring vring; 600a8a69ddSRusty Russell 610a8a69ddSRusty Russell /* Other side has made a mess, don't try any more. */ 620a8a69ddSRusty Russell bool broken; 630a8a69ddSRusty Russell 640a8a69ddSRusty Russell /* Number of free buffers */ 650a8a69ddSRusty Russell unsigned int num_free; 660a8a69ddSRusty Russell /* Head of free buffer list. */ 670a8a69ddSRusty Russell unsigned int free_head; 680a8a69ddSRusty Russell /* Number we've added since last sync. */ 690a8a69ddSRusty Russell unsigned int num_added; 700a8a69ddSRusty Russell 710a8a69ddSRusty Russell /* Last used index we've seen. */ 721bc4953eSAnthony Liguori u16 last_used_idx; 730a8a69ddSRusty Russell 740a8a69ddSRusty Russell /* How to notify other side. FIXME: commonalize hcalls! */ 750a8a69ddSRusty Russell void (*notify)(struct virtqueue *vq); 760a8a69ddSRusty Russell 770a8a69ddSRusty Russell #ifdef DEBUG 780a8a69ddSRusty Russell /* They're supposed to lock for us. */ 790a8a69ddSRusty Russell unsigned int in_use; 800a8a69ddSRusty Russell #endif 810a8a69ddSRusty Russell 820a8a69ddSRusty Russell /* Tokens for callbacks. */ 830a8a69ddSRusty Russell void *data[]; 840a8a69ddSRusty Russell }; 850a8a69ddSRusty Russell 860a8a69ddSRusty Russell #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) 870a8a69ddSRusty Russell 880a8a69ddSRusty Russell static int vring_add_buf(struct virtqueue *_vq, 890a8a69ddSRusty Russell struct scatterlist sg[], 900a8a69ddSRusty Russell unsigned int out, 910a8a69ddSRusty Russell unsigned int in, 920a8a69ddSRusty Russell void *data) 930a8a69ddSRusty Russell { 940a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 950a8a69ddSRusty Russell unsigned int i, avail, head, uninitialized_var(prev); 960a8a69ddSRusty Russell 970a8a69ddSRusty Russell BUG_ON(data == NULL); 980a8a69ddSRusty Russell BUG_ON(out + in > vq->vring.num); 990a8a69ddSRusty Russell BUG_ON(out + in == 0); 1000a8a69ddSRusty Russell 1010a8a69ddSRusty Russell START_USE(vq); 1020a8a69ddSRusty Russell 1030a8a69ddSRusty Russell if (vq->num_free < out + in) { 1040a8a69ddSRusty Russell pr_debug("Can't add buf len %i - avail = %i\n", 1050a8a69ddSRusty Russell out + in, vq->num_free); 10644653eaeSRusty Russell /* FIXME: for historical reasons, we force a notify here if 10744653eaeSRusty Russell * there are outgoing parts to the buffer. Presumably the 10844653eaeSRusty Russell * host should service the ring ASAP. */ 10944653eaeSRusty Russell if (out) 110426e3e0aSRusty Russell vq->notify(&vq->vq); 1110a8a69ddSRusty Russell END_USE(vq); 1120a8a69ddSRusty Russell return -ENOSPC; 1130a8a69ddSRusty Russell } 1140a8a69ddSRusty Russell 1150a8a69ddSRusty Russell /* We're about to use some buffers from the free list. */ 1160a8a69ddSRusty Russell vq->num_free -= out + in; 1170a8a69ddSRusty Russell 1180a8a69ddSRusty Russell head = vq->free_head; 1190a8a69ddSRusty Russell for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { 1200a8a69ddSRusty Russell vq->vring.desc[i].flags = VRING_DESC_F_NEXT; 12115f9c890SRusty Russell vq->vring.desc[i].addr = sg_phys(sg); 1220a8a69ddSRusty Russell vq->vring.desc[i].len = sg->length; 1230a8a69ddSRusty Russell prev = i; 1240a8a69ddSRusty Russell sg++; 1250a8a69ddSRusty Russell } 1260a8a69ddSRusty Russell for (; in; i = vq->vring.desc[i].next, in--) { 1270a8a69ddSRusty Russell vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; 12815f9c890SRusty Russell vq->vring.desc[i].addr = sg_phys(sg); 1290a8a69ddSRusty Russell vq->vring.desc[i].len = sg->length; 1300a8a69ddSRusty Russell prev = i; 1310a8a69ddSRusty Russell sg++; 1320a8a69ddSRusty Russell } 1330a8a69ddSRusty Russell /* Last one doesn't continue. */ 1340a8a69ddSRusty Russell vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT; 1350a8a69ddSRusty Russell 1360a8a69ddSRusty Russell /* Update free pointer */ 1370a8a69ddSRusty Russell vq->free_head = i; 1380a8a69ddSRusty Russell 1390a8a69ddSRusty Russell /* Set token. */ 1400a8a69ddSRusty Russell vq->data[head] = data; 1410a8a69ddSRusty Russell 1420a8a69ddSRusty Russell /* Put entry in available array (but don't update avail->idx until they 1430a8a69ddSRusty Russell * do sync). FIXME: avoid modulus here? */ 1440a8a69ddSRusty Russell avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num; 1450a8a69ddSRusty Russell vq->vring.avail->ring[avail] = head; 1460a8a69ddSRusty Russell 1470a8a69ddSRusty Russell pr_debug("Added buffer head %i to %p\n", head, vq); 1480a8a69ddSRusty Russell END_USE(vq); 1490a8a69ddSRusty Russell return 0; 1500a8a69ddSRusty Russell } 1510a8a69ddSRusty Russell 1520a8a69ddSRusty Russell static void vring_kick(struct virtqueue *_vq) 1530a8a69ddSRusty Russell { 1540a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 1550a8a69ddSRusty Russell START_USE(vq); 1560a8a69ddSRusty Russell /* Descriptors and available array need to be set before we expose the 1570a8a69ddSRusty Russell * new available array entries. */ 1580a8a69ddSRusty Russell wmb(); 1590a8a69ddSRusty Russell 1600a8a69ddSRusty Russell vq->vring.avail->idx += vq->num_added; 1610a8a69ddSRusty Russell vq->num_added = 0; 1620a8a69ddSRusty Russell 1630a8a69ddSRusty Russell /* Need to update avail index before checking if we should notify */ 1640a8a69ddSRusty Russell mb(); 1650a8a69ddSRusty Russell 1660a8a69ddSRusty Russell if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) 1670a8a69ddSRusty Russell /* Prod other side to tell it about changes. */ 1680a8a69ddSRusty Russell vq->notify(&vq->vq); 1690a8a69ddSRusty Russell 1700a8a69ddSRusty Russell END_USE(vq); 1710a8a69ddSRusty Russell } 1720a8a69ddSRusty Russell 1730a8a69ddSRusty Russell static void detach_buf(struct vring_virtqueue *vq, unsigned int head) 1740a8a69ddSRusty Russell { 1750a8a69ddSRusty Russell unsigned int i; 1760a8a69ddSRusty Russell 1770a8a69ddSRusty Russell /* Clear data ptr. */ 1780a8a69ddSRusty Russell vq->data[head] = NULL; 1790a8a69ddSRusty Russell 1800a8a69ddSRusty Russell /* Put back on free list: find end */ 1810a8a69ddSRusty Russell i = head; 1820a8a69ddSRusty Russell while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) { 1830a8a69ddSRusty Russell i = vq->vring.desc[i].next; 1840a8a69ddSRusty Russell vq->num_free++; 1850a8a69ddSRusty Russell } 1860a8a69ddSRusty Russell 1870a8a69ddSRusty Russell vq->vring.desc[i].next = vq->free_head; 1880a8a69ddSRusty Russell vq->free_head = head; 1890a8a69ddSRusty Russell /* Plus final descriptor */ 1900a8a69ddSRusty Russell vq->num_free++; 1910a8a69ddSRusty Russell } 1920a8a69ddSRusty Russell 1930a8a69ddSRusty Russell static inline bool more_used(const struct vring_virtqueue *vq) 1940a8a69ddSRusty Russell { 1950a8a69ddSRusty Russell return vq->last_used_idx != vq->vring.used->idx; 1960a8a69ddSRusty Russell } 1970a8a69ddSRusty Russell 1980a8a69ddSRusty Russell static void *vring_get_buf(struct virtqueue *_vq, unsigned int *len) 1990a8a69ddSRusty Russell { 2000a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 2010a8a69ddSRusty Russell void *ret; 2020a8a69ddSRusty Russell unsigned int i; 2030a8a69ddSRusty Russell 2040a8a69ddSRusty Russell START_USE(vq); 2050a8a69ddSRusty Russell 2065ef82752SRusty Russell if (unlikely(vq->broken)) { 2075ef82752SRusty Russell END_USE(vq); 2085ef82752SRusty Russell return NULL; 2095ef82752SRusty Russell } 2105ef82752SRusty Russell 2110a8a69ddSRusty Russell if (!more_used(vq)) { 2120a8a69ddSRusty Russell pr_debug("No more buffers in queue\n"); 2130a8a69ddSRusty Russell END_USE(vq); 2140a8a69ddSRusty Russell return NULL; 2150a8a69ddSRusty Russell } 2160a8a69ddSRusty Russell 2170a8a69ddSRusty Russell i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id; 2180a8a69ddSRusty Russell *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len; 2190a8a69ddSRusty Russell 2200a8a69ddSRusty Russell if (unlikely(i >= vq->vring.num)) { 2210a8a69ddSRusty Russell BAD_RING(vq, "id %u out of range\n", i); 2220a8a69ddSRusty Russell return NULL; 2230a8a69ddSRusty Russell } 2240a8a69ddSRusty Russell if (unlikely(!vq->data[i])) { 2250a8a69ddSRusty Russell BAD_RING(vq, "id %u is not a head!\n", i); 2260a8a69ddSRusty Russell return NULL; 2270a8a69ddSRusty Russell } 2280a8a69ddSRusty Russell 2290a8a69ddSRusty Russell /* detach_buf clears data, so grab it now. */ 2300a8a69ddSRusty Russell ret = vq->data[i]; 2310a8a69ddSRusty Russell detach_buf(vq, i); 2320a8a69ddSRusty Russell vq->last_used_idx++; 2330a8a69ddSRusty Russell END_USE(vq); 2340a8a69ddSRusty Russell return ret; 2350a8a69ddSRusty Russell } 2360a8a69ddSRusty Russell 23718445c4dSRusty Russell static void vring_disable_cb(struct virtqueue *_vq) 23818445c4dSRusty Russell { 23918445c4dSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 24018445c4dSRusty Russell 24118445c4dSRusty Russell vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 24218445c4dSRusty Russell } 24318445c4dSRusty Russell 24418445c4dSRusty Russell static bool vring_enable_cb(struct virtqueue *_vq) 2450a8a69ddSRusty Russell { 2460a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 2470a8a69ddSRusty Russell 2480a8a69ddSRusty Russell START_USE(vq); 2490a8a69ddSRusty Russell 2500a8a69ddSRusty Russell /* We optimistically turn back on interrupts, then check if there was 2510a8a69ddSRusty Russell * more to do. */ 2520a8a69ddSRusty Russell vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; 2530a8a69ddSRusty Russell mb(); 2540a8a69ddSRusty Russell if (unlikely(more_used(vq))) { 2550a8a69ddSRusty Russell END_USE(vq); 2560a8a69ddSRusty Russell return false; 2570a8a69ddSRusty Russell } 2580a8a69ddSRusty Russell 2590a8a69ddSRusty Russell END_USE(vq); 2600a8a69ddSRusty Russell return true; 2610a8a69ddSRusty Russell } 2620a8a69ddSRusty Russell 2630a8a69ddSRusty Russell irqreturn_t vring_interrupt(int irq, void *_vq) 2640a8a69ddSRusty Russell { 2650a8a69ddSRusty Russell struct vring_virtqueue *vq = to_vvq(_vq); 2660a8a69ddSRusty Russell 2670a8a69ddSRusty Russell if (!more_used(vq)) { 2680a8a69ddSRusty Russell pr_debug("virtqueue interrupt with no work for %p\n", vq); 2690a8a69ddSRusty Russell return IRQ_NONE; 2700a8a69ddSRusty Russell } 2710a8a69ddSRusty Russell 2720a8a69ddSRusty Russell if (unlikely(vq->broken)) 2730a8a69ddSRusty Russell return IRQ_HANDLED; 2740a8a69ddSRusty Russell 2750a8a69ddSRusty Russell pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 27618445c4dSRusty Russell if (vq->vq.callback) 27718445c4dSRusty Russell vq->vq.callback(&vq->vq); 2780a8a69ddSRusty Russell 2790a8a69ddSRusty Russell return IRQ_HANDLED; 2800a8a69ddSRusty Russell } 281c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_interrupt); 2820a8a69ddSRusty Russell 2830a8a69ddSRusty Russell static struct virtqueue_ops vring_vq_ops = { 2840a8a69ddSRusty Russell .add_buf = vring_add_buf, 2850a8a69ddSRusty Russell .get_buf = vring_get_buf, 2860a8a69ddSRusty Russell .kick = vring_kick, 28718445c4dSRusty Russell .disable_cb = vring_disable_cb, 28818445c4dSRusty Russell .enable_cb = vring_enable_cb, 2890a8a69ddSRusty Russell }; 2900a8a69ddSRusty Russell 2910a8a69ddSRusty Russell struct virtqueue *vring_new_virtqueue(unsigned int num, 29287c7d57cSRusty Russell unsigned int vring_align, 2930a8a69ddSRusty Russell struct virtio_device *vdev, 2940a8a69ddSRusty Russell void *pages, 2950a8a69ddSRusty Russell void (*notify)(struct virtqueue *), 296*9499f5e7SRusty Russell void (*callback)(struct virtqueue *), 297*9499f5e7SRusty Russell const char *name) 2980a8a69ddSRusty Russell { 2990a8a69ddSRusty Russell struct vring_virtqueue *vq; 3000a8a69ddSRusty Russell unsigned int i; 3010a8a69ddSRusty Russell 30242b36cc0SRusty Russell /* We assume num is a power of 2. */ 30342b36cc0SRusty Russell if (num & (num - 1)) { 30442b36cc0SRusty Russell dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num); 30542b36cc0SRusty Russell return NULL; 30642b36cc0SRusty Russell } 30742b36cc0SRusty Russell 3080a8a69ddSRusty Russell vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL); 3090a8a69ddSRusty Russell if (!vq) 3100a8a69ddSRusty Russell return NULL; 3110a8a69ddSRusty Russell 31287c7d57cSRusty Russell vring_init(&vq->vring, num, pages, vring_align); 3130a8a69ddSRusty Russell vq->vq.callback = callback; 3140a8a69ddSRusty Russell vq->vq.vdev = vdev; 3150a8a69ddSRusty Russell vq->vq.vq_ops = &vring_vq_ops; 316*9499f5e7SRusty Russell vq->vq.name = name; 3170a8a69ddSRusty Russell vq->notify = notify; 3180a8a69ddSRusty Russell vq->broken = false; 3190a8a69ddSRusty Russell vq->last_used_idx = 0; 3200a8a69ddSRusty Russell vq->num_added = 0; 321*9499f5e7SRusty Russell list_add_tail(&vq->vq.list, &vdev->vqs); 3220a8a69ddSRusty Russell #ifdef DEBUG 3230a8a69ddSRusty Russell vq->in_use = false; 3240a8a69ddSRusty Russell #endif 3250a8a69ddSRusty Russell 3260a8a69ddSRusty Russell /* No callback? Tell other side not to bother us. */ 3270a8a69ddSRusty Russell if (!callback) 3280a8a69ddSRusty Russell vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 3290a8a69ddSRusty Russell 3300a8a69ddSRusty Russell /* Put everything in free lists. */ 3310a8a69ddSRusty Russell vq->num_free = num; 3320a8a69ddSRusty Russell vq->free_head = 0; 3330a8a69ddSRusty Russell for (i = 0; i < num-1; i++) 3340a8a69ddSRusty Russell vq->vring.desc[i].next = i+1; 3350a8a69ddSRusty Russell 3360a8a69ddSRusty Russell return &vq->vq; 3370a8a69ddSRusty Russell } 338c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_new_virtqueue); 3390a8a69ddSRusty Russell 3400a8a69ddSRusty Russell void vring_del_virtqueue(struct virtqueue *vq) 3410a8a69ddSRusty Russell { 342*9499f5e7SRusty Russell list_del(&vq->list); 3430a8a69ddSRusty Russell kfree(to_vvq(vq)); 3440a8a69ddSRusty Russell } 345c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_del_virtqueue); 3460a8a69ddSRusty Russell 347e34f8725SRusty Russell /* Manipulates transport-specific feature bits. */ 348e34f8725SRusty Russell void vring_transport_features(struct virtio_device *vdev) 349e34f8725SRusty Russell { 350e34f8725SRusty Russell unsigned int i; 351e34f8725SRusty Russell 352e34f8725SRusty Russell for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { 353e34f8725SRusty Russell switch (i) { 354e34f8725SRusty Russell default: 355e34f8725SRusty Russell /* We don't understand this bit. */ 356e34f8725SRusty Russell clear_bit(i, vdev->features); 357e34f8725SRusty Russell } 358e34f8725SRusty Russell } 359e34f8725SRusty Russell } 360e34f8725SRusty Russell EXPORT_SYMBOL_GPL(vring_transport_features); 361e34f8725SRusty Russell 362c6fd4701SRusty Russell MODULE_LICENSE("GPL"); 363