xref: /openbmc/linux/drivers/virtio/virtio_ring.c (revision a9a0fef779074838230e04a322fd2bdc921f4f4f)
10a8a69ddSRusty Russell /* Virtio ring implementation.
20a8a69ddSRusty Russell  *
30a8a69ddSRusty Russell  *  Copyright 2007 Rusty Russell IBM Corporation
40a8a69ddSRusty Russell  *
50a8a69ddSRusty Russell  *  This program is free software; you can redistribute it and/or modify
60a8a69ddSRusty Russell  *  it under the terms of the GNU General Public License as published by
70a8a69ddSRusty Russell  *  the Free Software Foundation; either version 2 of the License, or
80a8a69ddSRusty Russell  *  (at your option) any later version.
90a8a69ddSRusty Russell  *
100a8a69ddSRusty Russell  *  This program is distributed in the hope that it will be useful,
110a8a69ddSRusty Russell  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
120a8a69ddSRusty Russell  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
130a8a69ddSRusty Russell  *  GNU General Public License for more details.
140a8a69ddSRusty Russell  *
150a8a69ddSRusty Russell  *  You should have received a copy of the GNU General Public License
160a8a69ddSRusty Russell  *  along with this program; if not, write to the Free Software
170a8a69ddSRusty Russell  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
180a8a69ddSRusty Russell  */
190a8a69ddSRusty Russell #include <linux/virtio.h>
200a8a69ddSRusty Russell #include <linux/virtio_ring.h>
21e34f8725SRusty Russell #include <linux/virtio_config.h>
220a8a69ddSRusty Russell #include <linux/device.h>
235a0e3ad6STejun Heo #include <linux/slab.h>
24b5a2c4f1SPaul Gortmaker #include <linux/module.h>
25e93300b1SRusty Russell #include <linux/hrtimer.h>
260a8a69ddSRusty Russell 
270a8a69ddSRusty Russell #ifdef DEBUG
280a8a69ddSRusty Russell /* For development, we want to crash whenever the ring is screwed. */
299499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...)				\
309499f5e7SRusty Russell 	do {							\
319499f5e7SRusty Russell 		dev_err(&(_vq)->vq.vdev->dev,			\
329499f5e7SRusty Russell 			"%s:"fmt, (_vq)->vq.name, ##args);	\
339499f5e7SRusty Russell 		BUG();						\
349499f5e7SRusty Russell 	} while (0)
35c5f841f1SRusty Russell /* Caller is supposed to guarantee no reentry. */
363a35ce7dSRoel Kluin #define START_USE(_vq)						\
37c5f841f1SRusty Russell 	do {							\
38c5f841f1SRusty Russell 		if ((_vq)->in_use)				\
399499f5e7SRusty Russell 			panic("%s:in_use = %i\n",		\
409499f5e7SRusty Russell 			      (_vq)->vq.name, (_vq)->in_use);	\
41c5f841f1SRusty Russell 		(_vq)->in_use = __LINE__;			\
42c5f841f1SRusty Russell 	} while (0)
433a35ce7dSRoel Kluin #define END_USE(_vq) \
4497a545abSRusty Russell 	do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
450a8a69ddSRusty Russell #else
469499f5e7SRusty Russell #define BAD_RING(_vq, fmt, args...)				\
479499f5e7SRusty Russell 	do {							\
489499f5e7SRusty Russell 		dev_err(&_vq->vq.vdev->dev,			\
499499f5e7SRusty Russell 			"%s:"fmt, (_vq)->vq.name, ##args);	\
509499f5e7SRusty Russell 		(_vq)->broken = true;				\
519499f5e7SRusty Russell 	} while (0)
520a8a69ddSRusty Russell #define START_USE(vq)
530a8a69ddSRusty Russell #define END_USE(vq)
540a8a69ddSRusty Russell #endif
550a8a69ddSRusty Russell 
560a8a69ddSRusty Russell struct vring_virtqueue
570a8a69ddSRusty Russell {
580a8a69ddSRusty Russell 	struct virtqueue vq;
590a8a69ddSRusty Russell 
600a8a69ddSRusty Russell 	/* Actual memory layout for this queue */
610a8a69ddSRusty Russell 	struct vring vring;
620a8a69ddSRusty Russell 
637b21e34fSRusty Russell 	/* Can we use weak barriers? */
647b21e34fSRusty Russell 	bool weak_barriers;
657b21e34fSRusty Russell 
660a8a69ddSRusty Russell 	/* Other side has made a mess, don't try any more. */
670a8a69ddSRusty Russell 	bool broken;
680a8a69ddSRusty Russell 
699fa29b9dSMark McLoughlin 	/* Host supports indirect buffers */
709fa29b9dSMark McLoughlin 	bool indirect;
719fa29b9dSMark McLoughlin 
72a5c262c5SMichael S. Tsirkin 	/* Host publishes avail event idx */
73a5c262c5SMichael S. Tsirkin 	bool event;
74a5c262c5SMichael S. Tsirkin 
750a8a69ddSRusty Russell 	/* Head of free buffer list. */
760a8a69ddSRusty Russell 	unsigned int free_head;
770a8a69ddSRusty Russell 	/* Number we've added since last sync. */
780a8a69ddSRusty Russell 	unsigned int num_added;
790a8a69ddSRusty Russell 
800a8a69ddSRusty Russell 	/* Last used index we've seen. */
811bc4953eSAnthony Liguori 	u16 last_used_idx;
820a8a69ddSRusty Russell 
830a8a69ddSRusty Russell 	/* How to notify other side. FIXME: commonalize hcalls! */
840a8a69ddSRusty Russell 	void (*notify)(struct virtqueue *vq);
850a8a69ddSRusty Russell 
860a8a69ddSRusty Russell #ifdef DEBUG
870a8a69ddSRusty Russell 	/* They're supposed to lock for us. */
880a8a69ddSRusty Russell 	unsigned int in_use;
89e93300b1SRusty Russell 
90e93300b1SRusty Russell 	/* Figure out if their kicks are too delayed. */
91e93300b1SRusty Russell 	bool last_add_time_valid;
92e93300b1SRusty Russell 	ktime_t last_add_time;
930a8a69ddSRusty Russell #endif
940a8a69ddSRusty Russell 
950a8a69ddSRusty Russell 	/* Tokens for callbacks. */
960a8a69ddSRusty Russell 	void *data[];
970a8a69ddSRusty Russell };
980a8a69ddSRusty Russell 
990a8a69ddSRusty Russell #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
1000a8a69ddSRusty Russell 
1019fa29b9dSMark McLoughlin /* Set up an indirect table of descriptors and add it to the queue. */
1029fa29b9dSMark McLoughlin static int vring_add_indirect(struct vring_virtqueue *vq,
1039fa29b9dSMark McLoughlin 			      struct scatterlist sg[],
1049fa29b9dSMark McLoughlin 			      unsigned int out,
105bbd603efSMichael S. Tsirkin 			      unsigned int in,
106bbd603efSMichael S. Tsirkin 			      gfp_t gfp)
1079fa29b9dSMark McLoughlin {
1089fa29b9dSMark McLoughlin 	struct vring_desc *desc;
1099fa29b9dSMark McLoughlin 	unsigned head;
1109fa29b9dSMark McLoughlin 	int i;
1119fa29b9dSMark McLoughlin 
112b92b1b89SWill Deacon 	/*
113b92b1b89SWill Deacon 	 * We require lowmem mappings for the descriptors because
114b92b1b89SWill Deacon 	 * otherwise virt_to_phys will give us bogus addresses in the
115b92b1b89SWill Deacon 	 * virtqueue.
116b92b1b89SWill Deacon 	 */
117b92b1b89SWill Deacon 	gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH);
118b92b1b89SWill Deacon 
119bbd603efSMichael S. Tsirkin 	desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
1209fa29b9dSMark McLoughlin 	if (!desc)
121686d3637SMichael S. Tsirkin 		return -ENOMEM;
1229fa29b9dSMark McLoughlin 
1239fa29b9dSMark McLoughlin 	/* Transfer entries from the sg list into the indirect page */
1249fa29b9dSMark McLoughlin 	for (i = 0; i < out; i++) {
1259fa29b9dSMark McLoughlin 		desc[i].flags = VRING_DESC_F_NEXT;
1269fa29b9dSMark McLoughlin 		desc[i].addr = sg_phys(sg);
1279fa29b9dSMark McLoughlin 		desc[i].len = sg->length;
1289fa29b9dSMark McLoughlin 		desc[i].next = i+1;
1299fa29b9dSMark McLoughlin 		sg++;
1309fa29b9dSMark McLoughlin 	}
1319fa29b9dSMark McLoughlin 	for (; i < (out + in); i++) {
1329fa29b9dSMark McLoughlin 		desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
1339fa29b9dSMark McLoughlin 		desc[i].addr = sg_phys(sg);
1349fa29b9dSMark McLoughlin 		desc[i].len = sg->length;
1359fa29b9dSMark McLoughlin 		desc[i].next = i+1;
1369fa29b9dSMark McLoughlin 		sg++;
1379fa29b9dSMark McLoughlin 	}
1389fa29b9dSMark McLoughlin 
1399fa29b9dSMark McLoughlin 	/* Last one doesn't continue. */
1409fa29b9dSMark McLoughlin 	desc[i-1].flags &= ~VRING_DESC_F_NEXT;
1419fa29b9dSMark McLoughlin 	desc[i-1].next = 0;
1429fa29b9dSMark McLoughlin 
1439fa29b9dSMark McLoughlin 	/* We're about to use a buffer */
14406ca287dSRusty Russell 	vq->vq.num_free--;
1459fa29b9dSMark McLoughlin 
1469fa29b9dSMark McLoughlin 	/* Use a single buffer which doesn't continue */
1479fa29b9dSMark McLoughlin 	head = vq->free_head;
1489fa29b9dSMark McLoughlin 	vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
1499fa29b9dSMark McLoughlin 	vq->vring.desc[head].addr = virt_to_phys(desc);
1509fa29b9dSMark McLoughlin 	vq->vring.desc[head].len = i * sizeof(struct vring_desc);
1519fa29b9dSMark McLoughlin 
1529fa29b9dSMark McLoughlin 	/* Update free pointer */
1539fa29b9dSMark McLoughlin 	vq->free_head = vq->vring.desc[head].next;
1549fa29b9dSMark McLoughlin 
1559fa29b9dSMark McLoughlin 	return head;
1569fa29b9dSMark McLoughlin }
1579fa29b9dSMark McLoughlin 
1585dfc1762SRusty Russell /**
159f96fde41SRusty Russell  * virtqueue_add_buf - expose buffer to other end
1605dfc1762SRusty Russell  * @vq: the struct virtqueue we're talking about.
1615dfc1762SRusty Russell  * @sg: the description of the buffer(s).
1625dfc1762SRusty Russell  * @out_num: the number of sg readable by other side
1635dfc1762SRusty Russell  * @in_num: the number of sg which are writable (after readable ones)
1645dfc1762SRusty Russell  * @data: the token identifying the buffer.
1655dfc1762SRusty Russell  * @gfp: how to do memory allocations (if necessary).
1665dfc1762SRusty Russell  *
1675dfc1762SRusty Russell  * Caller must ensure we don't call this with other virtqueue operations
1685dfc1762SRusty Russell  * at the same time (except where noted).
1695dfc1762SRusty Russell  *
17098e8c6bcSRusty Russell  * Returns zero or a negative error (ie. ENOSPC, ENOMEM).
1715dfc1762SRusty Russell  */
172f96fde41SRusty Russell int virtqueue_add_buf(struct virtqueue *_vq,
1730a8a69ddSRusty Russell 		      struct scatterlist sg[],
1740a8a69ddSRusty Russell 		      unsigned int out,
1750a8a69ddSRusty Russell 		      unsigned int in,
176bbd603efSMichael S. Tsirkin 		      void *data,
177bbd603efSMichael S. Tsirkin 		      gfp_t gfp)
1780a8a69ddSRusty Russell {
1790a8a69ddSRusty Russell 	struct vring_virtqueue *vq = to_vvq(_vq);
1801fe9b6feSMichael S. Tsirkin 	unsigned int i, avail, uninitialized_var(prev);
1811fe9b6feSMichael S. Tsirkin 	int head;
1820a8a69ddSRusty Russell 
1839fa29b9dSMark McLoughlin 	START_USE(vq);
1849fa29b9dSMark McLoughlin 
1850a8a69ddSRusty Russell 	BUG_ON(data == NULL);
1869fa29b9dSMark McLoughlin 
187e93300b1SRusty Russell #ifdef DEBUG
188e93300b1SRusty Russell 	{
189e93300b1SRusty Russell 		ktime_t now = ktime_get();
190e93300b1SRusty Russell 
191e93300b1SRusty Russell 		/* No kick or get, with .1 second between?  Warn. */
192e93300b1SRusty Russell 		if (vq->last_add_time_valid)
193e93300b1SRusty Russell 			WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
194e93300b1SRusty Russell 					    > 100);
195e93300b1SRusty Russell 		vq->last_add_time = now;
196e93300b1SRusty Russell 		vq->last_add_time_valid = true;
197e93300b1SRusty Russell 	}
198e93300b1SRusty Russell #endif
199e93300b1SRusty Russell 
2009fa29b9dSMark McLoughlin 	/* If the host supports indirect descriptor tables, and we have multiple
2019fa29b9dSMark McLoughlin 	 * buffers, then go indirect. FIXME: tune this threshold */
20206ca287dSRusty Russell 	if (vq->indirect && (out + in) > 1 && vq->vq.num_free) {
203bbd603efSMichael S. Tsirkin 		head = vring_add_indirect(vq, sg, out, in, gfp);
2041fe9b6feSMichael S. Tsirkin 		if (likely(head >= 0))
2059fa29b9dSMark McLoughlin 			goto add_head;
2069fa29b9dSMark McLoughlin 	}
2079fa29b9dSMark McLoughlin 
2080a8a69ddSRusty Russell 	BUG_ON(out + in > vq->vring.num);
2090a8a69ddSRusty Russell 	BUG_ON(out + in == 0);
2100a8a69ddSRusty Russell 
21106ca287dSRusty Russell 	if (vq->vq.num_free < out + in) {
2120a8a69ddSRusty Russell 		pr_debug("Can't add buf len %i - avail = %i\n",
21306ca287dSRusty Russell 			 out + in, vq->vq.num_free);
21444653eaeSRusty Russell 		/* FIXME: for historical reasons, we force a notify here if
21544653eaeSRusty Russell 		 * there are outgoing parts to the buffer.  Presumably the
21644653eaeSRusty Russell 		 * host should service the ring ASAP. */
21744653eaeSRusty Russell 		if (out)
218426e3e0aSRusty Russell 			vq->notify(&vq->vq);
2190a8a69ddSRusty Russell 		END_USE(vq);
2200a8a69ddSRusty Russell 		return -ENOSPC;
2210a8a69ddSRusty Russell 	}
2220a8a69ddSRusty Russell 
2230a8a69ddSRusty Russell 	/* We're about to use some buffers from the free list. */
22406ca287dSRusty Russell 	vq->vq.num_free -= out + in;
2250a8a69ddSRusty Russell 
2260a8a69ddSRusty Russell 	head = vq->free_head;
2270a8a69ddSRusty Russell 	for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) {
2280a8a69ddSRusty Russell 		vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
22915f9c890SRusty Russell 		vq->vring.desc[i].addr = sg_phys(sg);
2300a8a69ddSRusty Russell 		vq->vring.desc[i].len = sg->length;
2310a8a69ddSRusty Russell 		prev = i;
2320a8a69ddSRusty Russell 		sg++;
2330a8a69ddSRusty Russell 	}
2340a8a69ddSRusty Russell 	for (; in; i = vq->vring.desc[i].next, in--) {
2350a8a69ddSRusty Russell 		vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
23615f9c890SRusty Russell 		vq->vring.desc[i].addr = sg_phys(sg);
2370a8a69ddSRusty Russell 		vq->vring.desc[i].len = sg->length;
2380a8a69ddSRusty Russell 		prev = i;
2390a8a69ddSRusty Russell 		sg++;
2400a8a69ddSRusty Russell 	}
2410a8a69ddSRusty Russell 	/* Last one doesn't continue. */
2420a8a69ddSRusty Russell 	vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;
2430a8a69ddSRusty Russell 
2440a8a69ddSRusty Russell 	/* Update free pointer */
2450a8a69ddSRusty Russell 	vq->free_head = i;
2460a8a69ddSRusty Russell 
2479fa29b9dSMark McLoughlin add_head:
2480a8a69ddSRusty Russell 	/* Set token. */
2490a8a69ddSRusty Russell 	vq->data[head] = data;
2500a8a69ddSRusty Russell 
2510a8a69ddSRusty Russell 	/* Put entry in available array (but don't update avail->idx until they
2523b720b8cSRusty Russell 	 * do sync). */
253ee7cd898SRusty Russell 	avail = (vq->vring.avail->idx & (vq->vring.num-1));
2540a8a69ddSRusty Russell 	vq->vring.avail->ring[avail] = head;
2550a8a69ddSRusty Russell 
256ee7cd898SRusty Russell 	/* Descriptors and available array need to be set before we expose the
257ee7cd898SRusty Russell 	 * new available array entries. */
258*a9a0fef7SRusty Russell 	virtio_wmb(vq->weak_barriers);
259ee7cd898SRusty Russell 	vq->vring.avail->idx++;
260ee7cd898SRusty Russell 	vq->num_added++;
261ee7cd898SRusty Russell 
262ee7cd898SRusty Russell 	/* This is very unlikely, but theoretically possible.  Kick
263ee7cd898SRusty Russell 	 * just in case. */
264ee7cd898SRusty Russell 	if (unlikely(vq->num_added == (1 << 16) - 1))
265ee7cd898SRusty Russell 		virtqueue_kick(_vq);
266ee7cd898SRusty Russell 
2670a8a69ddSRusty Russell 	pr_debug("Added buffer head %i to %p\n", head, vq);
2680a8a69ddSRusty Russell 	END_USE(vq);
2693c1b27d5SRusty Russell 
27098e8c6bcSRusty Russell 	return 0;
2710a8a69ddSRusty Russell }
272f96fde41SRusty Russell EXPORT_SYMBOL_GPL(virtqueue_add_buf);
2730a8a69ddSRusty Russell 
2745dfc1762SRusty Russell /**
27541f0377fSRusty Russell  * virtqueue_kick_prepare - first half of split virtqueue_kick call.
2765dfc1762SRusty Russell  * @vq: the struct virtqueue
2775dfc1762SRusty Russell  *
27841f0377fSRusty Russell  * Instead of virtqueue_kick(), you can do:
27941f0377fSRusty Russell  *	if (virtqueue_kick_prepare(vq))
28041f0377fSRusty Russell  *		virtqueue_notify(vq);
2815dfc1762SRusty Russell  *
28241f0377fSRusty Russell  * This is sometimes useful because the virtqueue_kick_prepare() needs
28341f0377fSRusty Russell  * to be serialized, but the actual virtqueue_notify() call does not.
2845dfc1762SRusty Russell  */
28541f0377fSRusty Russell bool virtqueue_kick_prepare(struct virtqueue *_vq)
2860a8a69ddSRusty Russell {
2870a8a69ddSRusty Russell 	struct vring_virtqueue *vq = to_vvq(_vq);
288a5c262c5SMichael S. Tsirkin 	u16 new, old;
28941f0377fSRusty Russell 	bool needs_kick;
29041f0377fSRusty Russell 
2910a8a69ddSRusty Russell 	START_USE(vq);
292a72caae2SJason Wang 	/* We need to expose available array entries before checking avail
293a72caae2SJason Wang 	 * event. */
294*a9a0fef7SRusty Russell 	virtio_mb(vq->weak_barriers);
2950a8a69ddSRusty Russell 
296ee7cd898SRusty Russell 	old = vq->vring.avail->idx - vq->num_added;
297ee7cd898SRusty Russell 	new = vq->vring.avail->idx;
2980a8a69ddSRusty Russell 	vq->num_added = 0;
2990a8a69ddSRusty Russell 
300e93300b1SRusty Russell #ifdef DEBUG
301e93300b1SRusty Russell 	if (vq->last_add_time_valid) {
302e93300b1SRusty Russell 		WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
303e93300b1SRusty Russell 					      vq->last_add_time)) > 100);
304e93300b1SRusty Russell 	}
305e93300b1SRusty Russell 	vq->last_add_time_valid = false;
306e93300b1SRusty Russell #endif
307e93300b1SRusty Russell 
30841f0377fSRusty Russell 	if (vq->event) {
30941f0377fSRusty Russell 		needs_kick = vring_need_event(vring_avail_event(&vq->vring),
31041f0377fSRusty Russell 					      new, old);
31141f0377fSRusty Russell 	} else {
31241f0377fSRusty Russell 		needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
31341f0377fSRusty Russell 	}
3140a8a69ddSRusty Russell 	END_USE(vq);
31541f0377fSRusty Russell 	return needs_kick;
31641f0377fSRusty Russell }
31741f0377fSRusty Russell EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
31841f0377fSRusty Russell 
31941f0377fSRusty Russell /**
32041f0377fSRusty Russell  * virtqueue_notify - second half of split virtqueue_kick call.
32141f0377fSRusty Russell  * @vq: the struct virtqueue
32241f0377fSRusty Russell  *
32341f0377fSRusty Russell  * This does not need to be serialized.
32441f0377fSRusty Russell  */
32541f0377fSRusty Russell void virtqueue_notify(struct virtqueue *_vq)
32641f0377fSRusty Russell {
32741f0377fSRusty Russell 	struct vring_virtqueue *vq = to_vvq(_vq);
32841f0377fSRusty Russell 
32941f0377fSRusty Russell 	/* Prod other side to tell it about changes. */
33041f0377fSRusty Russell 	vq->notify(_vq);
33141f0377fSRusty Russell }
33241f0377fSRusty Russell EXPORT_SYMBOL_GPL(virtqueue_notify);
33341f0377fSRusty Russell 
33441f0377fSRusty Russell /**
33541f0377fSRusty Russell  * virtqueue_kick - update after add_buf
33641f0377fSRusty Russell  * @vq: the struct virtqueue
33741f0377fSRusty Russell  *
33841f0377fSRusty Russell  * After one or more virtqueue_add_buf calls, invoke this to kick
33941f0377fSRusty Russell  * the other side.
34041f0377fSRusty Russell  *
34141f0377fSRusty Russell  * Caller must ensure we don't call this with other virtqueue
34241f0377fSRusty Russell  * operations at the same time (except where noted).
34341f0377fSRusty Russell  */
34441f0377fSRusty Russell void virtqueue_kick(struct virtqueue *vq)
34541f0377fSRusty Russell {
34641f0377fSRusty Russell 	if (virtqueue_kick_prepare(vq))
34741f0377fSRusty Russell 		virtqueue_notify(vq);
3480a8a69ddSRusty Russell }
3497c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_kick);
3500a8a69ddSRusty Russell 
3510a8a69ddSRusty Russell static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
3520a8a69ddSRusty Russell {
3530a8a69ddSRusty Russell 	unsigned int i;
3540a8a69ddSRusty Russell 
3550a8a69ddSRusty Russell 	/* Clear data ptr. */
3560a8a69ddSRusty Russell 	vq->data[head] = NULL;
3570a8a69ddSRusty Russell 
3580a8a69ddSRusty Russell 	/* Put back on free list: find end */
3590a8a69ddSRusty Russell 	i = head;
3609fa29b9dSMark McLoughlin 
3619fa29b9dSMark McLoughlin 	/* Free the indirect table */
3629fa29b9dSMark McLoughlin 	if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
3639fa29b9dSMark McLoughlin 		kfree(phys_to_virt(vq->vring.desc[i].addr));
3649fa29b9dSMark McLoughlin 
3650a8a69ddSRusty Russell 	while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
3660a8a69ddSRusty Russell 		i = vq->vring.desc[i].next;
36706ca287dSRusty Russell 		vq->vq.num_free++;
3680a8a69ddSRusty Russell 	}
3690a8a69ddSRusty Russell 
3700a8a69ddSRusty Russell 	vq->vring.desc[i].next = vq->free_head;
3710a8a69ddSRusty Russell 	vq->free_head = head;
3720a8a69ddSRusty Russell 	/* Plus final descriptor */
37306ca287dSRusty Russell 	vq->vq.num_free++;
3740a8a69ddSRusty Russell }
3750a8a69ddSRusty Russell 
3760a8a69ddSRusty Russell static inline bool more_used(const struct vring_virtqueue *vq)
3770a8a69ddSRusty Russell {
3780a8a69ddSRusty Russell 	return vq->last_used_idx != vq->vring.used->idx;
3790a8a69ddSRusty Russell }
3800a8a69ddSRusty Russell 
3815dfc1762SRusty Russell /**
3825dfc1762SRusty Russell  * virtqueue_get_buf - get the next used buffer
3835dfc1762SRusty Russell  * @vq: the struct virtqueue we're talking about.
3845dfc1762SRusty Russell  * @len: the length written into the buffer
3855dfc1762SRusty Russell  *
3865dfc1762SRusty Russell  * If the driver wrote data into the buffer, @len will be set to the
3875dfc1762SRusty Russell  * amount written.  This means you don't need to clear the buffer
3885dfc1762SRusty Russell  * beforehand to ensure there's no data leakage in the case of short
3895dfc1762SRusty Russell  * writes.
3905dfc1762SRusty Russell  *
3915dfc1762SRusty Russell  * Caller must ensure we don't call this with other virtqueue
3925dfc1762SRusty Russell  * operations at the same time (except where noted).
3935dfc1762SRusty Russell  *
3945dfc1762SRusty Russell  * Returns NULL if there are no used buffers, or the "data" token
395f96fde41SRusty Russell  * handed to virtqueue_add_buf().
3965dfc1762SRusty Russell  */
3977c5e9ed0SMichael S. Tsirkin void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
3980a8a69ddSRusty Russell {
3990a8a69ddSRusty Russell 	struct vring_virtqueue *vq = to_vvq(_vq);
4000a8a69ddSRusty Russell 	void *ret;
4010a8a69ddSRusty Russell 	unsigned int i;
4023b720b8cSRusty Russell 	u16 last_used;
4030a8a69ddSRusty Russell 
4040a8a69ddSRusty Russell 	START_USE(vq);
4050a8a69ddSRusty Russell 
4065ef82752SRusty Russell 	if (unlikely(vq->broken)) {
4075ef82752SRusty Russell 		END_USE(vq);
4085ef82752SRusty Russell 		return NULL;
4095ef82752SRusty Russell 	}
4105ef82752SRusty Russell 
4110a8a69ddSRusty Russell 	if (!more_used(vq)) {
4120a8a69ddSRusty Russell 		pr_debug("No more buffers in queue\n");
4130a8a69ddSRusty Russell 		END_USE(vq);
4140a8a69ddSRusty Russell 		return NULL;
4150a8a69ddSRusty Russell 	}
4160a8a69ddSRusty Russell 
4172d61ba95SMichael S. Tsirkin 	/* Only get used array entries after they have been exposed by host. */
418*a9a0fef7SRusty Russell 	virtio_rmb(vq->weak_barriers);
4192d61ba95SMichael S. Tsirkin 
4203b720b8cSRusty Russell 	last_used = (vq->last_used_idx & (vq->vring.num - 1));
4213b720b8cSRusty Russell 	i = vq->vring.used->ring[last_used].id;
4223b720b8cSRusty Russell 	*len = vq->vring.used->ring[last_used].len;
4230a8a69ddSRusty Russell 
4240a8a69ddSRusty Russell 	if (unlikely(i >= vq->vring.num)) {
4250a8a69ddSRusty Russell 		BAD_RING(vq, "id %u out of range\n", i);
4260a8a69ddSRusty Russell 		return NULL;
4270a8a69ddSRusty Russell 	}
4280a8a69ddSRusty Russell 	if (unlikely(!vq->data[i])) {
4290a8a69ddSRusty Russell 		BAD_RING(vq, "id %u is not a head!\n", i);
4300a8a69ddSRusty Russell 		return NULL;
4310a8a69ddSRusty Russell 	}
4320a8a69ddSRusty Russell 
4330a8a69ddSRusty Russell 	/* detach_buf clears data, so grab it now. */
4340a8a69ddSRusty Russell 	ret = vq->data[i];
4350a8a69ddSRusty Russell 	detach_buf(vq, i);
4360a8a69ddSRusty Russell 	vq->last_used_idx++;
437a5c262c5SMichael S. Tsirkin 	/* If we expect an interrupt for the next entry, tell host
438a5c262c5SMichael S. Tsirkin 	 * by writing event index and flush out the write before
439a5c262c5SMichael S. Tsirkin 	 * the read in the next get_buf call. */
440a5c262c5SMichael S. Tsirkin 	if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
441a5c262c5SMichael S. Tsirkin 		vring_used_event(&vq->vring) = vq->last_used_idx;
442*a9a0fef7SRusty Russell 		virtio_mb(vq->weak_barriers);
443a5c262c5SMichael S. Tsirkin 	}
444a5c262c5SMichael S. Tsirkin 
445e93300b1SRusty Russell #ifdef DEBUG
446e93300b1SRusty Russell 	vq->last_add_time_valid = false;
447e93300b1SRusty Russell #endif
448e93300b1SRusty Russell 
4490a8a69ddSRusty Russell 	END_USE(vq);
4500a8a69ddSRusty Russell 	return ret;
4510a8a69ddSRusty Russell }
4527c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_get_buf);
4530a8a69ddSRusty Russell 
4545dfc1762SRusty Russell /**
4555dfc1762SRusty Russell  * virtqueue_disable_cb - disable callbacks
4565dfc1762SRusty Russell  * @vq: the struct virtqueue we're talking about.
4575dfc1762SRusty Russell  *
4585dfc1762SRusty Russell  * Note that this is not necessarily synchronous, hence unreliable and only
4595dfc1762SRusty Russell  * useful as an optimization.
4605dfc1762SRusty Russell  *
4615dfc1762SRusty Russell  * Unlike other operations, this need not be serialized.
4625dfc1762SRusty Russell  */
4637c5e9ed0SMichael S. Tsirkin void virtqueue_disable_cb(struct virtqueue *_vq)
46418445c4dSRusty Russell {
46518445c4dSRusty Russell 	struct vring_virtqueue *vq = to_vvq(_vq);
46618445c4dSRusty Russell 
46718445c4dSRusty Russell 	vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
46818445c4dSRusty Russell }
4697c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
47018445c4dSRusty Russell 
4715dfc1762SRusty Russell /**
4725dfc1762SRusty Russell  * virtqueue_enable_cb - restart callbacks after disable_cb.
4735dfc1762SRusty Russell  * @vq: the struct virtqueue we're talking about.
4745dfc1762SRusty Russell  *
4755dfc1762SRusty Russell  * This re-enables callbacks; it returns "false" if there are pending
4765dfc1762SRusty Russell  * buffers in the queue, to detect a possible race between the driver
4775dfc1762SRusty Russell  * checking for more work, and enabling callbacks.
4785dfc1762SRusty Russell  *
4795dfc1762SRusty Russell  * Caller must ensure we don't call this with other virtqueue
4805dfc1762SRusty Russell  * operations at the same time (except where noted).
4815dfc1762SRusty Russell  */
4827c5e9ed0SMichael S. Tsirkin bool virtqueue_enable_cb(struct virtqueue *_vq)
4830a8a69ddSRusty Russell {
4840a8a69ddSRusty Russell 	struct vring_virtqueue *vq = to_vvq(_vq);
4850a8a69ddSRusty Russell 
4860a8a69ddSRusty Russell 	START_USE(vq);
4870a8a69ddSRusty Russell 
4880a8a69ddSRusty Russell 	/* We optimistically turn back on interrupts, then check if there was
4890a8a69ddSRusty Russell 	 * more to do. */
490a5c262c5SMichael S. Tsirkin 	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
491a5c262c5SMichael S. Tsirkin 	 * either clear the flags bit or point the event index at the next
492a5c262c5SMichael S. Tsirkin 	 * entry. Always do both to keep code simple. */
4930a8a69ddSRusty Russell 	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
494a5c262c5SMichael S. Tsirkin 	vring_used_event(&vq->vring) = vq->last_used_idx;
495*a9a0fef7SRusty Russell 	virtio_mb(vq->weak_barriers);
4960a8a69ddSRusty Russell 	if (unlikely(more_used(vq))) {
4970a8a69ddSRusty Russell 		END_USE(vq);
4980a8a69ddSRusty Russell 		return false;
4990a8a69ddSRusty Russell 	}
5000a8a69ddSRusty Russell 
5010a8a69ddSRusty Russell 	END_USE(vq);
5020a8a69ddSRusty Russell 	return true;
5030a8a69ddSRusty Russell }
5047c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
5050a8a69ddSRusty Russell 
5065dfc1762SRusty Russell /**
5075dfc1762SRusty Russell  * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
5085dfc1762SRusty Russell  * @vq: the struct virtqueue we're talking about.
5095dfc1762SRusty Russell  *
5105dfc1762SRusty Russell  * This re-enables callbacks but hints to the other side to delay
5115dfc1762SRusty Russell  * interrupts until most of the available buffers have been processed;
5125dfc1762SRusty Russell  * it returns "false" if there are many pending buffers in the queue,
5135dfc1762SRusty Russell  * to detect a possible race between the driver checking for more work,
5145dfc1762SRusty Russell  * and enabling callbacks.
5155dfc1762SRusty Russell  *
5165dfc1762SRusty Russell  * Caller must ensure we don't call this with other virtqueue
5175dfc1762SRusty Russell  * operations at the same time (except where noted).
5185dfc1762SRusty Russell  */
5197ab358c2SMichael S. Tsirkin bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
5207ab358c2SMichael S. Tsirkin {
5217ab358c2SMichael S. Tsirkin 	struct vring_virtqueue *vq = to_vvq(_vq);
5227ab358c2SMichael S. Tsirkin 	u16 bufs;
5237ab358c2SMichael S. Tsirkin 
5247ab358c2SMichael S. Tsirkin 	START_USE(vq);
5257ab358c2SMichael S. Tsirkin 
5267ab358c2SMichael S. Tsirkin 	/* We optimistically turn back on interrupts, then check if there was
5277ab358c2SMichael S. Tsirkin 	 * more to do. */
5287ab358c2SMichael S. Tsirkin 	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
5297ab358c2SMichael S. Tsirkin 	 * either clear the flags bit or point the event index at the next
5307ab358c2SMichael S. Tsirkin 	 * entry. Always do both to keep code simple. */
5317ab358c2SMichael S. Tsirkin 	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
5327ab358c2SMichael S. Tsirkin 	/* TODO: tune this threshold */
5337ab358c2SMichael S. Tsirkin 	bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
5347ab358c2SMichael S. Tsirkin 	vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
535*a9a0fef7SRusty Russell 	virtio_mb(vq->weak_barriers);
5367ab358c2SMichael S. Tsirkin 	if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
5377ab358c2SMichael S. Tsirkin 		END_USE(vq);
5387ab358c2SMichael S. Tsirkin 		return false;
5397ab358c2SMichael S. Tsirkin 	}
5407ab358c2SMichael S. Tsirkin 
5417ab358c2SMichael S. Tsirkin 	END_USE(vq);
5427ab358c2SMichael S. Tsirkin 	return true;
5437ab358c2SMichael S. Tsirkin }
5447ab358c2SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
5457ab358c2SMichael S. Tsirkin 
5465dfc1762SRusty Russell /**
5475dfc1762SRusty Russell  * virtqueue_detach_unused_buf - detach first unused buffer
5485dfc1762SRusty Russell  * @vq: the struct virtqueue we're talking about.
5495dfc1762SRusty Russell  *
550f96fde41SRusty Russell  * Returns NULL or the "data" token handed to virtqueue_add_buf().
5515dfc1762SRusty Russell  * This is not valid on an active queue; it is useful only for device
5525dfc1762SRusty Russell  * shutdown.
5535dfc1762SRusty Russell  */
5547c5e9ed0SMichael S. Tsirkin void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
555c021eac4SShirley Ma {
556c021eac4SShirley Ma 	struct vring_virtqueue *vq = to_vvq(_vq);
557c021eac4SShirley Ma 	unsigned int i;
558c021eac4SShirley Ma 	void *buf;
559c021eac4SShirley Ma 
560c021eac4SShirley Ma 	START_USE(vq);
561c021eac4SShirley Ma 
562c021eac4SShirley Ma 	for (i = 0; i < vq->vring.num; i++) {
563c021eac4SShirley Ma 		if (!vq->data[i])
564c021eac4SShirley Ma 			continue;
565c021eac4SShirley Ma 		/* detach_buf clears data, so grab it now. */
566c021eac4SShirley Ma 		buf = vq->data[i];
567c021eac4SShirley Ma 		detach_buf(vq, i);
568b3258ff1SAmit Shah 		vq->vring.avail->idx--;
569c021eac4SShirley Ma 		END_USE(vq);
570c021eac4SShirley Ma 		return buf;
571c021eac4SShirley Ma 	}
572c021eac4SShirley Ma 	/* That should have freed everything. */
57306ca287dSRusty Russell 	BUG_ON(vq->vq.num_free != vq->vring.num);
574c021eac4SShirley Ma 
575c021eac4SShirley Ma 	END_USE(vq);
576c021eac4SShirley Ma 	return NULL;
577c021eac4SShirley Ma }
5787c5e9ed0SMichael S. Tsirkin EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
579c021eac4SShirley Ma 
5800a8a69ddSRusty Russell irqreturn_t vring_interrupt(int irq, void *_vq)
5810a8a69ddSRusty Russell {
5820a8a69ddSRusty Russell 	struct vring_virtqueue *vq = to_vvq(_vq);
5830a8a69ddSRusty Russell 
5840a8a69ddSRusty Russell 	if (!more_used(vq)) {
5850a8a69ddSRusty Russell 		pr_debug("virtqueue interrupt with no work for %p\n", vq);
5860a8a69ddSRusty Russell 		return IRQ_NONE;
5870a8a69ddSRusty Russell 	}
5880a8a69ddSRusty Russell 
5890a8a69ddSRusty Russell 	if (unlikely(vq->broken))
5900a8a69ddSRusty Russell 		return IRQ_HANDLED;
5910a8a69ddSRusty Russell 
5920a8a69ddSRusty Russell 	pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
59318445c4dSRusty Russell 	if (vq->vq.callback)
59418445c4dSRusty Russell 		vq->vq.callback(&vq->vq);
5950a8a69ddSRusty Russell 
5960a8a69ddSRusty Russell 	return IRQ_HANDLED;
5970a8a69ddSRusty Russell }
598c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_interrupt);
5990a8a69ddSRusty Russell 
60017bb6d40SJason Wang struct virtqueue *vring_new_virtqueue(unsigned int index,
60117bb6d40SJason Wang 				      unsigned int num,
60287c7d57cSRusty Russell 				      unsigned int vring_align,
6030a8a69ddSRusty Russell 				      struct virtio_device *vdev,
6047b21e34fSRusty Russell 				      bool weak_barriers,
6050a8a69ddSRusty Russell 				      void *pages,
6060a8a69ddSRusty Russell 				      void (*notify)(struct virtqueue *),
6079499f5e7SRusty Russell 				      void (*callback)(struct virtqueue *),
6089499f5e7SRusty Russell 				      const char *name)
6090a8a69ddSRusty Russell {
6100a8a69ddSRusty Russell 	struct vring_virtqueue *vq;
6110a8a69ddSRusty Russell 	unsigned int i;
6120a8a69ddSRusty Russell 
61342b36cc0SRusty Russell 	/* We assume num is a power of 2. */
61442b36cc0SRusty Russell 	if (num & (num - 1)) {
61542b36cc0SRusty Russell 		dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
61642b36cc0SRusty Russell 		return NULL;
61742b36cc0SRusty Russell 	}
61842b36cc0SRusty Russell 
6190a8a69ddSRusty Russell 	vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
6200a8a69ddSRusty Russell 	if (!vq)
6210a8a69ddSRusty Russell 		return NULL;
6220a8a69ddSRusty Russell 
62387c7d57cSRusty Russell 	vring_init(&vq->vring, num, pages, vring_align);
6240a8a69ddSRusty Russell 	vq->vq.callback = callback;
6250a8a69ddSRusty Russell 	vq->vq.vdev = vdev;
6269499f5e7SRusty Russell 	vq->vq.name = name;
62706ca287dSRusty Russell 	vq->vq.num_free = num;
62806ca287dSRusty Russell 	vq->vq.index = index;
6290a8a69ddSRusty Russell 	vq->notify = notify;
6307b21e34fSRusty Russell 	vq->weak_barriers = weak_barriers;
6310a8a69ddSRusty Russell 	vq->broken = false;
6320a8a69ddSRusty Russell 	vq->last_used_idx = 0;
6330a8a69ddSRusty Russell 	vq->num_added = 0;
6349499f5e7SRusty Russell 	list_add_tail(&vq->vq.list, &vdev->vqs);
6350a8a69ddSRusty Russell #ifdef DEBUG
6360a8a69ddSRusty Russell 	vq->in_use = false;
637e93300b1SRusty Russell 	vq->last_add_time_valid = false;
6380a8a69ddSRusty Russell #endif
6390a8a69ddSRusty Russell 
6409fa29b9dSMark McLoughlin 	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
641a5c262c5SMichael S. Tsirkin 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
6429fa29b9dSMark McLoughlin 
6430a8a69ddSRusty Russell 	/* No callback?  Tell other side not to bother us. */
6440a8a69ddSRusty Russell 	if (!callback)
6450a8a69ddSRusty Russell 		vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
6460a8a69ddSRusty Russell 
6470a8a69ddSRusty Russell 	/* Put everything in free lists. */
6480a8a69ddSRusty Russell 	vq->free_head = 0;
6493b870624SAmit Shah 	for (i = 0; i < num-1; i++) {
6500a8a69ddSRusty Russell 		vq->vring.desc[i].next = i+1;
6513b870624SAmit Shah 		vq->data[i] = NULL;
6523b870624SAmit Shah 	}
6533b870624SAmit Shah 	vq->data[i] = NULL;
6540a8a69ddSRusty Russell 
6550a8a69ddSRusty Russell 	return &vq->vq;
6560a8a69ddSRusty Russell }
657c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_new_virtqueue);
6580a8a69ddSRusty Russell 
6590a8a69ddSRusty Russell void vring_del_virtqueue(struct virtqueue *vq)
6600a8a69ddSRusty Russell {
6619499f5e7SRusty Russell 	list_del(&vq->list);
6620a8a69ddSRusty Russell 	kfree(to_vvq(vq));
6630a8a69ddSRusty Russell }
664c6fd4701SRusty Russell EXPORT_SYMBOL_GPL(vring_del_virtqueue);
6650a8a69ddSRusty Russell 
666e34f8725SRusty Russell /* Manipulates transport-specific feature bits. */
667e34f8725SRusty Russell void vring_transport_features(struct virtio_device *vdev)
668e34f8725SRusty Russell {
669e34f8725SRusty Russell 	unsigned int i;
670e34f8725SRusty Russell 
671e34f8725SRusty Russell 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
672e34f8725SRusty Russell 		switch (i) {
6739fa29b9dSMark McLoughlin 		case VIRTIO_RING_F_INDIRECT_DESC:
6749fa29b9dSMark McLoughlin 			break;
675a5c262c5SMichael S. Tsirkin 		case VIRTIO_RING_F_EVENT_IDX:
676a5c262c5SMichael S. Tsirkin 			break;
677e34f8725SRusty Russell 		default:
678e34f8725SRusty Russell 			/* We don't understand this bit. */
679e34f8725SRusty Russell 			clear_bit(i, vdev->features);
680e34f8725SRusty Russell 		}
681e34f8725SRusty Russell 	}
682e34f8725SRusty Russell }
683e34f8725SRusty Russell EXPORT_SYMBOL_GPL(vring_transport_features);
684e34f8725SRusty Russell 
6855dfc1762SRusty Russell /**
6865dfc1762SRusty Russell  * virtqueue_get_vring_size - return the size of the virtqueue's vring
6875dfc1762SRusty Russell  * @vq: the struct virtqueue containing the vring of interest.
6885dfc1762SRusty Russell  *
6895dfc1762SRusty Russell  * Returns the size of the vring.  This is mainly used for boasting to
6905dfc1762SRusty Russell  * userspace.  Unlike other operations, this need not be serialized.
6915dfc1762SRusty Russell  */
6928f9f4668SRick Jones unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
6938f9f4668SRick Jones {
6948f9f4668SRick Jones 
6958f9f4668SRick Jones 	struct vring_virtqueue *vq = to_vvq(_vq);
6968f9f4668SRick Jones 
6978f9f4668SRick Jones 	return vq->vring.num;
6988f9f4668SRick Jones }
6998f9f4668SRick Jones EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
7008f9f4668SRick Jones 
701c6fd4701SRusty Russell MODULE_LICENSE("GPL");
702