xref: /openbmc/linux/drivers/vhost/vringh.c (revision 7aed44ba)
109c434b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2f87d0fbbSRusty Russell /*
3f87d0fbbSRusty Russell  * Helpers for the host side of a virtio ring.
4f87d0fbbSRusty Russell  *
5f87d0fbbSRusty Russell  * Since these may be in userspace, we use (inline) accessors.
6f87d0fbbSRusty Russell  */
79d1b972fSMark Rutland #include <linux/compiler.h>
8f558a845SDave Jones #include <linux/module.h>
9f87d0fbbSRusty Russell #include <linux/vringh.h>
10f87d0fbbSRusty Russell #include <linux/virtio_ring.h>
11f87d0fbbSRusty Russell #include <linux/kernel.h>
12f87d0fbbSRusty Russell #include <linux/ratelimit.h>
13f87d0fbbSRusty Russell #include <linux/uaccess.h>
14f87d0fbbSRusty Russell #include <linux/slab.h>
15f87d0fbbSRusty Russell #include <linux/export.h>
163302363aSMichael S. Tsirkin #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
179ad9c49cSJason Wang #include <linux/bvec.h>
189ad9c49cSJason Wang #include <linux/highmem.h>
199ad9c49cSJason Wang #include <linux/vhost_iotlb.h>
203302363aSMichael S. Tsirkin #endif
21b9f7ac8cSMichael S. Tsirkin #include <uapi/linux/virtio_config.h>
22f87d0fbbSRusty Russell 
vringh_bad(const char * fmt,...)23f87d0fbbSRusty Russell static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
24f87d0fbbSRusty Russell {
25f87d0fbbSRusty Russell 	static DEFINE_RATELIMIT_STATE(vringh_rs,
26f87d0fbbSRusty Russell 				      DEFAULT_RATELIMIT_INTERVAL,
27f87d0fbbSRusty Russell 				      DEFAULT_RATELIMIT_BURST);
28f87d0fbbSRusty Russell 	if (__ratelimit(&vringh_rs)) {
29f87d0fbbSRusty Russell 		va_list ap;
30f87d0fbbSRusty Russell 		va_start(ap, fmt);
31f87d0fbbSRusty Russell 		printk(KERN_NOTICE "vringh:");
32f87d0fbbSRusty Russell 		vprintk(fmt, ap);
33f87d0fbbSRusty Russell 		va_end(ap);
34f87d0fbbSRusty Russell 	}
35f87d0fbbSRusty Russell }
36f87d0fbbSRusty Russell 
37f87d0fbbSRusty Russell /* Returns vring->num if empty, -ve on error. */
__vringh_get_head(const struct vringh * vrh,int (* getu16)(const struct vringh * vrh,u16 * val,const __virtio16 * p),u16 * last_avail_idx)38f87d0fbbSRusty Russell static inline int __vringh_get_head(const struct vringh *vrh,
39b9f7ac8cSMichael S. Tsirkin 				    int (*getu16)(const struct vringh *vrh,
40b9f7ac8cSMichael S. Tsirkin 						  u16 *val, const __virtio16 *p),
41f87d0fbbSRusty Russell 				    u16 *last_avail_idx)
42f87d0fbbSRusty Russell {
43f87d0fbbSRusty Russell 	u16 avail_idx, i, head;
44f87d0fbbSRusty Russell 	int err;
45f87d0fbbSRusty Russell 
46b9f7ac8cSMichael S. Tsirkin 	err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx);
47f87d0fbbSRusty Russell 	if (err) {
48f87d0fbbSRusty Russell 		vringh_bad("Failed to access avail idx at %p",
49f87d0fbbSRusty Russell 			   &vrh->vring.avail->idx);
50f87d0fbbSRusty Russell 		return err;
51f87d0fbbSRusty Russell 	}
52f87d0fbbSRusty Russell 
53f87d0fbbSRusty Russell 	if (*last_avail_idx == avail_idx)
54f87d0fbbSRusty Russell 		return vrh->vring.num;
55f87d0fbbSRusty Russell 
56f87d0fbbSRusty Russell 	/* Only get avail ring entries after they have been exposed by guest. */
57f87d0fbbSRusty Russell 	virtio_rmb(vrh->weak_barriers);
58f87d0fbbSRusty Russell 
59f87d0fbbSRusty Russell 	i = *last_avail_idx & (vrh->vring.num - 1);
60f87d0fbbSRusty Russell 
61b9f7ac8cSMichael S. Tsirkin 	err = getu16(vrh, &head, &vrh->vring.avail->ring[i]);
62f87d0fbbSRusty Russell 	if (err) {
63f87d0fbbSRusty Russell 		vringh_bad("Failed to read head: idx %d address %p",
64f87d0fbbSRusty Russell 			   *last_avail_idx, &vrh->vring.avail->ring[i]);
65f87d0fbbSRusty Russell 		return err;
66f87d0fbbSRusty Russell 	}
67f87d0fbbSRusty Russell 
68f87d0fbbSRusty Russell 	if (head >= vrh->vring.num) {
69f87d0fbbSRusty Russell 		vringh_bad("Guest says index %u > %u is available",
70f87d0fbbSRusty Russell 			   head, vrh->vring.num);
71f87d0fbbSRusty Russell 		return -EINVAL;
72f87d0fbbSRusty Russell 	}
73f87d0fbbSRusty Russell 
74f87d0fbbSRusty Russell 	(*last_avail_idx)++;
75f87d0fbbSRusty Russell 	return head;
76f87d0fbbSRusty Russell }
77f87d0fbbSRusty Russell 
78b8c06ad4SStefano Garzarella /**
79b8c06ad4SStefano Garzarella  * vringh_kiov_advance - skip bytes from vring_kiov
80b8c06ad4SStefano Garzarella  * @iov: an iov passed to vringh_getdesc_*() (updated as we consume)
81b8c06ad4SStefano Garzarella  * @len: the maximum length to advance
82b8c06ad4SStefano Garzarella  */
vringh_kiov_advance(struct vringh_kiov * iov,size_t len)83b8c06ad4SStefano Garzarella void vringh_kiov_advance(struct vringh_kiov *iov, size_t len)
84b8c06ad4SStefano Garzarella {
85b8c06ad4SStefano Garzarella 	while (len && iov->i < iov->used) {
86b8c06ad4SStefano Garzarella 		size_t partlen = min(iov->iov[iov->i].iov_len, len);
87b8c06ad4SStefano Garzarella 
88b8c06ad4SStefano Garzarella 		iov->consumed += partlen;
89b8c06ad4SStefano Garzarella 		iov->iov[iov->i].iov_len -= partlen;
90b8c06ad4SStefano Garzarella 		iov->iov[iov->i].iov_base += partlen;
91b8c06ad4SStefano Garzarella 
92b8c06ad4SStefano Garzarella 		if (!iov->iov[iov->i].iov_len) {
93b8c06ad4SStefano Garzarella 			/* Fix up old iov element then increment. */
94b8c06ad4SStefano Garzarella 			iov->iov[iov->i].iov_len = iov->consumed;
95b8c06ad4SStefano Garzarella 			iov->iov[iov->i].iov_base -= iov->consumed;
96b8c06ad4SStefano Garzarella 
97b8c06ad4SStefano Garzarella 			iov->consumed = 0;
98b8c06ad4SStefano Garzarella 			iov->i++;
99b8c06ad4SStefano Garzarella 		}
100b8c06ad4SStefano Garzarella 
101b8c06ad4SStefano Garzarella 		len -= partlen;
102b8c06ad4SStefano Garzarella 	}
103b8c06ad4SStefano Garzarella }
104b8c06ad4SStefano Garzarella EXPORT_SYMBOL(vringh_kiov_advance);
105b8c06ad4SStefano Garzarella 
106f87d0fbbSRusty Russell /* Copy some bytes to/from the iovec.  Returns num copied. */
vringh_iov_xfer(struct vringh * vrh,struct vringh_kiov * iov,void * ptr,size_t len,int (* xfer)(const struct vringh * vrh,void * addr,void * ptr,size_t len))1079ad9c49cSJason Wang static inline ssize_t vringh_iov_xfer(struct vringh *vrh,
1089ad9c49cSJason Wang 				      struct vringh_kiov *iov,
109f87d0fbbSRusty Russell 				      void *ptr, size_t len,
1109ad9c49cSJason Wang 				      int (*xfer)(const struct vringh *vrh,
1119ad9c49cSJason Wang 						  void *addr, void *ptr,
112f87d0fbbSRusty Russell 						  size_t len))
113f87d0fbbSRusty Russell {
114f87d0fbbSRusty Russell 	int err, done = 0;
115f87d0fbbSRusty Russell 
116f87d0fbbSRusty Russell 	while (len && iov->i < iov->used) {
117f87d0fbbSRusty Russell 		size_t partlen;
118f87d0fbbSRusty Russell 
119f87d0fbbSRusty Russell 		partlen = min(iov->iov[iov->i].iov_len, len);
1209ad9c49cSJason Wang 		err = xfer(vrh, iov->iov[iov->i].iov_base, ptr, partlen);
121f87d0fbbSRusty Russell 		if (err)
122f87d0fbbSRusty Russell 			return err;
123f87d0fbbSRusty Russell 		done += partlen;
124f87d0fbbSRusty Russell 		len -= partlen;
125f87d0fbbSRusty Russell 		ptr += partlen;
126*7aed44baSStefano Garzarella 		iov->consumed += partlen;
127*7aed44baSStefano Garzarella 		iov->iov[iov->i].iov_len -= partlen;
128*7aed44baSStefano Garzarella 		iov->iov[iov->i].iov_base += partlen;
129f87d0fbbSRusty Russell 
130*7aed44baSStefano Garzarella 		if (!iov->iov[iov->i].iov_len) {
131*7aed44baSStefano Garzarella 			/* Fix up old iov element then increment. */
132*7aed44baSStefano Garzarella 			iov->iov[iov->i].iov_len = iov->consumed;
133*7aed44baSStefano Garzarella 			iov->iov[iov->i].iov_base -= iov->consumed;
134*7aed44baSStefano Garzarella 
135*7aed44baSStefano Garzarella 			iov->consumed = 0;
136*7aed44baSStefano Garzarella 			iov->i++;
137*7aed44baSStefano Garzarella 		}
138f87d0fbbSRusty Russell 	}
139f87d0fbbSRusty Russell 	return done;
140f87d0fbbSRusty Russell }
141f87d0fbbSRusty Russell 
142f87d0fbbSRusty Russell /* May reduce *len if range is shorter. */
range_check(struct vringh * vrh,u64 addr,size_t * len,struct vringh_range * range,bool (* getrange)(struct vringh *,u64,struct vringh_range *))143f87d0fbbSRusty Russell static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len,
144f87d0fbbSRusty Russell 			       struct vringh_range *range,
145f87d0fbbSRusty Russell 			       bool (*getrange)(struct vringh *,
146f87d0fbbSRusty Russell 						u64, struct vringh_range *))
147f87d0fbbSRusty Russell {
148f87d0fbbSRusty Russell 	if (addr < range->start || addr > range->end_incl) {
149f87d0fbbSRusty Russell 		if (!getrange(vrh, addr, range))
150f87d0fbbSRusty Russell 			return false;
151f87d0fbbSRusty Russell 	}
152f87d0fbbSRusty Russell 	BUG_ON(addr < range->start || addr > range->end_incl);
153f87d0fbbSRusty Russell 
154f87d0fbbSRusty Russell 	/* To end of memory? */
155f87d0fbbSRusty Russell 	if (unlikely(addr + *len == 0)) {
156f87d0fbbSRusty Russell 		if (range->end_incl == -1ULL)
157f87d0fbbSRusty Russell 			return true;
158f87d0fbbSRusty Russell 		goto truncate;
159f87d0fbbSRusty Russell 	}
160f87d0fbbSRusty Russell 
161f87d0fbbSRusty Russell 	/* Otherwise, don't wrap. */
162f87d0fbbSRusty Russell 	if (addr + *len < addr) {
163f87d0fbbSRusty Russell 		vringh_bad("Wrapping descriptor %zu@0x%llx",
164f87d0fbbSRusty Russell 			   *len, (unsigned long long)addr);
165f87d0fbbSRusty Russell 		return false;
166f87d0fbbSRusty Russell 	}
167f87d0fbbSRusty Russell 
168f87d0fbbSRusty Russell 	if (unlikely(addr + *len - 1 > range->end_incl))
169f87d0fbbSRusty Russell 		goto truncate;
170f87d0fbbSRusty Russell 	return true;
171f87d0fbbSRusty Russell 
172f87d0fbbSRusty Russell truncate:
173f87d0fbbSRusty Russell 	*len = range->end_incl + 1 - addr;
174f87d0fbbSRusty Russell 	return true;
175f87d0fbbSRusty Russell }
176f87d0fbbSRusty Russell 
no_range_check(struct vringh * vrh,u64 addr,size_t * len,struct vringh_range * range,bool (* getrange)(struct vringh *,u64,struct vringh_range *))177f87d0fbbSRusty Russell static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len,
178f87d0fbbSRusty Russell 				  struct vringh_range *range,
179f87d0fbbSRusty Russell 				  bool (*getrange)(struct vringh *,
180f87d0fbbSRusty Russell 						   u64, struct vringh_range *))
181f87d0fbbSRusty Russell {
182f87d0fbbSRusty Russell 	return true;
183f87d0fbbSRusty Russell }
184f87d0fbbSRusty Russell 
185f87d0fbbSRusty Russell /* No reason for this code to be inline. */
move_to_indirect(const struct vringh * vrh,int * up_next,u16 * i,void * addr,const struct vring_desc * desc,struct vring_desc ** descs,int * desc_max)186b9f7ac8cSMichael S. Tsirkin static int move_to_indirect(const struct vringh *vrh,
187b9f7ac8cSMichael S. Tsirkin 			    int *up_next, u16 *i, void *addr,
188f87d0fbbSRusty Russell 			    const struct vring_desc *desc,
189f87d0fbbSRusty Russell 			    struct vring_desc **descs, int *desc_max)
190f87d0fbbSRusty Russell {
191b9f7ac8cSMichael S. Tsirkin 	u32 len;
192b9f7ac8cSMichael S. Tsirkin 
193f87d0fbbSRusty Russell 	/* Indirect tables can't have indirect. */
194f87d0fbbSRusty Russell 	if (*up_next != -1) {
195f87d0fbbSRusty Russell 		vringh_bad("Multilevel indirect %u->%u", *up_next, *i);
196f87d0fbbSRusty Russell 		return -EINVAL;
197f87d0fbbSRusty Russell 	}
198f87d0fbbSRusty Russell 
199b9f7ac8cSMichael S. Tsirkin 	len = vringh32_to_cpu(vrh, desc->len);
200b9f7ac8cSMichael S. Tsirkin 	if (unlikely(len % sizeof(struct vring_desc))) {
201f87d0fbbSRusty Russell 		vringh_bad("Strange indirect len %u", desc->len);
202f87d0fbbSRusty Russell 		return -EINVAL;
203f87d0fbbSRusty Russell 	}
204f87d0fbbSRusty Russell 
205f87d0fbbSRusty Russell 	/* We will check this when we follow it! */
206b9f7ac8cSMichael S. Tsirkin 	if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT))
207b9f7ac8cSMichael S. Tsirkin 		*up_next = vringh16_to_cpu(vrh, desc->next);
208f87d0fbbSRusty Russell 	else
209f87d0fbbSRusty Russell 		*up_next = -2;
210f87d0fbbSRusty Russell 	*descs = addr;
211b9f7ac8cSMichael S. Tsirkin 	*desc_max = len / sizeof(struct vring_desc);
212f87d0fbbSRusty Russell 
213f87d0fbbSRusty Russell 	/* Now, start at the first indirect. */
214f87d0fbbSRusty Russell 	*i = 0;
215f87d0fbbSRusty Russell 	return 0;
216f87d0fbbSRusty Russell }
217f87d0fbbSRusty Russell 
resize_iovec(struct vringh_kiov * iov,gfp_t gfp)218f87d0fbbSRusty Russell static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp)
219f87d0fbbSRusty Russell {
220f87d0fbbSRusty Russell 	struct kvec *new;
221f87d0fbbSRusty Russell 	unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2;
222f87d0fbbSRusty Russell 
223f87d0fbbSRusty Russell 	if (new_num < 8)
224f87d0fbbSRusty Russell 		new_num = 8;
225f87d0fbbSRusty Russell 
226f87d0fbbSRusty Russell 	flag = (iov->max_num & VRINGH_IOV_ALLOCATED);
227f87d0fbbSRusty Russell 	if (flag)
2283a999748SBartosz Golaszewski 		new = krealloc_array(iov->iov, new_num,
2293a999748SBartosz Golaszewski 				     sizeof(struct iovec), gfp);
230f87d0fbbSRusty Russell 	else {
2316da2ec56SKees Cook 		new = kmalloc_array(new_num, sizeof(struct iovec), gfp);
232f87d0fbbSRusty Russell 		if (new) {
233f87d0fbbSRusty Russell 			memcpy(new, iov->iov,
234f87d0fbbSRusty Russell 			       iov->max_num * sizeof(struct iovec));
235f87d0fbbSRusty Russell 			flag = VRINGH_IOV_ALLOCATED;
236f87d0fbbSRusty Russell 		}
237f87d0fbbSRusty Russell 	}
238f87d0fbbSRusty Russell 	if (!new)
239f87d0fbbSRusty Russell 		return -ENOMEM;
240f87d0fbbSRusty Russell 	iov->iov = new;
241f87d0fbbSRusty Russell 	iov->max_num = (new_num | flag);
242f87d0fbbSRusty Russell 	return 0;
243f87d0fbbSRusty Russell }
244f87d0fbbSRusty Russell 
return_from_indirect(const struct vringh * vrh,int * up_next,struct vring_desc ** descs,int * desc_max)245f87d0fbbSRusty Russell static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next,
246f87d0fbbSRusty Russell 				       struct vring_desc **descs, int *desc_max)
247f87d0fbbSRusty Russell {
248f87d0fbbSRusty Russell 	u16 i = *up_next;
249f87d0fbbSRusty Russell 
250f87d0fbbSRusty Russell 	*up_next = -1;
251f87d0fbbSRusty Russell 	*descs = vrh->vring.desc;
252f87d0fbbSRusty Russell 	*desc_max = vrh->vring.num;
253f87d0fbbSRusty Russell 	return i;
254f87d0fbbSRusty Russell }
255f87d0fbbSRusty Russell 
slow_copy(struct vringh * vrh,void * dst,const void * src,bool (* rcheck)(struct vringh * vrh,u64 addr,size_t * len,struct vringh_range * range,bool (* getrange)(struct vringh * vrh,u64,struct vringh_range *)),bool (* getrange)(struct vringh * vrh,u64 addr,struct vringh_range * r),struct vringh_range * range,int (* copy)(const struct vringh * vrh,void * dst,const void * src,size_t len))256f87d0fbbSRusty Russell static int slow_copy(struct vringh *vrh, void *dst, const void *src,
257f87d0fbbSRusty Russell 		     bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
258f87d0fbbSRusty Russell 				    struct vringh_range *range,
259f87d0fbbSRusty Russell 				    bool (*getrange)(struct vringh *vrh,
260f87d0fbbSRusty Russell 						     u64,
261f87d0fbbSRusty Russell 						     struct vringh_range *)),
262f87d0fbbSRusty Russell 		     bool (*getrange)(struct vringh *vrh,
263f87d0fbbSRusty Russell 				      u64 addr,
264f87d0fbbSRusty Russell 				      struct vringh_range *r),
265f87d0fbbSRusty Russell 		     struct vringh_range *range,
2669ad9c49cSJason Wang 		     int (*copy)(const struct vringh *vrh,
2679ad9c49cSJason Wang 				 void *dst, const void *src, size_t len))
268f87d0fbbSRusty Russell {
269f87d0fbbSRusty Russell 	size_t part, len = sizeof(struct vring_desc);
270f87d0fbbSRusty Russell 
271f87d0fbbSRusty Russell 	do {
272f87d0fbbSRusty Russell 		u64 addr;
273f87d0fbbSRusty Russell 		int err;
274f87d0fbbSRusty Russell 
275f87d0fbbSRusty Russell 		part = len;
276f87d0fbbSRusty Russell 		addr = (u64)(unsigned long)src - range->offset;
277f87d0fbbSRusty Russell 
278f87d0fbbSRusty Russell 		if (!rcheck(vrh, addr, &part, range, getrange))
279f87d0fbbSRusty Russell 			return -EINVAL;
280f87d0fbbSRusty Russell 
2819ad9c49cSJason Wang 		err = copy(vrh, dst, src, part);
282f87d0fbbSRusty Russell 		if (err)
283f87d0fbbSRusty Russell 			return err;
284f87d0fbbSRusty Russell 
285f87d0fbbSRusty Russell 		dst += part;
286f87d0fbbSRusty Russell 		src += part;
287f87d0fbbSRusty Russell 		len -= part;
288f87d0fbbSRusty Russell 	} while (len);
289f87d0fbbSRusty Russell 	return 0;
290f87d0fbbSRusty Russell }
291f87d0fbbSRusty Russell 
292f87d0fbbSRusty Russell static inline int
__vringh_iov(struct vringh * vrh,u16 i,struct vringh_kiov * riov,struct vringh_kiov * wiov,bool (* rcheck)(struct vringh * vrh,u64 addr,size_t * len,struct vringh_range * range,bool (* getrange)(struct vringh *,u64,struct vringh_range *)),bool (* getrange)(struct vringh *,u64,struct vringh_range *),gfp_t gfp,int (* copy)(const struct vringh * vrh,void * dst,const void * src,size_t len))293f87d0fbbSRusty Russell __vringh_iov(struct vringh *vrh, u16 i,
294f87d0fbbSRusty Russell 	     struct vringh_kiov *riov,
295f87d0fbbSRusty Russell 	     struct vringh_kiov *wiov,
296f87d0fbbSRusty Russell 	     bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
297f87d0fbbSRusty Russell 			    struct vringh_range *range,
298f87d0fbbSRusty Russell 			    bool (*getrange)(struct vringh *, u64,
299f87d0fbbSRusty Russell 					     struct vringh_range *)),
300f87d0fbbSRusty Russell 	     bool (*getrange)(struct vringh *, u64, struct vringh_range *),
301f87d0fbbSRusty Russell 	     gfp_t gfp,
3029ad9c49cSJason Wang 	     int (*copy)(const struct vringh *vrh,
3039ad9c49cSJason Wang 			 void *dst, const void *src, size_t len))
304f87d0fbbSRusty Russell {
305dbd29e07SXie Yongji 	int err, count = 0, indirect_count = 0, up_next, desc_max;
306f87d0fbbSRusty Russell 	struct vring_desc desc, *descs;
307f87d0fbbSRusty Russell 	struct vringh_range range = { -1ULL, 0 }, slowrange;
308f87d0fbbSRusty Russell 	bool slow = false;
309f87d0fbbSRusty Russell 
310f87d0fbbSRusty Russell 	/* We start traversing vring's descriptor table. */
311f87d0fbbSRusty Russell 	descs = vrh->vring.desc;
312f87d0fbbSRusty Russell 	desc_max = vrh->vring.num;
313f87d0fbbSRusty Russell 	up_next = -1;
314f87d0fbbSRusty Russell 
3155745bcfbSStefano Garzarella 	/* You must want something! */
3165745bcfbSStefano Garzarella 	if (WARN_ON(!riov && !wiov))
3175745bcfbSStefano Garzarella 		return -EINVAL;
3185745bcfbSStefano Garzarella 
319f87d0fbbSRusty Russell 	if (riov)
320bbc2c372SStefano Garzarella 		riov->i = riov->used = riov->consumed = 0;
3215745bcfbSStefano Garzarella 	if (wiov)
322bbc2c372SStefano Garzarella 		wiov->i = wiov->used = wiov->consumed = 0;
323f87d0fbbSRusty Russell 
324f87d0fbbSRusty Russell 	for (;;) {
325f87d0fbbSRusty Russell 		void *addr;
326f87d0fbbSRusty Russell 		struct vringh_kiov *iov;
327f87d0fbbSRusty Russell 		size_t len;
328f87d0fbbSRusty Russell 
329f87d0fbbSRusty Russell 		if (unlikely(slow))
330f87d0fbbSRusty Russell 			err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange,
331f87d0fbbSRusty Russell 					&slowrange, copy);
332f87d0fbbSRusty Russell 		else
3339ad9c49cSJason Wang 			err = copy(vrh, &desc, &descs[i], sizeof(desc));
334f87d0fbbSRusty Russell 		if (unlikely(err))
335f87d0fbbSRusty Russell 			goto fail;
336f87d0fbbSRusty Russell 
337b9f7ac8cSMichael S. Tsirkin 		if (unlikely(desc.flags &
338b9f7ac8cSMichael S. Tsirkin 			     cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) {
339b9f7ac8cSMichael S. Tsirkin 			u64 a = vringh64_to_cpu(vrh, desc.addr);
340b9f7ac8cSMichael S. Tsirkin 
341f87d0fbbSRusty Russell 			/* Make sure it's OK, and get offset. */
342b9f7ac8cSMichael S. Tsirkin 			len = vringh32_to_cpu(vrh, desc.len);
343b9f7ac8cSMichael S. Tsirkin 			if (!rcheck(vrh, a, &len, &range, getrange)) {
344f87d0fbbSRusty Russell 				err = -EINVAL;
345f87d0fbbSRusty Russell 				goto fail;
346f87d0fbbSRusty Russell 			}
347f87d0fbbSRusty Russell 
348b9f7ac8cSMichael S. Tsirkin 			if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
349f87d0fbbSRusty Russell 				slow = true;
350f87d0fbbSRusty Russell 				/* We need to save this range to use offset */
351f87d0fbbSRusty Russell 				slowrange = range;
352f87d0fbbSRusty Russell 			}
353f87d0fbbSRusty Russell 
354b9f7ac8cSMichael S. Tsirkin 			addr = (void *)(long)(a + range.offset);
355b9f7ac8cSMichael S. Tsirkin 			err = move_to_indirect(vrh, &up_next, &i, addr, &desc,
356f87d0fbbSRusty Russell 					       &descs, &desc_max);
357f87d0fbbSRusty Russell 			if (err)
358f87d0fbbSRusty Russell 				goto fail;
359f87d0fbbSRusty Russell 			continue;
360f87d0fbbSRusty Russell 		}
361f87d0fbbSRusty Russell 
362dbd29e07SXie Yongji 		if (up_next == -1)
363dbd29e07SXie Yongji 			count++;
364dbd29e07SXie Yongji 		else
365dbd29e07SXie Yongji 			indirect_count++;
366dbd29e07SXie Yongji 
367dbd29e07SXie Yongji 		if (count > vrh->vring.num || indirect_count > desc_max) {
368f87d0fbbSRusty Russell 			vringh_bad("Descriptor loop in %p", descs);
369f87d0fbbSRusty Russell 			err = -ELOOP;
370f87d0fbbSRusty Russell 			goto fail;
371f87d0fbbSRusty Russell 		}
372f87d0fbbSRusty Russell 
373b9f7ac8cSMichael S. Tsirkin 		if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE))
374f87d0fbbSRusty Russell 			iov = wiov;
375f87d0fbbSRusty Russell 		else {
376f87d0fbbSRusty Russell 			iov = riov;
377e74cfa91SNeeraj Upadhyay 			if (unlikely(wiov && wiov->used)) {
378f87d0fbbSRusty Russell 				vringh_bad("Readable desc %p after writable",
379f87d0fbbSRusty Russell 					   &descs[i]);
380f87d0fbbSRusty Russell 				err = -EINVAL;
381f87d0fbbSRusty Russell 				goto fail;
382f87d0fbbSRusty Russell 			}
383f87d0fbbSRusty Russell 		}
384f87d0fbbSRusty Russell 
385f87d0fbbSRusty Russell 		if (!iov) {
386f87d0fbbSRusty Russell 			vringh_bad("Unexpected %s desc",
387f87d0fbbSRusty Russell 				   !wiov ? "writable" : "readable");
388f87d0fbbSRusty Russell 			err = -EPROTO;
389f87d0fbbSRusty Russell 			goto fail;
390f87d0fbbSRusty Russell 		}
391f87d0fbbSRusty Russell 
392f87d0fbbSRusty Russell 	again:
393f87d0fbbSRusty Russell 		/* Make sure it's OK, and get offset. */
394b9f7ac8cSMichael S. Tsirkin 		len = vringh32_to_cpu(vrh, desc.len);
395b9f7ac8cSMichael S. Tsirkin 		if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range,
396b9f7ac8cSMichael S. Tsirkin 			    getrange)) {
397f87d0fbbSRusty Russell 			err = -EINVAL;
398f87d0fbbSRusty Russell 			goto fail;
399f87d0fbbSRusty Russell 		}
400b9f7ac8cSMichael S. Tsirkin 		addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) +
401b9f7ac8cSMichael S. Tsirkin 					       range.offset);
402f87d0fbbSRusty Russell 
403f87d0fbbSRusty Russell 		if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) {
404f87d0fbbSRusty Russell 			err = resize_iovec(iov, gfp);
405f87d0fbbSRusty Russell 			if (err)
406f87d0fbbSRusty Russell 				goto fail;
407f87d0fbbSRusty Russell 		}
408f87d0fbbSRusty Russell 
409f87d0fbbSRusty Russell 		iov->iov[iov->used].iov_base = addr;
410f87d0fbbSRusty Russell 		iov->iov[iov->used].iov_len = len;
411f87d0fbbSRusty Russell 		iov->used++;
412f87d0fbbSRusty Russell 
413b9f7ac8cSMichael S. Tsirkin 		if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
414b9f7ac8cSMichael S. Tsirkin 			desc.len = cpu_to_vringh32(vrh,
415b9f7ac8cSMichael S. Tsirkin 				   vringh32_to_cpu(vrh, desc.len) - len);
416b9f7ac8cSMichael S. Tsirkin 			desc.addr = cpu_to_vringh64(vrh,
417b9f7ac8cSMichael S. Tsirkin 				    vringh64_to_cpu(vrh, desc.addr) + len);
418f87d0fbbSRusty Russell 			goto again;
419f87d0fbbSRusty Russell 		}
420f87d0fbbSRusty Russell 
421b9f7ac8cSMichael S. Tsirkin 		if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) {
422b9f7ac8cSMichael S. Tsirkin 			i = vringh16_to_cpu(vrh, desc.next);
423f87d0fbbSRusty Russell 		} else {
424f87d0fbbSRusty Russell 			/* Just in case we need to finish traversing above. */
425f87d0fbbSRusty Russell 			if (unlikely(up_next > 0)) {
426f87d0fbbSRusty Russell 				i = return_from_indirect(vrh, &up_next,
427f87d0fbbSRusty Russell 							 &descs, &desc_max);
428f87d0fbbSRusty Russell 				slow = false;
429dbd29e07SXie Yongji 				indirect_count = 0;
430f87d0fbbSRusty Russell 			} else
431f87d0fbbSRusty Russell 				break;
432f87d0fbbSRusty Russell 		}
433f87d0fbbSRusty Russell 
434f87d0fbbSRusty Russell 		if (i >= desc_max) {
435f87d0fbbSRusty Russell 			vringh_bad("Chained index %u > %u", i, desc_max);
436f87d0fbbSRusty Russell 			err = -EINVAL;
437f87d0fbbSRusty Russell 			goto fail;
438f87d0fbbSRusty Russell 		}
439f87d0fbbSRusty Russell 	}
440f87d0fbbSRusty Russell 
441f87d0fbbSRusty Russell 	return 0;
442f87d0fbbSRusty Russell 
443f87d0fbbSRusty Russell fail:
444f87d0fbbSRusty Russell 	return err;
445f87d0fbbSRusty Russell }
446f87d0fbbSRusty Russell 
__vringh_complete(struct vringh * vrh,const struct vring_used_elem * used,unsigned int num_used,int (* putu16)(const struct vringh * vrh,__virtio16 * p,u16 val),int (* putused)(const struct vringh * vrh,struct vring_used_elem * dst,const struct vring_used_elem * src,unsigned num))447f87d0fbbSRusty Russell static inline int __vringh_complete(struct vringh *vrh,
448f87d0fbbSRusty Russell 				    const struct vring_used_elem *used,
449f87d0fbbSRusty Russell 				    unsigned int num_used,
450b9f7ac8cSMichael S. Tsirkin 				    int (*putu16)(const struct vringh *vrh,
451b9f7ac8cSMichael S. Tsirkin 						  __virtio16 *p, u16 val),
4529ad9c49cSJason Wang 				    int (*putused)(const struct vringh *vrh,
4539ad9c49cSJason Wang 						   struct vring_used_elem *dst,
454f87d0fbbSRusty Russell 						   const struct vring_used_elem
455f87d0fbbSRusty Russell 						   *src, unsigned num))
456f87d0fbbSRusty Russell {
457f87d0fbbSRusty Russell 	struct vring_used *used_ring;
458f87d0fbbSRusty Russell 	int err;
459f87d0fbbSRusty Russell 	u16 used_idx, off;
460f87d0fbbSRusty Russell 
461f87d0fbbSRusty Russell 	used_ring = vrh->vring.used;
462f87d0fbbSRusty Russell 	used_idx = vrh->last_used_idx + vrh->completed;
463f87d0fbbSRusty Russell 
464f87d0fbbSRusty Russell 	off = used_idx % vrh->vring.num;
465f87d0fbbSRusty Russell 
466f87d0fbbSRusty Russell 	/* Compiler knows num_used == 1 sometimes, hence extra check */
467f87d0fbbSRusty Russell 	if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) {
468f87d0fbbSRusty Russell 		u16 part = vrh->vring.num - off;
4699ad9c49cSJason Wang 		err = putused(vrh, &used_ring->ring[off], used, part);
470f87d0fbbSRusty Russell 		if (!err)
4719ad9c49cSJason Wang 			err = putused(vrh, &used_ring->ring[0], used + part,
472f87d0fbbSRusty Russell 				      num_used - part);
473f87d0fbbSRusty Russell 	} else
4749ad9c49cSJason Wang 		err = putused(vrh, &used_ring->ring[off], used, num_used);
475f87d0fbbSRusty Russell 
476f87d0fbbSRusty Russell 	if (err) {
477f87d0fbbSRusty Russell 		vringh_bad("Failed to write %u used entries %u at %p",
478f87d0fbbSRusty Russell 			   num_used, off, &used_ring->ring[off]);
479f87d0fbbSRusty Russell 		return err;
480f87d0fbbSRusty Russell 	}
481f87d0fbbSRusty Russell 
482f87d0fbbSRusty Russell 	/* Make sure buffer is written before we update index. */
483f87d0fbbSRusty Russell 	virtio_wmb(vrh->weak_barriers);
484f87d0fbbSRusty Russell 
485b9f7ac8cSMichael S. Tsirkin 	err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used);
486f87d0fbbSRusty Russell 	if (err) {
487f87d0fbbSRusty Russell 		vringh_bad("Failed to update used index at %p",
488f87d0fbbSRusty Russell 			   &vrh->vring.used->idx);
489f87d0fbbSRusty Russell 		return err;
490f87d0fbbSRusty Russell 	}
491f87d0fbbSRusty Russell 
492f87d0fbbSRusty Russell 	vrh->completed += num_used;
493f87d0fbbSRusty Russell 	return 0;
494f87d0fbbSRusty Russell }
495f87d0fbbSRusty Russell 
496f87d0fbbSRusty Russell 
__vringh_need_notify(struct vringh * vrh,int (* getu16)(const struct vringh * vrh,u16 * val,const __virtio16 * p))497f87d0fbbSRusty Russell static inline int __vringh_need_notify(struct vringh *vrh,
498b9f7ac8cSMichael S. Tsirkin 				       int (*getu16)(const struct vringh *vrh,
499b9f7ac8cSMichael S. Tsirkin 						     u16 *val,
500b9f7ac8cSMichael S. Tsirkin 						     const __virtio16 *p))
501f87d0fbbSRusty Russell {
502f87d0fbbSRusty Russell 	bool notify;
503f87d0fbbSRusty Russell 	u16 used_event;
504f87d0fbbSRusty Russell 	int err;
505f87d0fbbSRusty Russell 
506f87d0fbbSRusty Russell 	/* Flush out used index update. This is paired with the
507f87d0fbbSRusty Russell 	 * barrier that the Guest executes when enabling
508f87d0fbbSRusty Russell 	 * interrupts. */
509f87d0fbbSRusty Russell 	virtio_mb(vrh->weak_barriers);
510f87d0fbbSRusty Russell 
511f87d0fbbSRusty Russell 	/* Old-style, without event indices. */
512f87d0fbbSRusty Russell 	if (!vrh->event_indices) {
513f87d0fbbSRusty Russell 		u16 flags;
514b9f7ac8cSMichael S. Tsirkin 		err = getu16(vrh, &flags, &vrh->vring.avail->flags);
515f87d0fbbSRusty Russell 		if (err) {
516f87d0fbbSRusty Russell 			vringh_bad("Failed to get flags at %p",
517f87d0fbbSRusty Russell 				   &vrh->vring.avail->flags);
518f87d0fbbSRusty Russell 			return err;
519f87d0fbbSRusty Russell 		}
520f87d0fbbSRusty Russell 		return (!(flags & VRING_AVAIL_F_NO_INTERRUPT));
521f87d0fbbSRusty Russell 	}
522f87d0fbbSRusty Russell 
523f87d0fbbSRusty Russell 	/* Modern: we know when other side wants to know. */
524b9f7ac8cSMichael S. Tsirkin 	err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring));
525f87d0fbbSRusty Russell 	if (err) {
526f87d0fbbSRusty Russell 		vringh_bad("Failed to get used event idx at %p",
527f87d0fbbSRusty Russell 			   &vring_used_event(&vrh->vring));
528f87d0fbbSRusty Russell 		return err;
529f87d0fbbSRusty Russell 	}
530f87d0fbbSRusty Russell 
531f87d0fbbSRusty Russell 	/* Just in case we added so many that we wrap. */
532f87d0fbbSRusty Russell 	if (unlikely(vrh->completed > 0xffff))
533f87d0fbbSRusty Russell 		notify = true;
534f87d0fbbSRusty Russell 	else
535f87d0fbbSRusty Russell 		notify = vring_need_event(used_event,
536f87d0fbbSRusty Russell 					  vrh->last_used_idx + vrh->completed,
537f87d0fbbSRusty Russell 					  vrh->last_used_idx);
538f87d0fbbSRusty Russell 
539f87d0fbbSRusty Russell 	vrh->last_used_idx += vrh->completed;
540f87d0fbbSRusty Russell 	vrh->completed = 0;
541f87d0fbbSRusty Russell 	return notify;
542f87d0fbbSRusty Russell }
543f87d0fbbSRusty Russell 
__vringh_notify_enable(struct vringh * vrh,int (* getu16)(const struct vringh * vrh,u16 * val,const __virtio16 * p),int (* putu16)(const struct vringh * vrh,__virtio16 * p,u16 val))544f87d0fbbSRusty Russell static inline bool __vringh_notify_enable(struct vringh *vrh,
545b9f7ac8cSMichael S. Tsirkin 					  int (*getu16)(const struct vringh *vrh,
546b9f7ac8cSMichael S. Tsirkin 							u16 *val, const __virtio16 *p),
547b9f7ac8cSMichael S. Tsirkin 					  int (*putu16)(const struct vringh *vrh,
548b9f7ac8cSMichael S. Tsirkin 							__virtio16 *p, u16 val))
549f87d0fbbSRusty Russell {
550f87d0fbbSRusty Russell 	u16 avail;
551f87d0fbbSRusty Russell 
552f87d0fbbSRusty Russell 	if (!vrh->event_indices) {
553f87d0fbbSRusty Russell 		/* Old-school; update flags. */
554b9f7ac8cSMichael S. Tsirkin 		if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) {
555f87d0fbbSRusty Russell 			vringh_bad("Clearing used flags %p",
556f87d0fbbSRusty Russell 				   &vrh->vring.used->flags);
557f87d0fbbSRusty Russell 			return true;
558f87d0fbbSRusty Russell 		}
559f87d0fbbSRusty Russell 	} else {
560b9f7ac8cSMichael S. Tsirkin 		if (putu16(vrh, &vring_avail_event(&vrh->vring),
561f87d0fbbSRusty Russell 			   vrh->last_avail_idx) != 0) {
562f87d0fbbSRusty Russell 			vringh_bad("Updating avail event index %p",
563f87d0fbbSRusty Russell 				   &vring_avail_event(&vrh->vring));
564f87d0fbbSRusty Russell 			return true;
565f87d0fbbSRusty Russell 		}
566f87d0fbbSRusty Russell 	}
567f87d0fbbSRusty Russell 
568f87d0fbbSRusty Russell 	/* They could have slipped one in as we were doing that: make
569f87d0fbbSRusty Russell 	 * sure it's written, then check again. */
570f87d0fbbSRusty Russell 	virtio_mb(vrh->weak_barriers);
571f87d0fbbSRusty Russell 
572b9f7ac8cSMichael S. Tsirkin 	if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) {
573f87d0fbbSRusty Russell 		vringh_bad("Failed to check avail idx at %p",
574f87d0fbbSRusty Russell 			   &vrh->vring.avail->idx);
575f87d0fbbSRusty Russell 		return true;
576f87d0fbbSRusty Russell 	}
577f87d0fbbSRusty Russell 
578f87d0fbbSRusty Russell 	/* This is unlikely, so we just leave notifications enabled
579f87d0fbbSRusty Russell 	 * (if we're using event_indices, we'll only get one
580f87d0fbbSRusty Russell 	 * notification anyway). */
581f87d0fbbSRusty Russell 	return avail == vrh->last_avail_idx;
582f87d0fbbSRusty Russell }
583f87d0fbbSRusty Russell 
__vringh_notify_disable(struct vringh * vrh,int (* putu16)(const struct vringh * vrh,__virtio16 * p,u16 val))584f87d0fbbSRusty Russell static inline void __vringh_notify_disable(struct vringh *vrh,
585b9f7ac8cSMichael S. Tsirkin 					   int (*putu16)(const struct vringh *vrh,
586b9f7ac8cSMichael S. Tsirkin 							 __virtio16 *p, u16 val))
587f87d0fbbSRusty Russell {
588f87d0fbbSRusty Russell 	if (!vrh->event_indices) {
589f87d0fbbSRusty Russell 		/* Old-school; update flags. */
590b9f7ac8cSMichael S. Tsirkin 		if (putu16(vrh, &vrh->vring.used->flags,
591b9f7ac8cSMichael S. Tsirkin 			   VRING_USED_F_NO_NOTIFY)) {
592f87d0fbbSRusty Russell 			vringh_bad("Setting used flags %p",
593f87d0fbbSRusty Russell 				   &vrh->vring.used->flags);
594f87d0fbbSRusty Russell 		}
595f87d0fbbSRusty Russell 	}
596f87d0fbbSRusty Russell }
597f87d0fbbSRusty Russell 
598f87d0fbbSRusty Russell /* Userspace access helpers: in this case, addresses are really userspace. */
getu16_user(const struct vringh * vrh,u16 * val,const __virtio16 * p)599b9f7ac8cSMichael S. Tsirkin static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
600f87d0fbbSRusty Russell {
601b9f7ac8cSMichael S. Tsirkin 	__virtio16 v = 0;
602b9f7ac8cSMichael S. Tsirkin 	int rc = get_user(v, (__force __virtio16 __user *)p);
603b9f7ac8cSMichael S. Tsirkin 	*val = vringh16_to_cpu(vrh, v);
604b9f7ac8cSMichael S. Tsirkin 	return rc;
605f87d0fbbSRusty Russell }
606f87d0fbbSRusty Russell 
putu16_user(const struct vringh * vrh,__virtio16 * p,u16 val)607b9f7ac8cSMichael S. Tsirkin static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val)
608f87d0fbbSRusty Russell {
609b9f7ac8cSMichael S. Tsirkin 	__virtio16 v = cpu_to_vringh16(vrh, val);
610b9f7ac8cSMichael S. Tsirkin 	return put_user(v, (__force __virtio16 __user *)p);
611f87d0fbbSRusty Russell }
612f87d0fbbSRusty Russell 
copydesc_user(const struct vringh * vrh,void * dst,const void * src,size_t len)6139ad9c49cSJason Wang static inline int copydesc_user(const struct vringh *vrh,
6149ad9c49cSJason Wang 				void *dst, const void *src, size_t len)
615f87d0fbbSRusty Russell {
616f87d0fbbSRusty Russell 	return copy_from_user(dst, (__force void __user *)src, len) ?
617f87d0fbbSRusty Russell 		-EFAULT : 0;
618f87d0fbbSRusty Russell }
619f87d0fbbSRusty Russell 
putused_user(const struct vringh * vrh,struct vring_used_elem * dst,const struct vring_used_elem * src,unsigned int num)6209ad9c49cSJason Wang static inline int putused_user(const struct vringh *vrh,
6219ad9c49cSJason Wang 			       struct vring_used_elem *dst,
622f87d0fbbSRusty Russell 			       const struct vring_used_elem *src,
623f87d0fbbSRusty Russell 			       unsigned int num)
624f87d0fbbSRusty Russell {
625f87d0fbbSRusty Russell 	return copy_to_user((__force void __user *)dst, src,
626f87d0fbbSRusty Russell 			    sizeof(*dst) * num) ? -EFAULT : 0;
627f87d0fbbSRusty Russell }
628f87d0fbbSRusty Russell 
xfer_from_user(const struct vringh * vrh,void * src,void * dst,size_t len)6299ad9c49cSJason Wang static inline int xfer_from_user(const struct vringh *vrh, void *src,
6309ad9c49cSJason Wang 				 void *dst, size_t len)
631f87d0fbbSRusty Russell {
632f87d0fbbSRusty Russell 	return copy_from_user(dst, (__force void __user *)src, len) ?
633f87d0fbbSRusty Russell 		-EFAULT : 0;
634f87d0fbbSRusty Russell }
635f87d0fbbSRusty Russell 
xfer_to_user(const struct vringh * vrh,void * dst,void * src,size_t len)6369ad9c49cSJason Wang static inline int xfer_to_user(const struct vringh *vrh,
6379ad9c49cSJason Wang 			       void *dst, void *src, size_t len)
638f87d0fbbSRusty Russell {
639f87d0fbbSRusty Russell 	return copy_to_user((__force void __user *)dst, src, len) ?
640f87d0fbbSRusty Russell 		-EFAULT : 0;
641f87d0fbbSRusty Russell }
642f87d0fbbSRusty Russell 
643f87d0fbbSRusty Russell /**
644f87d0fbbSRusty Russell  * vringh_init_user - initialize a vringh for a userspace vring.
645f87d0fbbSRusty Russell  * @vrh: the vringh to initialize.
646f87d0fbbSRusty Russell  * @features: the feature bits for this ring.
647f87d0fbbSRusty Russell  * @num: the number of elements.
648f87d0fbbSRusty Russell  * @weak_barriers: true if we only need memory barriers, not I/O.
649905233afSStefano Garzarella  * @desc: the userspace descriptor pointer.
650905233afSStefano Garzarella  * @avail: the userspace avail pointer.
651905233afSStefano Garzarella  * @used: the userspace used pointer.
652f87d0fbbSRusty Russell  *
653f87d0fbbSRusty Russell  * Returns an error if num is invalid: you should check pointers
654f87d0fbbSRusty Russell  * yourself!
655f87d0fbbSRusty Russell  */
vringh_init_user(struct vringh * vrh,u64 features,unsigned int num,bool weak_barriers,vring_desc_t __user * desc,vring_avail_t __user * avail,vring_used_t __user * used)656b97a8a90SMichael S. Tsirkin int vringh_init_user(struct vringh *vrh, u64 features,
657f87d0fbbSRusty Russell 		     unsigned int num, bool weak_barriers,
658a865e420SMichael S. Tsirkin 		     vring_desc_t __user *desc,
659a865e420SMichael S. Tsirkin 		     vring_avail_t __user *avail,
660a865e420SMichael S. Tsirkin 		     vring_used_t __user *used)
661f87d0fbbSRusty Russell {
662f87d0fbbSRusty Russell 	/* Sane power of 2 please! */
663f87d0fbbSRusty Russell 	if (!num || num > 0xffff || (num & (num - 1))) {
664f87d0fbbSRusty Russell 		vringh_bad("Bad ring size %u", num);
665f87d0fbbSRusty Russell 		return -EINVAL;
666f87d0fbbSRusty Russell 	}
667f87d0fbbSRusty Russell 
668b9f7ac8cSMichael S. Tsirkin 	vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
669f87d0fbbSRusty Russell 	vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
670f87d0fbbSRusty Russell 	vrh->weak_barriers = weak_barriers;
671f87d0fbbSRusty Russell 	vrh->completed = 0;
672f87d0fbbSRusty Russell 	vrh->last_avail_idx = 0;
673f87d0fbbSRusty Russell 	vrh->last_used_idx = 0;
674f87d0fbbSRusty Russell 	vrh->vring.num = num;
675f87d0fbbSRusty Russell 	/* vring expects kernel addresses, but only used via accessors. */
676f87d0fbbSRusty Russell 	vrh->vring.desc = (__force struct vring_desc *)desc;
677f87d0fbbSRusty Russell 	vrh->vring.avail = (__force struct vring_avail *)avail;
678f87d0fbbSRusty Russell 	vrh->vring.used = (__force struct vring_used *)used;
679f87d0fbbSRusty Russell 	return 0;
680f87d0fbbSRusty Russell }
681f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_init_user);
682f87d0fbbSRusty Russell 
683f87d0fbbSRusty Russell /**
684f87d0fbbSRusty Russell  * vringh_getdesc_user - get next available descriptor from userspace ring.
685f87d0fbbSRusty Russell  * @vrh: the userspace vring.
686f87d0fbbSRusty Russell  * @riov: where to put the readable descriptors (or NULL)
687f87d0fbbSRusty Russell  * @wiov: where to put the writable descriptors (or NULL)
688f87d0fbbSRusty Russell  * @getrange: function to call to check ranges.
689f87d0fbbSRusty Russell  * @head: head index we received, for passing to vringh_complete_user().
690f87d0fbbSRusty Russell  *
691f87d0fbbSRusty Russell  * Returns 0 if there was no descriptor, 1 if there was, or -errno.
692f87d0fbbSRusty Russell  *
693f87d0fbbSRusty Russell  * Note that on error return, you can tell the difference between an
694f87d0fbbSRusty Russell  * invalid ring and a single invalid descriptor: in the former case,
695f87d0fbbSRusty Russell  * *head will be vrh->vring.num.  You may be able to ignore an invalid
696f87d0fbbSRusty Russell  * descriptor, but there's not much you can do with an invalid ring.
697f87d0fbbSRusty Russell  *
69869c13c58SStefano Garzarella  * Note that you can reuse riov and wiov with subsequent calls. Content is
69969c13c58SStefano Garzarella  * overwritten and memory reallocated if more space is needed.
70069c13c58SStefano Garzarella  * When you don't have to use riov and wiov anymore, you should clean up them
70169c13c58SStefano Garzarella  * calling vringh_iov_cleanup() to release the memory, even on error!
702f87d0fbbSRusty Russell  */
vringh_getdesc_user(struct vringh * vrh,struct vringh_iov * riov,struct vringh_iov * wiov,bool (* getrange)(struct vringh * vrh,u64 addr,struct vringh_range * r),u16 * head)703f87d0fbbSRusty Russell int vringh_getdesc_user(struct vringh *vrh,
704f87d0fbbSRusty Russell 			struct vringh_iov *riov,
705f87d0fbbSRusty Russell 			struct vringh_iov *wiov,
706f87d0fbbSRusty Russell 			bool (*getrange)(struct vringh *vrh,
707f87d0fbbSRusty Russell 					 u64 addr, struct vringh_range *r),
708f87d0fbbSRusty Russell 			u16 *head)
709f87d0fbbSRusty Russell {
710f87d0fbbSRusty Russell 	int err;
711f87d0fbbSRusty Russell 
712f87d0fbbSRusty Russell 	*head = vrh->vring.num;
713f87d0fbbSRusty Russell 	err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx);
714f87d0fbbSRusty Russell 	if (err < 0)
715f87d0fbbSRusty Russell 		return err;
716f87d0fbbSRusty Russell 
717f87d0fbbSRusty Russell 	/* Empty... */
718f87d0fbbSRusty Russell 	if (err == vrh->vring.num)
719f87d0fbbSRusty Russell 		return 0;
720f87d0fbbSRusty Russell 
721f87d0fbbSRusty Russell 	/* We need the layouts to be the identical for this to work */
722f87d0fbbSRusty Russell 	BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov));
723f87d0fbbSRusty Russell 	BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) !=
724f87d0fbbSRusty Russell 		     offsetof(struct vringh_iov, iov));
725f87d0fbbSRusty Russell 	BUILD_BUG_ON(offsetof(struct vringh_kiov, i) !=
726f87d0fbbSRusty Russell 		     offsetof(struct vringh_iov, i));
727f87d0fbbSRusty Russell 	BUILD_BUG_ON(offsetof(struct vringh_kiov, used) !=
728f87d0fbbSRusty Russell 		     offsetof(struct vringh_iov, used));
729f87d0fbbSRusty Russell 	BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) !=
730f87d0fbbSRusty Russell 		     offsetof(struct vringh_iov, max_num));
731f87d0fbbSRusty Russell 	BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
732f87d0fbbSRusty Russell 	BUILD_BUG_ON(offsetof(struct iovec, iov_base) !=
733f87d0fbbSRusty Russell 		     offsetof(struct kvec, iov_base));
734f87d0fbbSRusty Russell 	BUILD_BUG_ON(offsetof(struct iovec, iov_len) !=
735f87d0fbbSRusty Russell 		     offsetof(struct kvec, iov_len));
736f87d0fbbSRusty Russell 	BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base)
737f87d0fbbSRusty Russell 		     != sizeof(((struct kvec *)NULL)->iov_base));
738f87d0fbbSRusty Russell 	BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len)
739f87d0fbbSRusty Russell 		     != sizeof(((struct kvec *)NULL)->iov_len));
740f87d0fbbSRusty Russell 
741f87d0fbbSRusty Russell 	*head = err;
742f87d0fbbSRusty Russell 	err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov,
743f87d0fbbSRusty Russell 			   (struct vringh_kiov *)wiov,
744f87d0fbbSRusty Russell 			   range_check, getrange, GFP_KERNEL, copydesc_user);
745f87d0fbbSRusty Russell 	if (err)
746f87d0fbbSRusty Russell 		return err;
747f87d0fbbSRusty Russell 
748f87d0fbbSRusty Russell 	return 1;
749f87d0fbbSRusty Russell }
750f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_getdesc_user);
751f87d0fbbSRusty Russell 
752f87d0fbbSRusty Russell /**
753f87d0fbbSRusty Russell  * vringh_iov_pull_user - copy bytes from vring_iov.
754f87d0fbbSRusty Russell  * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume)
755f87d0fbbSRusty Russell  * @dst: the place to copy.
756f87d0fbbSRusty Russell  * @len: the maximum length to copy.
757f87d0fbbSRusty Russell  *
758f87d0fbbSRusty Russell  * Returns the bytes copied <= len or a negative errno.
759f87d0fbbSRusty Russell  */
vringh_iov_pull_user(struct vringh_iov * riov,void * dst,size_t len)760f87d0fbbSRusty Russell ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len)
761f87d0fbbSRusty Russell {
7629ad9c49cSJason Wang 	return vringh_iov_xfer(NULL, (struct vringh_kiov *)riov,
763f87d0fbbSRusty Russell 			       dst, len, xfer_from_user);
764f87d0fbbSRusty Russell }
765f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_iov_pull_user);
766f87d0fbbSRusty Russell 
767f87d0fbbSRusty Russell /**
768f87d0fbbSRusty Russell  * vringh_iov_push_user - copy bytes into vring_iov.
769f87d0fbbSRusty Russell  * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume)
7708009b0f4SStefano Garzarella  * @src: the place to copy from.
771f87d0fbbSRusty Russell  * @len: the maximum length to copy.
772f87d0fbbSRusty Russell  *
773f87d0fbbSRusty Russell  * Returns the bytes copied <= len or a negative errno.
774f87d0fbbSRusty Russell  */
vringh_iov_push_user(struct vringh_iov * wiov,const void * src,size_t len)775f87d0fbbSRusty Russell ssize_t vringh_iov_push_user(struct vringh_iov *wiov,
776f87d0fbbSRusty Russell 			     const void *src, size_t len)
777f87d0fbbSRusty Russell {
7789ad9c49cSJason Wang 	return vringh_iov_xfer(NULL, (struct vringh_kiov *)wiov,
779f87d0fbbSRusty Russell 			       (void *)src, len, xfer_to_user);
780f87d0fbbSRusty Russell }
781f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_iov_push_user);
782f87d0fbbSRusty Russell 
783f87d0fbbSRusty Russell /**
784f87d0fbbSRusty Russell  * vringh_abandon_user - we've decided not to handle the descriptor(s).
785f87d0fbbSRusty Russell  * @vrh: the vring.
786f87d0fbbSRusty Russell  * @num: the number of descriptors to put back (ie. num
787f87d0fbbSRusty Russell  *	 vringh_get_user() to undo).
788f87d0fbbSRusty Russell  *
789f87d0fbbSRusty Russell  * The next vringh_get_user() will return the old descriptor(s) again.
790f87d0fbbSRusty Russell  */
vringh_abandon_user(struct vringh * vrh,unsigned int num)791f87d0fbbSRusty Russell void vringh_abandon_user(struct vringh *vrh, unsigned int num)
792f87d0fbbSRusty Russell {
793f87d0fbbSRusty Russell 	/* We only update vring_avail_event(vr) when we want to be notified,
794f87d0fbbSRusty Russell 	 * so we haven't changed that yet. */
795f87d0fbbSRusty Russell 	vrh->last_avail_idx -= num;
796f87d0fbbSRusty Russell }
797f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_abandon_user);
798f87d0fbbSRusty Russell 
799f87d0fbbSRusty Russell /**
800f87d0fbbSRusty Russell  * vringh_complete_user - we've finished with descriptor, publish it.
801f87d0fbbSRusty Russell  * @vrh: the vring.
802f87d0fbbSRusty Russell  * @head: the head as filled in by vringh_getdesc_user.
803f87d0fbbSRusty Russell  * @len: the length of data we have written.
804f87d0fbbSRusty Russell  *
805f87d0fbbSRusty Russell  * You should check vringh_need_notify_user() after one or more calls
806f87d0fbbSRusty Russell  * to this function.
807f87d0fbbSRusty Russell  */
vringh_complete_user(struct vringh * vrh,u16 head,u32 len)808f87d0fbbSRusty Russell int vringh_complete_user(struct vringh *vrh, u16 head, u32 len)
809f87d0fbbSRusty Russell {
810f87d0fbbSRusty Russell 	struct vring_used_elem used;
811f87d0fbbSRusty Russell 
812b9f7ac8cSMichael S. Tsirkin 	used.id = cpu_to_vringh32(vrh, head);
813b9f7ac8cSMichael S. Tsirkin 	used.len = cpu_to_vringh32(vrh, len);
814f87d0fbbSRusty Russell 	return __vringh_complete(vrh, &used, 1, putu16_user, putused_user);
815f87d0fbbSRusty Russell }
816f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_complete_user);
817f87d0fbbSRusty Russell 
818f87d0fbbSRusty Russell /**
819f87d0fbbSRusty Russell  * vringh_complete_multi_user - we've finished with many descriptors.
820f87d0fbbSRusty Russell  * @vrh: the vring.
821f87d0fbbSRusty Russell  * @used: the head, length pairs.
822f87d0fbbSRusty Russell  * @num_used: the number of used elements.
823f87d0fbbSRusty Russell  *
824f87d0fbbSRusty Russell  * You should check vringh_need_notify_user() after one or more calls
825f87d0fbbSRusty Russell  * to this function.
826f87d0fbbSRusty Russell  */
vringh_complete_multi_user(struct vringh * vrh,const struct vring_used_elem used[],unsigned num_used)827f87d0fbbSRusty Russell int vringh_complete_multi_user(struct vringh *vrh,
828f87d0fbbSRusty Russell 			       const struct vring_used_elem used[],
829f87d0fbbSRusty Russell 			       unsigned num_used)
830f87d0fbbSRusty Russell {
831f87d0fbbSRusty Russell 	return __vringh_complete(vrh, used, num_used,
832f87d0fbbSRusty Russell 				 putu16_user, putused_user);
833f87d0fbbSRusty Russell }
834f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_complete_multi_user);
835f87d0fbbSRusty Russell 
836f87d0fbbSRusty Russell /**
837f87d0fbbSRusty Russell  * vringh_notify_enable_user - we want to know if something changes.
838f87d0fbbSRusty Russell  * @vrh: the vring.
839f87d0fbbSRusty Russell  *
840f87d0fbbSRusty Russell  * This always enables notifications, but returns false if there are
841f87d0fbbSRusty Russell  * now more buffers available in the vring.
842f87d0fbbSRusty Russell  */
vringh_notify_enable_user(struct vringh * vrh)843f87d0fbbSRusty Russell bool vringh_notify_enable_user(struct vringh *vrh)
844f87d0fbbSRusty Russell {
845f87d0fbbSRusty Russell 	return __vringh_notify_enable(vrh, getu16_user, putu16_user);
846f87d0fbbSRusty Russell }
847f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_notify_enable_user);
848f87d0fbbSRusty Russell 
849f87d0fbbSRusty Russell /**
850f87d0fbbSRusty Russell  * vringh_notify_disable_user - don't tell us if something changes.
851f87d0fbbSRusty Russell  * @vrh: the vring.
852f87d0fbbSRusty Russell  *
853f87d0fbbSRusty Russell  * This is our normal running state: we disable and then only enable when
854f87d0fbbSRusty Russell  * we're going to sleep.
855f87d0fbbSRusty Russell  */
vringh_notify_disable_user(struct vringh * vrh)856f87d0fbbSRusty Russell void vringh_notify_disable_user(struct vringh *vrh)
857f87d0fbbSRusty Russell {
858f87d0fbbSRusty Russell 	__vringh_notify_disable(vrh, putu16_user);
859f87d0fbbSRusty Russell }
860f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_notify_disable_user);
861f87d0fbbSRusty Russell 
862f87d0fbbSRusty Russell /**
863f87d0fbbSRusty Russell  * vringh_need_notify_user - must we tell the other side about used buffers?
864f87d0fbbSRusty Russell  * @vrh: the vring we've called vringh_complete_user() on.
865f87d0fbbSRusty Russell  *
866f87d0fbbSRusty Russell  * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
867f87d0fbbSRusty Russell  */
vringh_need_notify_user(struct vringh * vrh)868f87d0fbbSRusty Russell int vringh_need_notify_user(struct vringh *vrh)
869f87d0fbbSRusty Russell {
870f87d0fbbSRusty Russell 	return __vringh_need_notify(vrh, getu16_user);
871f87d0fbbSRusty Russell }
872f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_need_notify_user);
873f87d0fbbSRusty Russell 
874f87d0fbbSRusty Russell /* Kernelspace access helpers. */
getu16_kern(const struct vringh * vrh,u16 * val,const __virtio16 * p)875b9f7ac8cSMichael S. Tsirkin static inline int getu16_kern(const struct vringh *vrh,
876b9f7ac8cSMichael S. Tsirkin 			      u16 *val, const __virtio16 *p)
877f87d0fbbSRusty Russell {
8789d1b972fSMark Rutland 	*val = vringh16_to_cpu(vrh, READ_ONCE(*p));
879f87d0fbbSRusty Russell 	return 0;
880f87d0fbbSRusty Russell }
881f87d0fbbSRusty Russell 
putu16_kern(const struct vringh * vrh,__virtio16 * p,u16 val)882b9f7ac8cSMichael S. Tsirkin static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val)
883f87d0fbbSRusty Russell {
8849d1b972fSMark Rutland 	WRITE_ONCE(*p, cpu_to_vringh16(vrh, val));
885f87d0fbbSRusty Russell 	return 0;
886f87d0fbbSRusty Russell }
887f87d0fbbSRusty Russell 
copydesc_kern(const struct vringh * vrh,void * dst,const void * src,size_t len)8889ad9c49cSJason Wang static inline int copydesc_kern(const struct vringh *vrh,
8899ad9c49cSJason Wang 				void *dst, const void *src, size_t len)
890f87d0fbbSRusty Russell {
891f87d0fbbSRusty Russell 	memcpy(dst, src, len);
892f87d0fbbSRusty Russell 	return 0;
893f87d0fbbSRusty Russell }
894f87d0fbbSRusty Russell 
putused_kern(const struct vringh * vrh,struct vring_used_elem * dst,const struct vring_used_elem * src,unsigned int num)8959ad9c49cSJason Wang static inline int putused_kern(const struct vringh *vrh,
8969ad9c49cSJason Wang 			       struct vring_used_elem *dst,
897f87d0fbbSRusty Russell 			       const struct vring_used_elem *src,
898f87d0fbbSRusty Russell 			       unsigned int num)
899f87d0fbbSRusty Russell {
900f87d0fbbSRusty Russell 	memcpy(dst, src, num * sizeof(*dst));
901f87d0fbbSRusty Russell 	return 0;
902f87d0fbbSRusty Russell }
903f87d0fbbSRusty Russell 
xfer_kern(const struct vringh * vrh,void * src,void * dst,size_t len)9049ad9c49cSJason Wang static inline int xfer_kern(const struct vringh *vrh, void *src,
9059ad9c49cSJason Wang 			    void *dst, size_t len)
906f87d0fbbSRusty Russell {
907f87d0fbbSRusty Russell 	memcpy(dst, src, len);
908f87d0fbbSRusty Russell 	return 0;
909f87d0fbbSRusty Russell }
910f87d0fbbSRusty Russell 
kern_xfer(const struct vringh * vrh,void * dst,void * src,size_t len)9119ad9c49cSJason Wang static inline int kern_xfer(const struct vringh *vrh, void *dst,
9129ad9c49cSJason Wang 			    void *src, size_t len)
913b3683deeSJason Wang {
914b3683deeSJason Wang 	memcpy(dst, src, len);
915b3683deeSJason Wang 	return 0;
916b3683deeSJason Wang }
917b3683deeSJason Wang 
918f87d0fbbSRusty Russell /**
919f87d0fbbSRusty Russell  * vringh_init_kern - initialize a vringh for a kernelspace vring.
920f87d0fbbSRusty Russell  * @vrh: the vringh to initialize.
921f87d0fbbSRusty Russell  * @features: the feature bits for this ring.
922f87d0fbbSRusty Russell  * @num: the number of elements.
923f87d0fbbSRusty Russell  * @weak_barriers: true if we only need memory barriers, not I/O.
924905233afSStefano Garzarella  * @desc: the userspace descriptor pointer.
925905233afSStefano Garzarella  * @avail: the userspace avail pointer.
926905233afSStefano Garzarella  * @used: the userspace used pointer.
927f87d0fbbSRusty Russell  *
928f87d0fbbSRusty Russell  * Returns an error if num is invalid.
929f87d0fbbSRusty Russell  */
vringh_init_kern(struct vringh * vrh,u64 features,unsigned int num,bool weak_barriers,struct vring_desc * desc,struct vring_avail * avail,struct vring_used * used)930b97a8a90SMichael S. Tsirkin int vringh_init_kern(struct vringh *vrh, u64 features,
931f87d0fbbSRusty Russell 		     unsigned int num, bool weak_barriers,
932f87d0fbbSRusty Russell 		     struct vring_desc *desc,
933f87d0fbbSRusty Russell 		     struct vring_avail *avail,
934f87d0fbbSRusty Russell 		     struct vring_used *used)
935f87d0fbbSRusty Russell {
936f87d0fbbSRusty Russell 	/* Sane power of 2 please! */
937f87d0fbbSRusty Russell 	if (!num || num > 0xffff || (num & (num - 1))) {
938f87d0fbbSRusty Russell 		vringh_bad("Bad ring size %u", num);
939f87d0fbbSRusty Russell 		return -EINVAL;
940f87d0fbbSRusty Russell 	}
941f87d0fbbSRusty Russell 
942b9f7ac8cSMichael S. Tsirkin 	vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
943f87d0fbbSRusty Russell 	vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
944f87d0fbbSRusty Russell 	vrh->weak_barriers = weak_barriers;
945f87d0fbbSRusty Russell 	vrh->completed = 0;
946f87d0fbbSRusty Russell 	vrh->last_avail_idx = 0;
947f87d0fbbSRusty Russell 	vrh->last_used_idx = 0;
948f87d0fbbSRusty Russell 	vrh->vring.num = num;
949f87d0fbbSRusty Russell 	vrh->vring.desc = desc;
950f87d0fbbSRusty Russell 	vrh->vring.avail = avail;
951f87d0fbbSRusty Russell 	vrh->vring.used = used;
952f87d0fbbSRusty Russell 	return 0;
953f87d0fbbSRusty Russell }
954f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_init_kern);
955f87d0fbbSRusty Russell 
956f87d0fbbSRusty Russell /**
957f87d0fbbSRusty Russell  * vringh_getdesc_kern - get next available descriptor from kernelspace ring.
958f87d0fbbSRusty Russell  * @vrh: the kernelspace vring.
959f87d0fbbSRusty Russell  * @riov: where to put the readable descriptors (or NULL)
960f87d0fbbSRusty Russell  * @wiov: where to put the writable descriptors (or NULL)
961f87d0fbbSRusty Russell  * @head: head index we received, for passing to vringh_complete_kern().
962f87d0fbbSRusty Russell  * @gfp: flags for allocating larger riov/wiov.
963f87d0fbbSRusty Russell  *
964f87d0fbbSRusty Russell  * Returns 0 if there was no descriptor, 1 if there was, or -errno.
965f87d0fbbSRusty Russell  *
966f87d0fbbSRusty Russell  * Note that on error return, you can tell the difference between an
967f87d0fbbSRusty Russell  * invalid ring and a single invalid descriptor: in the former case,
968f87d0fbbSRusty Russell  * *head will be vrh->vring.num.  You may be able to ignore an invalid
969f87d0fbbSRusty Russell  * descriptor, but there's not much you can do with an invalid ring.
970f87d0fbbSRusty Russell  *
97169c13c58SStefano Garzarella  * Note that you can reuse riov and wiov with subsequent calls. Content is
97269c13c58SStefano Garzarella  * overwritten and memory reallocated if more space is needed.
97369c13c58SStefano Garzarella  * When you don't have to use riov and wiov anymore, you should clean up them
97469c13c58SStefano Garzarella  * calling vringh_kiov_cleanup() to release the memory, even on error!
975f87d0fbbSRusty Russell  */
vringh_getdesc_kern(struct vringh * vrh,struct vringh_kiov * riov,struct vringh_kiov * wiov,u16 * head,gfp_t gfp)976f87d0fbbSRusty Russell int vringh_getdesc_kern(struct vringh *vrh,
977f87d0fbbSRusty Russell 			struct vringh_kiov *riov,
978f87d0fbbSRusty Russell 			struct vringh_kiov *wiov,
979f87d0fbbSRusty Russell 			u16 *head,
980f87d0fbbSRusty Russell 			gfp_t gfp)
981f87d0fbbSRusty Russell {
982f87d0fbbSRusty Russell 	int err;
983f87d0fbbSRusty Russell 
984f87d0fbbSRusty Russell 	err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx);
985f87d0fbbSRusty Russell 	if (err < 0)
986f87d0fbbSRusty Russell 		return err;
987f87d0fbbSRusty Russell 
988f87d0fbbSRusty Russell 	/* Empty... */
989f87d0fbbSRusty Russell 	if (err == vrh->vring.num)
990f87d0fbbSRusty Russell 		return 0;
991f87d0fbbSRusty Russell 
992f87d0fbbSRusty Russell 	*head = err;
993f87d0fbbSRusty Russell 	err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
994f87d0fbbSRusty Russell 			   gfp, copydesc_kern);
995f87d0fbbSRusty Russell 	if (err)
996f87d0fbbSRusty Russell 		return err;
997f87d0fbbSRusty Russell 
998f87d0fbbSRusty Russell 	return 1;
999f87d0fbbSRusty Russell }
1000f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_getdesc_kern);
1001f87d0fbbSRusty Russell 
1002f87d0fbbSRusty Russell /**
1003f87d0fbbSRusty Russell  * vringh_iov_pull_kern - copy bytes from vring_iov.
1004f87d0fbbSRusty Russell  * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume)
1005f87d0fbbSRusty Russell  * @dst: the place to copy.
1006f87d0fbbSRusty Russell  * @len: the maximum length to copy.
1007f87d0fbbSRusty Russell  *
1008f87d0fbbSRusty Russell  * Returns the bytes copied <= len or a negative errno.
1009f87d0fbbSRusty Russell  */
vringh_iov_pull_kern(struct vringh_kiov * riov,void * dst,size_t len)1010f87d0fbbSRusty Russell ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len)
1011f87d0fbbSRusty Russell {
10129ad9c49cSJason Wang 	return vringh_iov_xfer(NULL, riov, dst, len, xfer_kern);
1013f87d0fbbSRusty Russell }
1014f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_iov_pull_kern);
1015f87d0fbbSRusty Russell 
1016f87d0fbbSRusty Russell /**
1017f87d0fbbSRusty Russell  * vringh_iov_push_kern - copy bytes into vring_iov.
1018f87d0fbbSRusty Russell  * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume)
10198009b0f4SStefano Garzarella  * @src: the place to copy from.
1020f87d0fbbSRusty Russell  * @len: the maximum length to copy.
1021f87d0fbbSRusty Russell  *
1022f87d0fbbSRusty Russell  * Returns the bytes copied <= len or a negative errno.
1023f87d0fbbSRusty Russell  */
vringh_iov_push_kern(struct vringh_kiov * wiov,const void * src,size_t len)1024f87d0fbbSRusty Russell ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
1025f87d0fbbSRusty Russell 			     const void *src, size_t len)
1026f87d0fbbSRusty Russell {
10279ad9c49cSJason Wang 	return vringh_iov_xfer(NULL, wiov, (void *)src, len, kern_xfer);
1028f87d0fbbSRusty Russell }
1029f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_iov_push_kern);
1030f87d0fbbSRusty Russell 
1031f87d0fbbSRusty Russell /**
1032f87d0fbbSRusty Russell  * vringh_abandon_kern - we've decided not to handle the descriptor(s).
1033f87d0fbbSRusty Russell  * @vrh: the vring.
1034f87d0fbbSRusty Russell  * @num: the number of descriptors to put back (ie. num
1035f87d0fbbSRusty Russell  *	 vringh_get_kern() to undo).
1036f87d0fbbSRusty Russell  *
1037f87d0fbbSRusty Russell  * The next vringh_get_kern() will return the old descriptor(s) again.
1038f87d0fbbSRusty Russell  */
vringh_abandon_kern(struct vringh * vrh,unsigned int num)1039f87d0fbbSRusty Russell void vringh_abandon_kern(struct vringh *vrh, unsigned int num)
1040f87d0fbbSRusty Russell {
1041f87d0fbbSRusty Russell 	/* We only update vring_avail_event(vr) when we want to be notified,
1042f87d0fbbSRusty Russell 	 * so we haven't changed that yet. */
1043f87d0fbbSRusty Russell 	vrh->last_avail_idx -= num;
1044f87d0fbbSRusty Russell }
1045f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_abandon_kern);
1046f87d0fbbSRusty Russell 
1047f87d0fbbSRusty Russell /**
1048f87d0fbbSRusty Russell  * vringh_complete_kern - we've finished with descriptor, publish it.
1049f87d0fbbSRusty Russell  * @vrh: the vring.
1050f87d0fbbSRusty Russell  * @head: the head as filled in by vringh_getdesc_kern.
1051f87d0fbbSRusty Russell  * @len: the length of data we have written.
1052f87d0fbbSRusty Russell  *
1053f87d0fbbSRusty Russell  * You should check vringh_need_notify_kern() after one or more calls
1054f87d0fbbSRusty Russell  * to this function.
1055f87d0fbbSRusty Russell  */
vringh_complete_kern(struct vringh * vrh,u16 head,u32 len)1056f87d0fbbSRusty Russell int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len)
1057f87d0fbbSRusty Russell {
1058f87d0fbbSRusty Russell 	struct vring_used_elem used;
1059f87d0fbbSRusty Russell 
1060b9f7ac8cSMichael S. Tsirkin 	used.id = cpu_to_vringh32(vrh, head);
1061b9f7ac8cSMichael S. Tsirkin 	used.len = cpu_to_vringh32(vrh, len);
1062f87d0fbbSRusty Russell 
1063f87d0fbbSRusty Russell 	return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern);
1064f87d0fbbSRusty Russell }
1065f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_complete_kern);
1066f87d0fbbSRusty Russell 
1067f87d0fbbSRusty Russell /**
1068f87d0fbbSRusty Russell  * vringh_notify_enable_kern - we want to know if something changes.
1069f87d0fbbSRusty Russell  * @vrh: the vring.
1070f87d0fbbSRusty Russell  *
1071f87d0fbbSRusty Russell  * This always enables notifications, but returns false if there are
1072f87d0fbbSRusty Russell  * now more buffers available in the vring.
1073f87d0fbbSRusty Russell  */
vringh_notify_enable_kern(struct vringh * vrh)1074f87d0fbbSRusty Russell bool vringh_notify_enable_kern(struct vringh *vrh)
1075f87d0fbbSRusty Russell {
1076f87d0fbbSRusty Russell 	return __vringh_notify_enable(vrh, getu16_kern, putu16_kern);
1077f87d0fbbSRusty Russell }
1078f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_notify_enable_kern);
1079f87d0fbbSRusty Russell 
1080f87d0fbbSRusty Russell /**
1081f87d0fbbSRusty Russell  * vringh_notify_disable_kern - don't tell us if something changes.
1082f87d0fbbSRusty Russell  * @vrh: the vring.
1083f87d0fbbSRusty Russell  *
1084f87d0fbbSRusty Russell  * This is our normal running state: we disable and then only enable when
1085f87d0fbbSRusty Russell  * we're going to sleep.
1086f87d0fbbSRusty Russell  */
vringh_notify_disable_kern(struct vringh * vrh)1087f87d0fbbSRusty Russell void vringh_notify_disable_kern(struct vringh *vrh)
1088f87d0fbbSRusty Russell {
1089f87d0fbbSRusty Russell 	__vringh_notify_disable(vrh, putu16_kern);
1090f87d0fbbSRusty Russell }
1091f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_notify_disable_kern);
1092f87d0fbbSRusty Russell 
1093f87d0fbbSRusty Russell /**
1094f87d0fbbSRusty Russell  * vringh_need_notify_kern - must we tell the other side about used buffers?
1095f87d0fbbSRusty Russell  * @vrh: the vring we've called vringh_complete_kern() on.
1096f87d0fbbSRusty Russell  *
1097f87d0fbbSRusty Russell  * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
1098f87d0fbbSRusty Russell  */
vringh_need_notify_kern(struct vringh * vrh)1099f87d0fbbSRusty Russell int vringh_need_notify_kern(struct vringh *vrh)
1100f87d0fbbSRusty Russell {
1101f87d0fbbSRusty Russell 	return __vringh_need_notify(vrh, getu16_kern);
1102f87d0fbbSRusty Russell }
1103f87d0fbbSRusty Russell EXPORT_SYMBOL(vringh_need_notify_kern);
1104f558a845SDave Jones 
11053302363aSMichael S. Tsirkin #if IS_REACHABLE(CONFIG_VHOST_IOTLB)
11063302363aSMichael S. Tsirkin 
110742823a87SStefano Garzarella struct iotlb_vec {
110842823a87SStefano Garzarella 	union {
110942823a87SStefano Garzarella 		struct iovec *iovec;
111042823a87SStefano Garzarella 		struct bio_vec *bvec;
111142823a87SStefano Garzarella 	} iov;
111242823a87SStefano Garzarella 	size_t count;
111342823a87SStefano Garzarella };
111442823a87SStefano Garzarella 
iotlb_translate(const struct vringh * vrh,u64 addr,u64 len,u64 * translated,struct iotlb_vec * ivec,u32 perm)11159ad9c49cSJason Wang static int iotlb_translate(const struct vringh *vrh,
1116309bba39SStefano Garzarella 			   u64 addr, u64 len, u64 *translated,
111742823a87SStefano Garzarella 			   struct iotlb_vec *ivec, u32 perm)
11189ad9c49cSJason Wang {
11199ad9c49cSJason Wang 	struct vhost_iotlb_map *map;
11209ad9c49cSJason Wang 	struct vhost_iotlb *iotlb = vrh->iotlb;
11219ad9c49cSJason Wang 	int ret = 0;
1122f85efa9bSStefano Garzarella 	u64 s = 0, last = addr + len - 1;
11239ad9c49cSJason Wang 
1124f53d9910SStefano Garzarella 	spin_lock(vrh->iotlb_lock);
1125f53d9910SStefano Garzarella 
11269ad9c49cSJason Wang 	while (len > s) {
112742823a87SStefano Garzarella 		uintptr_t io_addr;
112842823a87SStefano Garzarella 		size_t io_len;
112942823a87SStefano Garzarella 		u64 size;
11309ad9c49cSJason Wang 
113142823a87SStefano Garzarella 		if (unlikely(ret >= ivec->count)) {
11329ad9c49cSJason Wang 			ret = -ENOBUFS;
11339ad9c49cSJason Wang 			break;
11349ad9c49cSJason Wang 		}
11359ad9c49cSJason Wang 
1136f85efa9bSStefano Garzarella 		map = vhost_iotlb_itree_first(iotlb, addr, last);
11379ad9c49cSJason Wang 		if (!map || map->start > addr) {
11389ad9c49cSJason Wang 			ret = -EINVAL;
11399ad9c49cSJason Wang 			break;
11409ad9c49cSJason Wang 		} else if (!(map->perm & perm)) {
11419ad9c49cSJason Wang 			ret = -EPERM;
11429ad9c49cSJason Wang 			break;
11439ad9c49cSJason Wang 		}
11449ad9c49cSJason Wang 
11459ad9c49cSJason Wang 		size = map->size - addr + map->start;
114642823a87SStefano Garzarella 		io_len = min(len - s, size);
114742823a87SStefano Garzarella 		io_addr = map->addr - map->start + addr;
114842823a87SStefano Garzarella 
114942823a87SStefano Garzarella 		if (vrh->use_va) {
115042823a87SStefano Garzarella 			struct iovec *iovec = ivec->iov.iovec;
115142823a87SStefano Garzarella 
115242823a87SStefano Garzarella 			iovec[ret].iov_len = io_len;
115342823a87SStefano Garzarella 			iovec[ret].iov_base = (void __user *)io_addr;
115442823a87SStefano Garzarella 		} else {
115542823a87SStefano Garzarella 			u64 pfn = io_addr >> PAGE_SHIFT;
115642823a87SStefano Garzarella 			struct bio_vec *bvec = ivec->iov.bvec;
115742823a87SStefano Garzarella 
115842823a87SStefano Garzarella 			bvec_set_page(&bvec[ret], pfn_to_page(pfn), io_len,
115942823a87SStefano Garzarella 				      io_addr & (PAGE_SIZE - 1));
116042823a87SStefano Garzarella 		}
116142823a87SStefano Garzarella 
11629ad9c49cSJason Wang 		s += size;
11639ad9c49cSJason Wang 		addr += size;
11649ad9c49cSJason Wang 		++ret;
11659ad9c49cSJason Wang 	}
11669ad9c49cSJason Wang 
1167f53d9910SStefano Garzarella 	spin_unlock(vrh->iotlb_lock);
1168f53d9910SStefano Garzarella 
1169309bba39SStefano Garzarella 	if (translated)
1170309bba39SStefano Garzarella 		*translated = min(len, s);
1171309bba39SStefano Garzarella 
11729ad9c49cSJason Wang 	return ret;
11739ad9c49cSJason Wang }
11749ad9c49cSJason Wang 
1175f609d6cbSStefano Garzarella #define IOTLB_IOV_STRIDE 16
1176f609d6cbSStefano Garzarella 
copy_from_iotlb(const struct vringh * vrh,void * dst,void * src,size_t len)11779ad9c49cSJason Wang static inline int copy_from_iotlb(const struct vringh *vrh, void *dst,
11789ad9c49cSJason Wang 				  void *src, size_t len)
11799ad9c49cSJason Wang {
118042823a87SStefano Garzarella 	struct iotlb_vec ivec;
118142823a87SStefano Garzarella 	union {
118242823a87SStefano Garzarella 		struct iovec iovec[IOTLB_IOV_STRIDE];
118342823a87SStefano Garzarella 		struct bio_vec bvec[IOTLB_IOV_STRIDE];
118442823a87SStefano Garzarella 	} iov;
1185309bba39SStefano Garzarella 	u64 total_translated = 0;
1186309bba39SStefano Garzarella 
118742823a87SStefano Garzarella 	ivec.iov.iovec = iov.iovec;
118842823a87SStefano Garzarella 	ivec.count = IOTLB_IOV_STRIDE;
118942823a87SStefano Garzarella 
1190309bba39SStefano Garzarella 	while (total_translated < len) {
1191309bba39SStefano Garzarella 		struct iov_iter iter;
1192309bba39SStefano Garzarella 		u64 translated;
11939ad9c49cSJason Wang 		int ret;
11949ad9c49cSJason Wang 
11959ad9c49cSJason Wang 		ret = iotlb_translate(vrh, (u64)(uintptr_t)src,
1196309bba39SStefano Garzarella 				      len - total_translated, &translated,
119742823a87SStefano Garzarella 				      &ivec, VHOST_MAP_RO);
1198309bba39SStefano Garzarella 		if (ret == -ENOBUFS)
119942823a87SStefano Garzarella 			ret = IOTLB_IOV_STRIDE;
1200309bba39SStefano Garzarella 		else if (ret < 0)
1201309bba39SStefano Garzarella 			return ret;
1202309bba39SStefano Garzarella 
120342823a87SStefano Garzarella 		if (vrh->use_va) {
120442823a87SStefano Garzarella 			iov_iter_init(&iter, ITER_SOURCE, ivec.iov.iovec, ret,
120542823a87SStefano Garzarella 				      translated);
120642823a87SStefano Garzarella 		} else {
120742823a87SStefano Garzarella 			iov_iter_bvec(&iter, ITER_SOURCE, ivec.iov.bvec, ret,
120842823a87SStefano Garzarella 				      translated);
120942823a87SStefano Garzarella 		}
1210309bba39SStefano Garzarella 
1211309bba39SStefano Garzarella 		ret = copy_from_iter(dst, translated, &iter);
12129ad9c49cSJason Wang 		if (ret < 0)
12139ad9c49cSJason Wang 			return ret;
12149ad9c49cSJason Wang 
1215309bba39SStefano Garzarella 		src += translated;
1216309bba39SStefano Garzarella 		dst += translated;
1217309bba39SStefano Garzarella 		total_translated += translated;
1218309bba39SStefano Garzarella 	}
12199ad9c49cSJason Wang 
1220309bba39SStefano Garzarella 	return total_translated;
12219ad9c49cSJason Wang }
12229ad9c49cSJason Wang 
copy_to_iotlb(const struct vringh * vrh,void * dst,void * src,size_t len)12239ad9c49cSJason Wang static inline int copy_to_iotlb(const struct vringh *vrh, void *dst,
12249ad9c49cSJason Wang 				void *src, size_t len)
12259ad9c49cSJason Wang {
122642823a87SStefano Garzarella 	struct iotlb_vec ivec;
122742823a87SStefano Garzarella 	union {
122842823a87SStefano Garzarella 		struct iovec iovec[IOTLB_IOV_STRIDE];
122942823a87SStefano Garzarella 		struct bio_vec bvec[IOTLB_IOV_STRIDE];
123042823a87SStefano Garzarella 	} iov;
1231309bba39SStefano Garzarella 	u64 total_translated = 0;
1232309bba39SStefano Garzarella 
123342823a87SStefano Garzarella 	ivec.iov.iovec = iov.iovec;
123442823a87SStefano Garzarella 	ivec.count = IOTLB_IOV_STRIDE;
123542823a87SStefano Garzarella 
1236309bba39SStefano Garzarella 	while (total_translated < len) {
1237309bba39SStefano Garzarella 		struct iov_iter iter;
1238309bba39SStefano Garzarella 		u64 translated;
12399ad9c49cSJason Wang 		int ret;
12409ad9c49cSJason Wang 
12419ad9c49cSJason Wang 		ret = iotlb_translate(vrh, (u64)(uintptr_t)dst,
1242309bba39SStefano Garzarella 				      len - total_translated, &translated,
124342823a87SStefano Garzarella 				      &ivec, VHOST_MAP_WO);
1244309bba39SStefano Garzarella 		if (ret == -ENOBUFS)
124542823a87SStefano Garzarella 			ret = IOTLB_IOV_STRIDE;
1246309bba39SStefano Garzarella 		else if (ret < 0)
1247309bba39SStefano Garzarella 			return ret;
1248309bba39SStefano Garzarella 
124942823a87SStefano Garzarella 		if (vrh->use_va) {
125042823a87SStefano Garzarella 			iov_iter_init(&iter, ITER_DEST, ivec.iov.iovec, ret,
125142823a87SStefano Garzarella 				      translated);
125242823a87SStefano Garzarella 		} else {
125342823a87SStefano Garzarella 			iov_iter_bvec(&iter, ITER_DEST, ivec.iov.bvec, ret,
125442823a87SStefano Garzarella 				      translated);
125542823a87SStefano Garzarella 		}
1256309bba39SStefano Garzarella 
1257309bba39SStefano Garzarella 		ret = copy_to_iter(src, translated, &iter);
12589ad9c49cSJason Wang 		if (ret < 0)
12599ad9c49cSJason Wang 			return ret;
12609ad9c49cSJason Wang 
1261309bba39SStefano Garzarella 		src += translated;
1262309bba39SStefano Garzarella 		dst += translated;
1263309bba39SStefano Garzarella 		total_translated += translated;
1264309bba39SStefano Garzarella 	}
12659ad9c49cSJason Wang 
1266309bba39SStefano Garzarella 	return total_translated;
12679ad9c49cSJason Wang }
12689ad9c49cSJason Wang 
getu16_iotlb(const struct vringh * vrh,u16 * val,const __virtio16 * p)12699ad9c49cSJason Wang static inline int getu16_iotlb(const struct vringh *vrh,
12709ad9c49cSJason Wang 			       u16 *val, const __virtio16 *p)
12719ad9c49cSJason Wang {
127242823a87SStefano Garzarella 	struct iotlb_vec ivec;
127342823a87SStefano Garzarella 	union {
127442823a87SStefano Garzarella 		struct iovec iovec[1];
127542823a87SStefano Garzarella 		struct bio_vec bvec[1];
127642823a87SStefano Garzarella 	} iov;
127742823a87SStefano Garzarella 	__virtio16 tmp;
12789ad9c49cSJason Wang 	int ret;
12799ad9c49cSJason Wang 
128042823a87SStefano Garzarella 	ivec.iov.iovec = iov.iovec;
128142823a87SStefano Garzarella 	ivec.count = 1;
128242823a87SStefano Garzarella 
12839ad9c49cSJason Wang 	/* Atomic read is needed for getu16 */
128442823a87SStefano Garzarella 	ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
128542823a87SStefano Garzarella 			      NULL, &ivec, VHOST_MAP_RO);
12869ad9c49cSJason Wang 	if (ret < 0)
12879ad9c49cSJason Wang 		return ret;
12889ad9c49cSJason Wang 
128942823a87SStefano Garzarella 	if (vrh->use_va) {
129042823a87SStefano Garzarella 		ret = __get_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base);
129142823a87SStefano Garzarella 		if (ret)
129242823a87SStefano Garzarella 			return ret;
129342823a87SStefano Garzarella 	} else {
129442823a87SStefano Garzarella 		void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page);
129542823a87SStefano Garzarella 		void *from = kaddr + ivec.iov.bvec[0].bv_offset;
129642823a87SStefano Garzarella 
129742823a87SStefano Garzarella 		tmp = READ_ONCE(*(__virtio16 *)from);
1298c0371782SStefano Garzarella 		kunmap_local(kaddr);
129942823a87SStefano Garzarella 	}
130042823a87SStefano Garzarella 
130142823a87SStefano Garzarella 	*val = vringh16_to_cpu(vrh, tmp);
13029ad9c49cSJason Wang 
13039ad9c49cSJason Wang 	return 0;
13049ad9c49cSJason Wang }
13059ad9c49cSJason Wang 
putu16_iotlb(const struct vringh * vrh,__virtio16 * p,u16 val)13069ad9c49cSJason Wang static inline int putu16_iotlb(const struct vringh *vrh,
13079ad9c49cSJason Wang 			       __virtio16 *p, u16 val)
13089ad9c49cSJason Wang {
130942823a87SStefano Garzarella 	struct iotlb_vec ivec;
131042823a87SStefano Garzarella 	union {
131142823a87SStefano Garzarella 		struct iovec iovec;
131242823a87SStefano Garzarella 		struct bio_vec bvec;
131342823a87SStefano Garzarella 	} iov;
131442823a87SStefano Garzarella 	__virtio16 tmp;
13159ad9c49cSJason Wang 	int ret;
13169ad9c49cSJason Wang 
131742823a87SStefano Garzarella 	ivec.iov.iovec = &iov.iovec;
131842823a87SStefano Garzarella 	ivec.count = 1;
131942823a87SStefano Garzarella 
13209ad9c49cSJason Wang 	/* Atomic write is needed for putu16 */
132142823a87SStefano Garzarella 	ret = iotlb_translate(vrh, (u64)(uintptr_t)p, sizeof(*p),
132242823a87SStefano Garzarella 			      NULL, &ivec, VHOST_MAP_RO);
13239ad9c49cSJason Wang 	if (ret < 0)
13249ad9c49cSJason Wang 		return ret;
13259ad9c49cSJason Wang 
132642823a87SStefano Garzarella 	tmp = cpu_to_vringh16(vrh, val);
132742823a87SStefano Garzarella 
132842823a87SStefano Garzarella 	if (vrh->use_va) {
132942823a87SStefano Garzarella 		ret = __put_user(tmp, (__virtio16 __user *)ivec.iov.iovec[0].iov_base);
133042823a87SStefano Garzarella 		if (ret)
133142823a87SStefano Garzarella 			return ret;
133242823a87SStefano Garzarella 	} else {
133342823a87SStefano Garzarella 		void *kaddr = kmap_local_page(ivec.iov.bvec[0].bv_page);
133442823a87SStefano Garzarella 		void *to = kaddr + ivec.iov.bvec[0].bv_offset;
133542823a87SStefano Garzarella 
133642823a87SStefano Garzarella 		WRITE_ONCE(*(__virtio16 *)to, tmp);
1337c0371782SStefano Garzarella 		kunmap_local(kaddr);
133842823a87SStefano Garzarella 	}
13399ad9c49cSJason Wang 
13409ad9c49cSJason Wang 	return 0;
13419ad9c49cSJason Wang }
13429ad9c49cSJason Wang 
copydesc_iotlb(const struct vringh * vrh,void * dst,const void * src,size_t len)13439ad9c49cSJason Wang static inline int copydesc_iotlb(const struct vringh *vrh,
13449ad9c49cSJason Wang 				 void *dst, const void *src, size_t len)
13459ad9c49cSJason Wang {
13469ad9c49cSJason Wang 	int ret;
13479ad9c49cSJason Wang 
13489ad9c49cSJason Wang 	ret = copy_from_iotlb(vrh, dst, (void *)src, len);
13499ad9c49cSJason Wang 	if (ret != len)
13509ad9c49cSJason Wang 		return -EFAULT;
13519ad9c49cSJason Wang 
13529ad9c49cSJason Wang 	return 0;
13539ad9c49cSJason Wang }
13549ad9c49cSJason Wang 
xfer_from_iotlb(const struct vringh * vrh,void * src,void * dst,size_t len)13559ad9c49cSJason Wang static inline int xfer_from_iotlb(const struct vringh *vrh, void *src,
13569ad9c49cSJason Wang 				  void *dst, size_t len)
13579ad9c49cSJason Wang {
13589ad9c49cSJason Wang 	int ret;
13599ad9c49cSJason Wang 
13609ad9c49cSJason Wang 	ret = copy_from_iotlb(vrh, dst, src, len);
13619ad9c49cSJason Wang 	if (ret != len)
13629ad9c49cSJason Wang 		return -EFAULT;
13639ad9c49cSJason Wang 
13649ad9c49cSJason Wang 	return 0;
13659ad9c49cSJason Wang }
13669ad9c49cSJason Wang 
xfer_to_iotlb(const struct vringh * vrh,void * dst,void * src,size_t len)13679ad9c49cSJason Wang static inline int xfer_to_iotlb(const struct vringh *vrh,
13689ad9c49cSJason Wang 			       void *dst, void *src, size_t len)
13699ad9c49cSJason Wang {
13709ad9c49cSJason Wang 	int ret;
13719ad9c49cSJason Wang 
13729ad9c49cSJason Wang 	ret = copy_to_iotlb(vrh, dst, src, len);
13739ad9c49cSJason Wang 	if (ret != len)
13749ad9c49cSJason Wang 		return -EFAULT;
13759ad9c49cSJason Wang 
13769ad9c49cSJason Wang 	return 0;
13779ad9c49cSJason Wang }
13789ad9c49cSJason Wang 
putused_iotlb(const struct vringh * vrh,struct vring_used_elem * dst,const struct vring_used_elem * src,unsigned int num)13799ad9c49cSJason Wang static inline int putused_iotlb(const struct vringh *vrh,
13809ad9c49cSJason Wang 				struct vring_used_elem *dst,
13819ad9c49cSJason Wang 				const struct vring_used_elem *src,
13829ad9c49cSJason Wang 				unsigned int num)
13839ad9c49cSJason Wang {
13849ad9c49cSJason Wang 	int size = num * sizeof(*dst);
13859ad9c49cSJason Wang 	int ret;
13869ad9c49cSJason Wang 
13879ad9c49cSJason Wang 	ret = copy_to_iotlb(vrh, dst, (void *)src, num * sizeof(*dst));
13889ad9c49cSJason Wang 	if (ret != size)
13899ad9c49cSJason Wang 		return -EFAULT;
13909ad9c49cSJason Wang 
13919ad9c49cSJason Wang 	return 0;
13929ad9c49cSJason Wang }
13939ad9c49cSJason Wang 
13949ad9c49cSJason Wang /**
13959ad9c49cSJason Wang  * vringh_init_iotlb - initialize a vringh for a ring with IOTLB.
13969ad9c49cSJason Wang  * @vrh: the vringh to initialize.
13979ad9c49cSJason Wang  * @features: the feature bits for this ring.
13989ad9c49cSJason Wang  * @num: the number of elements.
13999ad9c49cSJason Wang  * @weak_barriers: true if we only need memory barriers, not I/O.
1400905233afSStefano Garzarella  * @desc: the userspace descriptor pointer.
1401905233afSStefano Garzarella  * @avail: the userspace avail pointer.
1402905233afSStefano Garzarella  * @used: the userspace used pointer.
14039ad9c49cSJason Wang  *
14049ad9c49cSJason Wang  * Returns an error if num is invalid.
14059ad9c49cSJason Wang  */
vringh_init_iotlb(struct vringh * vrh,u64 features,unsigned int num,bool weak_barriers,struct vring_desc * desc,struct vring_avail * avail,struct vring_used * used)14069ad9c49cSJason Wang int vringh_init_iotlb(struct vringh *vrh, u64 features,
14079ad9c49cSJason Wang 		      unsigned int num, bool weak_barriers,
14089ad9c49cSJason Wang 		      struct vring_desc *desc,
14099ad9c49cSJason Wang 		      struct vring_avail *avail,
14109ad9c49cSJason Wang 		      struct vring_used *used)
14119ad9c49cSJason Wang {
141242823a87SStefano Garzarella 	vrh->use_va = false;
141342823a87SStefano Garzarella 
14149ad9c49cSJason Wang 	return vringh_init_kern(vrh, features, num, weak_barriers,
14159ad9c49cSJason Wang 				desc, avail, used);
14169ad9c49cSJason Wang }
14179ad9c49cSJason Wang EXPORT_SYMBOL(vringh_init_iotlb);
14189ad9c49cSJason Wang 
14199ad9c49cSJason Wang /**
142042823a87SStefano Garzarella  * vringh_init_iotlb_va - initialize a vringh for a ring with IOTLB containing
142142823a87SStefano Garzarella  *                        user VA.
142242823a87SStefano Garzarella  * @vrh: the vringh to initialize.
142342823a87SStefano Garzarella  * @features: the feature bits for this ring.
142442823a87SStefano Garzarella  * @num: the number of elements.
142542823a87SStefano Garzarella  * @weak_barriers: true if we only need memory barriers, not I/O.
142642823a87SStefano Garzarella  * @desc: the userspace descriptor pointer.
142742823a87SStefano Garzarella  * @avail: the userspace avail pointer.
142842823a87SStefano Garzarella  * @used: the userspace used pointer.
142942823a87SStefano Garzarella  *
143042823a87SStefano Garzarella  * Returns an error if num is invalid.
143142823a87SStefano Garzarella  */
vringh_init_iotlb_va(struct vringh * vrh,u64 features,unsigned int num,bool weak_barriers,struct vring_desc * desc,struct vring_avail * avail,struct vring_used * used)143242823a87SStefano Garzarella int vringh_init_iotlb_va(struct vringh *vrh, u64 features,
143342823a87SStefano Garzarella 			 unsigned int num, bool weak_barriers,
143442823a87SStefano Garzarella 			 struct vring_desc *desc,
143542823a87SStefano Garzarella 			 struct vring_avail *avail,
143642823a87SStefano Garzarella 			 struct vring_used *used)
143742823a87SStefano Garzarella {
143842823a87SStefano Garzarella 	vrh->use_va = true;
143942823a87SStefano Garzarella 
144042823a87SStefano Garzarella 	return vringh_init_kern(vrh, features, num, weak_barriers,
144142823a87SStefano Garzarella 				desc, avail, used);
144242823a87SStefano Garzarella }
144342823a87SStefano Garzarella EXPORT_SYMBOL(vringh_init_iotlb_va);
144442823a87SStefano Garzarella 
144542823a87SStefano Garzarella /**
14469ad9c49cSJason Wang  * vringh_set_iotlb - initialize a vringh for a ring with IOTLB.
14479ad9c49cSJason Wang  * @vrh: the vring
14489ad9c49cSJason Wang  * @iotlb: iotlb associated with this vring
1449f53d9910SStefano Garzarella  * @iotlb_lock: spinlock to synchronize the iotlb accesses
14509ad9c49cSJason Wang  */
vringh_set_iotlb(struct vringh * vrh,struct vhost_iotlb * iotlb,spinlock_t * iotlb_lock)1451f53d9910SStefano Garzarella void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
1452f53d9910SStefano Garzarella 		      spinlock_t *iotlb_lock)
14539ad9c49cSJason Wang {
14549ad9c49cSJason Wang 	vrh->iotlb = iotlb;
1455f53d9910SStefano Garzarella 	vrh->iotlb_lock = iotlb_lock;
14569ad9c49cSJason Wang }
14579ad9c49cSJason Wang EXPORT_SYMBOL(vringh_set_iotlb);
14589ad9c49cSJason Wang 
14599ad9c49cSJason Wang /**
14609ad9c49cSJason Wang  * vringh_getdesc_iotlb - get next available descriptor from ring with
14619ad9c49cSJason Wang  * IOTLB.
14629ad9c49cSJason Wang  * @vrh: the kernelspace vring.
14639ad9c49cSJason Wang  * @riov: where to put the readable descriptors (or NULL)
14649ad9c49cSJason Wang  * @wiov: where to put the writable descriptors (or NULL)
14659ad9c49cSJason Wang  * @head: head index we received, for passing to vringh_complete_iotlb().
14669ad9c49cSJason Wang  * @gfp: flags for allocating larger riov/wiov.
14679ad9c49cSJason Wang  *
14689ad9c49cSJason Wang  * Returns 0 if there was no descriptor, 1 if there was, or -errno.
14699ad9c49cSJason Wang  *
14709ad9c49cSJason Wang  * Note that on error return, you can tell the difference between an
14719ad9c49cSJason Wang  * invalid ring and a single invalid descriptor: in the former case,
14729ad9c49cSJason Wang  * *head will be vrh->vring.num.  You may be able to ignore an invalid
14739ad9c49cSJason Wang  * descriptor, but there's not much you can do with an invalid ring.
14749ad9c49cSJason Wang  *
147569c13c58SStefano Garzarella  * Note that you can reuse riov and wiov with subsequent calls. Content is
147669c13c58SStefano Garzarella  * overwritten and memory reallocated if more space is needed.
147769c13c58SStefano Garzarella  * When you don't have to use riov and wiov anymore, you should clean up them
147869c13c58SStefano Garzarella  * calling vringh_kiov_cleanup() to release the memory, even on error!
14799ad9c49cSJason Wang  */
vringh_getdesc_iotlb(struct vringh * vrh,struct vringh_kiov * riov,struct vringh_kiov * wiov,u16 * head,gfp_t gfp)14809ad9c49cSJason Wang int vringh_getdesc_iotlb(struct vringh *vrh,
14819ad9c49cSJason Wang 			 struct vringh_kiov *riov,
14829ad9c49cSJason Wang 			 struct vringh_kiov *wiov,
14839ad9c49cSJason Wang 			 u16 *head,
14849ad9c49cSJason Wang 			 gfp_t gfp)
14859ad9c49cSJason Wang {
14869ad9c49cSJason Wang 	int err;
14879ad9c49cSJason Wang 
14889ad9c49cSJason Wang 	err = __vringh_get_head(vrh, getu16_iotlb, &vrh->last_avail_idx);
14899ad9c49cSJason Wang 	if (err < 0)
14909ad9c49cSJason Wang 		return err;
14919ad9c49cSJason Wang 
14929ad9c49cSJason Wang 	/* Empty... */
14939ad9c49cSJason Wang 	if (err == vrh->vring.num)
14949ad9c49cSJason Wang 		return 0;
14959ad9c49cSJason Wang 
14969ad9c49cSJason Wang 	*head = err;
14979ad9c49cSJason Wang 	err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
14989ad9c49cSJason Wang 			   gfp, copydesc_iotlb);
14999ad9c49cSJason Wang 	if (err)
15009ad9c49cSJason Wang 		return err;
15019ad9c49cSJason Wang 
15029ad9c49cSJason Wang 	return 1;
15039ad9c49cSJason Wang }
15049ad9c49cSJason Wang EXPORT_SYMBOL(vringh_getdesc_iotlb);
15059ad9c49cSJason Wang 
15069ad9c49cSJason Wang /**
15079ad9c49cSJason Wang  * vringh_iov_pull_iotlb - copy bytes from vring_iov.
15089ad9c49cSJason Wang  * @vrh: the vring.
15099ad9c49cSJason Wang  * @riov: the riov as passed to vringh_getdesc_iotlb() (updated as we consume)
15109ad9c49cSJason Wang  * @dst: the place to copy.
15119ad9c49cSJason Wang  * @len: the maximum length to copy.
15129ad9c49cSJason Wang  *
15139ad9c49cSJason Wang  * Returns the bytes copied <= len or a negative errno.
15149ad9c49cSJason Wang  */
vringh_iov_pull_iotlb(struct vringh * vrh,struct vringh_kiov * riov,void * dst,size_t len)15159ad9c49cSJason Wang ssize_t vringh_iov_pull_iotlb(struct vringh *vrh,
15169ad9c49cSJason Wang 			      struct vringh_kiov *riov,
15179ad9c49cSJason Wang 			      void *dst, size_t len)
15189ad9c49cSJason Wang {
15199ad9c49cSJason Wang 	return vringh_iov_xfer(vrh, riov, dst, len, xfer_from_iotlb);
15209ad9c49cSJason Wang }
15219ad9c49cSJason Wang EXPORT_SYMBOL(vringh_iov_pull_iotlb);
15229ad9c49cSJason Wang 
15239ad9c49cSJason Wang /**
15249ad9c49cSJason Wang  * vringh_iov_push_iotlb - copy bytes into vring_iov.
15259ad9c49cSJason Wang  * @vrh: the vring.
15269ad9c49cSJason Wang  * @wiov: the wiov as passed to vringh_getdesc_iotlb() (updated as we consume)
15278009b0f4SStefano Garzarella  * @src: the place to copy from.
15289ad9c49cSJason Wang  * @len: the maximum length to copy.
15299ad9c49cSJason Wang  *
15309ad9c49cSJason Wang  * Returns the bytes copied <= len or a negative errno.
15319ad9c49cSJason Wang  */
vringh_iov_push_iotlb(struct vringh * vrh,struct vringh_kiov * wiov,const void * src,size_t len)15329ad9c49cSJason Wang ssize_t vringh_iov_push_iotlb(struct vringh *vrh,
15339ad9c49cSJason Wang 			      struct vringh_kiov *wiov,
15349ad9c49cSJason Wang 			      const void *src, size_t len)
15359ad9c49cSJason Wang {
15369ad9c49cSJason Wang 	return vringh_iov_xfer(vrh, wiov, (void *)src, len, xfer_to_iotlb);
15379ad9c49cSJason Wang }
15389ad9c49cSJason Wang EXPORT_SYMBOL(vringh_iov_push_iotlb);
15399ad9c49cSJason Wang 
15409ad9c49cSJason Wang /**
15419ad9c49cSJason Wang  * vringh_abandon_iotlb - we've decided not to handle the descriptor(s).
15429ad9c49cSJason Wang  * @vrh: the vring.
15439ad9c49cSJason Wang  * @num: the number of descriptors to put back (ie. num
15449ad9c49cSJason Wang  *	 vringh_get_iotlb() to undo).
15459ad9c49cSJason Wang  *
15469ad9c49cSJason Wang  * The next vringh_get_iotlb() will return the old descriptor(s) again.
15479ad9c49cSJason Wang  */
vringh_abandon_iotlb(struct vringh * vrh,unsigned int num)15489ad9c49cSJason Wang void vringh_abandon_iotlb(struct vringh *vrh, unsigned int num)
15499ad9c49cSJason Wang {
15509ad9c49cSJason Wang 	/* We only update vring_avail_event(vr) when we want to be notified,
15519ad9c49cSJason Wang 	 * so we haven't changed that yet.
15529ad9c49cSJason Wang 	 */
15539ad9c49cSJason Wang 	vrh->last_avail_idx -= num;
15549ad9c49cSJason Wang }
15559ad9c49cSJason Wang EXPORT_SYMBOL(vringh_abandon_iotlb);
15569ad9c49cSJason Wang 
15579ad9c49cSJason Wang /**
15589ad9c49cSJason Wang  * vringh_complete_iotlb - we've finished with descriptor, publish it.
15599ad9c49cSJason Wang  * @vrh: the vring.
15609ad9c49cSJason Wang  * @head: the head as filled in by vringh_getdesc_iotlb.
15619ad9c49cSJason Wang  * @len: the length of data we have written.
15629ad9c49cSJason Wang  *
15639ad9c49cSJason Wang  * You should check vringh_need_notify_iotlb() after one or more calls
15649ad9c49cSJason Wang  * to this function.
15659ad9c49cSJason Wang  */
vringh_complete_iotlb(struct vringh * vrh,u16 head,u32 len)15669ad9c49cSJason Wang int vringh_complete_iotlb(struct vringh *vrh, u16 head, u32 len)
15679ad9c49cSJason Wang {
15689ad9c49cSJason Wang 	struct vring_used_elem used;
15699ad9c49cSJason Wang 
15709ad9c49cSJason Wang 	used.id = cpu_to_vringh32(vrh, head);
15719ad9c49cSJason Wang 	used.len = cpu_to_vringh32(vrh, len);
15729ad9c49cSJason Wang 
15739ad9c49cSJason Wang 	return __vringh_complete(vrh, &used, 1, putu16_iotlb, putused_iotlb);
15749ad9c49cSJason Wang }
15759ad9c49cSJason Wang EXPORT_SYMBOL(vringh_complete_iotlb);
15769ad9c49cSJason Wang 
15779ad9c49cSJason Wang /**
15789ad9c49cSJason Wang  * vringh_notify_enable_iotlb - we want to know if something changes.
15799ad9c49cSJason Wang  * @vrh: the vring.
15809ad9c49cSJason Wang  *
15819ad9c49cSJason Wang  * This always enables notifications, but returns false if there are
15829ad9c49cSJason Wang  * now more buffers available in the vring.
15839ad9c49cSJason Wang  */
vringh_notify_enable_iotlb(struct vringh * vrh)15849ad9c49cSJason Wang bool vringh_notify_enable_iotlb(struct vringh *vrh)
15859ad9c49cSJason Wang {
15869ad9c49cSJason Wang 	return __vringh_notify_enable(vrh, getu16_iotlb, putu16_iotlb);
15879ad9c49cSJason Wang }
15889ad9c49cSJason Wang EXPORT_SYMBOL(vringh_notify_enable_iotlb);
15899ad9c49cSJason Wang 
15909ad9c49cSJason Wang /**
15919ad9c49cSJason Wang  * vringh_notify_disable_iotlb - don't tell us if something changes.
15929ad9c49cSJason Wang  * @vrh: the vring.
15939ad9c49cSJason Wang  *
15949ad9c49cSJason Wang  * This is our normal running state: we disable and then only enable when
15959ad9c49cSJason Wang  * we're going to sleep.
15969ad9c49cSJason Wang  */
vringh_notify_disable_iotlb(struct vringh * vrh)15979ad9c49cSJason Wang void vringh_notify_disable_iotlb(struct vringh *vrh)
15989ad9c49cSJason Wang {
15999ad9c49cSJason Wang 	__vringh_notify_disable(vrh, putu16_iotlb);
16009ad9c49cSJason Wang }
16019ad9c49cSJason Wang EXPORT_SYMBOL(vringh_notify_disable_iotlb);
16029ad9c49cSJason Wang 
16039ad9c49cSJason Wang /**
16049ad9c49cSJason Wang  * vringh_need_notify_iotlb - must we tell the other side about used buffers?
16059ad9c49cSJason Wang  * @vrh: the vring we've called vringh_complete_iotlb() on.
16069ad9c49cSJason Wang  *
16079ad9c49cSJason Wang  * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
16089ad9c49cSJason Wang  */
vringh_need_notify_iotlb(struct vringh * vrh)16099ad9c49cSJason Wang int vringh_need_notify_iotlb(struct vringh *vrh)
16109ad9c49cSJason Wang {
16119ad9c49cSJason Wang 	return __vringh_need_notify(vrh, getu16_iotlb);
16129ad9c49cSJason Wang }
16139ad9c49cSJason Wang EXPORT_SYMBOL(vringh_need_notify_iotlb);
16149ad9c49cSJason Wang 
16153302363aSMichael S. Tsirkin #endif
16169ad9c49cSJason Wang 
1617f558a845SDave Jones MODULE_LICENSE("GPL");
1618