xref: /openbmc/linux/drivers/vhost/vhost.c (revision ee1cd5048959de496cd005c50b137212a5b62062)
17a338472SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
23a4d5c94SMichael S. Tsirkin /* Copyright (C) 2009 Red Hat, Inc.
33a4d5c94SMichael S. Tsirkin  * Copyright (C) 2006 Rusty Russell IBM Corporation
43a4d5c94SMichael S. Tsirkin  *
53a4d5c94SMichael S. Tsirkin  * Author: Michael S. Tsirkin <mst@redhat.com>
63a4d5c94SMichael S. Tsirkin  *
73a4d5c94SMichael S. Tsirkin  * Inspiration, some code, and most witty comments come from
861516587SRob Landley  * Documentation/virtual/lguest/lguest.c, by Rusty Russell
93a4d5c94SMichael S. Tsirkin  *
103a4d5c94SMichael S. Tsirkin  * Generic code for virtio server in host kernel.
113a4d5c94SMichael S. Tsirkin  */
123a4d5c94SMichael S. Tsirkin 
133a4d5c94SMichael S. Tsirkin #include <linux/eventfd.h>
143a4d5c94SMichael S. Tsirkin #include <linux/vhost.h>
1535596b27SAsias He #include <linux/uio.h>
163a4d5c94SMichael S. Tsirkin #include <linux/mm.h>
173a4d5c94SMichael S. Tsirkin #include <linux/miscdevice.h>
183a4d5c94SMichael S. Tsirkin #include <linux/mutex.h>
193a4d5c94SMichael S. Tsirkin #include <linux/poll.h>
203a4d5c94SMichael S. Tsirkin #include <linux/file.h>
213a4d5c94SMichael S. Tsirkin #include <linux/highmem.h>
225a0e3ad6STejun Heo #include <linux/slab.h>
234de7255fSIgor Mammedov #include <linux/vmalloc.h>
24c23f3445STejun Heo #include <linux/kthread.h>
256ac1afbfSAsias He #include <linux/module.h>
26bcfeacabSIgor Mammedov #include <linux/sort.h>
276e84f315SIngo Molnar #include <linux/sched/mm.h>
28174cd4b1SIngo Molnar #include <linux/sched/signal.h>
296e890c5dSMike Christie #include <linux/sched/vhost_task.h>
30a9709d68SJason Wang #include <linux/interval_tree_generic.h>
31ff002269SJason Wang #include <linux/nospec.h>
328f6a7f96SAndrey Konovalov #include <linux/kcov.h>
333a4d5c94SMichael S. Tsirkin 
343a4d5c94SMichael S. Tsirkin #include "vhost.h"
353a4d5c94SMichael S. Tsirkin 
36c9ce42f7SIgor Mammedov static ushort max_mem_regions = 64;
37c9ce42f7SIgor Mammedov module_param(max_mem_regions, ushort, 0444);
38c9ce42f7SIgor Mammedov MODULE_PARM_DESC(max_mem_regions,
39c9ce42f7SIgor Mammedov 	"Maximum number of memory regions in memory map. (default: 64)");
406b1e6cc7SJason Wang static int max_iotlb_entries = 2048;
416b1e6cc7SJason Wang module_param(max_iotlb_entries, int, 0444);
426b1e6cc7SJason Wang MODULE_PARM_DESC(max_iotlb_entries,
436b1e6cc7SJason Wang 	"Maximum number of iotlb entries. (default: 2048)");
44c9ce42f7SIgor Mammedov 
453a4d5c94SMichael S. Tsirkin enum {
463a4d5c94SMichael S. Tsirkin 	VHOST_MEMORY_F_LOG = 0x1,
473a4d5c94SMichael S. Tsirkin };
483a4d5c94SMichael S. Tsirkin 
493b1bbe89SMichael S. Tsirkin #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num])
503b1bbe89SMichael S. Tsirkin #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num])
518ea8cf89SMichael S. Tsirkin 
522751c988SGreg Kurz #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
vhost_disable_cross_endian(struct vhost_virtqueue * vq)53c5072037SGreg Kurz static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
542751c988SGreg Kurz {
552751c988SGreg Kurz 	vq->user_be = !virtio_legacy_is_little_endian();
562751c988SGreg Kurz }
572751c988SGreg Kurz 
vhost_enable_cross_endian_big(struct vhost_virtqueue * vq)58c5072037SGreg Kurz static void vhost_enable_cross_endian_big(struct vhost_virtqueue *vq)
59c5072037SGreg Kurz {
60c5072037SGreg Kurz 	vq->user_be = true;
61c5072037SGreg Kurz }
62c5072037SGreg Kurz 
vhost_enable_cross_endian_little(struct vhost_virtqueue * vq)63c5072037SGreg Kurz static void vhost_enable_cross_endian_little(struct vhost_virtqueue *vq)
64c5072037SGreg Kurz {
65c5072037SGreg Kurz 	vq->user_be = false;
66c5072037SGreg Kurz }
67c5072037SGreg Kurz 
vhost_set_vring_endian(struct vhost_virtqueue * vq,int __user * argp)682751c988SGreg Kurz static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp)
692751c988SGreg Kurz {
702751c988SGreg Kurz 	struct vhost_vring_state s;
712751c988SGreg Kurz 
722751c988SGreg Kurz 	if (vq->private_data)
732751c988SGreg Kurz 		return -EBUSY;
742751c988SGreg Kurz 
752751c988SGreg Kurz 	if (copy_from_user(&s, argp, sizeof(s)))
762751c988SGreg Kurz 		return -EFAULT;
772751c988SGreg Kurz 
782751c988SGreg Kurz 	if (s.num != VHOST_VRING_LITTLE_ENDIAN &&
792751c988SGreg Kurz 	    s.num != VHOST_VRING_BIG_ENDIAN)
802751c988SGreg Kurz 		return -EINVAL;
812751c988SGreg Kurz 
82c5072037SGreg Kurz 	if (s.num == VHOST_VRING_BIG_ENDIAN)
83c5072037SGreg Kurz 		vhost_enable_cross_endian_big(vq);
84c5072037SGreg Kurz 	else
85c5072037SGreg Kurz 		vhost_enable_cross_endian_little(vq);
862751c988SGreg Kurz 
872751c988SGreg Kurz 	return 0;
882751c988SGreg Kurz }
892751c988SGreg Kurz 
vhost_get_vring_endian(struct vhost_virtqueue * vq,u32 idx,int __user * argp)902751c988SGreg Kurz static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx,
912751c988SGreg Kurz 				   int __user *argp)
922751c988SGreg Kurz {
932751c988SGreg Kurz 	struct vhost_vring_state s = {
942751c988SGreg Kurz 		.index = idx,
952751c988SGreg Kurz 		.num = vq->user_be
962751c988SGreg Kurz 	};
972751c988SGreg Kurz 
982751c988SGreg Kurz 	if (copy_to_user(argp, &s, sizeof(s)))
992751c988SGreg Kurz 		return -EFAULT;
1002751c988SGreg Kurz 
1012751c988SGreg Kurz 	return 0;
1022751c988SGreg Kurz }
1032751c988SGreg Kurz 
vhost_init_is_le(struct vhost_virtqueue * vq)1042751c988SGreg Kurz static void vhost_init_is_le(struct vhost_virtqueue *vq)
1052751c988SGreg Kurz {
1062751c988SGreg Kurz 	/* Note for legacy virtio: user_be is initialized at reset time
1072751c988SGreg Kurz 	 * according to the host endianness. If userspace does not set an
1082751c988SGreg Kurz 	 * explicit endianness, the default behavior is native endian, as
1092751c988SGreg Kurz 	 * expected by legacy virtio.
1102751c988SGreg Kurz 	 */
1112751c988SGreg Kurz 	vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
1122751c988SGreg Kurz }
1132751c988SGreg Kurz #else
vhost_disable_cross_endian(struct vhost_virtqueue * vq)114c5072037SGreg Kurz static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
1152751c988SGreg Kurz {
1162751c988SGreg Kurz }
1172751c988SGreg Kurz 
vhost_set_vring_endian(struct vhost_virtqueue * vq,int __user * argp)1182751c988SGreg Kurz static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp)
1192751c988SGreg Kurz {
1202751c988SGreg Kurz 	return -ENOIOCTLCMD;
1212751c988SGreg Kurz }
1222751c988SGreg Kurz 
vhost_get_vring_endian(struct vhost_virtqueue * vq,u32 idx,int __user * argp)1232751c988SGreg Kurz static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx,
1242751c988SGreg Kurz 				   int __user *argp)
1252751c988SGreg Kurz {
1262751c988SGreg Kurz 	return -ENOIOCTLCMD;
1272751c988SGreg Kurz }
1282751c988SGreg Kurz 
vhost_init_is_le(struct vhost_virtqueue * vq)1292751c988SGreg Kurz static void vhost_init_is_le(struct vhost_virtqueue *vq)
1302751c988SGreg Kurz {
131cda8bba0SHalil Pasic 	vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1)
132cda8bba0SHalil Pasic 		|| virtio_legacy_is_little_endian();
1332751c988SGreg Kurz }
1342751c988SGreg Kurz #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */
1352751c988SGreg Kurz 
vhost_reset_is_le(struct vhost_virtqueue * vq)136c5072037SGreg Kurz static void vhost_reset_is_le(struct vhost_virtqueue *vq)
137c5072037SGreg Kurz {
138cda8bba0SHalil Pasic 	vhost_init_is_le(vq);
139c5072037SGreg Kurz }
140c5072037SGreg Kurz 
1417235acdbSJason Wang struct vhost_flush_struct {
1427235acdbSJason Wang 	struct vhost_work work;
1437235acdbSJason Wang 	struct completion wait_event;
1447235acdbSJason Wang };
1457235acdbSJason Wang 
vhost_flush_work(struct vhost_work * work)1467235acdbSJason Wang static void vhost_flush_work(struct vhost_work *work)
1477235acdbSJason Wang {
1487235acdbSJason Wang 	struct vhost_flush_struct *s;
1497235acdbSJason Wang 
1507235acdbSJason Wang 	s = container_of(work, struct vhost_flush_struct, work);
1517235acdbSJason Wang 	complete(&s->wait_event);
1527235acdbSJason Wang }
1537235acdbSJason Wang 
vhost_poll_func(struct file * file,wait_queue_head_t * wqh,poll_table * pt)1543a4d5c94SMichael S. Tsirkin static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
1553a4d5c94SMichael S. Tsirkin 			    poll_table *pt)
1563a4d5c94SMichael S. Tsirkin {
1573a4d5c94SMichael S. Tsirkin 	struct vhost_poll *poll;
1583a4d5c94SMichael S. Tsirkin 
159d47effe1SKrishna Kumar 	poll = container_of(pt, struct vhost_poll, table);
1603a4d5c94SMichael S. Tsirkin 	poll->wqh = wqh;
1613a4d5c94SMichael S. Tsirkin 	add_wait_queue(wqh, &poll->wait);
1623a4d5c94SMichael S. Tsirkin }
1633a4d5c94SMichael S. Tsirkin 
vhost_poll_wakeup(wait_queue_entry_t * wait,unsigned mode,int sync,void * key)164ac6424b9SIngo Molnar static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync,
1653a4d5c94SMichael S. Tsirkin 			     void *key)
1663a4d5c94SMichael S. Tsirkin {
167c23f3445STejun Heo 	struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait);
16801fcb1cbSJason Wang 	struct vhost_work *work = &poll->work;
169c23f3445STejun Heo 
1703ad6f93eSAl Viro 	if (!(key_to_poll(key) & poll->mask))
1713a4d5c94SMichael S. Tsirkin 		return 0;
1723a4d5c94SMichael S. Tsirkin 
17301fcb1cbSJason Wang 	if (!poll->dev->use_worker)
17401fcb1cbSJason Wang 		work->fn(work);
17501fcb1cbSJason Wang 	else
176c23f3445STejun Heo 		vhost_poll_queue(poll);
17701fcb1cbSJason Wang 
1783a4d5c94SMichael S. Tsirkin 	return 0;
1793a4d5c94SMichael S. Tsirkin }
1803a4d5c94SMichael S. Tsirkin 
vhost_work_init(struct vhost_work * work,vhost_work_fn_t fn)181163049aeSStefan Hajnoczi void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
1823a4d5c94SMichael S. Tsirkin {
18304b96e55SJason Wang 	clear_bit(VHOST_WORK_QUEUED, &work->flags);
184c23f3445STejun Heo 	work->fn = fn;
1853a4d5c94SMichael S. Tsirkin }
1866ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_work_init);
1873a4d5c94SMichael S. Tsirkin 
18887d6a412SMichael S. Tsirkin /* Init poll structure */
vhost_poll_init(struct vhost_poll * poll,vhost_work_fn_t fn,__poll_t mask,struct vhost_dev * dev,struct vhost_virtqueue * vq)18987d6a412SMichael S. Tsirkin void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
190493b94bfSMike Christie 		     __poll_t mask, struct vhost_dev *dev,
191493b94bfSMike Christie 		     struct vhost_virtqueue *vq)
19287d6a412SMichael S. Tsirkin {
19387d6a412SMichael S. Tsirkin 	init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
19487d6a412SMichael S. Tsirkin 	init_poll_funcptr(&poll->table, vhost_poll_func);
19587d6a412SMichael S. Tsirkin 	poll->mask = mask;
19687d6a412SMichael S. Tsirkin 	poll->dev = dev;
1972b8b328bSJason Wang 	poll->wqh = NULL;
198493b94bfSMike Christie 	poll->vq = vq;
19987d6a412SMichael S. Tsirkin 
20087d6a412SMichael S. Tsirkin 	vhost_work_init(&poll->work, fn);
20187d6a412SMichael S. Tsirkin }
2026ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_poll_init);
20387d6a412SMichael S. Tsirkin 
2043a4d5c94SMichael S. Tsirkin /* Start polling a file. We add ourselves to file's wait queue. The caller must
2053a4d5c94SMichael S. Tsirkin  * keep a reference to a file until after vhost_poll_stop is called. */
vhost_poll_start(struct vhost_poll * poll,struct file * file)2062b8b328bSJason Wang int vhost_poll_start(struct vhost_poll *poll, struct file *file)
2073a4d5c94SMichael S. Tsirkin {
208e6c8adcaSAl Viro 	__poll_t mask;
209d47effe1SKrishna Kumar 
21070181d51SJason Wang 	if (poll->wqh)
21170181d51SJason Wang 		return 0;
21270181d51SJason Wang 
2139965ed17SChristoph Hellwig 	mask = vfs_poll(file, &poll->table);
2143a4d5c94SMichael S. Tsirkin 	if (mask)
2153ad6f93eSAl Viro 		vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask));
216a9a08845SLinus Torvalds 	if (mask & EPOLLERR) {
217dc6455a7SJason Wang 		vhost_poll_stop(poll);
218896fc242SYunsheng Lin 		return -EINVAL;
2192b8b328bSJason Wang 	}
2202b8b328bSJason Wang 
221896fc242SYunsheng Lin 	return 0;
2223a4d5c94SMichael S. Tsirkin }
2236ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_poll_start);
2243a4d5c94SMichael S. Tsirkin 
2253a4d5c94SMichael S. Tsirkin /* Stop polling a file. After this function returns, it becomes safe to drop the
2263a4d5c94SMichael S. Tsirkin  * file reference. You must also flush afterwards. */
vhost_poll_stop(struct vhost_poll * poll)2273a4d5c94SMichael S. Tsirkin void vhost_poll_stop(struct vhost_poll *poll)
2283a4d5c94SMichael S. Tsirkin {
2292b8b328bSJason Wang 	if (poll->wqh) {
2303a4d5c94SMichael S. Tsirkin 		remove_wait_queue(poll->wqh, &poll->wait);
2312b8b328bSJason Wang 		poll->wqh = NULL;
2322b8b328bSJason Wang 	}
2333a4d5c94SMichael S. Tsirkin }
2346ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_poll_stop);
2353a4d5c94SMichael S. Tsirkin 
vhost_worker_queue(struct vhost_worker * worker,struct vhost_work * work)236228a27cfSMike Christie static void vhost_worker_queue(struct vhost_worker *worker,
2370921dddcSMike Christie 			       struct vhost_work *work)
2383a4d5c94SMichael S. Tsirkin {
23904b96e55SJason Wang 	if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
24004b96e55SJason Wang 		/* We can only add the work to the list after we're
24104b96e55SJason Wang 		 * sure it was not in the list.
242635abf01SPeng Tao 		 * test_and_set_bit() implies a memory barrier.
24304b96e55SJason Wang 		 */
2440921dddcSMike Christie 		llist_add(&work->node, &worker->work_list);
2450921dddcSMike Christie 		vhost_task_wake(worker->vtsk);
246ac9fde24SQin Chuanyu 	}
2473a4d5c94SMichael S. Tsirkin }
2480921dddcSMike Christie 
vhost_vq_work_queue(struct vhost_virtqueue * vq,struct vhost_work * work)2490921dddcSMike Christie bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work)
2500921dddcSMike Christie {
251228a27cfSMike Christie 	struct vhost_worker *worker;
252228a27cfSMike Christie 	bool queued = false;
253228a27cfSMike Christie 
254228a27cfSMike Christie 	rcu_read_lock();
255228a27cfSMike Christie 	worker = rcu_dereference(vq->worker);
256228a27cfSMike Christie 	if (worker) {
257228a27cfSMike Christie 		queued = true;
258228a27cfSMike Christie 		vhost_worker_queue(worker, work);
259228a27cfSMike Christie 	}
260228a27cfSMike Christie 	rcu_read_unlock();
261228a27cfSMike Christie 
262228a27cfSMike Christie 	return queued;
2630921dddcSMike Christie }
2640921dddcSMike Christie EXPORT_SYMBOL_GPL(vhost_vq_work_queue);
2650921dddcSMike Christie 
vhost_vq_flush(struct vhost_virtqueue * vq)266228a27cfSMike Christie void vhost_vq_flush(struct vhost_virtqueue *vq)
267228a27cfSMike Christie {
268228a27cfSMike Christie 	struct vhost_flush_struct flush;
269228a27cfSMike Christie 
270228a27cfSMike Christie 	init_completion(&flush.wait_event);
271228a27cfSMike Christie 	vhost_work_init(&flush.work, vhost_flush_work);
272228a27cfSMike Christie 
273228a27cfSMike Christie 	if (vhost_vq_work_queue(vq, &flush.work))
274228a27cfSMike Christie 		wait_for_completion(&flush.wait_event);
275228a27cfSMike Christie }
276228a27cfSMike Christie EXPORT_SYMBOL_GPL(vhost_vq_flush);
277228a27cfSMike Christie 
278228a27cfSMike Christie /**
279f5bb7219SMike Christie  * __vhost_worker_flush - flush a worker
280228a27cfSMike Christie  * @worker: worker to flush
281228a27cfSMike Christie  *
282f5bb7219SMike Christie  * The worker's flush_mutex must be held.
283228a27cfSMike Christie  */
__vhost_worker_flush(struct vhost_worker * worker)284f5bb7219SMike Christie static void __vhost_worker_flush(struct vhost_worker *worker)
2850921dddcSMike Christie {
2860921dddcSMike Christie 	struct vhost_flush_struct flush;
2870921dddcSMike Christie 
288*abe067dcSMike Christie 	if (!worker->attachment_cnt || worker->killed)
289f5bb7219SMike Christie 		return;
290f5bb7219SMike Christie 
2910921dddcSMike Christie 	init_completion(&flush.wait_event);
2920921dddcSMike Christie 	vhost_work_init(&flush.work, vhost_flush_work);
2930921dddcSMike Christie 
294228a27cfSMike Christie 	vhost_worker_queue(worker, &flush.work);
295f5bb7219SMike Christie 	/*
296f5bb7219SMike Christie 	 * Drop mutex in case our worker is killed and it needs to take the
297f5bb7219SMike Christie 	 * mutex to force cleanup.
298f5bb7219SMike Christie 	 */
299f5bb7219SMike Christie 	mutex_unlock(&worker->mutex);
3000921dddcSMike Christie 	wait_for_completion(&flush.wait_event);
301f5bb7219SMike Christie 	mutex_lock(&worker->mutex);
302f5bb7219SMike Christie }
303f5bb7219SMike Christie 
vhost_worker_flush(struct vhost_worker * worker)304f5bb7219SMike Christie static void vhost_worker_flush(struct vhost_worker *worker)
305f5bb7219SMike Christie {
306f5bb7219SMike Christie 	mutex_lock(&worker->mutex);
307f5bb7219SMike Christie 	__vhost_worker_flush(worker);
308f5bb7219SMike Christie 	mutex_unlock(&worker->mutex);
3090921dddcSMike Christie }
310a6fc0473SMike Christie 
vhost_dev_flush(struct vhost_dev * dev)311a6fc0473SMike Christie void vhost_dev_flush(struct vhost_dev *dev)
312a6fc0473SMike Christie {
3131cdaafa1SMike Christie 	struct vhost_worker *worker;
3141cdaafa1SMike Christie 	unsigned long i;
3151cdaafa1SMike Christie 
316f5bb7219SMike Christie 	xa_for_each(&dev->worker_xa, i, worker)
3171cdaafa1SMike Christie 		vhost_worker_flush(worker);
318a6fc0473SMike Christie }
3190921dddcSMike Christie EXPORT_SYMBOL_GPL(vhost_dev_flush);
3200921dddcSMike Christie 
321526d3e7fSJason Wang /* A lockless hint for busy polling code to exit the loop */
vhost_vq_has_work(struct vhost_virtqueue * vq)3229784df15SMike Christie bool vhost_vq_has_work(struct vhost_virtqueue *vq)
323526d3e7fSJason Wang {
324228a27cfSMike Christie 	struct vhost_worker *worker;
325228a27cfSMike Christie 	bool has_work = false;
326228a27cfSMike Christie 
327228a27cfSMike Christie 	rcu_read_lock();
328228a27cfSMike Christie 	worker = rcu_dereference(vq->worker);
329228a27cfSMike Christie 	if (worker && !llist_empty(&worker->work_list))
330228a27cfSMike Christie 		has_work = true;
331228a27cfSMike Christie 	rcu_read_unlock();
332228a27cfSMike Christie 
333228a27cfSMike Christie 	return has_work;
334526d3e7fSJason Wang }
3359784df15SMike Christie EXPORT_SYMBOL_GPL(vhost_vq_has_work);
336526d3e7fSJason Wang 
vhost_poll_queue(struct vhost_poll * poll)33787d6a412SMichael S. Tsirkin void vhost_poll_queue(struct vhost_poll *poll)
33887d6a412SMichael S. Tsirkin {
339493b94bfSMike Christie 	vhost_vq_work_queue(poll->vq, &poll->work);
34087d6a412SMichael S. Tsirkin }
3416ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_poll_queue);
34287d6a412SMichael S. Tsirkin 
__vhost_vq_meta_reset(struct vhost_virtqueue * vq)343f8894913SJason Wang static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq)
344f8894913SJason Wang {
345f8894913SJason Wang 	int j;
346f8894913SJason Wang 
347f8894913SJason Wang 	for (j = 0; j < VHOST_NUM_ADDRS; j++)
348f8894913SJason Wang 		vq->meta_iotlb[j] = NULL;
349f8894913SJason Wang }
350f8894913SJason Wang 
vhost_vq_meta_reset(struct vhost_dev * d)351f8894913SJason Wang static void vhost_vq_meta_reset(struct vhost_dev *d)
352f8894913SJason Wang {
353f8894913SJason Wang 	int i;
354f8894913SJason Wang 
35586a07da3SJason Wang 	for (i = 0; i < d->nvqs; ++i)
356f8894913SJason Wang 		__vhost_vq_meta_reset(d->vqs[i]);
357f8894913SJason Wang }
358f8894913SJason Wang 
vhost_vring_call_reset(struct vhost_vring_call * call_ctx)359265a0ad8SZhu Lingshan static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx)
360265a0ad8SZhu Lingshan {
361265a0ad8SZhu Lingshan 	call_ctx->ctx = NULL;
362265a0ad8SZhu Lingshan 	memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer));
363265a0ad8SZhu Lingshan }
364265a0ad8SZhu Lingshan 
vhost_vq_is_setup(struct vhost_virtqueue * vq)3656bcf3422SMike Christie bool vhost_vq_is_setup(struct vhost_virtqueue *vq)
3666bcf3422SMike Christie {
3676bcf3422SMike Christie 	return vq->avail && vq->desc && vq->used && vhost_vq_access_ok(vq);
3686bcf3422SMike Christie }
3696bcf3422SMike Christie EXPORT_SYMBOL_GPL(vhost_vq_is_setup);
3706bcf3422SMike Christie 
vhost_vq_reset(struct vhost_dev * dev,struct vhost_virtqueue * vq)3713a4d5c94SMichael S. Tsirkin static void vhost_vq_reset(struct vhost_dev *dev,
3723a4d5c94SMichael S. Tsirkin 			   struct vhost_virtqueue *vq)
3733a4d5c94SMichael S. Tsirkin {
3743a4d5c94SMichael S. Tsirkin 	vq->num = 1;
3753a4d5c94SMichael S. Tsirkin 	vq->desc = NULL;
3763a4d5c94SMichael S. Tsirkin 	vq->avail = NULL;
3773a4d5c94SMichael S. Tsirkin 	vq->used = NULL;
3783a4d5c94SMichael S. Tsirkin 	vq->last_avail_idx = 0;
3793a4d5c94SMichael S. Tsirkin 	vq->avail_idx = 0;
3803a4d5c94SMichael S. Tsirkin 	vq->last_used_idx = 0;
3818ea8cf89SMichael S. Tsirkin 	vq->signalled_used = 0;
3828ea8cf89SMichael S. Tsirkin 	vq->signalled_used_valid = false;
3833a4d5c94SMichael S. Tsirkin 	vq->used_flags = 0;
3843a4d5c94SMichael S. Tsirkin 	vq->log_used = false;
3853a4d5c94SMichael S. Tsirkin 	vq->log_addr = -1ull;
3863a4d5c94SMichael S. Tsirkin 	vq->private_data = NULL;
387ea16c514SMichael S. Tsirkin 	vq->acked_features = 0;
388429711aeSJason Wang 	vq->acked_backend_features = 0;
3893a4d5c94SMichael S. Tsirkin 	vq->log_base = NULL;
3903a4d5c94SMichael S. Tsirkin 	vq->error_ctx = NULL;
3913a4d5c94SMichael S. Tsirkin 	vq->kick = NULL;
39273a99f08SMichael S. Tsirkin 	vq->log_ctx = NULL;
393c5072037SGreg Kurz 	vhost_disable_cross_endian(vq);
394beb691e6SLaurent Vivier 	vhost_reset_is_le(vq);
39503088137SJason Wang 	vq->busyloop_timeout = 0;
396a9709d68SJason Wang 	vq->umem = NULL;
3976b1e6cc7SJason Wang 	vq->iotlb = NULL;
398228a27cfSMike Christie 	rcu_assign_pointer(vq->worker, NULL);
399265a0ad8SZhu Lingshan 	vhost_vring_call_reset(&vq->call_ctx);
400f8894913SJason Wang 	__vhost_vq_meta_reset(vq);
4013a4d5c94SMichael S. Tsirkin }
4023a4d5c94SMichael S. Tsirkin 
vhost_run_work_list(void * data)403*abe067dcSMike Christie static bool vhost_run_work_list(void *data)
404c23f3445STejun Heo {
4051a5f8090SMike Christie 	struct vhost_worker *worker = data;
40604b96e55SJason Wang 	struct vhost_work *work, *work_next;
40704b96e55SJason Wang 	struct llist_node *node;
408c23f3445STejun Heo 
4091a5f8090SMike Christie 	node = llist_del_all(&worker->work_list);
410f9010dbdSMike Christie 	if (node) {
4114b13cbefSMike Christie 		__set_current_state(TASK_RUNNING);
4124b13cbefSMike Christie 
41304b96e55SJason Wang 		node = llist_reverse_order(node);
41404b96e55SJason Wang 		/* make sure flag is seen after deletion */
41504b96e55SJason Wang 		smp_wmb();
41604b96e55SJason Wang 		llist_for_each_entry_safe(work, work_next, node, node) {
41704b96e55SJason Wang 			clear_bit(VHOST_WORK_QUEUED, &work->flags);
4181a5f8090SMike Christie 			kcov_remote_start_common(worker->kcov_handle);
419c23f3445STejun Heo 			work->fn(work);
4208f6a7f96SAndrey Konovalov 			kcov_remote_stop();
42105bfb338SJosh Poimboeuf 			cond_resched();
42204b96e55SJason Wang 		}
423c23f3445STejun Heo 	}
4246e890c5dSMike Christie 
425f9010dbdSMike Christie 	return !!node;
426c23f3445STejun Heo }
427c23f3445STejun Heo 
vhost_worker_killed(void * data)428*abe067dcSMike Christie static void vhost_worker_killed(void *data)
429*abe067dcSMike Christie {
430*abe067dcSMike Christie 	struct vhost_worker *worker = data;
431*abe067dcSMike Christie 	struct vhost_dev *dev = worker->dev;
432*abe067dcSMike Christie 	struct vhost_virtqueue *vq;
433*abe067dcSMike Christie 	int i, attach_cnt = 0;
434*abe067dcSMike Christie 
435*abe067dcSMike Christie 	mutex_lock(&worker->mutex);
436*abe067dcSMike Christie 	worker->killed = true;
437*abe067dcSMike Christie 
438*abe067dcSMike Christie 	for (i = 0; i < dev->nvqs; i++) {
439*abe067dcSMike Christie 		vq = dev->vqs[i];
440*abe067dcSMike Christie 
441*abe067dcSMike Christie 		mutex_lock(&vq->mutex);
442*abe067dcSMike Christie 		if (worker ==
443*abe067dcSMike Christie 		    rcu_dereference_check(vq->worker,
444*abe067dcSMike Christie 					  lockdep_is_held(&vq->mutex))) {
445*abe067dcSMike Christie 			rcu_assign_pointer(vq->worker, NULL);
446*abe067dcSMike Christie 			attach_cnt++;
447*abe067dcSMike Christie 		}
448*abe067dcSMike Christie 		mutex_unlock(&vq->mutex);
449*abe067dcSMike Christie 	}
450*abe067dcSMike Christie 
451*abe067dcSMike Christie 	worker->attachment_cnt -= attach_cnt;
452*abe067dcSMike Christie 	if (attach_cnt)
453*abe067dcSMike Christie 		synchronize_rcu();
454*abe067dcSMike Christie 	/*
455*abe067dcSMike Christie 	 * Finish vhost_worker_flush calls and any other works that snuck in
456*abe067dcSMike Christie 	 * before the synchronize_rcu.
457*abe067dcSMike Christie 	 */
458*abe067dcSMike Christie 	vhost_run_work_list(worker);
459*abe067dcSMike Christie 	mutex_unlock(&worker->mutex);
460*abe067dcSMike Christie }
461*abe067dcSMike Christie 
vhost_vq_free_iovecs(struct vhost_virtqueue * vq)462bab632d6SMichael S. Tsirkin static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
463bab632d6SMichael S. Tsirkin {
464bab632d6SMichael S. Tsirkin 	kfree(vq->indirect);
465bab632d6SMichael S. Tsirkin 	vq->indirect = NULL;
466bab632d6SMichael S. Tsirkin 	kfree(vq->log);
467bab632d6SMichael S. Tsirkin 	vq->log = NULL;
468bab632d6SMichael S. Tsirkin 	kfree(vq->heads);
469bab632d6SMichael S. Tsirkin 	vq->heads = NULL;
470bab632d6SMichael S. Tsirkin }
471bab632d6SMichael S. Tsirkin 
472e0e9b406SJason Wang /* Helper to allocate iovec buffers for all vqs. */
vhost_dev_alloc_iovecs(struct vhost_dev * dev)473e0e9b406SJason Wang static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
474e0e9b406SJason Wang {
4756d5e6aa8SAsias He 	struct vhost_virtqueue *vq;
476e0e9b406SJason Wang 	int i;
477d47effe1SKrishna Kumar 
478e0e9b406SJason Wang 	for (i = 0; i < dev->nvqs; ++i) {
4796d5e6aa8SAsias He 		vq = dev->vqs[i];
4806da2ec56SKees Cook 		vq->indirect = kmalloc_array(UIO_MAXIOV,
4816da2ec56SKees Cook 					     sizeof(*vq->indirect),
482e0e9b406SJason Wang 					     GFP_KERNEL);
483b46a0bf7SJason Wang 		vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log),
4846da2ec56SKees Cook 					GFP_KERNEL);
485b46a0bf7SJason Wang 		vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
4866da2ec56SKees Cook 					  GFP_KERNEL);
4876d5e6aa8SAsias He 		if (!vq->indirect || !vq->log || !vq->heads)
488e0e9b406SJason Wang 			goto err_nomem;
489e0e9b406SJason Wang 	}
490e0e9b406SJason Wang 	return 0;
491d47effe1SKrishna Kumar 
492e0e9b406SJason Wang err_nomem:
493bab632d6SMichael S. Tsirkin 	for (; i >= 0; --i)
4943ab2e420SAsias He 		vhost_vq_free_iovecs(dev->vqs[i]);
495e0e9b406SJason Wang 	return -ENOMEM;
496e0e9b406SJason Wang }
497e0e9b406SJason Wang 
vhost_dev_free_iovecs(struct vhost_dev * dev)498e0e9b406SJason Wang static void vhost_dev_free_iovecs(struct vhost_dev *dev)
499e0e9b406SJason Wang {
500e0e9b406SJason Wang 	int i;
501d47effe1SKrishna Kumar 
502bab632d6SMichael S. Tsirkin 	for (i = 0; i < dev->nvqs; ++i)
5033ab2e420SAsias He 		vhost_vq_free_iovecs(dev->vqs[i]);
504e0e9b406SJason Wang }
505e0e9b406SJason Wang 
vhost_exceeds_weight(struct vhost_virtqueue * vq,int pkts,int total_len)506e82b9b07SJason Wang bool vhost_exceeds_weight(struct vhost_virtqueue *vq,
507e82b9b07SJason Wang 			  int pkts, int total_len)
508e82b9b07SJason Wang {
509e82b9b07SJason Wang 	struct vhost_dev *dev = vq->dev;
510e82b9b07SJason Wang 
511e82b9b07SJason Wang 	if ((dev->byte_weight && total_len >= dev->byte_weight) ||
512e82b9b07SJason Wang 	    pkts >= dev->weight) {
513e82b9b07SJason Wang 		vhost_poll_queue(&vq->poll);
514e82b9b07SJason Wang 		return true;
515e82b9b07SJason Wang 	}
516e82b9b07SJason Wang 
517e82b9b07SJason Wang 	return false;
518e82b9b07SJason Wang }
519e82b9b07SJason Wang EXPORT_SYMBOL_GPL(vhost_exceeds_weight);
520e82b9b07SJason Wang 
vhost_get_avail_size(struct vhost_virtqueue * vq,unsigned int num)5214942e825SJason Wang static size_t vhost_get_avail_size(struct vhost_virtqueue *vq,
5224942e825SJason Wang 				   unsigned int num)
5234942e825SJason Wang {
5244942e825SJason Wang 	size_t event __maybe_unused =
5254942e825SJason Wang 	       vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
5264942e825SJason Wang 
527e4be66e5SJacob Keller 	return size_add(struct_size(vq->avail, ring, num), event);
5284942e825SJason Wang }
5294942e825SJason Wang 
vhost_get_used_size(struct vhost_virtqueue * vq,unsigned int num)5304942e825SJason Wang static size_t vhost_get_used_size(struct vhost_virtqueue *vq,
5314942e825SJason Wang 				  unsigned int num)
5324942e825SJason Wang {
5334942e825SJason Wang 	size_t event __maybe_unused =
5344942e825SJason Wang 	       vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
5354942e825SJason Wang 
536e4be66e5SJacob Keller 	return size_add(struct_size(vq->used, ring, num), event);
5374942e825SJason Wang }
5384942e825SJason Wang 
vhost_get_desc_size(struct vhost_virtqueue * vq,unsigned int num)5394942e825SJason Wang static size_t vhost_get_desc_size(struct vhost_virtqueue *vq,
5404942e825SJason Wang 				  unsigned int num)
5414942e825SJason Wang {
5424942e825SJason Wang 	return sizeof(*vq->desc) * num;
5434942e825SJason Wang }
5444942e825SJason Wang 
vhost_dev_init(struct vhost_dev * dev,struct vhost_virtqueue ** vqs,int nvqs,int iov_limit,int weight,int byte_weight,bool use_worker,int (* msg_handler)(struct vhost_dev * dev,u32 asid,struct vhost_iotlb_msg * msg))54559566b6eSZhi Yong Wu void vhost_dev_init(struct vhost_dev *dev,
546e82b9b07SJason Wang 		    struct vhost_virtqueue **vqs, int nvqs,
547792a4f2eSJason Wang 		    int iov_limit, int weight, int byte_weight,
54801fcb1cbSJason Wang 		    bool use_worker,
54991233ad7SGautam Dawar 		    int (*msg_handler)(struct vhost_dev *dev, u32 asid,
550792a4f2eSJason Wang 				       struct vhost_iotlb_msg *msg))
5513a4d5c94SMichael S. Tsirkin {
5526d5e6aa8SAsias He 	struct vhost_virtqueue *vq;
5533a4d5c94SMichael S. Tsirkin 	int i;
554c23f3445STejun Heo 
5553a4d5c94SMichael S. Tsirkin 	dev->vqs = vqs;
5563a4d5c94SMichael S. Tsirkin 	dev->nvqs = nvqs;
5573a4d5c94SMichael S. Tsirkin 	mutex_init(&dev->mutex);
5583a4d5c94SMichael S. Tsirkin 	dev->log_ctx = NULL;
559a9709d68SJason Wang 	dev->umem = NULL;
5606b1e6cc7SJason Wang 	dev->iotlb = NULL;
5613a4d5c94SMichael S. Tsirkin 	dev->mm = NULL;
562b46a0bf7SJason Wang 	dev->iov_limit = iov_limit;
563e82b9b07SJason Wang 	dev->weight = weight;
564e82b9b07SJason Wang 	dev->byte_weight = byte_weight;
56501fcb1cbSJason Wang 	dev->use_worker = use_worker;
566792a4f2eSJason Wang 	dev->msg_handler = msg_handler;
5676b1e6cc7SJason Wang 	init_waitqueue_head(&dev->wait);
5686b1e6cc7SJason Wang 	INIT_LIST_HEAD(&dev->read_list);
5696b1e6cc7SJason Wang 	INIT_LIST_HEAD(&dev->pending_list);
5706b1e6cc7SJason Wang 	spin_lock_init(&dev->iotlb_lock);
5711cdaafa1SMike Christie 	xa_init_flags(&dev->worker_xa, XA_FLAGS_ALLOC);
5723a4d5c94SMichael S. Tsirkin 
5733a4d5c94SMichael S. Tsirkin 	for (i = 0; i < dev->nvqs; ++i) {
5746d5e6aa8SAsias He 		vq = dev->vqs[i];
5756d5e6aa8SAsias He 		vq->log = NULL;
5766d5e6aa8SAsias He 		vq->indirect = NULL;
5776d5e6aa8SAsias He 		vq->heads = NULL;
5786d5e6aa8SAsias He 		vq->dev = dev;
5796d5e6aa8SAsias He 		mutex_init(&vq->mutex);
5806d5e6aa8SAsias He 		vhost_vq_reset(dev, vq);
5816d5e6aa8SAsias He 		if (vq->handle_kick)
5826d5e6aa8SAsias He 			vhost_poll_init(&vq->poll, vq->handle_kick,
583493b94bfSMike Christie 					EPOLLIN, dev, vq);
5843a4d5c94SMichael S. Tsirkin 	}
5853a4d5c94SMichael S. Tsirkin }
5866ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_init);
5873a4d5c94SMichael S. Tsirkin 
5883a4d5c94SMichael S. Tsirkin /* Caller should have device mutex */
vhost_dev_check_owner(struct vhost_dev * dev)5893a4d5c94SMichael S. Tsirkin long vhost_dev_check_owner(struct vhost_dev *dev)
5903a4d5c94SMichael S. Tsirkin {
5913a4d5c94SMichael S. Tsirkin 	/* Are you the owner? If not, I don't think you mean to do that */
5923a4d5c94SMichael S. Tsirkin 	return dev->mm == current->mm ? 0 : -EPERM;
5933a4d5c94SMichael S. Tsirkin }
5946ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
5953a4d5c94SMichael S. Tsirkin 
5963a4d5c94SMichael S. Tsirkin /* Caller should have device mutex */
vhost_dev_has_owner(struct vhost_dev * dev)59705c05351SMichael S. Tsirkin bool vhost_dev_has_owner(struct vhost_dev *dev)
59805c05351SMichael S. Tsirkin {
59905c05351SMichael S. Tsirkin 	return dev->mm;
60005c05351SMichael S. Tsirkin }
6016ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_has_owner);
60205c05351SMichael S. Tsirkin 
vhost_attach_mm(struct vhost_dev * dev)6035ce995f3SJason Wang static void vhost_attach_mm(struct vhost_dev *dev)
6045ce995f3SJason Wang {
6055ce995f3SJason Wang 	/* No owner, become one */
6065ce995f3SJason Wang 	if (dev->use_worker) {
6075ce995f3SJason Wang 		dev->mm = get_task_mm(current);
6085ce995f3SJason Wang 	} else {
6095ce995f3SJason Wang 		/* vDPA device does not use worker thead, so there's
6105ce995f3SJason Wang 		 * no need to hold the address space for mm. This help
6115ce995f3SJason Wang 		 * to avoid deadlock in the case of mmap() which may
6125ce995f3SJason Wang 		 * held the refcnt of the file and depends on release
6135ce995f3SJason Wang 		 * method to remove vma.
6145ce995f3SJason Wang 		 */
6155ce995f3SJason Wang 		dev->mm = current->mm;
6165ce995f3SJason Wang 		mmgrab(dev->mm);
6175ce995f3SJason Wang 	}
6185ce995f3SJason Wang }
6195ce995f3SJason Wang 
vhost_detach_mm(struct vhost_dev * dev)6205ce995f3SJason Wang static void vhost_detach_mm(struct vhost_dev *dev)
6215ce995f3SJason Wang {
6225ce995f3SJason Wang 	if (!dev->mm)
6235ce995f3SJason Wang 		return;
6245ce995f3SJason Wang 
6255ce995f3SJason Wang 	if (dev->use_worker)
6265ce995f3SJason Wang 		mmput(dev->mm);
6275ce995f3SJason Wang 	else
6285ce995f3SJason Wang 		mmdrop(dev->mm);
6295ce995f3SJason Wang 
6305ce995f3SJason Wang 	dev->mm = NULL;
6315ce995f3SJason Wang }
6325ce995f3SJason Wang 
vhost_worker_destroy(struct vhost_dev * dev,struct vhost_worker * worker)6331cdaafa1SMike Christie static void vhost_worker_destroy(struct vhost_dev *dev,
6341cdaafa1SMike Christie 				 struct vhost_worker *worker)
6351a5f8090SMike Christie {
6361cdaafa1SMike Christie 	if (!worker)
6371a5f8090SMike Christie 		return;
6381a5f8090SMike Christie 
6391cdaafa1SMike Christie 	WARN_ON(!llist_empty(&worker->work_list));
6401cdaafa1SMike Christie 	xa_erase(&dev->worker_xa, worker->id);
6411cdaafa1SMike Christie 	vhost_task_stop(worker->vtsk);
6421cdaafa1SMike Christie 	kfree(worker);
6431cdaafa1SMike Christie }
6441cdaafa1SMike Christie 
vhost_workers_free(struct vhost_dev * dev)6451cdaafa1SMike Christie static void vhost_workers_free(struct vhost_dev *dev)
6461cdaafa1SMike Christie {
6471cdaafa1SMike Christie 	struct vhost_worker *worker;
6481cdaafa1SMike Christie 	unsigned long i;
6491cdaafa1SMike Christie 
6501cdaafa1SMike Christie 	if (!dev->use_worker)
6511cdaafa1SMike Christie 		return;
6521cdaafa1SMike Christie 
6531cdaafa1SMike Christie 	for (i = 0; i < dev->nvqs; i++)
654228a27cfSMike Christie 		rcu_assign_pointer(dev->vqs[i]->worker, NULL);
6551cdaafa1SMike Christie 	/*
6561cdaafa1SMike Christie 	 * Free the default worker we created and cleanup workers userspace
6571cdaafa1SMike Christie 	 * created but couldn't clean up (it forgot or crashed).
6581cdaafa1SMike Christie 	 */
6591cdaafa1SMike Christie 	xa_for_each(&dev->worker_xa, i, worker)
6601cdaafa1SMike Christie 		vhost_worker_destroy(dev, worker);
6611cdaafa1SMike Christie 	xa_destroy(&dev->worker_xa);
6621a5f8090SMike Christie }
6631a5f8090SMike Christie 
vhost_worker_create(struct vhost_dev * dev)664737bdb64SMike Christie static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
6651a5f8090SMike Christie {
666c011bb66SMike Christie 	struct vhost_worker *worker;
6676e890c5dSMike Christie 	struct vhost_task *vtsk;
6686e890c5dSMike Christie 	char name[TASK_COMM_LEN];
6691cdaafa1SMike Christie 	int ret;
6701cdaafa1SMike Christie 	u32 id;
6711a5f8090SMike Christie 
672c011bb66SMike Christie 	worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
673c011bb66SMike Christie 	if (!worker)
674737bdb64SMike Christie 		return NULL;
6751a5f8090SMike Christie 
676*abe067dcSMike Christie 	worker->dev = dev;
677c011bb66SMike Christie 	snprintf(name, sizeof(name), "vhost-%d", current->pid);
678c011bb66SMike Christie 
679*abe067dcSMike Christie 	vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
680*abe067dcSMike Christie 				 worker, name);
681c011bb66SMike Christie 	if (!vtsk)
682c011bb66SMike Christie 		goto free_worker;
683c011bb66SMike Christie 
684228a27cfSMike Christie 	mutex_init(&worker->mutex);
685c011bb66SMike Christie 	init_llist_head(&worker->work_list);
686c011bb66SMike Christie 	worker->kcov_handle = kcov_common_handle();
687c011bb66SMike Christie 	worker->vtsk = vtsk;
688c011bb66SMike Christie 
6896e890c5dSMike Christie 	vhost_task_start(vtsk);
6901cdaafa1SMike Christie 
6911cdaafa1SMike Christie 	ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
6921cdaafa1SMike Christie 	if (ret < 0)
6931cdaafa1SMike Christie 		goto stop_worker;
6941cdaafa1SMike Christie 	worker->id = id;
6951cdaafa1SMike Christie 
696737bdb64SMike Christie 	return worker;
697c011bb66SMike Christie 
6981cdaafa1SMike Christie stop_worker:
6991cdaafa1SMike Christie 	vhost_task_stop(vtsk);
700c011bb66SMike Christie free_worker:
701c011bb66SMike Christie 	kfree(worker);
702737bdb64SMike Christie 	return NULL;
7031a5f8090SMike Christie }
7041a5f8090SMike Christie 
705c1ecd8e9SMike Christie /* Caller must have device mutex */
__vhost_vq_attach_worker(struct vhost_virtqueue * vq,struct vhost_worker * worker)706c1ecd8e9SMike Christie static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
707c1ecd8e9SMike Christie 				     struct vhost_worker *worker)
708c1ecd8e9SMike Christie {
709228a27cfSMike Christie 	struct vhost_worker *old_worker;
710228a27cfSMike Christie 
711228a27cfSMike Christie 	mutex_lock(&worker->mutex);
712*abe067dcSMike Christie 	if (worker->killed) {
713*abe067dcSMike Christie 		mutex_unlock(&worker->mutex);
714*abe067dcSMike Christie 		return;
715*abe067dcSMike Christie 	}
716*abe067dcSMike Christie 
717a86f9633SMike Christie 	mutex_lock(&vq->mutex);
718228a27cfSMike Christie 
719a86f9633SMike Christie 	old_worker = rcu_dereference_check(vq->worker,
720a86f9633SMike Christie 					   lockdep_is_held(&vq->mutex));
721a86f9633SMike Christie 	rcu_assign_pointer(vq->worker, worker);
722a86f9633SMike Christie 	worker->attachment_cnt++;
723a86f9633SMike Christie 
724a86f9633SMike Christie 	if (!old_worker) {
725a86f9633SMike Christie 		mutex_unlock(&vq->mutex);
726a86f9633SMike Christie 		mutex_unlock(&worker->mutex);
727228a27cfSMike Christie 		return;
728a86f9633SMike Christie 	}
729a86f9633SMike Christie 	mutex_unlock(&vq->mutex);
730a86f9633SMike Christie 	mutex_unlock(&worker->mutex);
731a86f9633SMike Christie 
732228a27cfSMike Christie 	/*
733228a27cfSMike Christie 	 * Take the worker mutex to make sure we see the work queued from
734228a27cfSMike Christie 	 * device wide flushes which doesn't use RCU for execution.
735228a27cfSMike Christie 	 */
736228a27cfSMike Christie 	mutex_lock(&old_worker->mutex);
737*abe067dcSMike Christie 	if (old_worker->killed) {
738*abe067dcSMike Christie 		mutex_unlock(&old_worker->mutex);
739*abe067dcSMike Christie 		return;
740*abe067dcSMike Christie 	}
741*abe067dcSMike Christie 
742228a27cfSMike Christie 	/*
743228a27cfSMike Christie 	 * We don't want to call synchronize_rcu for every vq during setup
744228a27cfSMike Christie 	 * because it will slow down VM startup. If we haven't done
745228a27cfSMike Christie 	 * VHOST_SET_VRING_KICK and not done the driver specific
746228a27cfSMike Christie 	 * SET_ENDPOINT/RUNNUNG then we can skip the sync since there will
747228a27cfSMike Christie 	 * not be any works queued for scsi and net.
748228a27cfSMike Christie 	 */
749228a27cfSMike Christie 	mutex_lock(&vq->mutex);
750228a27cfSMike Christie 	if (!vhost_vq_get_backend(vq) && !vq->kick) {
751228a27cfSMike Christie 		mutex_unlock(&vq->mutex);
752f5bb7219SMike Christie 
753f5bb7219SMike Christie 		old_worker->attachment_cnt--;
754228a27cfSMike Christie 		mutex_unlock(&old_worker->mutex);
755228a27cfSMike Christie 		/*
756228a27cfSMike Christie 		 * vsock can queue anytime after VHOST_VSOCK_SET_GUEST_CID.
757228a27cfSMike Christie 		 * Warn if it adds support for multiple workers but forgets to
758228a27cfSMike Christie 		 * handle the early queueing case.
759228a27cfSMike Christie 		 */
760228a27cfSMike Christie 		WARN_ON(!old_worker->attachment_cnt &&
761228a27cfSMike Christie 			!llist_empty(&old_worker->work_list));
762228a27cfSMike Christie 		return;
763228a27cfSMike Christie 	}
764228a27cfSMike Christie 	mutex_unlock(&vq->mutex);
765228a27cfSMike Christie 
766228a27cfSMike Christie 	/* Make sure new vq queue/flush/poll calls see the new worker */
767228a27cfSMike Christie 	synchronize_rcu();
768228a27cfSMike Christie 	/* Make sure whatever was queued gets run */
769f5bb7219SMike Christie 	__vhost_worker_flush(old_worker);
770f5bb7219SMike Christie 	old_worker->attachment_cnt--;
771228a27cfSMike Christie 	mutex_unlock(&old_worker->mutex);
772c1ecd8e9SMike Christie }
773c1ecd8e9SMike Christie 
774228a27cfSMike Christie  /* Caller must have device mutex */
vhost_vq_attach_worker(struct vhost_virtqueue * vq,struct vhost_vring_worker * info)775c1ecd8e9SMike Christie static int vhost_vq_attach_worker(struct vhost_virtqueue *vq,
776c1ecd8e9SMike Christie 				  struct vhost_vring_worker *info)
777c1ecd8e9SMike Christie {
778c1ecd8e9SMike Christie 	unsigned long index = info->worker_id;
779c1ecd8e9SMike Christie 	struct vhost_dev *dev = vq->dev;
780c1ecd8e9SMike Christie 	struct vhost_worker *worker;
781c1ecd8e9SMike Christie 
782c1ecd8e9SMike Christie 	if (!dev->use_worker)
783c1ecd8e9SMike Christie 		return -EINVAL;
784c1ecd8e9SMike Christie 
785c1ecd8e9SMike Christie 	worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT);
786c1ecd8e9SMike Christie 	if (!worker || worker->id != info->worker_id)
787c1ecd8e9SMike Christie 		return -ENODEV;
788c1ecd8e9SMike Christie 
789c1ecd8e9SMike Christie 	__vhost_vq_attach_worker(vq, worker);
790c1ecd8e9SMike Christie 	return 0;
791c1ecd8e9SMike Christie }
792c1ecd8e9SMike Christie 
793c1ecd8e9SMike Christie /* Caller must have device mutex */
vhost_new_worker(struct vhost_dev * dev,struct vhost_worker_state * info)794c1ecd8e9SMike Christie static int vhost_new_worker(struct vhost_dev *dev,
795c1ecd8e9SMike Christie 			    struct vhost_worker_state *info)
796c1ecd8e9SMike Christie {
797c1ecd8e9SMike Christie 	struct vhost_worker *worker;
798c1ecd8e9SMike Christie 
799c1ecd8e9SMike Christie 	worker = vhost_worker_create(dev);
800c1ecd8e9SMike Christie 	if (!worker)
801c1ecd8e9SMike Christie 		return -ENOMEM;
802c1ecd8e9SMike Christie 
803c1ecd8e9SMike Christie 	info->worker_id = worker->id;
804c1ecd8e9SMike Christie 	return 0;
805c1ecd8e9SMike Christie }
806c1ecd8e9SMike Christie 
807c1ecd8e9SMike Christie /* Caller must have device mutex */
vhost_free_worker(struct vhost_dev * dev,struct vhost_worker_state * info)808c1ecd8e9SMike Christie static int vhost_free_worker(struct vhost_dev *dev,
809c1ecd8e9SMike Christie 			     struct vhost_worker_state *info)
810c1ecd8e9SMike Christie {
811c1ecd8e9SMike Christie 	unsigned long index = info->worker_id;
812c1ecd8e9SMike Christie 	struct vhost_worker *worker;
813c1ecd8e9SMike Christie 
814c1ecd8e9SMike Christie 	worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT);
815c1ecd8e9SMike Christie 	if (!worker || worker->id != info->worker_id)
816c1ecd8e9SMike Christie 		return -ENODEV;
817c1ecd8e9SMike Christie 
818228a27cfSMike Christie 	mutex_lock(&worker->mutex);
819*abe067dcSMike Christie 	if (worker->attachment_cnt || worker->killed) {
820228a27cfSMike Christie 		mutex_unlock(&worker->mutex);
821c1ecd8e9SMike Christie 		return -EBUSY;
822228a27cfSMike Christie 	}
823f5bb7219SMike Christie 	/*
824f5bb7219SMike Christie 	 * A flush might have raced and snuck in before attachment_cnt was set
825f5bb7219SMike Christie 	 * to zero. Make sure flushes are flushed from the queue before
826f5bb7219SMike Christie 	 * freeing.
827f5bb7219SMike Christie 	 */
828f5bb7219SMike Christie 	__vhost_worker_flush(worker);
829228a27cfSMike Christie 	mutex_unlock(&worker->mutex);
830c1ecd8e9SMike Christie 
831c1ecd8e9SMike Christie 	vhost_worker_destroy(dev, worker);
832c1ecd8e9SMike Christie 	return 0;
833c1ecd8e9SMike Christie }
834c1ecd8e9SMike Christie 
vhost_get_vq_from_user(struct vhost_dev * dev,void __user * argp,struct vhost_virtqueue ** vq,u32 * id)835cef25866SMike Christie static int vhost_get_vq_from_user(struct vhost_dev *dev, void __user *argp,
836cef25866SMike Christie 				  struct vhost_virtqueue **vq, u32 *id)
837cef25866SMike Christie {
838cef25866SMike Christie 	u32 __user *idxp = argp;
839cef25866SMike Christie 	u32 idx;
840cef25866SMike Christie 	long r;
841cef25866SMike Christie 
842cef25866SMike Christie 	r = get_user(idx, idxp);
843cef25866SMike Christie 	if (r < 0)
844cef25866SMike Christie 		return r;
845cef25866SMike Christie 
846cef25866SMike Christie 	if (idx >= dev->nvqs)
847cef25866SMike Christie 		return -ENOBUFS;
848cef25866SMike Christie 
849cef25866SMike Christie 	idx = array_index_nospec(idx, dev->nvqs);
850cef25866SMike Christie 
851cef25866SMike Christie 	*vq = dev->vqs[idx];
852cef25866SMike Christie 	*id = idx;
853cef25866SMike Christie 	return 0;
854cef25866SMike Christie }
855cef25866SMike Christie 
856c1ecd8e9SMike Christie /* Caller must have device mutex */
vhost_worker_ioctl(struct vhost_dev * dev,unsigned int ioctl,void __user * argp)857c1ecd8e9SMike Christie long vhost_worker_ioctl(struct vhost_dev *dev, unsigned int ioctl,
858c1ecd8e9SMike Christie 			void __user *argp)
859c1ecd8e9SMike Christie {
860c1ecd8e9SMike Christie 	struct vhost_vring_worker ring_worker;
861c1ecd8e9SMike Christie 	struct vhost_worker_state state;
862228a27cfSMike Christie 	struct vhost_worker *worker;
863c1ecd8e9SMike Christie 	struct vhost_virtqueue *vq;
864c1ecd8e9SMike Christie 	long ret;
865c1ecd8e9SMike Christie 	u32 idx;
866c1ecd8e9SMike Christie 
867c1ecd8e9SMike Christie 	if (!dev->use_worker)
868c1ecd8e9SMike Christie 		return -EINVAL;
869c1ecd8e9SMike Christie 
870c1ecd8e9SMike Christie 	if (!vhost_dev_has_owner(dev))
871c1ecd8e9SMike Christie 		return -EINVAL;
872c1ecd8e9SMike Christie 
873c1ecd8e9SMike Christie 	ret = vhost_dev_check_owner(dev);
874c1ecd8e9SMike Christie 	if (ret)
875c1ecd8e9SMike Christie 		return ret;
876c1ecd8e9SMike Christie 
877c1ecd8e9SMike Christie 	switch (ioctl) {
878c1ecd8e9SMike Christie 	/* dev worker ioctls */
879c1ecd8e9SMike Christie 	case VHOST_NEW_WORKER:
880c1ecd8e9SMike Christie 		ret = vhost_new_worker(dev, &state);
881c1ecd8e9SMike Christie 		if (!ret && copy_to_user(argp, &state, sizeof(state)))
882c1ecd8e9SMike Christie 			ret = -EFAULT;
883c1ecd8e9SMike Christie 		return ret;
884c1ecd8e9SMike Christie 	case VHOST_FREE_WORKER:
885c1ecd8e9SMike Christie 		if (copy_from_user(&state, argp, sizeof(state)))
886c1ecd8e9SMike Christie 			return -EFAULT;
887c1ecd8e9SMike Christie 		return vhost_free_worker(dev, &state);
888c1ecd8e9SMike Christie 	/* vring worker ioctls */
889c1ecd8e9SMike Christie 	case VHOST_ATTACH_VRING_WORKER:
890c1ecd8e9SMike Christie 	case VHOST_GET_VRING_WORKER:
891c1ecd8e9SMike Christie 		break;
892c1ecd8e9SMike Christie 	default:
893c1ecd8e9SMike Christie 		return -ENOIOCTLCMD;
894c1ecd8e9SMike Christie 	}
895c1ecd8e9SMike Christie 
896c1ecd8e9SMike Christie 	ret = vhost_get_vq_from_user(dev, argp, &vq, &idx);
897c1ecd8e9SMike Christie 	if (ret)
898c1ecd8e9SMike Christie 		return ret;
899c1ecd8e9SMike Christie 
900c1ecd8e9SMike Christie 	switch (ioctl) {
901c1ecd8e9SMike Christie 	case VHOST_ATTACH_VRING_WORKER:
902c1ecd8e9SMike Christie 		if (copy_from_user(&ring_worker, argp, sizeof(ring_worker))) {
903c1ecd8e9SMike Christie 			ret = -EFAULT;
904c1ecd8e9SMike Christie 			break;
905c1ecd8e9SMike Christie 		}
906c1ecd8e9SMike Christie 
907c1ecd8e9SMike Christie 		ret = vhost_vq_attach_worker(vq, &ring_worker);
908c1ecd8e9SMike Christie 		break;
909c1ecd8e9SMike Christie 	case VHOST_GET_VRING_WORKER:
910228a27cfSMike Christie 		worker = rcu_dereference_check(vq->worker,
911228a27cfSMike Christie 					       lockdep_is_held(&dev->mutex));
912228a27cfSMike Christie 		if (!worker) {
913228a27cfSMike Christie 			ret = -EINVAL;
914228a27cfSMike Christie 			break;
915228a27cfSMike Christie 		}
916228a27cfSMike Christie 
917c1ecd8e9SMike Christie 		ring_worker.index = idx;
918228a27cfSMike Christie 		ring_worker.worker_id = worker->id;
919c1ecd8e9SMike Christie 
920c1ecd8e9SMike Christie 		if (copy_to_user(argp, &ring_worker, sizeof(ring_worker)))
921c1ecd8e9SMike Christie 			ret = -EFAULT;
922c1ecd8e9SMike Christie 		break;
923c1ecd8e9SMike Christie 	default:
924c1ecd8e9SMike Christie 		ret = -ENOIOCTLCMD;
925c1ecd8e9SMike Christie 		break;
926c1ecd8e9SMike Christie 	}
927c1ecd8e9SMike Christie 
928c1ecd8e9SMike Christie 	return ret;
929c1ecd8e9SMike Christie }
930c1ecd8e9SMike Christie EXPORT_SYMBOL_GPL(vhost_worker_ioctl);
931c1ecd8e9SMike Christie 
93205c05351SMichael S. Tsirkin /* Caller should have device mutex */
vhost_dev_set_owner(struct vhost_dev * dev)93354db63c2SAsias He long vhost_dev_set_owner(struct vhost_dev *dev)
9343a4d5c94SMichael S. Tsirkin {
935737bdb64SMike Christie 	struct vhost_worker *worker;
936737bdb64SMike Christie 	int err, i;
937d47effe1SKrishna Kumar 
9383a4d5c94SMichael S. Tsirkin 	/* Is there an owner already? */
93905c05351SMichael S. Tsirkin 	if (vhost_dev_has_owner(dev)) {
940c23f3445STejun Heo 		err = -EBUSY;
941c23f3445STejun Heo 		goto err_mm;
942c23f3445STejun Heo 	}
943d47effe1SKrishna Kumar 
9445ce995f3SJason Wang 	vhost_attach_mm(dev);
9455ce995f3SJason Wang 
9463e11c6ebSMike Christie 	err = vhost_dev_alloc_iovecs(dev);
9473e11c6ebSMike Christie 	if (err)
9483e11c6ebSMike Christie 		goto err_iovecs;
9493e11c6ebSMike Christie 
95001fcb1cbSJason Wang 	if (dev->use_worker) {
9513e11c6ebSMike Christie 		/*
9523e11c6ebSMike Christie 		 * This should be done last, because vsock can queue work
9533e11c6ebSMike Christie 		 * before VHOST_SET_OWNER so it simplifies the failure path
9543e11c6ebSMike Christie 		 * below since we don't have to worry about vsock queueing
9553e11c6ebSMike Christie 		 * while we free the worker.
9563e11c6ebSMike Christie 		 */
957737bdb64SMike Christie 		worker = vhost_worker_create(dev);
958737bdb64SMike Christie 		if (!worker) {
959737bdb64SMike Christie 			err = -ENOMEM;
9601a5f8090SMike Christie 			goto err_worker;
96101fcb1cbSJason Wang 		}
962c23f3445STejun Heo 
963737bdb64SMike Christie 		for (i = 0; i < dev->nvqs; i++)
964c1ecd8e9SMike Christie 			__vhost_vq_attach_worker(dev->vqs[i], worker);
965737bdb64SMike Christie 	}
966737bdb64SMike Christie 
9673a4d5c94SMichael S. Tsirkin 	return 0;
9683e11c6ebSMike Christie 
969c23f3445STejun Heo err_worker:
9703e11c6ebSMike Christie 	vhost_dev_free_iovecs(dev);
9713e11c6ebSMike Christie err_iovecs:
9725ce995f3SJason Wang 	vhost_detach_mm(dev);
973c23f3445STejun Heo err_mm:
974c23f3445STejun Heo 	return err;
9753a4d5c94SMichael S. Tsirkin }
9766ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
9773a4d5c94SMichael S. Tsirkin 
iotlb_alloc(void)9780bbe3066SJason Wang static struct vhost_iotlb *iotlb_alloc(void)
979a9709d68SJason Wang {
9800bbe3066SJason Wang 	return vhost_iotlb_alloc(max_iotlb_entries,
9810bbe3066SJason Wang 				 VHOST_IOTLB_FLAG_RETIRE);
9820bbe3066SJason Wang }
9830bbe3066SJason Wang 
vhost_dev_reset_owner_prepare(void)9840bbe3066SJason Wang struct vhost_iotlb *vhost_dev_reset_owner_prepare(void)
9850bbe3066SJason Wang {
9860bbe3066SJason Wang 	return iotlb_alloc();
987150b9e51SMichael S. Tsirkin }
9886ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare);
9893a4d5c94SMichael S. Tsirkin 
990150b9e51SMichael S. Tsirkin /* Caller should have device mutex */
vhost_dev_reset_owner(struct vhost_dev * dev,struct vhost_iotlb * umem)9910bbe3066SJason Wang void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem)
992150b9e51SMichael S. Tsirkin {
99347283befSMichael S. Tsirkin 	int i;
99447283befSMichael S. Tsirkin 
995f6f93f75S夷则(Caspar) 	vhost_dev_cleanup(dev);
9963a4d5c94SMichael S. Tsirkin 
997a9709d68SJason Wang 	dev->umem = umem;
99847283befSMichael S. Tsirkin 	/* We don't need VQ locks below since vhost_dev_cleanup makes sure
99947283befSMichael S. Tsirkin 	 * VQs aren't running.
100047283befSMichael S. Tsirkin 	 */
100147283befSMichael S. Tsirkin 	for (i = 0; i < dev->nvqs; ++i)
1002a9709d68SJason Wang 		dev->vqs[i]->umem = umem;
10033a4d5c94SMichael S. Tsirkin }
10046ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_reset_owner);
10053a4d5c94SMichael S. Tsirkin 
vhost_dev_stop(struct vhost_dev * dev)1006b211616dSMichael S. Tsirkin void vhost_dev_stop(struct vhost_dev *dev)
10073a4d5c94SMichael S. Tsirkin {
10083a4d5c94SMichael S. Tsirkin 	int i;
1009d47effe1SKrishna Kumar 
10103a4d5c94SMichael S. Tsirkin 	for (i = 0; i < dev->nvqs; ++i) {
10116ca84326SMike Christie 		if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick)
10123ab2e420SAsias He 			vhost_poll_stop(&dev->vqs[i]->poll);
10133a4d5c94SMichael S. Tsirkin 	}
10146ca84326SMike Christie 
1015b2ffa407SMike Christie 	vhost_dev_flush(dev);
1016b211616dSMichael S. Tsirkin }
10176ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_stop);
1018bab632d6SMichael S. Tsirkin 
vhost_clear_msg(struct vhost_dev * dev)10199526f9a2SEric Auger void vhost_clear_msg(struct vhost_dev *dev)
10206b1e6cc7SJason Wang {
10216b1e6cc7SJason Wang 	struct vhost_msg_node *node, *n;
10226b1e6cc7SJason Wang 
10236b1e6cc7SJason Wang 	spin_lock(&dev->iotlb_lock);
10246b1e6cc7SJason Wang 
10256b1e6cc7SJason Wang 	list_for_each_entry_safe(node, n, &dev->read_list, node) {
10266b1e6cc7SJason Wang 		list_del(&node->node);
10276b1e6cc7SJason Wang 		kfree(node);
10286b1e6cc7SJason Wang 	}
10296b1e6cc7SJason Wang 
10306b1e6cc7SJason Wang 	list_for_each_entry_safe(node, n, &dev->pending_list, node) {
10316b1e6cc7SJason Wang 		list_del(&node->node);
10326b1e6cc7SJason Wang 		kfree(node);
10336b1e6cc7SJason Wang 	}
10346b1e6cc7SJason Wang 
10356b1e6cc7SJason Wang 	spin_unlock(&dev->iotlb_lock);
10366b1e6cc7SJason Wang }
10379526f9a2SEric Auger EXPORT_SYMBOL_GPL(vhost_clear_msg);
10386b1e6cc7SJason Wang 
vhost_dev_cleanup(struct vhost_dev * dev)1039f6f93f75S夷则(Caspar) void vhost_dev_cleanup(struct vhost_dev *dev)
1040b211616dSMichael S. Tsirkin {
1041b211616dSMichael S. Tsirkin 	int i;
1042bab632d6SMichael S. Tsirkin 
1043b211616dSMichael S. Tsirkin 	for (i = 0; i < dev->nvqs; ++i) {
10443ab2e420SAsias He 		if (dev->vqs[i]->error_ctx)
10453ab2e420SAsias He 			eventfd_ctx_put(dev->vqs[i]->error_ctx);
10463ab2e420SAsias He 		if (dev->vqs[i]->kick)
10473ab2e420SAsias He 			fput(dev->vqs[i]->kick);
1048265a0ad8SZhu Lingshan 		if (dev->vqs[i]->call_ctx.ctx)
1049265a0ad8SZhu Lingshan 			eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx);
10503ab2e420SAsias He 		vhost_vq_reset(dev, dev->vqs[i]);
10513a4d5c94SMichael S. Tsirkin 	}
1052e0e9b406SJason Wang 	vhost_dev_free_iovecs(dev);
10533a4d5c94SMichael S. Tsirkin 	if (dev->log_ctx)
10543a4d5c94SMichael S. Tsirkin 		eventfd_ctx_put(dev->log_ctx);
10553a4d5c94SMichael S. Tsirkin 	dev->log_ctx = NULL;
10563a4d5c94SMichael S. Tsirkin 	/* No one will access memory at this point */
10570bbe3066SJason Wang 	vhost_iotlb_free(dev->umem);
1058a9709d68SJason Wang 	dev->umem = NULL;
10590bbe3066SJason Wang 	vhost_iotlb_free(dev->iotlb);
10606b1e6cc7SJason Wang 	dev->iotlb = NULL;
10616b1e6cc7SJason Wang 	vhost_clear_msg(dev);
1062a9a08845SLinus Torvalds 	wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
10631cdaafa1SMike Christie 	vhost_workers_free(dev);
10645ce995f3SJason Wang 	vhost_detach_mm(dev);
10653a4d5c94SMichael S. Tsirkin }
10666ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
10673a4d5c94SMichael S. Tsirkin 
log_access_ok(void __user * log_base,u64 addr,unsigned long sz)1068ddd3d408SStefan Hajnoczi static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
10693a4d5c94SMichael S. Tsirkin {
10703a4d5c94SMichael S. Tsirkin 	u64 a = addr / VHOST_PAGE_SIZE / 8;
1071d47effe1SKrishna Kumar 
10723a4d5c94SMichael S. Tsirkin 	/* Make sure 64 bit math will not overflow. */
10733a4d5c94SMichael S. Tsirkin 	if (a > ULONG_MAX - (unsigned long)log_base ||
10743a4d5c94SMichael S. Tsirkin 	    a + (unsigned long)log_base > ULONG_MAX)
1075ddd3d408SStefan Hajnoczi 		return false;
10763a4d5c94SMichael S. Tsirkin 
107796d4f267SLinus Torvalds 	return access_ok(log_base + a,
10783a4d5c94SMichael S. Tsirkin 			 (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8);
10793a4d5c94SMichael S. Tsirkin }
10803a4d5c94SMichael S. Tsirkin 
1081f7ad318eSXie Yongji /* Make sure 64 bit math will not overflow. */
vhost_overflow(u64 uaddr,u64 size)1082ec33d031SMichael S. Tsirkin static bool vhost_overflow(u64 uaddr, u64 size)
1083ec33d031SMichael S. Tsirkin {
1084f7ad318eSXie Yongji 	if (uaddr > ULONG_MAX || size > ULONG_MAX)
1085f7ad318eSXie Yongji 		return true;
1086f7ad318eSXie Yongji 
1087f7ad318eSXie Yongji 	if (!size)
1088f7ad318eSXie Yongji 		return false;
1089f7ad318eSXie Yongji 
1090f7ad318eSXie Yongji 	return uaddr > ULONG_MAX - size + 1;
1091ec33d031SMichael S. Tsirkin }
1092ec33d031SMichael S. Tsirkin 
10933a4d5c94SMichael S. Tsirkin /* Caller should have vq mutex and device mutex. */
vq_memory_access_ok(void __user * log_base,struct vhost_iotlb * umem,int log_all)10940bbe3066SJason Wang static bool vq_memory_access_ok(void __user *log_base, struct vhost_iotlb *umem,
10953a4d5c94SMichael S. Tsirkin 				int log_all)
10963a4d5c94SMichael S. Tsirkin {
10970bbe3066SJason Wang 	struct vhost_iotlb_map *map;
1098179b284eSJeff Dike 
1099a9709d68SJason Wang 	if (!umem)
1100ddd3d408SStefan Hajnoczi 		return false;
1101179b284eSJeff Dike 
11020bbe3066SJason Wang 	list_for_each_entry(map, &umem->list, link) {
11030bbe3066SJason Wang 		unsigned long a = map->addr;
1104a9709d68SJason Wang 
11050bbe3066SJason Wang 		if (vhost_overflow(map->addr, map->size))
1106ddd3d408SStefan Hajnoczi 			return false;
1107ec33d031SMichael S. Tsirkin 
1108ec33d031SMichael S. Tsirkin 
11090bbe3066SJason Wang 		if (!access_ok((void __user *)a, map->size))
1110ddd3d408SStefan Hajnoczi 			return false;
11113a4d5c94SMichael S. Tsirkin 		else if (log_all && !log_access_ok(log_base,
11120bbe3066SJason Wang 						   map->start,
11130bbe3066SJason Wang 						   map->size))
1114ddd3d408SStefan Hajnoczi 			return false;
11153a4d5c94SMichael S. Tsirkin 	}
1116ddd3d408SStefan Hajnoczi 	return true;
11173a4d5c94SMichael S. Tsirkin }
11183a4d5c94SMichael S. Tsirkin 
vhost_vq_meta_fetch(struct vhost_virtqueue * vq,u64 addr,unsigned int size,int type)1119f8894913SJason Wang static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
1120f8894913SJason Wang 					       u64 addr, unsigned int size,
1121f8894913SJason Wang 					       int type)
1122f8894913SJason Wang {
11230bbe3066SJason Wang 	const struct vhost_iotlb_map *map = vq->meta_iotlb[type];
1124f8894913SJason Wang 
11250bbe3066SJason Wang 	if (!map)
1126f8894913SJason Wang 		return NULL;
1127f8894913SJason Wang 
11281b0be99fSMichael S. Tsirkin 	return (void __user *)(uintptr_t)(map->addr + addr - map->start);
1129f8894913SJason Wang }
1130f8894913SJason Wang 
11313a4d5c94SMichael S. Tsirkin /* Can we switch to this memory table? */
11323a4d5c94SMichael S. Tsirkin /* Caller should have device mutex but not vq mutex */
memory_access_ok(struct vhost_dev * d,struct vhost_iotlb * umem,int log_all)11330bbe3066SJason Wang static bool memory_access_ok(struct vhost_dev *d, struct vhost_iotlb *umem,
11343a4d5c94SMichael S. Tsirkin 			     int log_all)
11353a4d5c94SMichael S. Tsirkin {
11363a4d5c94SMichael S. Tsirkin 	int i;
1137d47effe1SKrishna Kumar 
11383a4d5c94SMichael S. Tsirkin 	for (i = 0; i < d->nvqs; ++i) {
1139ddd3d408SStefan Hajnoczi 		bool ok;
1140ea16c514SMichael S. Tsirkin 		bool log;
1141ea16c514SMichael S. Tsirkin 
11423ab2e420SAsias He 		mutex_lock(&d->vqs[i]->mutex);
1143ea16c514SMichael S. Tsirkin 		log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL);
11443a4d5c94SMichael S. Tsirkin 		/* If ring is inactive, will check when it's enabled. */
11453ab2e420SAsias He 		if (d->vqs[i]->private_data)
1146a9709d68SJason Wang 			ok = vq_memory_access_ok(d->vqs[i]->log_base,
1147a9709d68SJason Wang 						 umem, log);
11483a4d5c94SMichael S. Tsirkin 		else
1149ddd3d408SStefan Hajnoczi 			ok = true;
11503ab2e420SAsias He 		mutex_unlock(&d->vqs[i]->mutex);
11513a4d5c94SMichael S. Tsirkin 		if (!ok)
1152ddd3d408SStefan Hajnoczi 			return false;
11533a4d5c94SMichael S. Tsirkin 	}
1154ddd3d408SStefan Hajnoczi 	return true;
11553a4d5c94SMichael S. Tsirkin }
11563a4d5c94SMichael S. Tsirkin 
11576b1e6cc7SJason Wang static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
11586b1e6cc7SJason Wang 			  struct iovec iov[], int iov_size, int access);
1159bfe2bc51SJason Wang 
vhost_copy_to_user(struct vhost_virtqueue * vq,void __user * to,const void * from,unsigned size)116072952cc0SMichael S. Tsirkin static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to,
1161bfe2bc51SJason Wang 			      const void *from, unsigned size)
1162bfe2bc51SJason Wang {
11636b1e6cc7SJason Wang 	int ret;
1164bfe2bc51SJason Wang 
11656b1e6cc7SJason Wang 	if (!vq->iotlb)
11666b1e6cc7SJason Wang 		return __copy_to_user(to, from, size);
11676b1e6cc7SJason Wang 	else {
11686b1e6cc7SJason Wang 		/* This function should be called after iotlb
11696b1e6cc7SJason Wang 		 * prefetch, which means we're sure that all vq
11706b1e6cc7SJason Wang 		 * could be access through iotlb. So -EAGAIN should
11716b1e6cc7SJason Wang 		 * not happen in this case.
11726b1e6cc7SJason Wang 		 */
11736b1e6cc7SJason Wang 		struct iov_iter t;
1174f8894913SJason Wang 		void __user *uaddr = vhost_vq_meta_fetch(vq,
1175f8894913SJason Wang 				     (u64)(uintptr_t)to, size,
11767ced6c98SEric Auger 				     VHOST_ADDR_USED);
1177f8894913SJason Wang 
1178f8894913SJason Wang 		if (uaddr)
1179f8894913SJason Wang 			return __copy_to_user(uaddr, from, size);
1180f8894913SJason Wang 
11816b1e6cc7SJason Wang 		ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
11826b1e6cc7SJason Wang 				     ARRAY_SIZE(vq->iotlb_iov),
11836b1e6cc7SJason Wang 				     VHOST_ACCESS_WO);
11846b1e6cc7SJason Wang 		if (ret < 0)
11856b1e6cc7SJason Wang 			goto out;
1186de4eda9dSAl Viro 		iov_iter_init(&t, ITER_DEST, vq->iotlb_iov, ret, size);
11876b1e6cc7SJason Wang 		ret = copy_to_iter(from, size, &t);
11886b1e6cc7SJason Wang 		if (ret == size)
11896b1e6cc7SJason Wang 			ret = 0;
11906b1e6cc7SJason Wang 	}
11916b1e6cc7SJason Wang out:
11926b1e6cc7SJason Wang 	return ret;
11936b1e6cc7SJason Wang }
1194bfe2bc51SJason Wang 
vhost_copy_from_user(struct vhost_virtqueue * vq,void * to,void __user * from,unsigned size)1195bfe2bc51SJason Wang static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
119672952cc0SMichael S. Tsirkin 				void __user *from, unsigned size)
1197bfe2bc51SJason Wang {
11986b1e6cc7SJason Wang 	int ret;
11996b1e6cc7SJason Wang 
12006b1e6cc7SJason Wang 	if (!vq->iotlb)
1201bfe2bc51SJason Wang 		return __copy_from_user(to, from, size);
12026b1e6cc7SJason Wang 	else {
12036b1e6cc7SJason Wang 		/* This function should be called after iotlb
12046b1e6cc7SJason Wang 		 * prefetch, which means we're sure that vq
12056b1e6cc7SJason Wang 		 * could be access through iotlb. So -EAGAIN should
12066b1e6cc7SJason Wang 		 * not happen in this case.
12076b1e6cc7SJason Wang 		 */
1208f8894913SJason Wang 		void __user *uaddr = vhost_vq_meta_fetch(vq,
1209f8894913SJason Wang 				     (u64)(uintptr_t)from, size,
1210f8894913SJason Wang 				     VHOST_ADDR_DESC);
12116b1e6cc7SJason Wang 		struct iov_iter f;
1212f8894913SJason Wang 
1213f8894913SJason Wang 		if (uaddr)
1214f8894913SJason Wang 			return __copy_from_user(to, uaddr, size);
1215f8894913SJason Wang 
12166b1e6cc7SJason Wang 		ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
12176b1e6cc7SJason Wang 				     ARRAY_SIZE(vq->iotlb_iov),
12186b1e6cc7SJason Wang 				     VHOST_ACCESS_RO);
12196b1e6cc7SJason Wang 		if (ret < 0) {
12206b1e6cc7SJason Wang 			vq_err(vq, "IOTLB translation failure: uaddr "
12216b1e6cc7SJason Wang 			       "%p size 0x%llx\n", from,
12226b1e6cc7SJason Wang 			       (unsigned long long) size);
12236b1e6cc7SJason Wang 			goto out;
12246b1e6cc7SJason Wang 		}
1225de4eda9dSAl Viro 		iov_iter_init(&f, ITER_SOURCE, vq->iotlb_iov, ret, size);
12266b1e6cc7SJason Wang 		ret = copy_from_iter(to, size, &f);
12276b1e6cc7SJason Wang 		if (ret == size)
12286b1e6cc7SJason Wang 			ret = 0;
12296b1e6cc7SJason Wang 	}
12306b1e6cc7SJason Wang 
12316b1e6cc7SJason Wang out:
12326b1e6cc7SJason Wang 	return ret;
12336b1e6cc7SJason Wang }
12346b1e6cc7SJason Wang 
__vhost_get_user_slow(struct vhost_virtqueue * vq,void __user * addr,unsigned int size,int type)1235f8894913SJason Wang static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
1236f8894913SJason Wang 					  void __user *addr, unsigned int size,
1237f8894913SJason Wang 					  int type)
12386b1e6cc7SJason Wang {
12396b1e6cc7SJason Wang 	int ret;
12406b1e6cc7SJason Wang 
12416b1e6cc7SJason Wang 	ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
12426b1e6cc7SJason Wang 			     ARRAY_SIZE(vq->iotlb_iov),
12436b1e6cc7SJason Wang 			     VHOST_ACCESS_RO);
12446b1e6cc7SJason Wang 	if (ret < 0) {
12456b1e6cc7SJason Wang 		vq_err(vq, "IOTLB translation failure: uaddr "
12466b1e6cc7SJason Wang 			"%p size 0x%llx\n", addr,
12476b1e6cc7SJason Wang 			(unsigned long long) size);
12486b1e6cc7SJason Wang 		return NULL;
12496b1e6cc7SJason Wang 	}
12506b1e6cc7SJason Wang 
12516b1e6cc7SJason Wang 	if (ret != 1 || vq->iotlb_iov[0].iov_len != size) {
12526b1e6cc7SJason Wang 		vq_err(vq, "Non atomic userspace memory access: uaddr "
12536b1e6cc7SJason Wang 			"%p size 0x%llx\n", addr,
12546b1e6cc7SJason Wang 			(unsigned long long) size);
12556b1e6cc7SJason Wang 		return NULL;
12566b1e6cc7SJason Wang 	}
12576b1e6cc7SJason Wang 
12586b1e6cc7SJason Wang 	return vq->iotlb_iov[0].iov_base;
12596b1e6cc7SJason Wang }
12606b1e6cc7SJason Wang 
1261f8894913SJason Wang /* This function should be called after iotlb
1262f8894913SJason Wang  * prefetch, which means we're sure that vq
1263f8894913SJason Wang  * could be access through iotlb. So -EAGAIN should
1264f8894913SJason Wang  * not happen in this case.
1265f8894913SJason Wang  */
__vhost_get_user(struct vhost_virtqueue * vq,void __user * addr,unsigned int size,int type)1266f8894913SJason Wang static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
12671b0be99fSMichael S. Tsirkin 					    void __user *addr, unsigned int size,
1268f8894913SJason Wang 					    int type)
1269f8894913SJason Wang {
1270f8894913SJason Wang 	void __user *uaddr = vhost_vq_meta_fetch(vq,
1271f8894913SJason Wang 			     (u64)(uintptr_t)addr, size, type);
1272f8894913SJason Wang 	if (uaddr)
1273f8894913SJason Wang 		return uaddr;
1274f8894913SJason Wang 
1275f8894913SJason Wang 	return __vhost_get_user_slow(vq, addr, size, type);
1276f8894913SJason Wang }
1277f8894913SJason Wang 
12786b1e6cc7SJason Wang #define vhost_put_user(vq, x, ptr)		\
12796b1e6cc7SJason Wang ({ \
1280002ef18eSGuennadi Liakhovetski 	int ret; \
12816b1e6cc7SJason Wang 	if (!vq->iotlb) { \
12826b1e6cc7SJason Wang 		ret = __put_user(x, ptr); \
12836b1e6cc7SJason Wang 	} else { \
12846b1e6cc7SJason Wang 		__typeof__(ptr) to = \
1285f8894913SJason Wang 			(__typeof__(ptr)) __vhost_get_user(vq, ptr,	\
1286f8894913SJason Wang 					  sizeof(*ptr), VHOST_ADDR_USED); \
12876b1e6cc7SJason Wang 		if (to != NULL) \
12886b1e6cc7SJason Wang 			ret = __put_user(x, to); \
12896b1e6cc7SJason Wang 		else \
12906b1e6cc7SJason Wang 			ret = -EFAULT;	\
12916b1e6cc7SJason Wang 	} \
12926b1e6cc7SJason Wang 	ret; \
12936b1e6cc7SJason Wang })
12946b1e6cc7SJason Wang 
vhost_put_avail_event(struct vhost_virtqueue * vq)12957b5d753eSJason Wang static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
12967b5d753eSJason Wang {
12977b5d753eSJason Wang 	return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
12987b5d753eSJason Wang 			      vhost_avail_event(vq));
12997b5d753eSJason Wang }
13007b5d753eSJason Wang 
vhost_put_used(struct vhost_virtqueue * vq,struct vring_used_elem * head,int idx,int count)13017b5d753eSJason Wang static inline int vhost_put_used(struct vhost_virtqueue *vq,
13027b5d753eSJason Wang 				 struct vring_used_elem *head, int idx,
13037b5d753eSJason Wang 				 int count)
13047b5d753eSJason Wang {
13057b5d753eSJason Wang 	return vhost_copy_to_user(vq, vq->used->ring + idx, head,
13067b5d753eSJason Wang 				  count * sizeof(*head));
13077b5d753eSJason Wang }
13087b5d753eSJason Wang 
vhost_put_used_flags(struct vhost_virtqueue * vq)13097b5d753eSJason Wang static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
13107b5d753eSJason Wang 
13117b5d753eSJason Wang {
13127b5d753eSJason Wang 	return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
13137b5d753eSJason Wang 			      &vq->used->flags);
13147b5d753eSJason Wang }
13157b5d753eSJason Wang 
vhost_put_used_idx(struct vhost_virtqueue * vq)13167b5d753eSJason Wang static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
13177b5d753eSJason Wang 
13187b5d753eSJason Wang {
13197b5d753eSJason Wang 	return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
13207b5d753eSJason Wang 			      &vq->used->idx);
13217b5d753eSJason Wang }
13227b5d753eSJason Wang 
1323f8894913SJason Wang #define vhost_get_user(vq, x, ptr, type)		\
13246b1e6cc7SJason Wang ({ \
13256b1e6cc7SJason Wang 	int ret; \
13266b1e6cc7SJason Wang 	if (!vq->iotlb) { \
13276b1e6cc7SJason Wang 		ret = __get_user(x, ptr); \
13286b1e6cc7SJason Wang 	} else { \
13296b1e6cc7SJason Wang 		__typeof__(ptr) from = \
1330f8894913SJason Wang 			(__typeof__(ptr)) __vhost_get_user(vq, ptr, \
1331f8894913SJason Wang 							   sizeof(*ptr), \
1332f8894913SJason Wang 							   type); \
13336b1e6cc7SJason Wang 		if (from != NULL) \
13346b1e6cc7SJason Wang 			ret = __get_user(x, from); \
13356b1e6cc7SJason Wang 		else \
13366b1e6cc7SJason Wang 			ret = -EFAULT; \
13376b1e6cc7SJason Wang 	} \
13386b1e6cc7SJason Wang 	ret; \
13396b1e6cc7SJason Wang })
13406b1e6cc7SJason Wang 
1341f8894913SJason Wang #define vhost_get_avail(vq, x, ptr) \
1342f8894913SJason Wang 	vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL)
1343f8894913SJason Wang 
1344f8894913SJason Wang #define vhost_get_used(vq, x, ptr) \
1345f8894913SJason Wang 	vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
1346f8894913SJason Wang 
vhost_dev_lock_vqs(struct vhost_dev * d)134786a07da3SJason Wang static void vhost_dev_lock_vqs(struct vhost_dev *d)
134886a07da3SJason Wang {
134986a07da3SJason Wang 	int i = 0;
135086a07da3SJason Wang 	for (i = 0; i < d->nvqs; ++i)
135186a07da3SJason Wang 		mutex_lock_nested(&d->vqs[i]->mutex, i);
135286a07da3SJason Wang }
135386a07da3SJason Wang 
vhost_dev_unlock_vqs(struct vhost_dev * d)135486a07da3SJason Wang static void vhost_dev_unlock_vqs(struct vhost_dev *d)
135586a07da3SJason Wang {
135686a07da3SJason Wang 	int i = 0;
135786a07da3SJason Wang 	for (i = 0; i < d->nvqs; ++i)
135886a07da3SJason Wang 		mutex_unlock(&d->vqs[i]->mutex);
135986a07da3SJason Wang }
136086a07da3SJason Wang 
vhost_get_avail_idx(struct vhost_virtqueue * vq,__virtio16 * idx)13617b5d753eSJason Wang static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
13627b5d753eSJason Wang 				      __virtio16 *idx)
13637b5d753eSJason Wang {
13647b5d753eSJason Wang 	return vhost_get_avail(vq, *idx, &vq->avail->idx);
13657b5d753eSJason Wang }
13667b5d753eSJason Wang 
vhost_get_avail_head(struct vhost_virtqueue * vq,__virtio16 * head,int idx)13677b5d753eSJason Wang static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
13687b5d753eSJason Wang 				       __virtio16 *head, int idx)
13697b5d753eSJason Wang {
13707b5d753eSJason Wang 	return vhost_get_avail(vq, *head,
13717b5d753eSJason Wang 			       &vq->avail->ring[idx & (vq->num - 1)]);
13727b5d753eSJason Wang }
13737b5d753eSJason Wang 
vhost_get_avail_flags(struct vhost_virtqueue * vq,__virtio16 * flags)13747b5d753eSJason Wang static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq,
13757b5d753eSJason Wang 					__virtio16 *flags)
13767b5d753eSJason Wang {
13777b5d753eSJason Wang 	return vhost_get_avail(vq, *flags, &vq->avail->flags);
13787b5d753eSJason Wang }
13797b5d753eSJason Wang 
vhost_get_used_event(struct vhost_virtqueue * vq,__virtio16 * event)13807b5d753eSJason Wang static inline int vhost_get_used_event(struct vhost_virtqueue *vq,
13817b5d753eSJason Wang 				       __virtio16 *event)
13827b5d753eSJason Wang {
13837b5d753eSJason Wang 	return vhost_get_avail(vq, *event, vhost_used_event(vq));
13847b5d753eSJason Wang }
13857b5d753eSJason Wang 
vhost_get_used_idx(struct vhost_virtqueue * vq,__virtio16 * idx)13867b5d753eSJason Wang static inline int vhost_get_used_idx(struct vhost_virtqueue *vq,
13877b5d753eSJason Wang 				     __virtio16 *idx)
13887b5d753eSJason Wang {
13897b5d753eSJason Wang 	return vhost_get_used(vq, *idx, &vq->used->idx);
13907b5d753eSJason Wang }
13917b5d753eSJason Wang 
vhost_get_desc(struct vhost_virtqueue * vq,struct vring_desc * desc,int idx)13927b5d753eSJason Wang static inline int vhost_get_desc(struct vhost_virtqueue *vq,
13937b5d753eSJason Wang 				 struct vring_desc *desc, int idx)
13947b5d753eSJason Wang {
13957b5d753eSJason Wang 	return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc));
13967b5d753eSJason Wang }
13977b5d753eSJason Wang 
vhost_iotlb_notify_vq(struct vhost_dev * d,struct vhost_iotlb_msg * msg)13986b1e6cc7SJason Wang static void vhost_iotlb_notify_vq(struct vhost_dev *d,
13996b1e6cc7SJason Wang 				  struct vhost_iotlb_msg *msg)
14006b1e6cc7SJason Wang {
14016b1e6cc7SJason Wang 	struct vhost_msg_node *node, *n;
14026b1e6cc7SJason Wang 
14036b1e6cc7SJason Wang 	spin_lock(&d->iotlb_lock);
14046b1e6cc7SJason Wang 
14056b1e6cc7SJason Wang 	list_for_each_entry_safe(node, n, &d->pending_list, node) {
14066b1e6cc7SJason Wang 		struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb;
14076b1e6cc7SJason Wang 		if (msg->iova <= vq_msg->iova &&
14082d66f997SJason Wang 		    msg->iova + msg->size - 1 >= vq_msg->iova &&
14096b1e6cc7SJason Wang 		    vq_msg->type == VHOST_IOTLB_MISS) {
14106b1e6cc7SJason Wang 			vhost_poll_queue(&node->vq->poll);
14116b1e6cc7SJason Wang 			list_del(&node->node);
14126b1e6cc7SJason Wang 			kfree(node);
14136b1e6cc7SJason Wang 		}
14146b1e6cc7SJason Wang 	}
14156b1e6cc7SJason Wang 
14166b1e6cc7SJason Wang 	spin_unlock(&d->iotlb_lock);
14176b1e6cc7SJason Wang }
14186b1e6cc7SJason Wang 
umem_access_ok(u64 uaddr,u64 size,int access)1419ddd3d408SStefan Hajnoczi static bool umem_access_ok(u64 uaddr, u64 size, int access)
14206b1e6cc7SJason Wang {
14216b1e6cc7SJason Wang 	unsigned long a = uaddr;
14226b1e6cc7SJason Wang 
1423ec33d031SMichael S. Tsirkin 	/* Make sure 64 bit math will not overflow. */
1424ec33d031SMichael S. Tsirkin 	if (vhost_overflow(uaddr, size))
1425ddd3d408SStefan Hajnoczi 		return false;
1426ec33d031SMichael S. Tsirkin 
14276b1e6cc7SJason Wang 	if ((access & VHOST_ACCESS_RO) &&
142896d4f267SLinus Torvalds 	    !access_ok((void __user *)a, size))
1429ddd3d408SStefan Hajnoczi 		return false;
14306b1e6cc7SJason Wang 	if ((access & VHOST_ACCESS_WO) &&
143196d4f267SLinus Torvalds 	    !access_ok((void __user *)a, size))
1432ddd3d408SStefan Hajnoczi 		return false;
1433ddd3d408SStefan Hajnoczi 	return true;
14346b1e6cc7SJason Wang }
14356b1e6cc7SJason Wang 
vhost_process_iotlb_msg(struct vhost_dev * dev,u32 asid,struct vhost_iotlb_msg * msg)143691233ad7SGautam Dawar static int vhost_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
14376b1e6cc7SJason Wang 				   struct vhost_iotlb_msg *msg)
14386b1e6cc7SJason Wang {
14396b1e6cc7SJason Wang 	int ret = 0;
14406b1e6cc7SJason Wang 
144191233ad7SGautam Dawar 	if (asid != 0)
144291233ad7SGautam Dawar 		return -EINVAL;
144391233ad7SGautam Dawar 
14441b15ad68SJason Wang 	mutex_lock(&dev->mutex);
144586a07da3SJason Wang 	vhost_dev_lock_vqs(dev);
14466b1e6cc7SJason Wang 	switch (msg->type) {
14476b1e6cc7SJason Wang 	case VHOST_IOTLB_UPDATE:
14486b1e6cc7SJason Wang 		if (!dev->iotlb) {
14496b1e6cc7SJason Wang 			ret = -EFAULT;
14506b1e6cc7SJason Wang 			break;
14516b1e6cc7SJason Wang 		}
1452ddd3d408SStefan Hajnoczi 		if (!umem_access_ok(msg->uaddr, msg->size, msg->perm)) {
14536b1e6cc7SJason Wang 			ret = -EFAULT;
14546b1e6cc7SJason Wang 			break;
14556b1e6cc7SJason Wang 		}
1456f8894913SJason Wang 		vhost_vq_meta_reset(dev);
14570bbe3066SJason Wang 		if (vhost_iotlb_add_range(dev->iotlb, msg->iova,
14586b1e6cc7SJason Wang 					  msg->iova + msg->size - 1,
14596b1e6cc7SJason Wang 					  msg->uaddr, msg->perm)) {
14606b1e6cc7SJason Wang 			ret = -ENOMEM;
14616b1e6cc7SJason Wang 			break;
14626b1e6cc7SJason Wang 		}
14636b1e6cc7SJason Wang 		vhost_iotlb_notify_vq(dev, msg);
14646b1e6cc7SJason Wang 		break;
14656b1e6cc7SJason Wang 	case VHOST_IOTLB_INVALIDATE:
14666f3180afSJason Wang 		if (!dev->iotlb) {
14676f3180afSJason Wang 			ret = -EFAULT;
14686f3180afSJason Wang 			break;
14696f3180afSJason Wang 		}
1470f8894913SJason Wang 		vhost_vq_meta_reset(dev);
14710bbe3066SJason Wang 		vhost_iotlb_del_range(dev->iotlb, msg->iova,
14726b1e6cc7SJason Wang 				      msg->iova + msg->size - 1);
14736b1e6cc7SJason Wang 		break;
14746b1e6cc7SJason Wang 	default:
14756b1e6cc7SJason Wang 		ret = -EINVAL;
14766b1e6cc7SJason Wang 		break;
14776b1e6cc7SJason Wang 	}
14786b1e6cc7SJason Wang 
147986a07da3SJason Wang 	vhost_dev_unlock_vqs(dev);
14801b15ad68SJason Wang 	mutex_unlock(&dev->mutex);
14811b15ad68SJason Wang 
14826b1e6cc7SJason Wang 	return ret;
14836b1e6cc7SJason Wang }
vhost_chr_write_iter(struct vhost_dev * dev,struct iov_iter * from)14846b1e6cc7SJason Wang ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
14856b1e6cc7SJason Wang 			     struct iov_iter *from)
14866b1e6cc7SJason Wang {
1487429711aeSJason Wang 	struct vhost_iotlb_msg msg;
1488429711aeSJason Wang 	size_t offset;
1489429711aeSJason Wang 	int type, ret;
149091233ad7SGautam Dawar 	u32 asid = 0;
14916b1e6cc7SJason Wang 
1492429711aeSJason Wang 	ret = copy_from_iter(&type, sizeof(type), from);
149374ad7419SPavel Tikhomirov 	if (ret != sizeof(type)) {
149474ad7419SPavel Tikhomirov 		ret = -EINVAL;
14956b1e6cc7SJason Wang 		goto done;
149674ad7419SPavel Tikhomirov 	}
14976b1e6cc7SJason Wang 
1498429711aeSJason Wang 	switch (type) {
14996b1e6cc7SJason Wang 	case VHOST_IOTLB_MSG:
1500429711aeSJason Wang 		/* There maybe a hole after type for V1 message type,
1501429711aeSJason Wang 		 * so skip it here.
1502429711aeSJason Wang 		 */
1503429711aeSJason Wang 		offset = offsetof(struct vhost_msg, iotlb) - sizeof(int);
1504429711aeSJason Wang 		break;
1505429711aeSJason Wang 	case VHOST_IOTLB_MSG_V2:
150691233ad7SGautam Dawar 		if (vhost_backend_has_feature(dev->vqs[0],
150791233ad7SGautam Dawar 					      VHOST_BACKEND_F_IOTLB_ASID)) {
150891233ad7SGautam Dawar 			ret = copy_from_iter(&asid, sizeof(asid), from);
150991233ad7SGautam Dawar 			if (ret != sizeof(asid)) {
151091233ad7SGautam Dawar 				ret = -EINVAL;
151191233ad7SGautam Dawar 				goto done;
151291233ad7SGautam Dawar 			}
1513aaca8373SGautam Dawar 			offset = 0;
151491233ad7SGautam Dawar 		} else
1515429711aeSJason Wang 			offset = sizeof(__u32);
15166b1e6cc7SJason Wang 		break;
15176b1e6cc7SJason Wang 	default:
15186b1e6cc7SJason Wang 		ret = -EINVAL;
1519429711aeSJason Wang 		goto done;
15206b1e6cc7SJason Wang 	}
15216b1e6cc7SJason Wang 
1522429711aeSJason Wang 	iov_iter_advance(from, offset);
1523429711aeSJason Wang 	ret = copy_from_iter(&msg, sizeof(msg), from);
152474ad7419SPavel Tikhomirov 	if (ret != sizeof(msg)) {
152574ad7419SPavel Tikhomirov 		ret = -EINVAL;
1526429711aeSJason Wang 		goto done;
152774ad7419SPavel Tikhomirov 	}
1528792a4f2eSJason Wang 
1529ca50ec37SEric Auger 	if (msg.type == VHOST_IOTLB_UPDATE && msg.size == 0) {
1530e2ae38cfSAnirudh Rayabharam 		ret = -EINVAL;
1531e2ae38cfSAnirudh Rayabharam 		goto done;
1532e2ae38cfSAnirudh Rayabharam 	}
1533e2ae38cfSAnirudh Rayabharam 
1534792a4f2eSJason Wang 	if (dev->msg_handler)
153591233ad7SGautam Dawar 		ret = dev->msg_handler(dev, asid, &msg);
1536792a4f2eSJason Wang 	else
153791233ad7SGautam Dawar 		ret = vhost_process_iotlb_msg(dev, asid, &msg);
1538792a4f2eSJason Wang 	if (ret) {
1539429711aeSJason Wang 		ret = -EFAULT;
1540429711aeSJason Wang 		goto done;
1541429711aeSJason Wang 	}
1542429711aeSJason Wang 
1543429711aeSJason Wang 	ret = (type == VHOST_IOTLB_MSG) ? sizeof(struct vhost_msg) :
1544429711aeSJason Wang 	      sizeof(struct vhost_msg_v2);
15456b1e6cc7SJason Wang done:
15466b1e6cc7SJason Wang 	return ret;
15476b1e6cc7SJason Wang }
15486b1e6cc7SJason Wang EXPORT_SYMBOL(vhost_chr_write_iter);
15496b1e6cc7SJason Wang 
vhost_chr_poll(struct file * file,struct vhost_dev * dev,poll_table * wait)1550afc9a42bSAl Viro __poll_t vhost_chr_poll(struct file *file, struct vhost_dev *dev,
15516b1e6cc7SJason Wang 			    poll_table *wait)
15526b1e6cc7SJason Wang {
1553afc9a42bSAl Viro 	__poll_t mask = 0;
15546b1e6cc7SJason Wang 
15556b1e6cc7SJason Wang 	poll_wait(file, &dev->wait, wait);
15566b1e6cc7SJason Wang 
15576b1e6cc7SJason Wang 	if (!list_empty(&dev->read_list))
1558a9a08845SLinus Torvalds 		mask |= EPOLLIN | EPOLLRDNORM;
15596b1e6cc7SJason Wang 
15606b1e6cc7SJason Wang 	return mask;
15616b1e6cc7SJason Wang }
15626b1e6cc7SJason Wang EXPORT_SYMBOL(vhost_chr_poll);
15636b1e6cc7SJason Wang 
vhost_chr_read_iter(struct vhost_dev * dev,struct iov_iter * to,int noblock)15646b1e6cc7SJason Wang ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to,
15656b1e6cc7SJason Wang 			    int noblock)
15666b1e6cc7SJason Wang {
15676b1e6cc7SJason Wang 	DEFINE_WAIT(wait);
15686b1e6cc7SJason Wang 	struct vhost_msg_node *node;
15696b1e6cc7SJason Wang 	ssize_t ret = 0;
15706b1e6cc7SJason Wang 	unsigned size = sizeof(struct vhost_msg);
15716b1e6cc7SJason Wang 
15726b1e6cc7SJason Wang 	if (iov_iter_count(to) < size)
15736b1e6cc7SJason Wang 		return 0;
15746b1e6cc7SJason Wang 
15756b1e6cc7SJason Wang 	while (1) {
15766b1e6cc7SJason Wang 		if (!noblock)
15776b1e6cc7SJason Wang 			prepare_to_wait(&dev->wait, &wait,
15786b1e6cc7SJason Wang 					TASK_INTERRUPTIBLE);
15796b1e6cc7SJason Wang 
15806b1e6cc7SJason Wang 		node = vhost_dequeue_msg(dev, &dev->read_list);
15816b1e6cc7SJason Wang 		if (node)
15826b1e6cc7SJason Wang 			break;
15836b1e6cc7SJason Wang 		if (noblock) {
15846b1e6cc7SJason Wang 			ret = -EAGAIN;
15856b1e6cc7SJason Wang 			break;
15866b1e6cc7SJason Wang 		}
15876b1e6cc7SJason Wang 		if (signal_pending(current)) {
15886b1e6cc7SJason Wang 			ret = -ERESTARTSYS;
15896b1e6cc7SJason Wang 			break;
15906b1e6cc7SJason Wang 		}
15916b1e6cc7SJason Wang 		if (!dev->iotlb) {
15926b1e6cc7SJason Wang 			ret = -EBADFD;
15936b1e6cc7SJason Wang 			break;
15946b1e6cc7SJason Wang 		}
15956b1e6cc7SJason Wang 
15966b1e6cc7SJason Wang 		schedule();
15976b1e6cc7SJason Wang 	}
15986b1e6cc7SJason Wang 
15996b1e6cc7SJason Wang 	if (!noblock)
16006b1e6cc7SJason Wang 		finish_wait(&dev->wait, &wait);
16016b1e6cc7SJason Wang 
16026b1e6cc7SJason Wang 	if (node) {
1603429711aeSJason Wang 		struct vhost_iotlb_msg *msg;
1604429711aeSJason Wang 		void *start = &node->msg;
16056b1e6cc7SJason Wang 
1606429711aeSJason Wang 		switch (node->msg.type) {
1607429711aeSJason Wang 		case VHOST_IOTLB_MSG:
1608429711aeSJason Wang 			size = sizeof(node->msg);
1609429711aeSJason Wang 			msg = &node->msg.iotlb;
1610429711aeSJason Wang 			break;
1611429711aeSJason Wang 		case VHOST_IOTLB_MSG_V2:
1612429711aeSJason Wang 			size = sizeof(node->msg_v2);
1613429711aeSJason Wang 			msg = &node->msg_v2.iotlb;
1614429711aeSJason Wang 			break;
1615429711aeSJason Wang 		default:
1616429711aeSJason Wang 			BUG();
1617429711aeSJason Wang 			break;
1618429711aeSJason Wang 		}
1619429711aeSJason Wang 
1620429711aeSJason Wang 		ret = copy_to_iter(start, size, to);
1621429711aeSJason Wang 		if (ret != size || msg->type != VHOST_IOTLB_MISS) {
16226b1e6cc7SJason Wang 			kfree(node);
16236b1e6cc7SJason Wang 			return ret;
16246b1e6cc7SJason Wang 		}
16256b1e6cc7SJason Wang 		vhost_enqueue_msg(dev, &dev->pending_list, node);
16266b1e6cc7SJason Wang 	}
16276b1e6cc7SJason Wang 
16286b1e6cc7SJason Wang 	return ret;
16296b1e6cc7SJason Wang }
16306b1e6cc7SJason Wang EXPORT_SYMBOL_GPL(vhost_chr_read_iter);
16316b1e6cc7SJason Wang 
vhost_iotlb_miss(struct vhost_virtqueue * vq,u64 iova,int access)16326b1e6cc7SJason Wang static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access)
16336b1e6cc7SJason Wang {
16346b1e6cc7SJason Wang 	struct vhost_dev *dev = vq->dev;
16356b1e6cc7SJason Wang 	struct vhost_msg_node *node;
16366b1e6cc7SJason Wang 	struct vhost_iotlb_msg *msg;
1637429711aeSJason Wang 	bool v2 = vhost_backend_has_feature(vq, VHOST_BACKEND_F_IOTLB_MSG_V2);
16386b1e6cc7SJason Wang 
1639429711aeSJason Wang 	node = vhost_new_msg(vq, v2 ? VHOST_IOTLB_MSG_V2 : VHOST_IOTLB_MSG);
16406b1e6cc7SJason Wang 	if (!node)
16416b1e6cc7SJason Wang 		return -ENOMEM;
16426b1e6cc7SJason Wang 
1643429711aeSJason Wang 	if (v2) {
1644429711aeSJason Wang 		node->msg_v2.type = VHOST_IOTLB_MSG_V2;
1645429711aeSJason Wang 		msg = &node->msg_v2.iotlb;
1646429711aeSJason Wang 	} else {
16476b1e6cc7SJason Wang 		msg = &node->msg.iotlb;
1648429711aeSJason Wang 	}
1649429711aeSJason Wang 
16506b1e6cc7SJason Wang 	msg->type = VHOST_IOTLB_MISS;
16516b1e6cc7SJason Wang 	msg->iova = iova;
16526b1e6cc7SJason Wang 	msg->perm = access;
16536b1e6cc7SJason Wang 
16546b1e6cc7SJason Wang 	vhost_enqueue_msg(dev, &dev->read_list, node);
16556b1e6cc7SJason Wang 
16566b1e6cc7SJason Wang 	return 0;
1657bfe2bc51SJason Wang }
1658bfe2bc51SJason Wang 
vq_access_ok(struct vhost_virtqueue * vq,unsigned int num,vring_desc_t __user * desc,vring_avail_t __user * avail,vring_used_t __user * used)1659ddd3d408SStefan Hajnoczi static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
1660a865e420SMichael S. Tsirkin 			 vring_desc_t __user *desc,
1661a865e420SMichael S. Tsirkin 			 vring_avail_t __user *avail,
1662a865e420SMichael S. Tsirkin 			 vring_used_t __user *used)
16636b1e6cc7SJason Wang 
16643a4d5c94SMichael S. Tsirkin {
16650210a8dbSGreg Kurz 	/* If an IOTLB device is present, the vring addresses are
16660210a8dbSGreg Kurz 	 * GIOVAs. Access validation occurs at prefetch time. */
16670210a8dbSGreg Kurz 	if (vq->iotlb)
16680210a8dbSGreg Kurz 		return true;
16690210a8dbSGreg Kurz 
16704942e825SJason Wang 	return access_ok(desc, vhost_get_desc_size(vq, num)) &&
16714942e825SJason Wang 	       access_ok(avail, vhost_get_avail_size(vq, num)) &&
16724942e825SJason Wang 	       access_ok(used, vhost_get_used_size(vq, num));
16733a4d5c94SMichael S. Tsirkin }
16743a4d5c94SMichael S. Tsirkin 
vhost_vq_meta_update(struct vhost_virtqueue * vq,const struct vhost_iotlb_map * map,int type)1675f8894913SJason Wang static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
16760bbe3066SJason Wang 				 const struct vhost_iotlb_map *map,
1677f8894913SJason Wang 				 int type)
1678f8894913SJason Wang {
1679f8894913SJason Wang 	int access = (type == VHOST_ADDR_USED) ?
1680f8894913SJason Wang 		     VHOST_ACCESS_WO : VHOST_ACCESS_RO;
1681f8894913SJason Wang 
16820bbe3066SJason Wang 	if (likely(map->perm & access))
16830bbe3066SJason Wang 		vq->meta_iotlb[type] = map;
1684f8894913SJason Wang }
1685f8894913SJason Wang 
iotlb_access_ok(struct vhost_virtqueue * vq,int access,u64 addr,u64 len,int type)1686ddd3d408SStefan Hajnoczi static bool iotlb_access_ok(struct vhost_virtqueue *vq,
1687f8894913SJason Wang 			    int access, u64 addr, u64 len, int type)
16886b1e6cc7SJason Wang {
16890bbe3066SJason Wang 	const struct vhost_iotlb_map *map;
16900bbe3066SJason Wang 	struct vhost_iotlb *umem = vq->iotlb;
1691ca2c5b33SMichael S. Tsirkin 	u64 s = 0, size, orig_addr = addr, last = addr + len - 1;
1692f8894913SJason Wang 
1693f8894913SJason Wang 	if (vhost_vq_meta_fetch(vq, addr, len, type))
1694f8894913SJason Wang 		return true;
16956b1e6cc7SJason Wang 
16966b1e6cc7SJason Wang 	while (len > s) {
16970bbe3066SJason Wang 		map = vhost_iotlb_itree_first(umem, addr, last);
16980bbe3066SJason Wang 		if (map == NULL || map->start > addr) {
16996b1e6cc7SJason Wang 			vhost_iotlb_miss(vq, addr, access);
17006b1e6cc7SJason Wang 			return false;
17010bbe3066SJason Wang 		} else if (!(map->perm & access)) {
17026b1e6cc7SJason Wang 			/* Report the possible access violation by
17036b1e6cc7SJason Wang 			 * request another translation from userspace.
17046b1e6cc7SJason Wang 			 */
17056b1e6cc7SJason Wang 			return false;
17066b1e6cc7SJason Wang 		}
17076b1e6cc7SJason Wang 
17080bbe3066SJason Wang 		size = map->size - addr + map->start;
1709f8894913SJason Wang 
1710f8894913SJason Wang 		if (orig_addr == addr && size >= len)
17110bbe3066SJason Wang 			vhost_vq_meta_update(vq, map, type);
1712f8894913SJason Wang 
17136b1e6cc7SJason Wang 		s += size;
17146b1e6cc7SJason Wang 		addr += size;
17156b1e6cc7SJason Wang 	}
17166b1e6cc7SJason Wang 
17176b1e6cc7SJason Wang 	return true;
17186b1e6cc7SJason Wang }
17196b1e6cc7SJason Wang 
vq_meta_prefetch(struct vhost_virtqueue * vq)17209b5e830bSJason Wang int vq_meta_prefetch(struct vhost_virtqueue *vq)
17216b1e6cc7SJason Wang {
17226b1e6cc7SJason Wang 	unsigned int num = vq->num;
17236b1e6cc7SJason Wang 
17243d2c7d37SMichael S. Tsirkin 	if (!vq->iotlb)
17256b1e6cc7SJason Wang 		return 1;
17266b1e6cc7SJason Wang 
17270bbe3066SJason Wang 	return iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->desc,
17284942e825SJason Wang 			       vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) &&
17290bbe3066SJason Wang 	       iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->avail,
17304942e825SJason Wang 			       vhost_get_avail_size(vq, num),
1731f8894913SJason Wang 			       VHOST_ADDR_AVAIL) &&
17320bbe3066SJason Wang 	       iotlb_access_ok(vq, VHOST_MAP_WO, (u64)(uintptr_t)vq->used,
17334942e825SJason Wang 			       vhost_get_used_size(vq, num), VHOST_ADDR_USED);
17346b1e6cc7SJason Wang }
17359b5e830bSJason Wang EXPORT_SYMBOL_GPL(vq_meta_prefetch);
17366b1e6cc7SJason Wang 
17373a4d5c94SMichael S. Tsirkin /* Can we log writes? */
17383a4d5c94SMichael S. Tsirkin /* Caller should have device mutex but not vq mutex */
vhost_log_access_ok(struct vhost_dev * dev)1739ddd3d408SStefan Hajnoczi bool vhost_log_access_ok(struct vhost_dev *dev)
17403a4d5c94SMichael S. Tsirkin {
1741a9709d68SJason Wang 	return memory_access_ok(dev, dev->umem, 1);
17423a4d5c94SMichael S. Tsirkin }
17436ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_log_access_ok);
17443a4d5c94SMichael S. Tsirkin 
vq_log_used_access_ok(struct vhost_virtqueue * vq,void __user * log_base,bool log_used,u64 log_addr)1745ab512251SGreg Kurz static bool vq_log_used_access_ok(struct vhost_virtqueue *vq,
1746ab512251SGreg Kurz 				  void __user *log_base,
1747ab512251SGreg Kurz 				  bool log_used,
1748ab512251SGreg Kurz 				  u64 log_addr)
1749ab512251SGreg Kurz {
1750ab512251SGreg Kurz 	/* If an IOTLB device is present, log_addr is a GIOVA that
1751ab512251SGreg Kurz 	 * will never be logged by log_used(). */
1752ab512251SGreg Kurz 	if (vq->iotlb)
1753ab512251SGreg Kurz 		return true;
1754ab512251SGreg Kurz 
1755ab512251SGreg Kurz 	return !log_used || log_access_ok(log_base, log_addr,
1756ab512251SGreg Kurz 					  vhost_get_used_size(vq, vq->num));
1757ab512251SGreg Kurz }
1758ab512251SGreg Kurz 
17593a4d5c94SMichael S. Tsirkin /* Verify access for write logging. */
17603a4d5c94SMichael S. Tsirkin /* Caller should have vq mutex and device mutex */
vq_log_access_ok(struct vhost_virtqueue * vq,void __user * log_base)1761ddd3d408SStefan Hajnoczi static bool vq_log_access_ok(struct vhost_virtqueue *vq,
17628ea8cf89SMichael S. Tsirkin 			     void __user *log_base)
17633a4d5c94SMichael S. Tsirkin {
1764a9709d68SJason Wang 	return vq_memory_access_ok(log_base, vq->umem,
1765ea16c514SMichael S. Tsirkin 				   vhost_has_feature(vq, VHOST_F_LOG_ALL)) &&
1766ab512251SGreg Kurz 		vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr);
17673a4d5c94SMichael S. Tsirkin }
17683a4d5c94SMichael S. Tsirkin 
17693a4d5c94SMichael S. Tsirkin /* Can we start vq? */
17703a4d5c94SMichael S. Tsirkin /* Caller should have vq mutex and device mutex */
vhost_vq_access_ok(struct vhost_virtqueue * vq)1771ddd3d408SStefan Hajnoczi bool vhost_vq_access_ok(struct vhost_virtqueue *vq)
17723a4d5c94SMichael S. Tsirkin {
1773d14d2b78SStefan Hajnoczi 	if (!vq_log_access_ok(vq, vq->log_base))
1774ddd3d408SStefan Hajnoczi 		return false;
1775d65026c6SJason Wang 
1776d65026c6SJason Wang 	return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used);
17773a4d5c94SMichael S. Tsirkin }
17786ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_vq_access_ok);
17793a4d5c94SMichael S. Tsirkin 
vhost_set_memory(struct vhost_dev * d,struct vhost_memory __user * m)17803a4d5c94SMichael S. Tsirkin static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
17813a4d5c94SMichael S. Tsirkin {
1782a9709d68SJason Wang 	struct vhost_memory mem, *newmem;
1783a9709d68SJason Wang 	struct vhost_memory_region *region;
17840bbe3066SJason Wang 	struct vhost_iotlb *newumem, *oldumem;
17853a4d5c94SMichael S. Tsirkin 	unsigned long size = offsetof(struct vhost_memory, regions);
178698f9ca0aSMichael S. Tsirkin 	int i;
1787d47effe1SKrishna Kumar 
17887ad9c9d2STakuya Yoshikawa 	if (copy_from_user(&mem, m, size))
17897ad9c9d2STakuya Yoshikawa 		return -EFAULT;
17903a4d5c94SMichael S. Tsirkin 	if (mem.padding)
17913a4d5c94SMichael S. Tsirkin 		return -EOPNOTSUPP;
1792c9ce42f7SIgor Mammedov 	if (mem.nregions > max_mem_regions)
17933a4d5c94SMichael S. Tsirkin 		return -E2BIG;
1794b2303d7bSMatthew Wilcox 	newmem = kvzalloc(struct_size(newmem, regions, mem.nregions),
1795b2303d7bSMatthew Wilcox 			GFP_KERNEL);
17963a4d5c94SMichael S. Tsirkin 	if (!newmem)
17973a4d5c94SMichael S. Tsirkin 		return -ENOMEM;
17983a4d5c94SMichael S. Tsirkin 
17993a4d5c94SMichael S. Tsirkin 	memcpy(newmem, &mem, size);
18007ad9c9d2STakuya Yoshikawa 	if (copy_from_user(newmem->regions, m->regions,
1801bf11d71aSGustavo A. R. Silva 			   flex_array_size(newmem, regions, mem.nregions))) {
1802bcfeacabSIgor Mammedov 		kvfree(newmem);
18037ad9c9d2STakuya Yoshikawa 		return -EFAULT;
18043a4d5c94SMichael S. Tsirkin 	}
18053a4d5c94SMichael S. Tsirkin 
18060bbe3066SJason Wang 	newumem = iotlb_alloc();
1807a9709d68SJason Wang 	if (!newumem) {
18084de7255fSIgor Mammedov 		kvfree(newmem);
1809a9709d68SJason Wang 		return -ENOMEM;
1810a02c3789STakuya Yoshikawa 	}
1811a9709d68SJason Wang 
1812a9709d68SJason Wang 	for (region = newmem->regions;
1813a9709d68SJason Wang 	     region < newmem->regions + mem.nregions;
1814a9709d68SJason Wang 	     region++) {
18150bbe3066SJason Wang 		if (vhost_iotlb_add_range(newumem,
18166b1e6cc7SJason Wang 					  region->guest_phys_addr,
18176b1e6cc7SJason Wang 					  region->guest_phys_addr +
18186b1e6cc7SJason Wang 					  region->memory_size - 1,
18196b1e6cc7SJason Wang 					  region->userspace_addr,
18200bbe3066SJason Wang 					  VHOST_MAP_RW))
1821a9709d68SJason Wang 			goto err;
1822a9709d68SJason Wang 	}
1823a9709d68SJason Wang 
1824a9709d68SJason Wang 	if (!memory_access_ok(d, newumem, 0))
1825a9709d68SJason Wang 		goto err;
1826a9709d68SJason Wang 
1827a9709d68SJason Wang 	oldumem = d->umem;
1828a9709d68SJason Wang 	d->umem = newumem;
182998f9ca0aSMichael S. Tsirkin 
183047283befSMichael S. Tsirkin 	/* All memory accesses are done under some VQ mutex. */
183198f9ca0aSMichael S. Tsirkin 	for (i = 0; i < d->nvqs; ++i) {
183298f9ca0aSMichael S. Tsirkin 		mutex_lock(&d->vqs[i]->mutex);
1833a9709d68SJason Wang 		d->vqs[i]->umem = newumem;
183498f9ca0aSMichael S. Tsirkin 		mutex_unlock(&d->vqs[i]->mutex);
183598f9ca0aSMichael S. Tsirkin 	}
1836a9709d68SJason Wang 
1837a9709d68SJason Wang 	kvfree(newmem);
18380bbe3066SJason Wang 	vhost_iotlb_free(oldumem);
18393a4d5c94SMichael S. Tsirkin 	return 0;
1840a9709d68SJason Wang 
1841a9709d68SJason Wang err:
18420bbe3066SJason Wang 	vhost_iotlb_free(newumem);
1843a9709d68SJason Wang 	kvfree(newmem);
1844a9709d68SJason Wang 	return -EFAULT;
18453a4d5c94SMichael S. Tsirkin }
18463a4d5c94SMichael S. Tsirkin 
vhost_vring_set_num(struct vhost_dev * d,struct vhost_virtqueue * vq,void __user * argp)1847feebcaeaSJason Wang static long vhost_vring_set_num(struct vhost_dev *d,
1848feebcaeaSJason Wang 				struct vhost_virtqueue *vq,
1849feebcaeaSJason Wang 				void __user *argp)
1850feebcaeaSJason Wang {
1851feebcaeaSJason Wang 	struct vhost_vring_state s;
1852feebcaeaSJason Wang 
1853feebcaeaSJason Wang 	/* Resizing ring with an active backend?
1854feebcaeaSJason Wang 	 * You don't want to do that. */
1855feebcaeaSJason Wang 	if (vq->private_data)
1856feebcaeaSJason Wang 		return -EBUSY;
1857feebcaeaSJason Wang 
1858feebcaeaSJason Wang 	if (copy_from_user(&s, argp, sizeof s))
1859feebcaeaSJason Wang 		return -EFAULT;
1860feebcaeaSJason Wang 
1861feebcaeaSJason Wang 	if (!s.num || s.num > 0xffff || (s.num & (s.num - 1)))
1862feebcaeaSJason Wang 		return -EINVAL;
1863feebcaeaSJason Wang 	vq->num = s.num;
1864feebcaeaSJason Wang 
1865feebcaeaSJason Wang 	return 0;
1866feebcaeaSJason Wang }
1867feebcaeaSJason Wang 
vhost_vring_set_addr(struct vhost_dev * d,struct vhost_virtqueue * vq,void __user * argp)1868feebcaeaSJason Wang static long vhost_vring_set_addr(struct vhost_dev *d,
1869feebcaeaSJason Wang 				 struct vhost_virtqueue *vq,
1870feebcaeaSJason Wang 				 void __user *argp)
1871feebcaeaSJason Wang {
1872feebcaeaSJason Wang 	struct vhost_vring_addr a;
1873feebcaeaSJason Wang 
1874feebcaeaSJason Wang 	if (copy_from_user(&a, argp, sizeof a))
1875feebcaeaSJason Wang 		return -EFAULT;
1876feebcaeaSJason Wang 	if (a.flags & ~(0x1 << VHOST_VRING_F_LOG))
1877feebcaeaSJason Wang 		return -EOPNOTSUPP;
1878feebcaeaSJason Wang 
1879feebcaeaSJason Wang 	/* For 32bit, verify that the top 32bits of the user
1880feebcaeaSJason Wang 	   data are set to zero. */
1881feebcaeaSJason Wang 	if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr ||
1882feebcaeaSJason Wang 	    (u64)(unsigned long)a.used_user_addr != a.used_user_addr ||
1883feebcaeaSJason Wang 	    (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr)
1884feebcaeaSJason Wang 		return -EFAULT;
1885feebcaeaSJason Wang 
1886feebcaeaSJason Wang 	/* Make sure it's safe to cast pointers to vring types. */
1887feebcaeaSJason Wang 	BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE);
1888feebcaeaSJason Wang 	BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE);
1889feebcaeaSJason Wang 	if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) ||
1890feebcaeaSJason Wang 	    (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) ||
1891feebcaeaSJason Wang 	    (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1)))
1892feebcaeaSJason Wang 		return -EINVAL;
1893feebcaeaSJason Wang 
1894feebcaeaSJason Wang 	/* We only verify access here if backend is configured.
1895feebcaeaSJason Wang 	 * If it is not, we don't as size might not have been setup.
1896feebcaeaSJason Wang 	 * We will verify when backend is configured. */
1897feebcaeaSJason Wang 	if (vq->private_data) {
1898feebcaeaSJason Wang 		if (!vq_access_ok(vq, vq->num,
1899feebcaeaSJason Wang 			(void __user *)(unsigned long)a.desc_user_addr,
1900feebcaeaSJason Wang 			(void __user *)(unsigned long)a.avail_user_addr,
1901feebcaeaSJason Wang 			(void __user *)(unsigned long)a.used_user_addr))
1902feebcaeaSJason Wang 			return -EINVAL;
1903feebcaeaSJason Wang 
1904feebcaeaSJason Wang 		/* Also validate log access for used ring if enabled. */
1905ab512251SGreg Kurz 		if (!vq_log_used_access_ok(vq, vq->log_base,
1906ab512251SGreg Kurz 				a.flags & (0x1 << VHOST_VRING_F_LOG),
1907ab512251SGreg Kurz 				a.log_guest_addr))
1908feebcaeaSJason Wang 			return -EINVAL;
1909feebcaeaSJason Wang 	}
1910feebcaeaSJason Wang 
1911feebcaeaSJason Wang 	vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG));
1912feebcaeaSJason Wang 	vq->desc = (void __user *)(unsigned long)a.desc_user_addr;
1913feebcaeaSJason Wang 	vq->avail = (void __user *)(unsigned long)a.avail_user_addr;
1914feebcaeaSJason Wang 	vq->log_addr = a.log_guest_addr;
1915feebcaeaSJason Wang 	vq->used = (void __user *)(unsigned long)a.used_user_addr;
1916feebcaeaSJason Wang 
1917feebcaeaSJason Wang 	return 0;
1918feebcaeaSJason Wang }
1919feebcaeaSJason Wang 
vhost_vring_set_num_addr(struct vhost_dev * d,struct vhost_virtqueue * vq,unsigned int ioctl,void __user * argp)1920feebcaeaSJason Wang static long vhost_vring_set_num_addr(struct vhost_dev *d,
1921feebcaeaSJason Wang 				     struct vhost_virtqueue *vq,
1922feebcaeaSJason Wang 				     unsigned int ioctl,
1923feebcaeaSJason Wang 				     void __user *argp)
1924feebcaeaSJason Wang {
1925feebcaeaSJason Wang 	long r;
1926feebcaeaSJason Wang 
1927feebcaeaSJason Wang 	mutex_lock(&vq->mutex);
1928feebcaeaSJason Wang 
1929feebcaeaSJason Wang 	switch (ioctl) {
1930feebcaeaSJason Wang 	case VHOST_SET_VRING_NUM:
1931feebcaeaSJason Wang 		r = vhost_vring_set_num(d, vq, argp);
1932feebcaeaSJason Wang 		break;
1933feebcaeaSJason Wang 	case VHOST_SET_VRING_ADDR:
1934feebcaeaSJason Wang 		r = vhost_vring_set_addr(d, vq, argp);
1935feebcaeaSJason Wang 		break;
1936feebcaeaSJason Wang 	default:
1937feebcaeaSJason Wang 		BUG();
1938feebcaeaSJason Wang 	}
1939feebcaeaSJason Wang 
1940feebcaeaSJason Wang 	mutex_unlock(&vq->mutex);
1941feebcaeaSJason Wang 
1942feebcaeaSJason Wang 	return r;
1943feebcaeaSJason Wang }
vhost_vring_ioctl(struct vhost_dev * d,unsigned int ioctl,void __user * argp)194426b36604SSonny Rao long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
19453a4d5c94SMichael S. Tsirkin {
1946cecb46f1SAl Viro 	struct file *eventfp, *filep = NULL;
1947cecb46f1SAl Viro 	bool pollstart = false, pollstop = false;
19483a4d5c94SMichael S. Tsirkin 	struct eventfd_ctx *ctx = NULL;
19493a4d5c94SMichael S. Tsirkin 	struct vhost_virtqueue *vq;
19503a4d5c94SMichael S. Tsirkin 	struct vhost_vring_state s;
19513a4d5c94SMichael S. Tsirkin 	struct vhost_vring_file f;
19523a4d5c94SMichael S. Tsirkin 	u32 idx;
19533a4d5c94SMichael S. Tsirkin 	long r;
19543a4d5c94SMichael S. Tsirkin 
1955cef25866SMike Christie 	r = vhost_get_vq_from_user(d, argp, &vq, &idx);
19563a4d5c94SMichael S. Tsirkin 	if (r < 0)
19573a4d5c94SMichael S. Tsirkin 		return r;
19583a4d5c94SMichael S. Tsirkin 
1959feebcaeaSJason Wang 	if (ioctl == VHOST_SET_VRING_NUM ||
1960feebcaeaSJason Wang 	    ioctl == VHOST_SET_VRING_ADDR) {
1961feebcaeaSJason Wang 		return vhost_vring_set_num_addr(d, vq, ioctl, argp);
1962feebcaeaSJason Wang 	}
1963feebcaeaSJason Wang 
19643a4d5c94SMichael S. Tsirkin 	mutex_lock(&vq->mutex);
19653a4d5c94SMichael S. Tsirkin 
19663a4d5c94SMichael S. Tsirkin 	switch (ioctl) {
19673a4d5c94SMichael S. Tsirkin 	case VHOST_SET_VRING_BASE:
19683a4d5c94SMichael S. Tsirkin 		/* Moving base with an active backend?
19693a4d5c94SMichael S. Tsirkin 		 * You don't want to do that. */
19703a4d5c94SMichael S. Tsirkin 		if (vq->private_data) {
19713a4d5c94SMichael S. Tsirkin 			r = -EBUSY;
19723a4d5c94SMichael S. Tsirkin 			break;
19733a4d5c94SMichael S. Tsirkin 		}
19747ad9c9d2STakuya Yoshikawa 		if (copy_from_user(&s, argp, sizeof s)) {
19757ad9c9d2STakuya Yoshikawa 			r = -EFAULT;
19763a4d5c94SMichael S. Tsirkin 			break;
19777ad9c9d2STakuya Yoshikawa 		}
197855d8122fSShannon Nelson 		if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
197955d8122fSShannon Nelson 			vq->last_avail_idx = s.num & 0xffff;
198055d8122fSShannon Nelson 			vq->last_used_idx = (s.num >> 16) & 0xffff;
198155d8122fSShannon Nelson 		} else {
19823a4d5c94SMichael S. Tsirkin 			if (s.num > 0xffff) {
19833a4d5c94SMichael S. Tsirkin 				r = -EINVAL;
19843a4d5c94SMichael S. Tsirkin 				break;
19853a4d5c94SMichael S. Tsirkin 			}
19868d65843cSJason Wang 			vq->last_avail_idx = s.num;
198755d8122fSShannon Nelson 		}
19883a4d5c94SMichael S. Tsirkin 		/* Forget the cached index value. */
19893a4d5c94SMichael S. Tsirkin 		vq->avail_idx = vq->last_avail_idx;
19903a4d5c94SMichael S. Tsirkin 		break;
19913a4d5c94SMichael S. Tsirkin 	case VHOST_GET_VRING_BASE:
19923a4d5c94SMichael S. Tsirkin 		s.index = idx;
199355d8122fSShannon Nelson 		if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
199455d8122fSShannon Nelson 			s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16);
199555d8122fSShannon Nelson 		else
19963a4d5c94SMichael S. Tsirkin 			s.num = vq->last_avail_idx;
19977ad9c9d2STakuya Yoshikawa 		if (copy_to_user(argp, &s, sizeof s))
19987ad9c9d2STakuya Yoshikawa 			r = -EFAULT;
19993a4d5c94SMichael S. Tsirkin 		break;
20003a4d5c94SMichael S. Tsirkin 	case VHOST_SET_VRING_KICK:
20017ad9c9d2STakuya Yoshikawa 		if (copy_from_user(&f, argp, sizeof f)) {
20027ad9c9d2STakuya Yoshikawa 			r = -EFAULT;
20033a4d5c94SMichael S. Tsirkin 			break;
20047ad9c9d2STakuya Yoshikawa 		}
2005e0136c16SZhu Lingshan 		eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd);
2006535297a6SMichael S. Tsirkin 		if (IS_ERR(eventfp)) {
2007535297a6SMichael S. Tsirkin 			r = PTR_ERR(eventfp);
2008535297a6SMichael S. Tsirkin 			break;
2009535297a6SMichael S. Tsirkin 		}
20103a4d5c94SMichael S. Tsirkin 		if (eventfp != vq->kick) {
2011cecb46f1SAl Viro 			pollstop = (filep = vq->kick) != NULL;
2012cecb46f1SAl Viro 			pollstart = (vq->kick = eventfp) != NULL;
20133a4d5c94SMichael S. Tsirkin 		} else
20143a4d5c94SMichael S. Tsirkin 			filep = eventfp;
20153a4d5c94SMichael S. Tsirkin 		break;
20163a4d5c94SMichael S. Tsirkin 	case VHOST_SET_VRING_CALL:
20177ad9c9d2STakuya Yoshikawa 		if (copy_from_user(&f, argp, sizeof f)) {
20187ad9c9d2STakuya Yoshikawa 			r = -EFAULT;
20193a4d5c94SMichael S. Tsirkin 			break;
20207ad9c9d2STakuya Yoshikawa 		}
2021e0136c16SZhu Lingshan 		ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd);
2022e050c7d9SEric Biggers 		if (IS_ERR(ctx)) {
2023e050c7d9SEric Biggers 			r = PTR_ERR(ctx);
2024535297a6SMichael S. Tsirkin 			break;
2025535297a6SMichael S. Tsirkin 		}
2026265a0ad8SZhu Lingshan 
2027265a0ad8SZhu Lingshan 		swap(ctx, vq->call_ctx.ctx);
20283a4d5c94SMichael S. Tsirkin 		break;
20293a4d5c94SMichael S. Tsirkin 	case VHOST_SET_VRING_ERR:
20307ad9c9d2STakuya Yoshikawa 		if (copy_from_user(&f, argp, sizeof f)) {
20317ad9c9d2STakuya Yoshikawa 			r = -EFAULT;
20323a4d5c94SMichael S. Tsirkin 			break;
20337ad9c9d2STakuya Yoshikawa 		}
2034e0136c16SZhu Lingshan 		ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd);
203509f332a5SEric Biggers 		if (IS_ERR(ctx)) {
203609f332a5SEric Biggers 			r = PTR_ERR(ctx);
2037535297a6SMichael S. Tsirkin 			break;
2038535297a6SMichael S. Tsirkin 		}
203909f332a5SEric Biggers 		swap(ctx, vq->error_ctx);
20403a4d5c94SMichael S. Tsirkin 		break;
20412751c988SGreg Kurz 	case VHOST_SET_VRING_ENDIAN:
20422751c988SGreg Kurz 		r = vhost_set_vring_endian(vq, argp);
20432751c988SGreg Kurz 		break;
20442751c988SGreg Kurz 	case VHOST_GET_VRING_ENDIAN:
20452751c988SGreg Kurz 		r = vhost_get_vring_endian(vq, idx, argp);
20462751c988SGreg Kurz 		break;
204703088137SJason Wang 	case VHOST_SET_VRING_BUSYLOOP_TIMEOUT:
204803088137SJason Wang 		if (copy_from_user(&s, argp, sizeof(s))) {
204903088137SJason Wang 			r = -EFAULT;
205003088137SJason Wang 			break;
205103088137SJason Wang 		}
205203088137SJason Wang 		vq->busyloop_timeout = s.num;
205303088137SJason Wang 		break;
205403088137SJason Wang 	case VHOST_GET_VRING_BUSYLOOP_TIMEOUT:
205503088137SJason Wang 		s.index = idx;
205603088137SJason Wang 		s.num = vq->busyloop_timeout;
205703088137SJason Wang 		if (copy_to_user(argp, &s, sizeof(s)))
205803088137SJason Wang 			r = -EFAULT;
205903088137SJason Wang 		break;
20603a4d5c94SMichael S. Tsirkin 	default:
20613a4d5c94SMichael S. Tsirkin 		r = -ENOIOCTLCMD;
20623a4d5c94SMichael S. Tsirkin 	}
20633a4d5c94SMichael S. Tsirkin 
20643a4d5c94SMichael S. Tsirkin 	if (pollstop && vq->handle_kick)
20653a4d5c94SMichael S. Tsirkin 		vhost_poll_stop(&vq->poll);
20663a4d5c94SMichael S. Tsirkin 
2067e050c7d9SEric Biggers 	if (!IS_ERR_OR_NULL(ctx))
20683a4d5c94SMichael S. Tsirkin 		eventfd_ctx_put(ctx);
20693a4d5c94SMichael S. Tsirkin 	if (filep)
20703a4d5c94SMichael S. Tsirkin 		fput(filep);
20713a4d5c94SMichael S. Tsirkin 
20723a4d5c94SMichael S. Tsirkin 	if (pollstart && vq->handle_kick)
20732b8b328bSJason Wang 		r = vhost_poll_start(&vq->poll, vq->kick);
20743a4d5c94SMichael S. Tsirkin 
20753a4d5c94SMichael S. Tsirkin 	mutex_unlock(&vq->mutex);
20763a4d5c94SMichael S. Tsirkin 
20773a4d5c94SMichael S. Tsirkin 	if (pollstop && vq->handle_kick)
2078b2ffa407SMike Christie 		vhost_dev_flush(vq->poll.dev);
20793a4d5c94SMichael S. Tsirkin 	return r;
20803a4d5c94SMichael S. Tsirkin }
20816ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_vring_ioctl);
20823a4d5c94SMichael S. Tsirkin 
vhost_init_device_iotlb(struct vhost_dev * d)2083759aba1eSLiming Wu int vhost_init_device_iotlb(struct vhost_dev *d)
20846b1e6cc7SJason Wang {
20850bbe3066SJason Wang 	struct vhost_iotlb *niotlb, *oiotlb;
20866b1e6cc7SJason Wang 	int i;
20876b1e6cc7SJason Wang 
20880bbe3066SJason Wang 	niotlb = iotlb_alloc();
20896b1e6cc7SJason Wang 	if (!niotlb)
20906b1e6cc7SJason Wang 		return -ENOMEM;
20916b1e6cc7SJason Wang 
20926b1e6cc7SJason Wang 	oiotlb = d->iotlb;
20936b1e6cc7SJason Wang 	d->iotlb = niotlb;
20946b1e6cc7SJason Wang 
20956b1e6cc7SJason Wang 	for (i = 0; i < d->nvqs; ++i) {
2096b13f9c63SJason Wang 		struct vhost_virtqueue *vq = d->vqs[i];
2097b13f9c63SJason Wang 
2098b13f9c63SJason Wang 		mutex_lock(&vq->mutex);
2099b13f9c63SJason Wang 		vq->iotlb = niotlb;
2100b13f9c63SJason Wang 		__vhost_vq_meta_reset(vq);
2101b13f9c63SJason Wang 		mutex_unlock(&vq->mutex);
21026b1e6cc7SJason Wang 	}
21036b1e6cc7SJason Wang 
21040bbe3066SJason Wang 	vhost_iotlb_free(oiotlb);
21056b1e6cc7SJason Wang 
21066b1e6cc7SJason Wang 	return 0;
21076b1e6cc7SJason Wang }
21086b1e6cc7SJason Wang EXPORT_SYMBOL_GPL(vhost_init_device_iotlb);
21096b1e6cc7SJason Wang 
21103a4d5c94SMichael S. Tsirkin /* Caller must have device mutex */
vhost_dev_ioctl(struct vhost_dev * d,unsigned int ioctl,void __user * argp)2111935cdee7SMichael S. Tsirkin long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
21123a4d5c94SMichael S. Tsirkin {
2113d25cc43cSEric Biggers 	struct eventfd_ctx *ctx;
21143a4d5c94SMichael S. Tsirkin 	u64 p;
21153a4d5c94SMichael S. Tsirkin 	long r;
21163a4d5c94SMichael S. Tsirkin 	int i, fd;
21173a4d5c94SMichael S. Tsirkin 
21183a4d5c94SMichael S. Tsirkin 	/* If you are not the owner, you can become one */
21193a4d5c94SMichael S. Tsirkin 	if (ioctl == VHOST_SET_OWNER) {
21203a4d5c94SMichael S. Tsirkin 		r = vhost_dev_set_owner(d);
21213a4d5c94SMichael S. Tsirkin 		goto done;
21223a4d5c94SMichael S. Tsirkin 	}
21233a4d5c94SMichael S. Tsirkin 
21243a4d5c94SMichael S. Tsirkin 	/* You must be the owner to do anything else */
21253a4d5c94SMichael S. Tsirkin 	r = vhost_dev_check_owner(d);
21263a4d5c94SMichael S. Tsirkin 	if (r)
21273a4d5c94SMichael S. Tsirkin 		goto done;
21283a4d5c94SMichael S. Tsirkin 
21293a4d5c94SMichael S. Tsirkin 	switch (ioctl) {
21303a4d5c94SMichael S. Tsirkin 	case VHOST_SET_MEM_TABLE:
21313a4d5c94SMichael S. Tsirkin 		r = vhost_set_memory(d, argp);
21323a4d5c94SMichael S. Tsirkin 		break;
21333a4d5c94SMichael S. Tsirkin 	case VHOST_SET_LOG_BASE:
21347ad9c9d2STakuya Yoshikawa 		if (copy_from_user(&p, argp, sizeof p)) {
21357ad9c9d2STakuya Yoshikawa 			r = -EFAULT;
21363a4d5c94SMichael S. Tsirkin 			break;
21377ad9c9d2STakuya Yoshikawa 		}
21383a4d5c94SMichael S. Tsirkin 		if ((u64)(unsigned long)p != p) {
21393a4d5c94SMichael S. Tsirkin 			r = -EFAULT;
21403a4d5c94SMichael S. Tsirkin 			break;
21413a4d5c94SMichael S. Tsirkin 		}
21423a4d5c94SMichael S. Tsirkin 		for (i = 0; i < d->nvqs; ++i) {
21433a4d5c94SMichael S. Tsirkin 			struct vhost_virtqueue *vq;
21443a4d5c94SMichael S. Tsirkin 			void __user *base = (void __user *)(unsigned long)p;
21453ab2e420SAsias He 			vq = d->vqs[i];
21463a4d5c94SMichael S. Tsirkin 			mutex_lock(&vq->mutex);
21473a4d5c94SMichael S. Tsirkin 			/* If ring is inactive, will check when it's enabled. */
2148ea16c514SMichael S. Tsirkin 			if (vq->private_data && !vq_log_access_ok(vq, base))
21493a4d5c94SMichael S. Tsirkin 				r = -EFAULT;
21503a4d5c94SMichael S. Tsirkin 			else
21513a4d5c94SMichael S. Tsirkin 				vq->log_base = base;
21523a4d5c94SMichael S. Tsirkin 			mutex_unlock(&vq->mutex);
21533a4d5c94SMichael S. Tsirkin 		}
21543a4d5c94SMichael S. Tsirkin 		break;
21553a4d5c94SMichael S. Tsirkin 	case VHOST_SET_LOG_FD:
21563a4d5c94SMichael S. Tsirkin 		r = get_user(fd, (int __user *)argp);
21573a4d5c94SMichael S. Tsirkin 		if (r < 0)
21583a4d5c94SMichael S. Tsirkin 			break;
2159e0136c16SZhu Lingshan 		ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
2160d25cc43cSEric Biggers 		if (IS_ERR(ctx)) {
2161d25cc43cSEric Biggers 			r = PTR_ERR(ctx);
21623a4d5c94SMichael S. Tsirkin 			break;
21633a4d5c94SMichael S. Tsirkin 		}
2164d25cc43cSEric Biggers 		swap(ctx, d->log_ctx);
21653a4d5c94SMichael S. Tsirkin 		for (i = 0; i < d->nvqs; ++i) {
21663ab2e420SAsias He 			mutex_lock(&d->vqs[i]->mutex);
21673ab2e420SAsias He 			d->vqs[i]->log_ctx = d->log_ctx;
21683ab2e420SAsias He 			mutex_unlock(&d->vqs[i]->mutex);
21693a4d5c94SMichael S. Tsirkin 		}
21703a4d5c94SMichael S. Tsirkin 		if (ctx)
21713a4d5c94SMichael S. Tsirkin 			eventfd_ctx_put(ctx);
21723a4d5c94SMichael S. Tsirkin 		break;
21733a4d5c94SMichael S. Tsirkin 	default:
2174935cdee7SMichael S. Tsirkin 		r = -ENOIOCTLCMD;
21753a4d5c94SMichael S. Tsirkin 		break;
21763a4d5c94SMichael S. Tsirkin 	}
21773a4d5c94SMichael S. Tsirkin done:
21783a4d5c94SMichael S. Tsirkin 	return r;
21793a4d5c94SMichael S. Tsirkin }
21806ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_ioctl);
21813a4d5c94SMichael S. Tsirkin 
21823a4d5c94SMichael S. Tsirkin /* TODO: This is really inefficient.  We need something like get_user()
21833a4d5c94SMichael S. Tsirkin  * (instruction directly accesses the data, with an exception table entry
2184ff61f079SJonathan Corbet  * returning -EFAULT). See Documentation/arch/x86/exception-tables.rst.
21853a4d5c94SMichael S. Tsirkin  */
set_bit_to_user(int nr,void __user * addr)21863a4d5c94SMichael S. Tsirkin static int set_bit_to_user(int nr, void __user *addr)
21873a4d5c94SMichael S. Tsirkin {
21883a4d5c94SMichael S. Tsirkin 	unsigned long log = (unsigned long)addr;
21893a4d5c94SMichael S. Tsirkin 	struct page *page;
21903a4d5c94SMichael S. Tsirkin 	void *base;
21913a4d5c94SMichael S. Tsirkin 	int bit = nr + (log % PAGE_SIZE) * 8;
21923a4d5c94SMichael S. Tsirkin 	int r;
2193d47effe1SKrishna Kumar 
2194690623e1SJohn Hubbard 	r = pin_user_pages_fast(log, 1, FOLL_WRITE, &page);
2195d6db3f5cSMichael S. Tsirkin 	if (r < 0)
21963a4d5c94SMichael S. Tsirkin 		return r;
2197d6db3f5cSMichael S. Tsirkin 	BUG_ON(r != 1);
2198c6daa7ffSCong Wang 	base = kmap_atomic(page);
21993a4d5c94SMichael S. Tsirkin 	set_bit(bit, base);
2200c6daa7ffSCong Wang 	kunmap_atomic(base);
2201690623e1SJohn Hubbard 	unpin_user_pages_dirty_lock(&page, 1, true);
22023a4d5c94SMichael S. Tsirkin 	return 0;
22033a4d5c94SMichael S. Tsirkin }
22043a4d5c94SMichael S. Tsirkin 
log_write(void __user * log_base,u64 write_address,u64 write_length)22053a4d5c94SMichael S. Tsirkin static int log_write(void __user *log_base,
22063a4d5c94SMichael S. Tsirkin 		     u64 write_address, u64 write_length)
22073a4d5c94SMichael S. Tsirkin {
220828831ee6SMichael S. Tsirkin 	u64 write_page = write_address / VHOST_PAGE_SIZE;
22093a4d5c94SMichael S. Tsirkin 	int r;
2210d47effe1SKrishna Kumar 
22113a4d5c94SMichael S. Tsirkin 	if (!write_length)
22123a4d5c94SMichael S. Tsirkin 		return 0;
22133bf9be40SMichael S. Tsirkin 	write_length += write_address % VHOST_PAGE_SIZE;
22143a4d5c94SMichael S. Tsirkin 	for (;;) {
22153a4d5c94SMichael S. Tsirkin 		u64 base = (u64)(unsigned long)log_base;
221628831ee6SMichael S. Tsirkin 		u64 log = base + write_page / 8;
221728831ee6SMichael S. Tsirkin 		int bit = write_page % 8;
22183a4d5c94SMichael S. Tsirkin 		if ((u64)(unsigned long)log != log)
22193a4d5c94SMichael S. Tsirkin 			return -EFAULT;
22203a4d5c94SMichael S. Tsirkin 		r = set_bit_to_user(bit, (void __user *)(unsigned long)log);
22213a4d5c94SMichael S. Tsirkin 		if (r < 0)
22223a4d5c94SMichael S. Tsirkin 			return r;
22233a4d5c94SMichael S. Tsirkin 		if (write_length <= VHOST_PAGE_SIZE)
22243a4d5c94SMichael S. Tsirkin 			break;
22253a4d5c94SMichael S. Tsirkin 		write_length -= VHOST_PAGE_SIZE;
222628831ee6SMichael S. Tsirkin 		write_page += 1;
22273a4d5c94SMichael S. Tsirkin 	}
22283a4d5c94SMichael S. Tsirkin 	return r;
22293a4d5c94SMichael S. Tsirkin }
22303a4d5c94SMichael S. Tsirkin 
log_write_hva(struct vhost_virtqueue * vq,u64 hva,u64 len)2231cc5e7107SJason Wang static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len)
2232cc5e7107SJason Wang {
22330bbe3066SJason Wang 	struct vhost_iotlb *umem = vq->umem;
22340bbe3066SJason Wang 	struct vhost_iotlb_map *u;
2235cc5e7107SJason Wang 	u64 start, end, l, min;
2236cc5e7107SJason Wang 	int r;
2237cc5e7107SJason Wang 	bool hit = false;
2238cc5e7107SJason Wang 
2239cc5e7107SJason Wang 	while (len) {
2240cc5e7107SJason Wang 		min = len;
2241cc5e7107SJason Wang 		/* More than one GPAs can be mapped into a single HVA. So
2242cc5e7107SJason Wang 		 * iterate all possible umems here to be safe.
2243cc5e7107SJason Wang 		 */
22440bbe3066SJason Wang 		list_for_each_entry(u, &umem->list, link) {
22450bbe3066SJason Wang 			if (u->addr > hva - 1 + len ||
22460bbe3066SJason Wang 			    u->addr - 1 + u->size < hva)
2247cc5e7107SJason Wang 				continue;
22480bbe3066SJason Wang 			start = max(u->addr, hva);
22490bbe3066SJason Wang 			end = min(u->addr - 1 + u->size, hva - 1 + len);
2250cc5e7107SJason Wang 			l = end - start + 1;
2251cc5e7107SJason Wang 			r = log_write(vq->log_base,
22520bbe3066SJason Wang 				      u->start + start - u->addr,
2253cc5e7107SJason Wang 				      l);
2254cc5e7107SJason Wang 			if (r < 0)
2255cc5e7107SJason Wang 				return r;
2256cc5e7107SJason Wang 			hit = true;
2257cc5e7107SJason Wang 			min = min(l, min);
2258cc5e7107SJason Wang 		}
2259cc5e7107SJason Wang 
2260cc5e7107SJason Wang 		if (!hit)
2261cc5e7107SJason Wang 			return -EFAULT;
2262cc5e7107SJason Wang 
2263cc5e7107SJason Wang 		len -= min;
2264cc5e7107SJason Wang 		hva += min;
2265cc5e7107SJason Wang 	}
2266cc5e7107SJason Wang 
2267cc5e7107SJason Wang 	return 0;
2268cc5e7107SJason Wang }
2269cc5e7107SJason Wang 
log_used(struct vhost_virtqueue * vq,u64 used_offset,u64 len)2270cc5e7107SJason Wang static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len)
2271cc5e7107SJason Wang {
22725e5e8736SLi Wang 	struct iovec *iov = vq->log_iov;
2273cc5e7107SJason Wang 	int i, ret;
2274cc5e7107SJason Wang 
2275cc5e7107SJason Wang 	if (!vq->iotlb)
2276cc5e7107SJason Wang 		return log_write(vq->log_base, vq->log_addr + used_offset, len);
2277cc5e7107SJason Wang 
2278cc5e7107SJason Wang 	ret = translate_desc(vq, (uintptr_t)vq->used + used_offset,
2279cc5e7107SJason Wang 			     len, iov, 64, VHOST_ACCESS_WO);
2280816db766SJason Wang 	if (ret < 0)
2281cc5e7107SJason Wang 		return ret;
2282cc5e7107SJason Wang 
2283cc5e7107SJason Wang 	for (i = 0; i < ret; i++) {
2284cc5e7107SJason Wang 		ret = log_write_hva(vq,	(uintptr_t)iov[i].iov_base,
2285cc5e7107SJason Wang 				    iov[i].iov_len);
2286cc5e7107SJason Wang 		if (ret)
2287cc5e7107SJason Wang 			return ret;
2288cc5e7107SJason Wang 	}
2289cc5e7107SJason Wang 
2290cc5e7107SJason Wang 	return 0;
2291cc5e7107SJason Wang }
2292cc5e7107SJason Wang 
vhost_log_write(struct vhost_virtqueue * vq,struct vhost_log * log,unsigned int log_num,u64 len,struct iovec * iov,int count)22933a4d5c94SMichael S. Tsirkin int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
2294cc5e7107SJason Wang 		    unsigned int log_num, u64 len, struct iovec *iov, int count)
22953a4d5c94SMichael S. Tsirkin {
22963a4d5c94SMichael S. Tsirkin 	int i, r;
22973a4d5c94SMichael S. Tsirkin 
22983a4d5c94SMichael S. Tsirkin 	/* Make sure data written is seen before log. */
22995659338cSMichael S. Tsirkin 	smp_wmb();
2300cc5e7107SJason Wang 
2301cc5e7107SJason Wang 	if (vq->iotlb) {
2302cc5e7107SJason Wang 		for (i = 0; i < count; i++) {
2303cc5e7107SJason Wang 			r = log_write_hva(vq, (uintptr_t)iov[i].iov_base,
2304cc5e7107SJason Wang 					  iov[i].iov_len);
2305cc5e7107SJason Wang 			if (r < 0)
2306cc5e7107SJason Wang 				return r;
2307cc5e7107SJason Wang 		}
2308cc5e7107SJason Wang 		return 0;
2309cc5e7107SJason Wang 	}
2310cc5e7107SJason Wang 
23113a4d5c94SMichael S. Tsirkin 	for (i = 0; i < log_num; ++i) {
23123a4d5c94SMichael S. Tsirkin 		u64 l = min(log[i].len, len);
23133a4d5c94SMichael S. Tsirkin 		r = log_write(vq->log_base, log[i].addr, l);
23143a4d5c94SMichael S. Tsirkin 		if (r < 0)
23153a4d5c94SMichael S. Tsirkin 			return r;
23163a4d5c94SMichael S. Tsirkin 		len -= l;
23175786aee8SMichael S. Tsirkin 		if (!len) {
23183a4d5c94SMichael S. Tsirkin 			if (vq->log_ctx)
23193a4d5c94SMichael S. Tsirkin 				eventfd_signal(vq->log_ctx, 1);
23205786aee8SMichael S. Tsirkin 			return 0;
23215786aee8SMichael S. Tsirkin 		}
23225786aee8SMichael S. Tsirkin 	}
23233a4d5c94SMichael S. Tsirkin 	/* Length written exceeds what we have stored. This is a bug. */
23243a4d5c94SMichael S. Tsirkin 	BUG();
23253a4d5c94SMichael S. Tsirkin 	return 0;
23263a4d5c94SMichael S. Tsirkin }
23276ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_log_write);
23283a4d5c94SMichael S. Tsirkin 
vhost_update_used_flags(struct vhost_virtqueue * vq)23292723feaaSJason Wang static int vhost_update_used_flags(struct vhost_virtqueue *vq)
23302723feaaSJason Wang {
23312723feaaSJason Wang 	void __user *used;
23327b5d753eSJason Wang 	if (vhost_put_used_flags(vq))
23332723feaaSJason Wang 		return -EFAULT;
23342723feaaSJason Wang 	if (unlikely(vq->log_used)) {
23352723feaaSJason Wang 		/* Make sure the flag is seen before log. */
23362723feaaSJason Wang 		smp_wmb();
23372723feaaSJason Wang 		/* Log used flag write. */
23382723feaaSJason Wang 		used = &vq->used->flags;
2339cc5e7107SJason Wang 		log_used(vq, (used - (void __user *)vq->used),
23402723feaaSJason Wang 			 sizeof vq->used->flags);
23412723feaaSJason Wang 		if (vq->log_ctx)
23422723feaaSJason Wang 			eventfd_signal(vq->log_ctx, 1);
23432723feaaSJason Wang 	}
23442723feaaSJason Wang 	return 0;
23452723feaaSJason Wang }
23462723feaaSJason Wang 
vhost_update_avail_event(struct vhost_virtqueue * vq)23474c809363SStefano Garzarella static int vhost_update_avail_event(struct vhost_virtqueue *vq)
23482723feaaSJason Wang {
23497b5d753eSJason Wang 	if (vhost_put_avail_event(vq))
23502723feaaSJason Wang 		return -EFAULT;
23512723feaaSJason Wang 	if (unlikely(vq->log_used)) {
23522723feaaSJason Wang 		void __user *used;
23532723feaaSJason Wang 		/* Make sure the event is seen before log. */
23542723feaaSJason Wang 		smp_wmb();
23552723feaaSJason Wang 		/* Log avail event write */
23562723feaaSJason Wang 		used = vhost_avail_event(vq);
2357cc5e7107SJason Wang 		log_used(vq, (used - (void __user *)vq->used),
23582723feaaSJason Wang 			 sizeof *vhost_avail_event(vq));
23592723feaaSJason Wang 		if (vq->log_ctx)
23602723feaaSJason Wang 			eventfd_signal(vq->log_ctx, 1);
23612723feaaSJason Wang 	}
23622723feaaSJason Wang 	return 0;
23632723feaaSJason Wang }
23642723feaaSJason Wang 
vhost_vq_init_access(struct vhost_virtqueue * vq)236580f7d030SGreg Kurz int vhost_vq_init_access(struct vhost_virtqueue *vq)
23662723feaaSJason Wang {
23673b1bbe89SMichael S. Tsirkin 	__virtio16 last_used_idx;
23682723feaaSJason Wang 	int r;
2369e1f33be9SGreg Kurz 	bool is_le = vq->is_le;
2370e1f33be9SGreg Kurz 
2371cda8bba0SHalil Pasic 	if (!vq->private_data)
23722723feaaSJason Wang 		return 0;
23732751c988SGreg Kurz 
23742751c988SGreg Kurz 	vhost_init_is_le(vq);
23752723feaaSJason Wang 
23762723feaaSJason Wang 	r = vhost_update_used_flags(vq);
23772723feaaSJason Wang 	if (r)
2378e1f33be9SGreg Kurz 		goto err;
23792723feaaSJason Wang 	vq->signalled_used_valid = false;
23806b1e6cc7SJason Wang 	if (!vq->iotlb &&
238196d4f267SLinus Torvalds 	    !access_ok(&vq->used->idx, sizeof vq->used->idx)) {
2382e1f33be9SGreg Kurz 		r = -EFAULT;
2383e1f33be9SGreg Kurz 		goto err;
2384e1f33be9SGreg Kurz 	}
23857b5d753eSJason Wang 	r = vhost_get_used_idx(vq, &last_used_idx);
23866b1e6cc7SJason Wang 	if (r) {
23876b1e6cc7SJason Wang 		vq_err(vq, "Can't access used idx at %p\n",
23886b1e6cc7SJason Wang 		       &vq->used->idx);
2389e1f33be9SGreg Kurz 		goto err;
23906b1e6cc7SJason Wang 	}
23913b1bbe89SMichael S. Tsirkin 	vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx);
239264f7f051SMichael S. Tsirkin 	return 0;
23936b1e6cc7SJason Wang 
2394e1f33be9SGreg Kurz err:
2395e1f33be9SGreg Kurz 	vq->is_le = is_le;
2396e1f33be9SGreg Kurz 	return r;
23972723feaaSJason Wang }
239880f7d030SGreg Kurz EXPORT_SYMBOL_GPL(vhost_vq_init_access);
23992723feaaSJason Wang 
translate_desc(struct vhost_virtqueue * vq,u64 addr,u32 len,struct iovec iov[],int iov_size,int access)240047283befSMichael S. Tsirkin static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
24016b1e6cc7SJason Wang 			  struct iovec iov[], int iov_size, int access)
24023a4d5c94SMichael S. Tsirkin {
24030bbe3066SJason Wang 	const struct vhost_iotlb_map *map;
24046b1e6cc7SJason Wang 	struct vhost_dev *dev = vq->dev;
24050bbe3066SJason Wang 	struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem;
24063a4d5c94SMichael S. Tsirkin 	struct iovec *_iov;
240798047313SStefano Garzarella 	u64 s = 0, last = addr + len - 1;
24083a4d5c94SMichael S. Tsirkin 	int ret = 0;
24093a4d5c94SMichael S. Tsirkin 
24103a4d5c94SMichael S. Tsirkin 	while ((u64)len > s) {
24113a4d5c94SMichael S. Tsirkin 		u64 size;
24127b3384fcSMichael S. Tsirkin 		if (unlikely(ret >= iov_size)) {
24133a4d5c94SMichael S. Tsirkin 			ret = -ENOBUFS;
24143a4d5c94SMichael S. Tsirkin 			break;
24153a4d5c94SMichael S. Tsirkin 		}
24166b1e6cc7SJason Wang 
241798047313SStefano Garzarella 		map = vhost_iotlb_itree_first(umem, addr, last);
24180bbe3066SJason Wang 		if (map == NULL || map->start > addr) {
24196b1e6cc7SJason Wang 			if (umem != dev->iotlb) {
24203a4d5c94SMichael S. Tsirkin 				ret = -EFAULT;
24213a4d5c94SMichael S. Tsirkin 				break;
24223a4d5c94SMichael S. Tsirkin 			}
24236b1e6cc7SJason Wang 			ret = -EAGAIN;
24246b1e6cc7SJason Wang 			break;
24250bbe3066SJason Wang 		} else if (!(map->perm & access)) {
24266b1e6cc7SJason Wang 			ret = -EPERM;
24276b1e6cc7SJason Wang 			break;
24286b1e6cc7SJason Wang 		}
24296b1e6cc7SJason Wang 
24303a4d5c94SMichael S. Tsirkin 		_iov = iov + ret;
24310bbe3066SJason Wang 		size = map->size - addr + map->start;
2432bd97120fSMichael S. Tsirkin 		_iov->iov_len = min((u64)len - s, size);
24330d4a3f2aSMichael S. Tsirkin 		_iov->iov_base = (void __user *)(unsigned long)
24340bbe3066SJason Wang 				 (map->addr + addr - map->start);
24353a4d5c94SMichael S. Tsirkin 		s += size;
24363a4d5c94SMichael S. Tsirkin 		addr += size;
24373a4d5c94SMichael S. Tsirkin 		++ret;
24383a4d5c94SMichael S. Tsirkin 	}
24393a4d5c94SMichael S. Tsirkin 
24406b1e6cc7SJason Wang 	if (ret == -EAGAIN)
24416b1e6cc7SJason Wang 		vhost_iotlb_miss(vq, addr, access);
24423a4d5c94SMichael S. Tsirkin 	return ret;
24433a4d5c94SMichael S. Tsirkin }
24443a4d5c94SMichael S. Tsirkin 
24453a4d5c94SMichael S. Tsirkin /* Each buffer in the virtqueues is actually a chain of descriptors.  This
24463a4d5c94SMichael S. Tsirkin  * function returns the next descriptor in the chain,
24473a4d5c94SMichael S. Tsirkin  * or -1U if we're at the end. */
next_desc(struct vhost_virtqueue * vq,struct vring_desc * desc)24483b1bbe89SMichael S. Tsirkin static unsigned next_desc(struct vhost_virtqueue *vq, struct vring_desc *desc)
24493a4d5c94SMichael S. Tsirkin {
24503a4d5c94SMichael S. Tsirkin 	unsigned int next;
24513a4d5c94SMichael S. Tsirkin 
24523a4d5c94SMichael S. Tsirkin 	/* If this descriptor says it doesn't chain, we're done. */
24533b1bbe89SMichael S. Tsirkin 	if (!(desc->flags & cpu_to_vhost16(vq, VRING_DESC_F_NEXT)))
24543a4d5c94SMichael S. Tsirkin 		return -1U;
24553a4d5c94SMichael S. Tsirkin 
24563a4d5c94SMichael S. Tsirkin 	/* Check they're not leading us off end of descriptors. */
24573a5db0b1SPaul E. McKenney 	next = vhost16_to_cpu(vq, READ_ONCE(desc->next));
24583a4d5c94SMichael S. Tsirkin 	return next;
24593a4d5c94SMichael S. Tsirkin }
24603a4d5c94SMichael S. Tsirkin 
get_indirect(struct vhost_virtqueue * vq,struct iovec iov[],unsigned int iov_size,unsigned int * out_num,unsigned int * in_num,struct vhost_log * log,unsigned int * log_num,struct vring_desc * indirect)246147283befSMichael S. Tsirkin static int get_indirect(struct vhost_virtqueue *vq,
24623a4d5c94SMichael S. Tsirkin 			struct iovec iov[], unsigned int iov_size,
24633a4d5c94SMichael S. Tsirkin 			unsigned int *out_num, unsigned int *in_num,
24643a4d5c94SMichael S. Tsirkin 			struct vhost_log *log, unsigned int *log_num,
24653a4d5c94SMichael S. Tsirkin 			struct vring_desc *indirect)
24663a4d5c94SMichael S. Tsirkin {
24673a4d5c94SMichael S. Tsirkin 	struct vring_desc desc;
24683a4d5c94SMichael S. Tsirkin 	unsigned int i = 0, count, found = 0;
24693b1bbe89SMichael S. Tsirkin 	u32 len = vhost32_to_cpu(vq, indirect->len);
2470aad9a1ceSAl Viro 	struct iov_iter from;
24716b1e6cc7SJason Wang 	int ret, access;
24723a4d5c94SMichael S. Tsirkin 
24733a4d5c94SMichael S. Tsirkin 	/* Sanity check */
24743b1bbe89SMichael S. Tsirkin 	if (unlikely(len % sizeof desc)) {
24753a4d5c94SMichael S. Tsirkin 		vq_err(vq, "Invalid length in indirect descriptor: "
24763a4d5c94SMichael S. Tsirkin 		       "len 0x%llx not multiple of 0x%zx\n",
24773b1bbe89SMichael S. Tsirkin 		       (unsigned long long)len,
24783a4d5c94SMichael S. Tsirkin 		       sizeof desc);
24793a4d5c94SMichael S. Tsirkin 		return -EINVAL;
24803a4d5c94SMichael S. Tsirkin 	}
24813a4d5c94SMichael S. Tsirkin 
24823b1bbe89SMichael S. Tsirkin 	ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect,
24836b1e6cc7SJason Wang 			     UIO_MAXIOV, VHOST_ACCESS_RO);
24847b3384fcSMichael S. Tsirkin 	if (unlikely(ret < 0)) {
24856b1e6cc7SJason Wang 		if (ret != -EAGAIN)
24863a4d5c94SMichael S. Tsirkin 			vq_err(vq, "Translation failure %d in indirect.\n", ret);
24873a4d5c94SMichael S. Tsirkin 		return ret;
24883a4d5c94SMichael S. Tsirkin 	}
2489de4eda9dSAl Viro 	iov_iter_init(&from, ITER_SOURCE, vq->indirect, ret, len);
24903b1bbe89SMichael S. Tsirkin 	count = len / sizeof desc;
24913a4d5c94SMichael S. Tsirkin 	/* Buffers are chained via a 16 bit next field, so
24923a4d5c94SMichael S. Tsirkin 	 * we can have at most 2^16 of these. */
24937b3384fcSMichael S. Tsirkin 	if (unlikely(count > USHRT_MAX + 1)) {
24943a4d5c94SMichael S. Tsirkin 		vq_err(vq, "Indirect buffer length too big: %d\n",
24953a4d5c94SMichael S. Tsirkin 		       indirect->len);
24963a4d5c94SMichael S. Tsirkin 		return -E2BIG;
24973a4d5c94SMichael S. Tsirkin 	}
24983a4d5c94SMichael S. Tsirkin 
24993a4d5c94SMichael S. Tsirkin 	do {
25003a4d5c94SMichael S. Tsirkin 		unsigned iov_count = *in_num + *out_num;
25017b3384fcSMichael S. Tsirkin 		if (unlikely(++found > count)) {
25023a4d5c94SMichael S. Tsirkin 			vq_err(vq, "Loop detected: last one at %u "
25033a4d5c94SMichael S. Tsirkin 			       "indirect size %u\n",
25043a4d5c94SMichael S. Tsirkin 			       i, count);
25053a4d5c94SMichael S. Tsirkin 			return -EINVAL;
25063a4d5c94SMichael S. Tsirkin 		}
2507cbbd26b8SAl Viro 		if (unlikely(!copy_from_iter_full(&desc, sizeof(desc), &from))) {
25083a4d5c94SMichael S. Tsirkin 			vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
25093b1bbe89SMichael S. Tsirkin 			       i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc);
25103a4d5c94SMichael S. Tsirkin 			return -EINVAL;
25113a4d5c94SMichael S. Tsirkin 		}
25123b1bbe89SMichael S. Tsirkin 		if (unlikely(desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT))) {
25133a4d5c94SMichael S. Tsirkin 			vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n",
25143b1bbe89SMichael S. Tsirkin 			       i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc);
25153a4d5c94SMichael S. Tsirkin 			return -EINVAL;
25163a4d5c94SMichael S. Tsirkin 		}
25173a4d5c94SMichael S. Tsirkin 
25186b1e6cc7SJason Wang 		if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE))
25196b1e6cc7SJason Wang 			access = VHOST_ACCESS_WO;
25206b1e6cc7SJason Wang 		else
25216b1e6cc7SJason Wang 			access = VHOST_ACCESS_RO;
25226b1e6cc7SJason Wang 
25233b1bbe89SMichael S. Tsirkin 		ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr),
25243b1bbe89SMichael S. Tsirkin 				     vhost32_to_cpu(vq, desc.len), iov + iov_count,
25256b1e6cc7SJason Wang 				     iov_size - iov_count, access);
25267b3384fcSMichael S. Tsirkin 		if (unlikely(ret < 0)) {
25276b1e6cc7SJason Wang 			if (ret != -EAGAIN)
25283a4d5c94SMichael S. Tsirkin 				vq_err(vq, "Translation failure %d indirect idx %d\n",
25293a4d5c94SMichael S. Tsirkin 					ret, i);
25303a4d5c94SMichael S. Tsirkin 			return ret;
25313a4d5c94SMichael S. Tsirkin 		}
25323a4d5c94SMichael S. Tsirkin 		/* If this is an input descriptor, increment that count. */
25336b1e6cc7SJason Wang 		if (access == VHOST_ACCESS_WO) {
25343a4d5c94SMichael S. Tsirkin 			*in_num += ret;
2535060423bfSyongduan 			if (unlikely(log && ret)) {
25363b1bbe89SMichael S. Tsirkin 				log[*log_num].addr = vhost64_to_cpu(vq, desc.addr);
25373b1bbe89SMichael S. Tsirkin 				log[*log_num].len = vhost32_to_cpu(vq, desc.len);
25383a4d5c94SMichael S. Tsirkin 				++*log_num;
25393a4d5c94SMichael S. Tsirkin 			}
25403a4d5c94SMichael S. Tsirkin 		} else {
25413a4d5c94SMichael S. Tsirkin 			/* If it's an output descriptor, they're all supposed
25423a4d5c94SMichael S. Tsirkin 			 * to come before any input descriptors. */
25437b3384fcSMichael S. Tsirkin 			if (unlikely(*in_num)) {
25443a4d5c94SMichael S. Tsirkin 				vq_err(vq, "Indirect descriptor "
25453a4d5c94SMichael S. Tsirkin 				       "has out after in: idx %d\n", i);
25463a4d5c94SMichael S. Tsirkin 				return -EINVAL;
25473a4d5c94SMichael S. Tsirkin 			}
25483a4d5c94SMichael S. Tsirkin 			*out_num += ret;
25493a4d5c94SMichael S. Tsirkin 		}
25503b1bbe89SMichael S. Tsirkin 	} while ((i = next_desc(vq, &desc)) != -1);
25513a4d5c94SMichael S. Tsirkin 	return 0;
25523a4d5c94SMichael S. Tsirkin }
25533a4d5c94SMichael S. Tsirkin 
25543a4d5c94SMichael S. Tsirkin /* This looks in the virtqueue and for the first available buffer, and converts
25553a4d5c94SMichael S. Tsirkin  * it to an iovec for convenient access.  Since descriptors consist of some
25563a4d5c94SMichael S. Tsirkin  * number of output then some number of input descriptors, it's actually two
25573a4d5c94SMichael S. Tsirkin  * iovecs, but we pack them into one and note how many of each there were.
25583a4d5c94SMichael S. Tsirkin  *
2559d5675bd2SMichael S. Tsirkin  * This function returns the descriptor number found, or vq->num (which is
2560d5675bd2SMichael S. Tsirkin  * never a valid descriptor number) if none was found.  A negative code is
2561d5675bd2SMichael S. Tsirkin  * returned on error. */
vhost_get_vq_desc(struct vhost_virtqueue * vq,struct iovec iov[],unsigned int iov_size,unsigned int * out_num,unsigned int * in_num,struct vhost_log * log,unsigned int * log_num)256247283befSMichael S. Tsirkin int vhost_get_vq_desc(struct vhost_virtqueue *vq,
25633a4d5c94SMichael S. Tsirkin 		      struct iovec iov[], unsigned int iov_size,
25643a4d5c94SMichael S. Tsirkin 		      unsigned int *out_num, unsigned int *in_num,
25653a4d5c94SMichael S. Tsirkin 		      struct vhost_log *log, unsigned int *log_num)
25663a4d5c94SMichael S. Tsirkin {
25673a4d5c94SMichael S. Tsirkin 	struct vring_desc desc;
25683a4d5c94SMichael S. Tsirkin 	unsigned int i, head, found = 0;
25693a4d5c94SMichael S. Tsirkin 	u16 last_avail_idx;
25703b1bbe89SMichael S. Tsirkin 	__virtio16 avail_idx;
25713b1bbe89SMichael S. Tsirkin 	__virtio16 ring_head;
25726b1e6cc7SJason Wang 	int ret, access;
25733a4d5c94SMichael S. Tsirkin 
25743a4d5c94SMichael S. Tsirkin 	/* Check it isn't doing very strange things with descriptor numbers. */
25753a4d5c94SMichael S. Tsirkin 	last_avail_idx = vq->last_avail_idx;
2576e3b56cddSJason Wang 
2577e3b56cddSJason Wang 	if (vq->avail_idx == vq->last_avail_idx) {
25787b5d753eSJason Wang 		if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
25793a4d5c94SMichael S. Tsirkin 			vq_err(vq, "Failed to access avail idx at %p\n",
25803a4d5c94SMichael S. Tsirkin 				&vq->avail->idx);
2581d5675bd2SMichael S. Tsirkin 			return -EFAULT;
25823a4d5c94SMichael S. Tsirkin 		}
25833b1bbe89SMichael S. Tsirkin 		vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
25843a4d5c94SMichael S. Tsirkin 
25857b3384fcSMichael S. Tsirkin 		if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
25863a4d5c94SMichael S. Tsirkin 			vq_err(vq, "Guest moved used index from %u to %u",
25873a4d5c94SMichael S. Tsirkin 				last_avail_idx, vq->avail_idx);
2588d5675bd2SMichael S. Tsirkin 			return -EFAULT;
25893a4d5c94SMichael S. Tsirkin 		}
25903a4d5c94SMichael S. Tsirkin 
2591e3b56cddSJason Wang 		/* If there's nothing new since last we looked, return
2592e3b56cddSJason Wang 		 * invalid.
2593e3b56cddSJason Wang 		 */
25943a4d5c94SMichael S. Tsirkin 		if (vq->avail_idx == last_avail_idx)
25953a4d5c94SMichael S. Tsirkin 			return vq->num;
25963a4d5c94SMichael S. Tsirkin 
2597e3b56cddSJason Wang 		/* Only get avail ring entries after they have been
2598e3b56cddSJason Wang 		 * exposed by guest.
2599e3b56cddSJason Wang 		 */
26005659338cSMichael S. Tsirkin 		smp_rmb();
2601e3b56cddSJason Wang 	}
26023a4d5c94SMichael S. Tsirkin 
26033a4d5c94SMichael S. Tsirkin 	/* Grab the next descriptor number they're advertising, and increment
26043a4d5c94SMichael S. Tsirkin 	 * the index we've seen. */
26057b5d753eSJason Wang 	if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) {
26063a4d5c94SMichael S. Tsirkin 		vq_err(vq, "Failed to read head: idx %d address %p\n",
26073a4d5c94SMichael S. Tsirkin 		       last_avail_idx,
26083a4d5c94SMichael S. Tsirkin 		       &vq->avail->ring[last_avail_idx % vq->num]);
2609d5675bd2SMichael S. Tsirkin 		return -EFAULT;
26103a4d5c94SMichael S. Tsirkin 	}
26113a4d5c94SMichael S. Tsirkin 
26123b1bbe89SMichael S. Tsirkin 	head = vhost16_to_cpu(vq, ring_head);
26133b1bbe89SMichael S. Tsirkin 
26143a4d5c94SMichael S. Tsirkin 	/* If their number is silly, that's an error. */
26157b3384fcSMichael S. Tsirkin 	if (unlikely(head >= vq->num)) {
26163a4d5c94SMichael S. Tsirkin 		vq_err(vq, "Guest says index %u > %u is available",
26173a4d5c94SMichael S. Tsirkin 		       head, vq->num);
2618d5675bd2SMichael S. Tsirkin 		return -EINVAL;
26193a4d5c94SMichael S. Tsirkin 	}
26203a4d5c94SMichael S. Tsirkin 
26213a4d5c94SMichael S. Tsirkin 	/* When we start there are none of either input nor output. */
26223a4d5c94SMichael S. Tsirkin 	*out_num = *in_num = 0;
26233a4d5c94SMichael S. Tsirkin 	if (unlikely(log))
26243a4d5c94SMichael S. Tsirkin 		*log_num = 0;
26253a4d5c94SMichael S. Tsirkin 
26263a4d5c94SMichael S. Tsirkin 	i = head;
26273a4d5c94SMichael S. Tsirkin 	do {
26283a4d5c94SMichael S. Tsirkin 		unsigned iov_count = *in_num + *out_num;
26297b3384fcSMichael S. Tsirkin 		if (unlikely(i >= vq->num)) {
26303a4d5c94SMichael S. Tsirkin 			vq_err(vq, "Desc index is %u > %u, head = %u",
26313a4d5c94SMichael S. Tsirkin 			       i, vq->num, head);
2632d5675bd2SMichael S. Tsirkin 			return -EINVAL;
26333a4d5c94SMichael S. Tsirkin 		}
26347b3384fcSMichael S. Tsirkin 		if (unlikely(++found > vq->num)) {
26353a4d5c94SMichael S. Tsirkin 			vq_err(vq, "Loop detected: last one at %u "
26363a4d5c94SMichael S. Tsirkin 			       "vq size %u head %u\n",
26373a4d5c94SMichael S. Tsirkin 			       i, vq->num, head);
2638d5675bd2SMichael S. Tsirkin 			return -EINVAL;
26393a4d5c94SMichael S. Tsirkin 		}
26407b5d753eSJason Wang 		ret = vhost_get_desc(vq, &desc, i);
26417b3384fcSMichael S. Tsirkin 		if (unlikely(ret)) {
26423a4d5c94SMichael S. Tsirkin 			vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
26433a4d5c94SMichael S. Tsirkin 			       i, vq->desc + i);
2644d5675bd2SMichael S. Tsirkin 			return -EFAULT;
26453a4d5c94SMichael S. Tsirkin 		}
26463b1bbe89SMichael S. Tsirkin 		if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT)) {
264747283befSMichael S. Tsirkin 			ret = get_indirect(vq, iov, iov_size,
26483a4d5c94SMichael S. Tsirkin 					   out_num, in_num,
26493a4d5c94SMichael S. Tsirkin 					   log, log_num, &desc);
26507b3384fcSMichael S. Tsirkin 			if (unlikely(ret < 0)) {
26516b1e6cc7SJason Wang 				if (ret != -EAGAIN)
26523a4d5c94SMichael S. Tsirkin 					vq_err(vq, "Failure detected "
26533a4d5c94SMichael S. Tsirkin 						"in indirect descriptor at idx %d\n", i);
2654d5675bd2SMichael S. Tsirkin 				return ret;
26553a4d5c94SMichael S. Tsirkin 			}
26563a4d5c94SMichael S. Tsirkin 			continue;
26573a4d5c94SMichael S. Tsirkin 		}
26583a4d5c94SMichael S. Tsirkin 
26596b1e6cc7SJason Wang 		if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE))
26606b1e6cc7SJason Wang 			access = VHOST_ACCESS_WO;
26616b1e6cc7SJason Wang 		else
26626b1e6cc7SJason Wang 			access = VHOST_ACCESS_RO;
26633b1bbe89SMichael S. Tsirkin 		ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr),
26643b1bbe89SMichael S. Tsirkin 				     vhost32_to_cpu(vq, desc.len), iov + iov_count,
26656b1e6cc7SJason Wang 				     iov_size - iov_count, access);
26667b3384fcSMichael S. Tsirkin 		if (unlikely(ret < 0)) {
26676b1e6cc7SJason Wang 			if (ret != -EAGAIN)
26683a4d5c94SMichael S. Tsirkin 				vq_err(vq, "Translation failure %d descriptor idx %d\n",
26693a4d5c94SMichael S. Tsirkin 					ret, i);
2670d5675bd2SMichael S. Tsirkin 			return ret;
26713a4d5c94SMichael S. Tsirkin 		}
26726b1e6cc7SJason Wang 		if (access == VHOST_ACCESS_WO) {
26733a4d5c94SMichael S. Tsirkin 			/* If this is an input descriptor,
26743a4d5c94SMichael S. Tsirkin 			 * increment that count. */
26753a4d5c94SMichael S. Tsirkin 			*in_num += ret;
2676060423bfSyongduan 			if (unlikely(log && ret)) {
26773b1bbe89SMichael S. Tsirkin 				log[*log_num].addr = vhost64_to_cpu(vq, desc.addr);
26783b1bbe89SMichael S. Tsirkin 				log[*log_num].len = vhost32_to_cpu(vq, desc.len);
26793a4d5c94SMichael S. Tsirkin 				++*log_num;
26803a4d5c94SMichael S. Tsirkin 			}
26813a4d5c94SMichael S. Tsirkin 		} else {
26823a4d5c94SMichael S. Tsirkin 			/* If it's an output descriptor, they're all supposed
26833a4d5c94SMichael S. Tsirkin 			 * to come before any input descriptors. */
26847b3384fcSMichael S. Tsirkin 			if (unlikely(*in_num)) {
26853a4d5c94SMichael S. Tsirkin 				vq_err(vq, "Descriptor has out after in: "
26863a4d5c94SMichael S. Tsirkin 				       "idx %d\n", i);
2687d5675bd2SMichael S. Tsirkin 				return -EINVAL;
26883a4d5c94SMichael S. Tsirkin 			}
26893a4d5c94SMichael S. Tsirkin 			*out_num += ret;
26903a4d5c94SMichael S. Tsirkin 		}
26913b1bbe89SMichael S. Tsirkin 	} while ((i = next_desc(vq, &desc)) != -1);
26923a4d5c94SMichael S. Tsirkin 
26933a4d5c94SMichael S. Tsirkin 	/* On success, increment avail index. */
26943a4d5c94SMichael S. Tsirkin 	vq->last_avail_idx++;
26958ea8cf89SMichael S. Tsirkin 
26968ea8cf89SMichael S. Tsirkin 	/* Assume notifications from guest are disabled at this point,
26978ea8cf89SMichael S. Tsirkin 	 * if they aren't we would need to update avail_event index. */
26988ea8cf89SMichael S. Tsirkin 	BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
26993a4d5c94SMichael S. Tsirkin 	return head;
27003a4d5c94SMichael S. Tsirkin }
27016ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
27023a4d5c94SMichael S. Tsirkin 
27033a4d5c94SMichael S. Tsirkin /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
vhost_discard_vq_desc(struct vhost_virtqueue * vq,int n)27048dd014adSDavid Stevens void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
27053a4d5c94SMichael S. Tsirkin {
27068dd014adSDavid Stevens 	vq->last_avail_idx -= n;
27073a4d5c94SMichael S. Tsirkin }
27086ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
27093a4d5c94SMichael S. Tsirkin 
27103a4d5c94SMichael S. Tsirkin /* After we've used one of their buffers, we tell them about it.  We'll then
27113a4d5c94SMichael S. Tsirkin  * want to notify the guest, using eventfd. */
vhost_add_used(struct vhost_virtqueue * vq,unsigned int head,int len)27123a4d5c94SMichael S. Tsirkin int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
27133a4d5c94SMichael S. Tsirkin {
27143b1bbe89SMichael S. Tsirkin 	struct vring_used_elem heads = {
27153b1bbe89SMichael S. Tsirkin 		cpu_to_vhost32(vq, head),
27163b1bbe89SMichael S. Tsirkin 		cpu_to_vhost32(vq, len)
27173b1bbe89SMichael S. Tsirkin 	};
27183a4d5c94SMichael S. Tsirkin 
2719c49e4e57SJason Wang 	return vhost_add_used_n(vq, &heads, 1);
27203a4d5c94SMichael S. Tsirkin }
27216ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_add_used);
27223a4d5c94SMichael S. Tsirkin 
__vhost_add_used_n(struct vhost_virtqueue * vq,struct vring_used_elem * heads,unsigned count)27238dd014adSDavid Stevens static int __vhost_add_used_n(struct vhost_virtqueue *vq,
27248dd014adSDavid Stevens 			    struct vring_used_elem *heads,
27258dd014adSDavid Stevens 			    unsigned count)
27268dd014adSDavid Stevens {
2727a865e420SMichael S. Tsirkin 	vring_used_elem_t __user *used;
27288ea8cf89SMichael S. Tsirkin 	u16 old, new;
27298dd014adSDavid Stevens 	int start;
27308dd014adSDavid Stevens 
27315fba13b5SMichael S. Tsirkin 	start = vq->last_used_idx & (vq->num - 1);
27328dd014adSDavid Stevens 	used = vq->used->ring + start;
27337b5d753eSJason Wang 	if (vhost_put_used(vq, heads, start, count)) {
27348dd014adSDavid Stevens 		vq_err(vq, "Failed to write used");
27358dd014adSDavid Stevens 		return -EFAULT;
27368dd014adSDavid Stevens 	}
27378dd014adSDavid Stevens 	if (unlikely(vq->log_used)) {
27388dd014adSDavid Stevens 		/* Make sure data is seen before log. */
27398dd014adSDavid Stevens 		smp_wmb();
27408dd014adSDavid Stevens 		/* Log used ring entry write. */
2741cc5e7107SJason Wang 		log_used(vq, ((void __user *)used - (void __user *)vq->used),
27428dd014adSDavid Stevens 			 count * sizeof *used);
27438dd014adSDavid Stevens 	}
27448ea8cf89SMichael S. Tsirkin 	old = vq->last_used_idx;
27458ea8cf89SMichael S. Tsirkin 	new = (vq->last_used_idx += count);
27468ea8cf89SMichael S. Tsirkin 	/* If the driver never bothers to signal in a very long while,
27478ea8cf89SMichael S. Tsirkin 	 * used index might wrap around. If that happens, invalidate
27488ea8cf89SMichael S. Tsirkin 	 * signalled_used index we stored. TODO: make sure driver
27498ea8cf89SMichael S. Tsirkin 	 * signals at least once in 2^16 and remove this. */
27508ea8cf89SMichael S. Tsirkin 	if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
27518ea8cf89SMichael S. Tsirkin 		vq->signalled_used_valid = false;
27528dd014adSDavid Stevens 	return 0;
27538dd014adSDavid Stevens }
27548dd014adSDavid Stevens 
27558dd014adSDavid Stevens /* After we've used one of their buffers, we tell them about it.  We'll then
27568dd014adSDavid Stevens  * want to notify the guest, using eventfd. */
vhost_add_used_n(struct vhost_virtqueue * vq,struct vring_used_elem * heads,unsigned count)27578dd014adSDavid Stevens int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
27588dd014adSDavid Stevens 		     unsigned count)
27598dd014adSDavid Stevens {
27608dd014adSDavid Stevens 	int start, n, r;
27618dd014adSDavid Stevens 
27625fba13b5SMichael S. Tsirkin 	start = vq->last_used_idx & (vq->num - 1);
27638dd014adSDavid Stevens 	n = vq->num - start;
27648dd014adSDavid Stevens 	if (n < count) {
27658dd014adSDavid Stevens 		r = __vhost_add_used_n(vq, heads, n);
27668dd014adSDavid Stevens 		if (r < 0)
27678dd014adSDavid Stevens 			return r;
27688dd014adSDavid Stevens 		heads += n;
27698dd014adSDavid Stevens 		count -= n;
27708dd014adSDavid Stevens 	}
27718dd014adSDavid Stevens 	r = __vhost_add_used_n(vq, heads, count);
27728dd014adSDavid Stevens 
27738dd014adSDavid Stevens 	/* Make sure buffer is written before we update index. */
27748dd014adSDavid Stevens 	smp_wmb();
27757b5d753eSJason Wang 	if (vhost_put_used_idx(vq)) {
27768dd014adSDavid Stevens 		vq_err(vq, "Failed to increment used idx");
27778dd014adSDavid Stevens 		return -EFAULT;
27788dd014adSDavid Stevens 	}
27798dd014adSDavid Stevens 	if (unlikely(vq->log_used)) {
2780841df922SJason Wang 		/* Make sure used idx is seen before log. */
2781841df922SJason Wang 		smp_wmb();
27828dd014adSDavid Stevens 		/* Log used index update. */
2783cc5e7107SJason Wang 		log_used(vq, offsetof(struct vring_used, idx),
27848dd014adSDavid Stevens 			 sizeof vq->used->idx);
27858dd014adSDavid Stevens 		if (vq->log_ctx)
27868dd014adSDavid Stevens 			eventfd_signal(vq->log_ctx, 1);
27878dd014adSDavid Stevens 	}
27888dd014adSDavid Stevens 	return r;
27898dd014adSDavid Stevens }
27906ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_add_used_n);
27918dd014adSDavid Stevens 
vhost_notify(struct vhost_dev * dev,struct vhost_virtqueue * vq)27928ea8cf89SMichael S. Tsirkin static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
27933a4d5c94SMichael S. Tsirkin {
27943b1bbe89SMichael S. Tsirkin 	__u16 old, new;
27953b1bbe89SMichael S. Tsirkin 	__virtio16 event;
27968ea8cf89SMichael S. Tsirkin 	bool v;
27978d65843cSJason Wang 	/* Flush out used index updates. This is paired
27988d65843cSJason Wang 	 * with the barrier that the Guest executes when enabling
27998d65843cSJason Wang 	 * interrupts. */
28008d65843cSJason Wang 	smp_mb();
28010d499356SMichael S. Tsirkin 
2802ea16c514SMichael S. Tsirkin 	if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) &&
28038ea8cf89SMichael S. Tsirkin 	    unlikely(vq->avail_idx == vq->last_avail_idx))
28048ea8cf89SMichael S. Tsirkin 		return true;
28058ea8cf89SMichael S. Tsirkin 
2806ea16c514SMichael S. Tsirkin 	if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
28073b1bbe89SMichael S. Tsirkin 		__virtio16 flags;
28087b5d753eSJason Wang 		if (vhost_get_avail_flags(vq, &flags)) {
28093a4d5c94SMichael S. Tsirkin 			vq_err(vq, "Failed to get flags");
28108ea8cf89SMichael S. Tsirkin 			return true;
28118ea8cf89SMichael S. Tsirkin 		}
28123b1bbe89SMichael S. Tsirkin 		return !(flags & cpu_to_vhost16(vq, VRING_AVAIL_F_NO_INTERRUPT));
28138ea8cf89SMichael S. Tsirkin 	}
28148ea8cf89SMichael S. Tsirkin 	old = vq->signalled_used;
28158ea8cf89SMichael S. Tsirkin 	v = vq->signalled_used_valid;
28168ea8cf89SMichael S. Tsirkin 	new = vq->signalled_used = vq->last_used_idx;
28178ea8cf89SMichael S. Tsirkin 	vq->signalled_used_valid = true;
28188ea8cf89SMichael S. Tsirkin 
28198ea8cf89SMichael S. Tsirkin 	if (unlikely(!v))
28208ea8cf89SMichael S. Tsirkin 		return true;
28218ea8cf89SMichael S. Tsirkin 
28227b5d753eSJason Wang 	if (vhost_get_used_event(vq, &event)) {
28238ea8cf89SMichael S. Tsirkin 		vq_err(vq, "Failed to get used event idx");
28248ea8cf89SMichael S. Tsirkin 		return true;
28258ea8cf89SMichael S. Tsirkin 	}
28268d65843cSJason Wang 	return vring_need_event(vhost16_to_cpu(vq, event), new, old);
28273a4d5c94SMichael S. Tsirkin }
28283a4d5c94SMichael S. Tsirkin 
28298ea8cf89SMichael S. Tsirkin /* This actually signals the guest, using eventfd. */
vhost_signal(struct vhost_dev * dev,struct vhost_virtqueue * vq)28308ea8cf89SMichael S. Tsirkin void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
28318ea8cf89SMichael S. Tsirkin {
28323a4d5c94SMichael S. Tsirkin 	/* Signal the Guest tell them we used something up. */
2833265a0ad8SZhu Lingshan 	if (vq->call_ctx.ctx && vhost_notify(dev, vq))
2834265a0ad8SZhu Lingshan 		eventfd_signal(vq->call_ctx.ctx, 1);
28353a4d5c94SMichael S. Tsirkin }
28366ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_signal);
28373a4d5c94SMichael S. Tsirkin 
28383a4d5c94SMichael S. Tsirkin /* And here's the combo meal deal.  Supersize me! */
vhost_add_used_and_signal(struct vhost_dev * dev,struct vhost_virtqueue * vq,unsigned int head,int len)28393a4d5c94SMichael S. Tsirkin void vhost_add_used_and_signal(struct vhost_dev *dev,
28403a4d5c94SMichael S. Tsirkin 			       struct vhost_virtqueue *vq,
28413a4d5c94SMichael S. Tsirkin 			       unsigned int head, int len)
28423a4d5c94SMichael S. Tsirkin {
28433a4d5c94SMichael S. Tsirkin 	vhost_add_used(vq, head, len);
28443a4d5c94SMichael S. Tsirkin 	vhost_signal(dev, vq);
28453a4d5c94SMichael S. Tsirkin }
28466ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_add_used_and_signal);
28473a4d5c94SMichael S. Tsirkin 
28488dd014adSDavid Stevens /* multi-buffer version of vhost_add_used_and_signal */
vhost_add_used_and_signal_n(struct vhost_dev * dev,struct vhost_virtqueue * vq,struct vring_used_elem * heads,unsigned count)28498dd014adSDavid Stevens void vhost_add_used_and_signal_n(struct vhost_dev *dev,
28508dd014adSDavid Stevens 				 struct vhost_virtqueue *vq,
28518dd014adSDavid Stevens 				 struct vring_used_elem *heads, unsigned count)
28528dd014adSDavid Stevens {
28538dd014adSDavid Stevens 	vhost_add_used_n(vq, heads, count);
28548dd014adSDavid Stevens 	vhost_signal(dev, vq);
28558dd014adSDavid Stevens }
28566ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
28578dd014adSDavid Stevens 
2858d4a60603SJason Wang /* return true if we're sure that avaiable ring is empty */
vhost_vq_avail_empty(struct vhost_dev * dev,struct vhost_virtqueue * vq)2859d4a60603SJason Wang bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
2860d4a60603SJason Wang {
2861d4a60603SJason Wang 	__virtio16 avail_idx;
2862d4a60603SJason Wang 	int r;
2863d4a60603SJason Wang 
2864275bf960SJason Wang 	if (vq->avail_idx != vq->last_avail_idx)
2865d4a60603SJason Wang 		return false;
2866d4a60603SJason Wang 
28677b5d753eSJason Wang 	r = vhost_get_avail_idx(vq, &avail_idx);
2868275bf960SJason Wang 	if (unlikely(r))
2869275bf960SJason Wang 		return false;
2870275bf960SJason Wang 
28718a05b663SGavin Shan 	vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
28728a05b663SGavin Shan 	if (vq->avail_idx != vq->last_avail_idx) {
28738a05b663SGavin Shan 		/* Since we have updated avail_idx, the following
28748a05b663SGavin Shan 		 * call to vhost_get_vq_desc() will read available
28758a05b663SGavin Shan 		 * ring entries. Make sure that read happens after
28768a05b663SGavin Shan 		 * the avail_idx read.
28778a05b663SGavin Shan 		 */
28788a05b663SGavin Shan 		smp_rmb();
28798a05b663SGavin Shan 		return false;
28808a05b663SGavin Shan 	}
28818a05b663SGavin Shan 
28828a05b663SGavin Shan 	return true;
2883d4a60603SJason Wang }
2884d4a60603SJason Wang EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
2885d4a60603SJason Wang 
28863a4d5c94SMichael S. Tsirkin /* OK, now we need to know about added descriptors. */
vhost_enable_notify(struct vhost_dev * dev,struct vhost_virtqueue * vq)28878ea8cf89SMichael S. Tsirkin bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
28883a4d5c94SMichael S. Tsirkin {
28893b1bbe89SMichael S. Tsirkin 	__virtio16 avail_idx;
28903a4d5c94SMichael S. Tsirkin 	int r;
2891d47effe1SKrishna Kumar 
28923a4d5c94SMichael S. Tsirkin 	if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
28933a4d5c94SMichael S. Tsirkin 		return false;
28943a4d5c94SMichael S. Tsirkin 	vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
2895ea16c514SMichael S. Tsirkin 	if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
28962723feaaSJason Wang 		r = vhost_update_used_flags(vq);
28973a4d5c94SMichael S. Tsirkin 		if (r) {
28983a4d5c94SMichael S. Tsirkin 			vq_err(vq, "Failed to enable notification at %p: %d\n",
28993a4d5c94SMichael S. Tsirkin 			       &vq->used->flags, r);
29003a4d5c94SMichael S. Tsirkin 			return false;
29013a4d5c94SMichael S. Tsirkin 		}
29028ea8cf89SMichael S. Tsirkin 	} else {
29034c809363SStefano Garzarella 		r = vhost_update_avail_event(vq);
29048ea8cf89SMichael S. Tsirkin 		if (r) {
29058ea8cf89SMichael S. Tsirkin 			vq_err(vq, "Failed to update avail event index at %p: %d\n",
29068ea8cf89SMichael S. Tsirkin 			       vhost_avail_event(vq), r);
29078ea8cf89SMichael S. Tsirkin 			return false;
29088ea8cf89SMichael S. Tsirkin 		}
29098ea8cf89SMichael S. Tsirkin 	}
29103a4d5c94SMichael S. Tsirkin 	/* They could have slipped one in as we were doing that: make
29113a4d5c94SMichael S. Tsirkin 	 * sure it's written, then check again. */
29125659338cSMichael S. Tsirkin 	smp_mb();
29137b5d753eSJason Wang 	r = vhost_get_avail_idx(vq, &avail_idx);
29143a4d5c94SMichael S. Tsirkin 	if (r) {
29153a4d5c94SMichael S. Tsirkin 		vq_err(vq, "Failed to check avail idx at %p: %d\n",
29163a4d5c94SMichael S. Tsirkin 		       &vq->avail->idx, r);
29173a4d5c94SMichael S. Tsirkin 		return false;
29183a4d5c94SMichael S. Tsirkin 	}
29193a4d5c94SMichael S. Tsirkin 
2920d619651aSGavin Shan 	vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
2921d619651aSGavin Shan 	if (vq->avail_idx != vq->last_avail_idx) {
2922d619651aSGavin Shan 		/* Since we have updated avail_idx, the following
2923d619651aSGavin Shan 		 * call to vhost_get_vq_desc() will read available
2924d619651aSGavin Shan 		 * ring entries. Make sure that read happens after
2925d619651aSGavin Shan 		 * the avail_idx read.
2926d619651aSGavin Shan 		 */
2927d619651aSGavin Shan 		smp_rmb();
2928d619651aSGavin Shan 		return true;
2929d619651aSGavin Shan 	}
2930d619651aSGavin Shan 
2931d619651aSGavin Shan 	return false;
29323a4d5c94SMichael S. Tsirkin }
29336ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_enable_notify);
29343a4d5c94SMichael S. Tsirkin 
29353a4d5c94SMichael S. Tsirkin /* We don't need to be notified again. */
vhost_disable_notify(struct vhost_dev * dev,struct vhost_virtqueue * vq)29368ea8cf89SMichael S. Tsirkin void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
29373a4d5c94SMichael S. Tsirkin {
29383a4d5c94SMichael S. Tsirkin 	int r;
2939d47effe1SKrishna Kumar 
29403a4d5c94SMichael S. Tsirkin 	if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
29413a4d5c94SMichael S. Tsirkin 		return;
29423a4d5c94SMichael S. Tsirkin 	vq->used_flags |= VRING_USED_F_NO_NOTIFY;
2943ea16c514SMichael S. Tsirkin 	if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
29442723feaaSJason Wang 		r = vhost_update_used_flags(vq);
29453a4d5c94SMichael S. Tsirkin 		if (r)
2946ae6961deSYunsheng Lin 			vq_err(vq, "Failed to disable notification at %p: %d\n",
29473a4d5c94SMichael S. Tsirkin 			       &vq->used->flags, r);
29483a4d5c94SMichael S. Tsirkin 	}
29498ea8cf89SMichael S. Tsirkin }
29506ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_disable_notify);
29516ac1afbfSAsias He 
29526b1e6cc7SJason Wang /* Create a new message. */
vhost_new_msg(struct vhost_virtqueue * vq,int type)29536b1e6cc7SJason Wang struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type)
29546b1e6cc7SJason Wang {
29554d8df0f5SPrathu Baronia 	/* Make sure all padding within the structure is initialized. */
29564d8df0f5SPrathu Baronia 	struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
29576b1e6cc7SJason Wang 	if (!node)
29586b1e6cc7SJason Wang 		return NULL;
2959670ae9caSMichael S. Tsirkin 
29606b1e6cc7SJason Wang 	node->vq = vq;
29616b1e6cc7SJason Wang 	node->msg.type = type;
29626b1e6cc7SJason Wang 	return node;
29636b1e6cc7SJason Wang }
29646b1e6cc7SJason Wang EXPORT_SYMBOL_GPL(vhost_new_msg);
29656b1e6cc7SJason Wang 
vhost_enqueue_msg(struct vhost_dev * dev,struct list_head * head,struct vhost_msg_node * node)29666b1e6cc7SJason Wang void vhost_enqueue_msg(struct vhost_dev *dev, struct list_head *head,
29676b1e6cc7SJason Wang 		       struct vhost_msg_node *node)
29686b1e6cc7SJason Wang {
29696b1e6cc7SJason Wang 	spin_lock(&dev->iotlb_lock);
29706b1e6cc7SJason Wang 	list_add_tail(&node->node, head);
29716b1e6cc7SJason Wang 	spin_unlock(&dev->iotlb_lock);
29726b1e6cc7SJason Wang 
2973a9a08845SLinus Torvalds 	wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
29746b1e6cc7SJason Wang }
29756b1e6cc7SJason Wang EXPORT_SYMBOL_GPL(vhost_enqueue_msg);
29766b1e6cc7SJason Wang 
vhost_dequeue_msg(struct vhost_dev * dev,struct list_head * head)29776b1e6cc7SJason Wang struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev,
29786b1e6cc7SJason Wang 					 struct list_head *head)
29796b1e6cc7SJason Wang {
29806b1e6cc7SJason Wang 	struct vhost_msg_node *node = NULL;
29816b1e6cc7SJason Wang 
29826b1e6cc7SJason Wang 	spin_lock(&dev->iotlb_lock);
29836b1e6cc7SJason Wang 	if (!list_empty(head)) {
29846b1e6cc7SJason Wang 		node = list_first_entry(head, struct vhost_msg_node,
29856b1e6cc7SJason Wang 					node);
29866b1e6cc7SJason Wang 		list_del(&node->node);
29876b1e6cc7SJason Wang 	}
29886b1e6cc7SJason Wang 	spin_unlock(&dev->iotlb_lock);
29896b1e6cc7SJason Wang 
29906b1e6cc7SJason Wang 	return node;
29916b1e6cc7SJason Wang }
29926b1e6cc7SJason Wang EXPORT_SYMBOL_GPL(vhost_dequeue_msg);
29936b1e6cc7SJason Wang 
vhost_set_backend_features(struct vhost_dev * dev,u64 features)2994460f7ce1SJason Wang void vhost_set_backend_features(struct vhost_dev *dev, u64 features)
2995460f7ce1SJason Wang {
2996460f7ce1SJason Wang 	struct vhost_virtqueue *vq;
2997460f7ce1SJason Wang 	int i;
2998460f7ce1SJason Wang 
2999460f7ce1SJason Wang 	mutex_lock(&dev->mutex);
3000460f7ce1SJason Wang 	for (i = 0; i < dev->nvqs; ++i) {
3001460f7ce1SJason Wang 		vq = dev->vqs[i];
3002460f7ce1SJason Wang 		mutex_lock(&vq->mutex);
3003460f7ce1SJason Wang 		vq->acked_backend_features = features;
3004460f7ce1SJason Wang 		mutex_unlock(&vq->mutex);
3005460f7ce1SJason Wang 	}
3006460f7ce1SJason Wang 	mutex_unlock(&dev->mutex);
3007460f7ce1SJason Wang }
3008460f7ce1SJason Wang EXPORT_SYMBOL_GPL(vhost_set_backend_features);
30096b1e6cc7SJason Wang 
vhost_init(void)30106ac1afbfSAsias He static int __init vhost_init(void)
30116ac1afbfSAsias He {
30126ac1afbfSAsias He 	return 0;
30136ac1afbfSAsias He }
30146ac1afbfSAsias He 
vhost_exit(void)30156ac1afbfSAsias He static void __exit vhost_exit(void)
30166ac1afbfSAsias He {
30176ac1afbfSAsias He }
30186ac1afbfSAsias He 
30196ac1afbfSAsias He module_init(vhost_init);
30206ac1afbfSAsias He module_exit(vhost_exit);
30216ac1afbfSAsias He 
30226ac1afbfSAsias He MODULE_VERSION("0.0.1");
30236ac1afbfSAsias He MODULE_LICENSE("GPL v2");
30246ac1afbfSAsias He MODULE_AUTHOR("Michael S. Tsirkin");
30256ac1afbfSAsias He MODULE_DESCRIPTION("Host kernel accelerator for virtio");
3026