17a338472SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
23a4d5c94SMichael S. Tsirkin /* Copyright (C) 2009 Red Hat, Inc.
33a4d5c94SMichael S. Tsirkin * Copyright (C) 2006 Rusty Russell IBM Corporation
43a4d5c94SMichael S. Tsirkin *
53a4d5c94SMichael S. Tsirkin * Author: Michael S. Tsirkin <mst@redhat.com>
63a4d5c94SMichael S. Tsirkin *
73a4d5c94SMichael S. Tsirkin * Inspiration, some code, and most witty comments come from
861516587SRob Landley * Documentation/virtual/lguest/lguest.c, by Rusty Russell
93a4d5c94SMichael S. Tsirkin *
103a4d5c94SMichael S. Tsirkin * Generic code for virtio server in host kernel.
113a4d5c94SMichael S. Tsirkin */
123a4d5c94SMichael S. Tsirkin
133a4d5c94SMichael S. Tsirkin #include <linux/eventfd.h>
143a4d5c94SMichael S. Tsirkin #include <linux/vhost.h>
1535596b27SAsias He #include <linux/uio.h>
163a4d5c94SMichael S. Tsirkin #include <linux/mm.h>
173a4d5c94SMichael S. Tsirkin #include <linux/miscdevice.h>
183a4d5c94SMichael S. Tsirkin #include <linux/mutex.h>
193a4d5c94SMichael S. Tsirkin #include <linux/poll.h>
203a4d5c94SMichael S. Tsirkin #include <linux/file.h>
213a4d5c94SMichael S. Tsirkin #include <linux/highmem.h>
225a0e3ad6STejun Heo #include <linux/slab.h>
234de7255fSIgor Mammedov #include <linux/vmalloc.h>
24c23f3445STejun Heo #include <linux/kthread.h>
256ac1afbfSAsias He #include <linux/module.h>
26bcfeacabSIgor Mammedov #include <linux/sort.h>
276e84f315SIngo Molnar #include <linux/sched/mm.h>
28174cd4b1SIngo Molnar #include <linux/sched/signal.h>
296e890c5dSMike Christie #include <linux/sched/vhost_task.h>
30a9709d68SJason Wang #include <linux/interval_tree_generic.h>
31ff002269SJason Wang #include <linux/nospec.h>
328f6a7f96SAndrey Konovalov #include <linux/kcov.h>
333a4d5c94SMichael S. Tsirkin
343a4d5c94SMichael S. Tsirkin #include "vhost.h"
353a4d5c94SMichael S. Tsirkin
36c9ce42f7SIgor Mammedov static ushort max_mem_regions = 64;
37c9ce42f7SIgor Mammedov module_param(max_mem_regions, ushort, 0444);
38c9ce42f7SIgor Mammedov MODULE_PARM_DESC(max_mem_regions,
39c9ce42f7SIgor Mammedov "Maximum number of memory regions in memory map. (default: 64)");
406b1e6cc7SJason Wang static int max_iotlb_entries = 2048;
416b1e6cc7SJason Wang module_param(max_iotlb_entries, int, 0444);
426b1e6cc7SJason Wang MODULE_PARM_DESC(max_iotlb_entries,
436b1e6cc7SJason Wang "Maximum number of iotlb entries. (default: 2048)");
44c9ce42f7SIgor Mammedov
453a4d5c94SMichael S. Tsirkin enum {
463a4d5c94SMichael S. Tsirkin VHOST_MEMORY_F_LOG = 0x1,
473a4d5c94SMichael S. Tsirkin };
483a4d5c94SMichael S. Tsirkin
493b1bbe89SMichael S. Tsirkin #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num])
503b1bbe89SMichael S. Tsirkin #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num])
518ea8cf89SMichael S. Tsirkin
522751c988SGreg Kurz #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
vhost_disable_cross_endian(struct vhost_virtqueue * vq)53c5072037SGreg Kurz static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
542751c988SGreg Kurz {
552751c988SGreg Kurz vq->user_be = !virtio_legacy_is_little_endian();
562751c988SGreg Kurz }
572751c988SGreg Kurz
vhost_enable_cross_endian_big(struct vhost_virtqueue * vq)58c5072037SGreg Kurz static void vhost_enable_cross_endian_big(struct vhost_virtqueue *vq)
59c5072037SGreg Kurz {
60c5072037SGreg Kurz vq->user_be = true;
61c5072037SGreg Kurz }
62c5072037SGreg Kurz
vhost_enable_cross_endian_little(struct vhost_virtqueue * vq)63c5072037SGreg Kurz static void vhost_enable_cross_endian_little(struct vhost_virtqueue *vq)
64c5072037SGreg Kurz {
65c5072037SGreg Kurz vq->user_be = false;
66c5072037SGreg Kurz }
67c5072037SGreg Kurz
vhost_set_vring_endian(struct vhost_virtqueue * vq,int __user * argp)682751c988SGreg Kurz static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp)
692751c988SGreg Kurz {
702751c988SGreg Kurz struct vhost_vring_state s;
712751c988SGreg Kurz
722751c988SGreg Kurz if (vq->private_data)
732751c988SGreg Kurz return -EBUSY;
742751c988SGreg Kurz
752751c988SGreg Kurz if (copy_from_user(&s, argp, sizeof(s)))
762751c988SGreg Kurz return -EFAULT;
772751c988SGreg Kurz
782751c988SGreg Kurz if (s.num != VHOST_VRING_LITTLE_ENDIAN &&
792751c988SGreg Kurz s.num != VHOST_VRING_BIG_ENDIAN)
802751c988SGreg Kurz return -EINVAL;
812751c988SGreg Kurz
82c5072037SGreg Kurz if (s.num == VHOST_VRING_BIG_ENDIAN)
83c5072037SGreg Kurz vhost_enable_cross_endian_big(vq);
84c5072037SGreg Kurz else
85c5072037SGreg Kurz vhost_enable_cross_endian_little(vq);
862751c988SGreg Kurz
872751c988SGreg Kurz return 0;
882751c988SGreg Kurz }
892751c988SGreg Kurz
vhost_get_vring_endian(struct vhost_virtqueue * vq,u32 idx,int __user * argp)902751c988SGreg Kurz static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx,
912751c988SGreg Kurz int __user *argp)
922751c988SGreg Kurz {
932751c988SGreg Kurz struct vhost_vring_state s = {
942751c988SGreg Kurz .index = idx,
952751c988SGreg Kurz .num = vq->user_be
962751c988SGreg Kurz };
972751c988SGreg Kurz
982751c988SGreg Kurz if (copy_to_user(argp, &s, sizeof(s)))
992751c988SGreg Kurz return -EFAULT;
1002751c988SGreg Kurz
1012751c988SGreg Kurz return 0;
1022751c988SGreg Kurz }
1032751c988SGreg Kurz
vhost_init_is_le(struct vhost_virtqueue * vq)1042751c988SGreg Kurz static void vhost_init_is_le(struct vhost_virtqueue *vq)
1052751c988SGreg Kurz {
1062751c988SGreg Kurz /* Note for legacy virtio: user_be is initialized at reset time
1072751c988SGreg Kurz * according to the host endianness. If userspace does not set an
1082751c988SGreg Kurz * explicit endianness, the default behavior is native endian, as
1092751c988SGreg Kurz * expected by legacy virtio.
1102751c988SGreg Kurz */
1112751c988SGreg Kurz vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
1122751c988SGreg Kurz }
1132751c988SGreg Kurz #else
vhost_disable_cross_endian(struct vhost_virtqueue * vq)114c5072037SGreg Kurz static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
1152751c988SGreg Kurz {
1162751c988SGreg Kurz }
1172751c988SGreg Kurz
vhost_set_vring_endian(struct vhost_virtqueue * vq,int __user * argp)1182751c988SGreg Kurz static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp)
1192751c988SGreg Kurz {
1202751c988SGreg Kurz return -ENOIOCTLCMD;
1212751c988SGreg Kurz }
1222751c988SGreg Kurz
vhost_get_vring_endian(struct vhost_virtqueue * vq,u32 idx,int __user * argp)1232751c988SGreg Kurz static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx,
1242751c988SGreg Kurz int __user *argp)
1252751c988SGreg Kurz {
1262751c988SGreg Kurz return -ENOIOCTLCMD;
1272751c988SGreg Kurz }
1282751c988SGreg Kurz
vhost_init_is_le(struct vhost_virtqueue * vq)1292751c988SGreg Kurz static void vhost_init_is_le(struct vhost_virtqueue *vq)
1302751c988SGreg Kurz {
131cda8bba0SHalil Pasic vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1)
132cda8bba0SHalil Pasic || virtio_legacy_is_little_endian();
1332751c988SGreg Kurz }
1342751c988SGreg Kurz #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */
1352751c988SGreg Kurz
vhost_reset_is_le(struct vhost_virtqueue * vq)136c5072037SGreg Kurz static void vhost_reset_is_le(struct vhost_virtqueue *vq)
137c5072037SGreg Kurz {
138cda8bba0SHalil Pasic vhost_init_is_le(vq);
139c5072037SGreg Kurz }
140c5072037SGreg Kurz
1417235acdbSJason Wang struct vhost_flush_struct {
1427235acdbSJason Wang struct vhost_work work;
1437235acdbSJason Wang struct completion wait_event;
1447235acdbSJason Wang };
1457235acdbSJason Wang
vhost_flush_work(struct vhost_work * work)1467235acdbSJason Wang static void vhost_flush_work(struct vhost_work *work)
1477235acdbSJason Wang {
1487235acdbSJason Wang struct vhost_flush_struct *s;
1497235acdbSJason Wang
1507235acdbSJason Wang s = container_of(work, struct vhost_flush_struct, work);
1517235acdbSJason Wang complete(&s->wait_event);
1527235acdbSJason Wang }
1537235acdbSJason Wang
vhost_poll_func(struct file * file,wait_queue_head_t * wqh,poll_table * pt)1543a4d5c94SMichael S. Tsirkin static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
1553a4d5c94SMichael S. Tsirkin poll_table *pt)
1563a4d5c94SMichael S. Tsirkin {
1573a4d5c94SMichael S. Tsirkin struct vhost_poll *poll;
1583a4d5c94SMichael S. Tsirkin
159d47effe1SKrishna Kumar poll = container_of(pt, struct vhost_poll, table);
1603a4d5c94SMichael S. Tsirkin poll->wqh = wqh;
1613a4d5c94SMichael S. Tsirkin add_wait_queue(wqh, &poll->wait);
1623a4d5c94SMichael S. Tsirkin }
1633a4d5c94SMichael S. Tsirkin
vhost_poll_wakeup(wait_queue_entry_t * wait,unsigned mode,int sync,void * key)164ac6424b9SIngo Molnar static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync,
1653a4d5c94SMichael S. Tsirkin void *key)
1663a4d5c94SMichael S. Tsirkin {
167c23f3445STejun Heo struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait);
16801fcb1cbSJason Wang struct vhost_work *work = &poll->work;
169c23f3445STejun Heo
1703ad6f93eSAl Viro if (!(key_to_poll(key) & poll->mask))
1713a4d5c94SMichael S. Tsirkin return 0;
1723a4d5c94SMichael S. Tsirkin
17301fcb1cbSJason Wang if (!poll->dev->use_worker)
17401fcb1cbSJason Wang work->fn(work);
17501fcb1cbSJason Wang else
176c23f3445STejun Heo vhost_poll_queue(poll);
17701fcb1cbSJason Wang
1783a4d5c94SMichael S. Tsirkin return 0;
1793a4d5c94SMichael S. Tsirkin }
1803a4d5c94SMichael S. Tsirkin
vhost_work_init(struct vhost_work * work,vhost_work_fn_t fn)181163049aeSStefan Hajnoczi void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
1823a4d5c94SMichael S. Tsirkin {
18304b96e55SJason Wang clear_bit(VHOST_WORK_QUEUED, &work->flags);
184c23f3445STejun Heo work->fn = fn;
1853a4d5c94SMichael S. Tsirkin }
1866ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_work_init);
1873a4d5c94SMichael S. Tsirkin
18887d6a412SMichael S. Tsirkin /* Init poll structure */
vhost_poll_init(struct vhost_poll * poll,vhost_work_fn_t fn,__poll_t mask,struct vhost_dev * dev,struct vhost_virtqueue * vq)18987d6a412SMichael S. Tsirkin void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
190493b94bfSMike Christie __poll_t mask, struct vhost_dev *dev,
191493b94bfSMike Christie struct vhost_virtqueue *vq)
19287d6a412SMichael S. Tsirkin {
19387d6a412SMichael S. Tsirkin init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
19487d6a412SMichael S. Tsirkin init_poll_funcptr(&poll->table, vhost_poll_func);
19587d6a412SMichael S. Tsirkin poll->mask = mask;
19687d6a412SMichael S. Tsirkin poll->dev = dev;
1972b8b328bSJason Wang poll->wqh = NULL;
198493b94bfSMike Christie poll->vq = vq;
19987d6a412SMichael S. Tsirkin
20087d6a412SMichael S. Tsirkin vhost_work_init(&poll->work, fn);
20187d6a412SMichael S. Tsirkin }
2026ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_poll_init);
20387d6a412SMichael S. Tsirkin
2043a4d5c94SMichael S. Tsirkin /* Start polling a file. We add ourselves to file's wait queue. The caller must
2053a4d5c94SMichael S. Tsirkin * keep a reference to a file until after vhost_poll_stop is called. */
vhost_poll_start(struct vhost_poll * poll,struct file * file)2062b8b328bSJason Wang int vhost_poll_start(struct vhost_poll *poll, struct file *file)
2073a4d5c94SMichael S. Tsirkin {
208e6c8adcaSAl Viro __poll_t mask;
209d47effe1SKrishna Kumar
21070181d51SJason Wang if (poll->wqh)
21170181d51SJason Wang return 0;
21270181d51SJason Wang
2139965ed17SChristoph Hellwig mask = vfs_poll(file, &poll->table);
2143a4d5c94SMichael S. Tsirkin if (mask)
2153ad6f93eSAl Viro vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask));
216a9a08845SLinus Torvalds if (mask & EPOLLERR) {
217dc6455a7SJason Wang vhost_poll_stop(poll);
218896fc242SYunsheng Lin return -EINVAL;
2192b8b328bSJason Wang }
2202b8b328bSJason Wang
221896fc242SYunsheng Lin return 0;
2223a4d5c94SMichael S. Tsirkin }
2236ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_poll_start);
2243a4d5c94SMichael S. Tsirkin
2253a4d5c94SMichael S. Tsirkin /* Stop polling a file. After this function returns, it becomes safe to drop the
2263a4d5c94SMichael S. Tsirkin * file reference. You must also flush afterwards. */
vhost_poll_stop(struct vhost_poll * poll)2273a4d5c94SMichael S. Tsirkin void vhost_poll_stop(struct vhost_poll *poll)
2283a4d5c94SMichael S. Tsirkin {
2292b8b328bSJason Wang if (poll->wqh) {
2303a4d5c94SMichael S. Tsirkin remove_wait_queue(poll->wqh, &poll->wait);
2312b8b328bSJason Wang poll->wqh = NULL;
2322b8b328bSJason Wang }
2333a4d5c94SMichael S. Tsirkin }
2346ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_poll_stop);
2353a4d5c94SMichael S. Tsirkin
vhost_worker_queue(struct vhost_worker * worker,struct vhost_work * work)236228a27cfSMike Christie static void vhost_worker_queue(struct vhost_worker *worker,
2370921dddcSMike Christie struct vhost_work *work)
2383a4d5c94SMichael S. Tsirkin {
23904b96e55SJason Wang if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
24004b96e55SJason Wang /* We can only add the work to the list after we're
24104b96e55SJason Wang * sure it was not in the list.
242635abf01SPeng Tao * test_and_set_bit() implies a memory barrier.
24304b96e55SJason Wang */
2440921dddcSMike Christie llist_add(&work->node, &worker->work_list);
2450921dddcSMike Christie vhost_task_wake(worker->vtsk);
246ac9fde24SQin Chuanyu }
2473a4d5c94SMichael S. Tsirkin }
2480921dddcSMike Christie
vhost_vq_work_queue(struct vhost_virtqueue * vq,struct vhost_work * work)2490921dddcSMike Christie bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work)
2500921dddcSMike Christie {
251228a27cfSMike Christie struct vhost_worker *worker;
252228a27cfSMike Christie bool queued = false;
253228a27cfSMike Christie
254228a27cfSMike Christie rcu_read_lock();
255228a27cfSMike Christie worker = rcu_dereference(vq->worker);
256228a27cfSMike Christie if (worker) {
257228a27cfSMike Christie queued = true;
258228a27cfSMike Christie vhost_worker_queue(worker, work);
259228a27cfSMike Christie }
260228a27cfSMike Christie rcu_read_unlock();
261228a27cfSMike Christie
262228a27cfSMike Christie return queued;
2630921dddcSMike Christie }
2640921dddcSMike Christie EXPORT_SYMBOL_GPL(vhost_vq_work_queue);
2650921dddcSMike Christie
vhost_vq_flush(struct vhost_virtqueue * vq)266228a27cfSMike Christie void vhost_vq_flush(struct vhost_virtqueue *vq)
267228a27cfSMike Christie {
268228a27cfSMike Christie struct vhost_flush_struct flush;
269228a27cfSMike Christie
270228a27cfSMike Christie init_completion(&flush.wait_event);
271228a27cfSMike Christie vhost_work_init(&flush.work, vhost_flush_work);
272228a27cfSMike Christie
273228a27cfSMike Christie if (vhost_vq_work_queue(vq, &flush.work))
274228a27cfSMike Christie wait_for_completion(&flush.wait_event);
275228a27cfSMike Christie }
276228a27cfSMike Christie EXPORT_SYMBOL_GPL(vhost_vq_flush);
277228a27cfSMike Christie
278228a27cfSMike Christie /**
279f5bb7219SMike Christie * __vhost_worker_flush - flush a worker
280228a27cfSMike Christie * @worker: worker to flush
281228a27cfSMike Christie *
282f5bb7219SMike Christie * The worker's flush_mutex must be held.
283228a27cfSMike Christie */
__vhost_worker_flush(struct vhost_worker * worker)284f5bb7219SMike Christie static void __vhost_worker_flush(struct vhost_worker *worker)
2850921dddcSMike Christie {
2860921dddcSMike Christie struct vhost_flush_struct flush;
2870921dddcSMike Christie
288*abe067dcSMike Christie if (!worker->attachment_cnt || worker->killed)
289f5bb7219SMike Christie return;
290f5bb7219SMike Christie
2910921dddcSMike Christie init_completion(&flush.wait_event);
2920921dddcSMike Christie vhost_work_init(&flush.work, vhost_flush_work);
2930921dddcSMike Christie
294228a27cfSMike Christie vhost_worker_queue(worker, &flush.work);
295f5bb7219SMike Christie /*
296f5bb7219SMike Christie * Drop mutex in case our worker is killed and it needs to take the
297f5bb7219SMike Christie * mutex to force cleanup.
298f5bb7219SMike Christie */
299f5bb7219SMike Christie mutex_unlock(&worker->mutex);
3000921dddcSMike Christie wait_for_completion(&flush.wait_event);
301f5bb7219SMike Christie mutex_lock(&worker->mutex);
302f5bb7219SMike Christie }
303f5bb7219SMike Christie
vhost_worker_flush(struct vhost_worker * worker)304f5bb7219SMike Christie static void vhost_worker_flush(struct vhost_worker *worker)
305f5bb7219SMike Christie {
306f5bb7219SMike Christie mutex_lock(&worker->mutex);
307f5bb7219SMike Christie __vhost_worker_flush(worker);
308f5bb7219SMike Christie mutex_unlock(&worker->mutex);
3090921dddcSMike Christie }
310a6fc0473SMike Christie
vhost_dev_flush(struct vhost_dev * dev)311a6fc0473SMike Christie void vhost_dev_flush(struct vhost_dev *dev)
312a6fc0473SMike Christie {
3131cdaafa1SMike Christie struct vhost_worker *worker;
3141cdaafa1SMike Christie unsigned long i;
3151cdaafa1SMike Christie
316f5bb7219SMike Christie xa_for_each(&dev->worker_xa, i, worker)
3171cdaafa1SMike Christie vhost_worker_flush(worker);
318a6fc0473SMike Christie }
3190921dddcSMike Christie EXPORT_SYMBOL_GPL(vhost_dev_flush);
3200921dddcSMike Christie
321526d3e7fSJason Wang /* A lockless hint for busy polling code to exit the loop */
vhost_vq_has_work(struct vhost_virtqueue * vq)3229784df15SMike Christie bool vhost_vq_has_work(struct vhost_virtqueue *vq)
323526d3e7fSJason Wang {
324228a27cfSMike Christie struct vhost_worker *worker;
325228a27cfSMike Christie bool has_work = false;
326228a27cfSMike Christie
327228a27cfSMike Christie rcu_read_lock();
328228a27cfSMike Christie worker = rcu_dereference(vq->worker);
329228a27cfSMike Christie if (worker && !llist_empty(&worker->work_list))
330228a27cfSMike Christie has_work = true;
331228a27cfSMike Christie rcu_read_unlock();
332228a27cfSMike Christie
333228a27cfSMike Christie return has_work;
334526d3e7fSJason Wang }
3359784df15SMike Christie EXPORT_SYMBOL_GPL(vhost_vq_has_work);
336526d3e7fSJason Wang
vhost_poll_queue(struct vhost_poll * poll)33787d6a412SMichael S. Tsirkin void vhost_poll_queue(struct vhost_poll *poll)
33887d6a412SMichael S. Tsirkin {
339493b94bfSMike Christie vhost_vq_work_queue(poll->vq, &poll->work);
34087d6a412SMichael S. Tsirkin }
3416ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_poll_queue);
34287d6a412SMichael S. Tsirkin
__vhost_vq_meta_reset(struct vhost_virtqueue * vq)343f8894913SJason Wang static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq)
344f8894913SJason Wang {
345f8894913SJason Wang int j;
346f8894913SJason Wang
347f8894913SJason Wang for (j = 0; j < VHOST_NUM_ADDRS; j++)
348f8894913SJason Wang vq->meta_iotlb[j] = NULL;
349f8894913SJason Wang }
350f8894913SJason Wang
vhost_vq_meta_reset(struct vhost_dev * d)351f8894913SJason Wang static void vhost_vq_meta_reset(struct vhost_dev *d)
352f8894913SJason Wang {
353f8894913SJason Wang int i;
354f8894913SJason Wang
35586a07da3SJason Wang for (i = 0; i < d->nvqs; ++i)
356f8894913SJason Wang __vhost_vq_meta_reset(d->vqs[i]);
357f8894913SJason Wang }
358f8894913SJason Wang
vhost_vring_call_reset(struct vhost_vring_call * call_ctx)359265a0ad8SZhu Lingshan static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx)
360265a0ad8SZhu Lingshan {
361265a0ad8SZhu Lingshan call_ctx->ctx = NULL;
362265a0ad8SZhu Lingshan memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer));
363265a0ad8SZhu Lingshan }
364265a0ad8SZhu Lingshan
vhost_vq_is_setup(struct vhost_virtqueue * vq)3656bcf3422SMike Christie bool vhost_vq_is_setup(struct vhost_virtqueue *vq)
3666bcf3422SMike Christie {
3676bcf3422SMike Christie return vq->avail && vq->desc && vq->used && vhost_vq_access_ok(vq);
3686bcf3422SMike Christie }
3696bcf3422SMike Christie EXPORT_SYMBOL_GPL(vhost_vq_is_setup);
3706bcf3422SMike Christie
vhost_vq_reset(struct vhost_dev * dev,struct vhost_virtqueue * vq)3713a4d5c94SMichael S. Tsirkin static void vhost_vq_reset(struct vhost_dev *dev,
3723a4d5c94SMichael S. Tsirkin struct vhost_virtqueue *vq)
3733a4d5c94SMichael S. Tsirkin {
3743a4d5c94SMichael S. Tsirkin vq->num = 1;
3753a4d5c94SMichael S. Tsirkin vq->desc = NULL;
3763a4d5c94SMichael S. Tsirkin vq->avail = NULL;
3773a4d5c94SMichael S. Tsirkin vq->used = NULL;
3783a4d5c94SMichael S. Tsirkin vq->last_avail_idx = 0;
3793a4d5c94SMichael S. Tsirkin vq->avail_idx = 0;
3803a4d5c94SMichael S. Tsirkin vq->last_used_idx = 0;
3818ea8cf89SMichael S. Tsirkin vq->signalled_used = 0;
3828ea8cf89SMichael S. Tsirkin vq->signalled_used_valid = false;
3833a4d5c94SMichael S. Tsirkin vq->used_flags = 0;
3843a4d5c94SMichael S. Tsirkin vq->log_used = false;
3853a4d5c94SMichael S. Tsirkin vq->log_addr = -1ull;
3863a4d5c94SMichael S. Tsirkin vq->private_data = NULL;
387ea16c514SMichael S. Tsirkin vq->acked_features = 0;
388429711aeSJason Wang vq->acked_backend_features = 0;
3893a4d5c94SMichael S. Tsirkin vq->log_base = NULL;
3903a4d5c94SMichael S. Tsirkin vq->error_ctx = NULL;
3913a4d5c94SMichael S. Tsirkin vq->kick = NULL;
39273a99f08SMichael S. Tsirkin vq->log_ctx = NULL;
393c5072037SGreg Kurz vhost_disable_cross_endian(vq);
394beb691e6SLaurent Vivier vhost_reset_is_le(vq);
39503088137SJason Wang vq->busyloop_timeout = 0;
396a9709d68SJason Wang vq->umem = NULL;
3976b1e6cc7SJason Wang vq->iotlb = NULL;
398228a27cfSMike Christie rcu_assign_pointer(vq->worker, NULL);
399265a0ad8SZhu Lingshan vhost_vring_call_reset(&vq->call_ctx);
400f8894913SJason Wang __vhost_vq_meta_reset(vq);
4013a4d5c94SMichael S. Tsirkin }
4023a4d5c94SMichael S. Tsirkin
vhost_run_work_list(void * data)403*abe067dcSMike Christie static bool vhost_run_work_list(void *data)
404c23f3445STejun Heo {
4051a5f8090SMike Christie struct vhost_worker *worker = data;
40604b96e55SJason Wang struct vhost_work *work, *work_next;
40704b96e55SJason Wang struct llist_node *node;
408c23f3445STejun Heo
4091a5f8090SMike Christie node = llist_del_all(&worker->work_list);
410f9010dbdSMike Christie if (node) {
4114b13cbefSMike Christie __set_current_state(TASK_RUNNING);
4124b13cbefSMike Christie
41304b96e55SJason Wang node = llist_reverse_order(node);
41404b96e55SJason Wang /* make sure flag is seen after deletion */
41504b96e55SJason Wang smp_wmb();
41604b96e55SJason Wang llist_for_each_entry_safe(work, work_next, node, node) {
41704b96e55SJason Wang clear_bit(VHOST_WORK_QUEUED, &work->flags);
4181a5f8090SMike Christie kcov_remote_start_common(worker->kcov_handle);
419c23f3445STejun Heo work->fn(work);
4208f6a7f96SAndrey Konovalov kcov_remote_stop();
42105bfb338SJosh Poimboeuf cond_resched();
42204b96e55SJason Wang }
423c23f3445STejun Heo }
4246e890c5dSMike Christie
425f9010dbdSMike Christie return !!node;
426c23f3445STejun Heo }
427c23f3445STejun Heo
vhost_worker_killed(void * data)428*abe067dcSMike Christie static void vhost_worker_killed(void *data)
429*abe067dcSMike Christie {
430*abe067dcSMike Christie struct vhost_worker *worker = data;
431*abe067dcSMike Christie struct vhost_dev *dev = worker->dev;
432*abe067dcSMike Christie struct vhost_virtqueue *vq;
433*abe067dcSMike Christie int i, attach_cnt = 0;
434*abe067dcSMike Christie
435*abe067dcSMike Christie mutex_lock(&worker->mutex);
436*abe067dcSMike Christie worker->killed = true;
437*abe067dcSMike Christie
438*abe067dcSMike Christie for (i = 0; i < dev->nvqs; i++) {
439*abe067dcSMike Christie vq = dev->vqs[i];
440*abe067dcSMike Christie
441*abe067dcSMike Christie mutex_lock(&vq->mutex);
442*abe067dcSMike Christie if (worker ==
443*abe067dcSMike Christie rcu_dereference_check(vq->worker,
444*abe067dcSMike Christie lockdep_is_held(&vq->mutex))) {
445*abe067dcSMike Christie rcu_assign_pointer(vq->worker, NULL);
446*abe067dcSMike Christie attach_cnt++;
447*abe067dcSMike Christie }
448*abe067dcSMike Christie mutex_unlock(&vq->mutex);
449*abe067dcSMike Christie }
450*abe067dcSMike Christie
451*abe067dcSMike Christie worker->attachment_cnt -= attach_cnt;
452*abe067dcSMike Christie if (attach_cnt)
453*abe067dcSMike Christie synchronize_rcu();
454*abe067dcSMike Christie /*
455*abe067dcSMike Christie * Finish vhost_worker_flush calls and any other works that snuck in
456*abe067dcSMike Christie * before the synchronize_rcu.
457*abe067dcSMike Christie */
458*abe067dcSMike Christie vhost_run_work_list(worker);
459*abe067dcSMike Christie mutex_unlock(&worker->mutex);
460*abe067dcSMike Christie }
461*abe067dcSMike Christie
vhost_vq_free_iovecs(struct vhost_virtqueue * vq)462bab632d6SMichael S. Tsirkin static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
463bab632d6SMichael S. Tsirkin {
464bab632d6SMichael S. Tsirkin kfree(vq->indirect);
465bab632d6SMichael S. Tsirkin vq->indirect = NULL;
466bab632d6SMichael S. Tsirkin kfree(vq->log);
467bab632d6SMichael S. Tsirkin vq->log = NULL;
468bab632d6SMichael S. Tsirkin kfree(vq->heads);
469bab632d6SMichael S. Tsirkin vq->heads = NULL;
470bab632d6SMichael S. Tsirkin }
471bab632d6SMichael S. Tsirkin
472e0e9b406SJason Wang /* Helper to allocate iovec buffers for all vqs. */
vhost_dev_alloc_iovecs(struct vhost_dev * dev)473e0e9b406SJason Wang static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
474e0e9b406SJason Wang {
4756d5e6aa8SAsias He struct vhost_virtqueue *vq;
476e0e9b406SJason Wang int i;
477d47effe1SKrishna Kumar
478e0e9b406SJason Wang for (i = 0; i < dev->nvqs; ++i) {
4796d5e6aa8SAsias He vq = dev->vqs[i];
4806da2ec56SKees Cook vq->indirect = kmalloc_array(UIO_MAXIOV,
4816da2ec56SKees Cook sizeof(*vq->indirect),
482e0e9b406SJason Wang GFP_KERNEL);
483b46a0bf7SJason Wang vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log),
4846da2ec56SKees Cook GFP_KERNEL);
485b46a0bf7SJason Wang vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads),
4866da2ec56SKees Cook GFP_KERNEL);
4876d5e6aa8SAsias He if (!vq->indirect || !vq->log || !vq->heads)
488e0e9b406SJason Wang goto err_nomem;
489e0e9b406SJason Wang }
490e0e9b406SJason Wang return 0;
491d47effe1SKrishna Kumar
492e0e9b406SJason Wang err_nomem:
493bab632d6SMichael S. Tsirkin for (; i >= 0; --i)
4943ab2e420SAsias He vhost_vq_free_iovecs(dev->vqs[i]);
495e0e9b406SJason Wang return -ENOMEM;
496e0e9b406SJason Wang }
497e0e9b406SJason Wang
vhost_dev_free_iovecs(struct vhost_dev * dev)498e0e9b406SJason Wang static void vhost_dev_free_iovecs(struct vhost_dev *dev)
499e0e9b406SJason Wang {
500e0e9b406SJason Wang int i;
501d47effe1SKrishna Kumar
502bab632d6SMichael S. Tsirkin for (i = 0; i < dev->nvqs; ++i)
5033ab2e420SAsias He vhost_vq_free_iovecs(dev->vqs[i]);
504e0e9b406SJason Wang }
505e0e9b406SJason Wang
vhost_exceeds_weight(struct vhost_virtqueue * vq,int pkts,int total_len)506e82b9b07SJason Wang bool vhost_exceeds_weight(struct vhost_virtqueue *vq,
507e82b9b07SJason Wang int pkts, int total_len)
508e82b9b07SJason Wang {
509e82b9b07SJason Wang struct vhost_dev *dev = vq->dev;
510e82b9b07SJason Wang
511e82b9b07SJason Wang if ((dev->byte_weight && total_len >= dev->byte_weight) ||
512e82b9b07SJason Wang pkts >= dev->weight) {
513e82b9b07SJason Wang vhost_poll_queue(&vq->poll);
514e82b9b07SJason Wang return true;
515e82b9b07SJason Wang }
516e82b9b07SJason Wang
517e82b9b07SJason Wang return false;
518e82b9b07SJason Wang }
519e82b9b07SJason Wang EXPORT_SYMBOL_GPL(vhost_exceeds_weight);
520e82b9b07SJason Wang
vhost_get_avail_size(struct vhost_virtqueue * vq,unsigned int num)5214942e825SJason Wang static size_t vhost_get_avail_size(struct vhost_virtqueue *vq,
5224942e825SJason Wang unsigned int num)
5234942e825SJason Wang {
5244942e825SJason Wang size_t event __maybe_unused =
5254942e825SJason Wang vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
5264942e825SJason Wang
527e4be66e5SJacob Keller return size_add(struct_size(vq->avail, ring, num), event);
5284942e825SJason Wang }
5294942e825SJason Wang
vhost_get_used_size(struct vhost_virtqueue * vq,unsigned int num)5304942e825SJason Wang static size_t vhost_get_used_size(struct vhost_virtqueue *vq,
5314942e825SJason Wang unsigned int num)
5324942e825SJason Wang {
5334942e825SJason Wang size_t event __maybe_unused =
5344942e825SJason Wang vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
5354942e825SJason Wang
536e4be66e5SJacob Keller return size_add(struct_size(vq->used, ring, num), event);
5374942e825SJason Wang }
5384942e825SJason Wang
vhost_get_desc_size(struct vhost_virtqueue * vq,unsigned int num)5394942e825SJason Wang static size_t vhost_get_desc_size(struct vhost_virtqueue *vq,
5404942e825SJason Wang unsigned int num)
5414942e825SJason Wang {
5424942e825SJason Wang return sizeof(*vq->desc) * num;
5434942e825SJason Wang }
5444942e825SJason Wang
vhost_dev_init(struct vhost_dev * dev,struct vhost_virtqueue ** vqs,int nvqs,int iov_limit,int weight,int byte_weight,bool use_worker,int (* msg_handler)(struct vhost_dev * dev,u32 asid,struct vhost_iotlb_msg * msg))54559566b6eSZhi Yong Wu void vhost_dev_init(struct vhost_dev *dev,
546e82b9b07SJason Wang struct vhost_virtqueue **vqs, int nvqs,
547792a4f2eSJason Wang int iov_limit, int weight, int byte_weight,
54801fcb1cbSJason Wang bool use_worker,
54991233ad7SGautam Dawar int (*msg_handler)(struct vhost_dev *dev, u32 asid,
550792a4f2eSJason Wang struct vhost_iotlb_msg *msg))
5513a4d5c94SMichael S. Tsirkin {
5526d5e6aa8SAsias He struct vhost_virtqueue *vq;
5533a4d5c94SMichael S. Tsirkin int i;
554c23f3445STejun Heo
5553a4d5c94SMichael S. Tsirkin dev->vqs = vqs;
5563a4d5c94SMichael S. Tsirkin dev->nvqs = nvqs;
5573a4d5c94SMichael S. Tsirkin mutex_init(&dev->mutex);
5583a4d5c94SMichael S. Tsirkin dev->log_ctx = NULL;
559a9709d68SJason Wang dev->umem = NULL;
5606b1e6cc7SJason Wang dev->iotlb = NULL;
5613a4d5c94SMichael S. Tsirkin dev->mm = NULL;
562b46a0bf7SJason Wang dev->iov_limit = iov_limit;
563e82b9b07SJason Wang dev->weight = weight;
564e82b9b07SJason Wang dev->byte_weight = byte_weight;
56501fcb1cbSJason Wang dev->use_worker = use_worker;
566792a4f2eSJason Wang dev->msg_handler = msg_handler;
5676b1e6cc7SJason Wang init_waitqueue_head(&dev->wait);
5686b1e6cc7SJason Wang INIT_LIST_HEAD(&dev->read_list);
5696b1e6cc7SJason Wang INIT_LIST_HEAD(&dev->pending_list);
5706b1e6cc7SJason Wang spin_lock_init(&dev->iotlb_lock);
5711cdaafa1SMike Christie xa_init_flags(&dev->worker_xa, XA_FLAGS_ALLOC);
5723a4d5c94SMichael S. Tsirkin
5733a4d5c94SMichael S. Tsirkin for (i = 0; i < dev->nvqs; ++i) {
5746d5e6aa8SAsias He vq = dev->vqs[i];
5756d5e6aa8SAsias He vq->log = NULL;
5766d5e6aa8SAsias He vq->indirect = NULL;
5776d5e6aa8SAsias He vq->heads = NULL;
5786d5e6aa8SAsias He vq->dev = dev;
5796d5e6aa8SAsias He mutex_init(&vq->mutex);
5806d5e6aa8SAsias He vhost_vq_reset(dev, vq);
5816d5e6aa8SAsias He if (vq->handle_kick)
5826d5e6aa8SAsias He vhost_poll_init(&vq->poll, vq->handle_kick,
583493b94bfSMike Christie EPOLLIN, dev, vq);
5843a4d5c94SMichael S. Tsirkin }
5853a4d5c94SMichael S. Tsirkin }
5866ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_init);
5873a4d5c94SMichael S. Tsirkin
5883a4d5c94SMichael S. Tsirkin /* Caller should have device mutex */
vhost_dev_check_owner(struct vhost_dev * dev)5893a4d5c94SMichael S. Tsirkin long vhost_dev_check_owner(struct vhost_dev *dev)
5903a4d5c94SMichael S. Tsirkin {
5913a4d5c94SMichael S. Tsirkin /* Are you the owner? If not, I don't think you mean to do that */
5923a4d5c94SMichael S. Tsirkin return dev->mm == current->mm ? 0 : -EPERM;
5933a4d5c94SMichael S. Tsirkin }
5946ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
5953a4d5c94SMichael S. Tsirkin
5963a4d5c94SMichael S. Tsirkin /* Caller should have device mutex */
vhost_dev_has_owner(struct vhost_dev * dev)59705c05351SMichael S. Tsirkin bool vhost_dev_has_owner(struct vhost_dev *dev)
59805c05351SMichael S. Tsirkin {
59905c05351SMichael S. Tsirkin return dev->mm;
60005c05351SMichael S. Tsirkin }
6016ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_has_owner);
60205c05351SMichael S. Tsirkin
vhost_attach_mm(struct vhost_dev * dev)6035ce995f3SJason Wang static void vhost_attach_mm(struct vhost_dev *dev)
6045ce995f3SJason Wang {
6055ce995f3SJason Wang /* No owner, become one */
6065ce995f3SJason Wang if (dev->use_worker) {
6075ce995f3SJason Wang dev->mm = get_task_mm(current);
6085ce995f3SJason Wang } else {
6095ce995f3SJason Wang /* vDPA device does not use worker thead, so there's
6105ce995f3SJason Wang * no need to hold the address space for mm. This help
6115ce995f3SJason Wang * to avoid deadlock in the case of mmap() which may
6125ce995f3SJason Wang * held the refcnt of the file and depends on release
6135ce995f3SJason Wang * method to remove vma.
6145ce995f3SJason Wang */
6155ce995f3SJason Wang dev->mm = current->mm;
6165ce995f3SJason Wang mmgrab(dev->mm);
6175ce995f3SJason Wang }
6185ce995f3SJason Wang }
6195ce995f3SJason Wang
vhost_detach_mm(struct vhost_dev * dev)6205ce995f3SJason Wang static void vhost_detach_mm(struct vhost_dev *dev)
6215ce995f3SJason Wang {
6225ce995f3SJason Wang if (!dev->mm)
6235ce995f3SJason Wang return;
6245ce995f3SJason Wang
6255ce995f3SJason Wang if (dev->use_worker)
6265ce995f3SJason Wang mmput(dev->mm);
6275ce995f3SJason Wang else
6285ce995f3SJason Wang mmdrop(dev->mm);
6295ce995f3SJason Wang
6305ce995f3SJason Wang dev->mm = NULL;
6315ce995f3SJason Wang }
6325ce995f3SJason Wang
vhost_worker_destroy(struct vhost_dev * dev,struct vhost_worker * worker)6331cdaafa1SMike Christie static void vhost_worker_destroy(struct vhost_dev *dev,
6341cdaafa1SMike Christie struct vhost_worker *worker)
6351a5f8090SMike Christie {
6361cdaafa1SMike Christie if (!worker)
6371a5f8090SMike Christie return;
6381a5f8090SMike Christie
6391cdaafa1SMike Christie WARN_ON(!llist_empty(&worker->work_list));
6401cdaafa1SMike Christie xa_erase(&dev->worker_xa, worker->id);
6411cdaafa1SMike Christie vhost_task_stop(worker->vtsk);
6421cdaafa1SMike Christie kfree(worker);
6431cdaafa1SMike Christie }
6441cdaafa1SMike Christie
vhost_workers_free(struct vhost_dev * dev)6451cdaafa1SMike Christie static void vhost_workers_free(struct vhost_dev *dev)
6461cdaafa1SMike Christie {
6471cdaafa1SMike Christie struct vhost_worker *worker;
6481cdaafa1SMike Christie unsigned long i;
6491cdaafa1SMike Christie
6501cdaafa1SMike Christie if (!dev->use_worker)
6511cdaafa1SMike Christie return;
6521cdaafa1SMike Christie
6531cdaafa1SMike Christie for (i = 0; i < dev->nvqs; i++)
654228a27cfSMike Christie rcu_assign_pointer(dev->vqs[i]->worker, NULL);
6551cdaafa1SMike Christie /*
6561cdaafa1SMike Christie * Free the default worker we created and cleanup workers userspace
6571cdaafa1SMike Christie * created but couldn't clean up (it forgot or crashed).
6581cdaafa1SMike Christie */
6591cdaafa1SMike Christie xa_for_each(&dev->worker_xa, i, worker)
6601cdaafa1SMike Christie vhost_worker_destroy(dev, worker);
6611cdaafa1SMike Christie xa_destroy(&dev->worker_xa);
6621a5f8090SMike Christie }
6631a5f8090SMike Christie
vhost_worker_create(struct vhost_dev * dev)664737bdb64SMike Christie static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
6651a5f8090SMike Christie {
666c011bb66SMike Christie struct vhost_worker *worker;
6676e890c5dSMike Christie struct vhost_task *vtsk;
6686e890c5dSMike Christie char name[TASK_COMM_LEN];
6691cdaafa1SMike Christie int ret;
6701cdaafa1SMike Christie u32 id;
6711a5f8090SMike Christie
672c011bb66SMike Christie worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
673c011bb66SMike Christie if (!worker)
674737bdb64SMike Christie return NULL;
6751a5f8090SMike Christie
676*abe067dcSMike Christie worker->dev = dev;
677c011bb66SMike Christie snprintf(name, sizeof(name), "vhost-%d", current->pid);
678c011bb66SMike Christie
679*abe067dcSMike Christie vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
680*abe067dcSMike Christie worker, name);
681c011bb66SMike Christie if (!vtsk)
682c011bb66SMike Christie goto free_worker;
683c011bb66SMike Christie
684228a27cfSMike Christie mutex_init(&worker->mutex);
685c011bb66SMike Christie init_llist_head(&worker->work_list);
686c011bb66SMike Christie worker->kcov_handle = kcov_common_handle();
687c011bb66SMike Christie worker->vtsk = vtsk;
688c011bb66SMike Christie
6896e890c5dSMike Christie vhost_task_start(vtsk);
6901cdaafa1SMike Christie
6911cdaafa1SMike Christie ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
6921cdaafa1SMike Christie if (ret < 0)
6931cdaafa1SMike Christie goto stop_worker;
6941cdaafa1SMike Christie worker->id = id;
6951cdaafa1SMike Christie
696737bdb64SMike Christie return worker;
697c011bb66SMike Christie
6981cdaafa1SMike Christie stop_worker:
6991cdaafa1SMike Christie vhost_task_stop(vtsk);
700c011bb66SMike Christie free_worker:
701c011bb66SMike Christie kfree(worker);
702737bdb64SMike Christie return NULL;
7031a5f8090SMike Christie }
7041a5f8090SMike Christie
705c1ecd8e9SMike Christie /* Caller must have device mutex */
__vhost_vq_attach_worker(struct vhost_virtqueue * vq,struct vhost_worker * worker)706c1ecd8e9SMike Christie static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
707c1ecd8e9SMike Christie struct vhost_worker *worker)
708c1ecd8e9SMike Christie {
709228a27cfSMike Christie struct vhost_worker *old_worker;
710228a27cfSMike Christie
711228a27cfSMike Christie mutex_lock(&worker->mutex);
712*abe067dcSMike Christie if (worker->killed) {
713*abe067dcSMike Christie mutex_unlock(&worker->mutex);
714*abe067dcSMike Christie return;
715*abe067dcSMike Christie }
716*abe067dcSMike Christie
717a86f9633SMike Christie mutex_lock(&vq->mutex);
718228a27cfSMike Christie
719a86f9633SMike Christie old_worker = rcu_dereference_check(vq->worker,
720a86f9633SMike Christie lockdep_is_held(&vq->mutex));
721a86f9633SMike Christie rcu_assign_pointer(vq->worker, worker);
722a86f9633SMike Christie worker->attachment_cnt++;
723a86f9633SMike Christie
724a86f9633SMike Christie if (!old_worker) {
725a86f9633SMike Christie mutex_unlock(&vq->mutex);
726a86f9633SMike Christie mutex_unlock(&worker->mutex);
727228a27cfSMike Christie return;
728a86f9633SMike Christie }
729a86f9633SMike Christie mutex_unlock(&vq->mutex);
730a86f9633SMike Christie mutex_unlock(&worker->mutex);
731a86f9633SMike Christie
732228a27cfSMike Christie /*
733228a27cfSMike Christie * Take the worker mutex to make sure we see the work queued from
734228a27cfSMike Christie * device wide flushes which doesn't use RCU for execution.
735228a27cfSMike Christie */
736228a27cfSMike Christie mutex_lock(&old_worker->mutex);
737*abe067dcSMike Christie if (old_worker->killed) {
738*abe067dcSMike Christie mutex_unlock(&old_worker->mutex);
739*abe067dcSMike Christie return;
740*abe067dcSMike Christie }
741*abe067dcSMike Christie
742228a27cfSMike Christie /*
743228a27cfSMike Christie * We don't want to call synchronize_rcu for every vq during setup
744228a27cfSMike Christie * because it will slow down VM startup. If we haven't done
745228a27cfSMike Christie * VHOST_SET_VRING_KICK and not done the driver specific
746228a27cfSMike Christie * SET_ENDPOINT/RUNNUNG then we can skip the sync since there will
747228a27cfSMike Christie * not be any works queued for scsi and net.
748228a27cfSMike Christie */
749228a27cfSMike Christie mutex_lock(&vq->mutex);
750228a27cfSMike Christie if (!vhost_vq_get_backend(vq) && !vq->kick) {
751228a27cfSMike Christie mutex_unlock(&vq->mutex);
752f5bb7219SMike Christie
753f5bb7219SMike Christie old_worker->attachment_cnt--;
754228a27cfSMike Christie mutex_unlock(&old_worker->mutex);
755228a27cfSMike Christie /*
756228a27cfSMike Christie * vsock can queue anytime after VHOST_VSOCK_SET_GUEST_CID.
757228a27cfSMike Christie * Warn if it adds support for multiple workers but forgets to
758228a27cfSMike Christie * handle the early queueing case.
759228a27cfSMike Christie */
760228a27cfSMike Christie WARN_ON(!old_worker->attachment_cnt &&
761228a27cfSMike Christie !llist_empty(&old_worker->work_list));
762228a27cfSMike Christie return;
763228a27cfSMike Christie }
764228a27cfSMike Christie mutex_unlock(&vq->mutex);
765228a27cfSMike Christie
766228a27cfSMike Christie /* Make sure new vq queue/flush/poll calls see the new worker */
767228a27cfSMike Christie synchronize_rcu();
768228a27cfSMike Christie /* Make sure whatever was queued gets run */
769f5bb7219SMike Christie __vhost_worker_flush(old_worker);
770f5bb7219SMike Christie old_worker->attachment_cnt--;
771228a27cfSMike Christie mutex_unlock(&old_worker->mutex);
772c1ecd8e9SMike Christie }
773c1ecd8e9SMike Christie
774228a27cfSMike Christie /* Caller must have device mutex */
vhost_vq_attach_worker(struct vhost_virtqueue * vq,struct vhost_vring_worker * info)775c1ecd8e9SMike Christie static int vhost_vq_attach_worker(struct vhost_virtqueue *vq,
776c1ecd8e9SMike Christie struct vhost_vring_worker *info)
777c1ecd8e9SMike Christie {
778c1ecd8e9SMike Christie unsigned long index = info->worker_id;
779c1ecd8e9SMike Christie struct vhost_dev *dev = vq->dev;
780c1ecd8e9SMike Christie struct vhost_worker *worker;
781c1ecd8e9SMike Christie
782c1ecd8e9SMike Christie if (!dev->use_worker)
783c1ecd8e9SMike Christie return -EINVAL;
784c1ecd8e9SMike Christie
785c1ecd8e9SMike Christie worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT);
786c1ecd8e9SMike Christie if (!worker || worker->id != info->worker_id)
787c1ecd8e9SMike Christie return -ENODEV;
788c1ecd8e9SMike Christie
789c1ecd8e9SMike Christie __vhost_vq_attach_worker(vq, worker);
790c1ecd8e9SMike Christie return 0;
791c1ecd8e9SMike Christie }
792c1ecd8e9SMike Christie
793c1ecd8e9SMike Christie /* Caller must have device mutex */
vhost_new_worker(struct vhost_dev * dev,struct vhost_worker_state * info)794c1ecd8e9SMike Christie static int vhost_new_worker(struct vhost_dev *dev,
795c1ecd8e9SMike Christie struct vhost_worker_state *info)
796c1ecd8e9SMike Christie {
797c1ecd8e9SMike Christie struct vhost_worker *worker;
798c1ecd8e9SMike Christie
799c1ecd8e9SMike Christie worker = vhost_worker_create(dev);
800c1ecd8e9SMike Christie if (!worker)
801c1ecd8e9SMike Christie return -ENOMEM;
802c1ecd8e9SMike Christie
803c1ecd8e9SMike Christie info->worker_id = worker->id;
804c1ecd8e9SMike Christie return 0;
805c1ecd8e9SMike Christie }
806c1ecd8e9SMike Christie
807c1ecd8e9SMike Christie /* Caller must have device mutex */
vhost_free_worker(struct vhost_dev * dev,struct vhost_worker_state * info)808c1ecd8e9SMike Christie static int vhost_free_worker(struct vhost_dev *dev,
809c1ecd8e9SMike Christie struct vhost_worker_state *info)
810c1ecd8e9SMike Christie {
811c1ecd8e9SMike Christie unsigned long index = info->worker_id;
812c1ecd8e9SMike Christie struct vhost_worker *worker;
813c1ecd8e9SMike Christie
814c1ecd8e9SMike Christie worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT);
815c1ecd8e9SMike Christie if (!worker || worker->id != info->worker_id)
816c1ecd8e9SMike Christie return -ENODEV;
817c1ecd8e9SMike Christie
818228a27cfSMike Christie mutex_lock(&worker->mutex);
819*abe067dcSMike Christie if (worker->attachment_cnt || worker->killed) {
820228a27cfSMike Christie mutex_unlock(&worker->mutex);
821c1ecd8e9SMike Christie return -EBUSY;
822228a27cfSMike Christie }
823f5bb7219SMike Christie /*
824f5bb7219SMike Christie * A flush might have raced and snuck in before attachment_cnt was set
825f5bb7219SMike Christie * to zero. Make sure flushes are flushed from the queue before
826f5bb7219SMike Christie * freeing.
827f5bb7219SMike Christie */
828f5bb7219SMike Christie __vhost_worker_flush(worker);
829228a27cfSMike Christie mutex_unlock(&worker->mutex);
830c1ecd8e9SMike Christie
831c1ecd8e9SMike Christie vhost_worker_destroy(dev, worker);
832c1ecd8e9SMike Christie return 0;
833c1ecd8e9SMike Christie }
834c1ecd8e9SMike Christie
vhost_get_vq_from_user(struct vhost_dev * dev,void __user * argp,struct vhost_virtqueue ** vq,u32 * id)835cef25866SMike Christie static int vhost_get_vq_from_user(struct vhost_dev *dev, void __user *argp,
836cef25866SMike Christie struct vhost_virtqueue **vq, u32 *id)
837cef25866SMike Christie {
838cef25866SMike Christie u32 __user *idxp = argp;
839cef25866SMike Christie u32 idx;
840cef25866SMike Christie long r;
841cef25866SMike Christie
842cef25866SMike Christie r = get_user(idx, idxp);
843cef25866SMike Christie if (r < 0)
844cef25866SMike Christie return r;
845cef25866SMike Christie
846cef25866SMike Christie if (idx >= dev->nvqs)
847cef25866SMike Christie return -ENOBUFS;
848cef25866SMike Christie
849cef25866SMike Christie idx = array_index_nospec(idx, dev->nvqs);
850cef25866SMike Christie
851cef25866SMike Christie *vq = dev->vqs[idx];
852cef25866SMike Christie *id = idx;
853cef25866SMike Christie return 0;
854cef25866SMike Christie }
855cef25866SMike Christie
856c1ecd8e9SMike Christie /* Caller must have device mutex */
vhost_worker_ioctl(struct vhost_dev * dev,unsigned int ioctl,void __user * argp)857c1ecd8e9SMike Christie long vhost_worker_ioctl(struct vhost_dev *dev, unsigned int ioctl,
858c1ecd8e9SMike Christie void __user *argp)
859c1ecd8e9SMike Christie {
860c1ecd8e9SMike Christie struct vhost_vring_worker ring_worker;
861c1ecd8e9SMike Christie struct vhost_worker_state state;
862228a27cfSMike Christie struct vhost_worker *worker;
863c1ecd8e9SMike Christie struct vhost_virtqueue *vq;
864c1ecd8e9SMike Christie long ret;
865c1ecd8e9SMike Christie u32 idx;
866c1ecd8e9SMike Christie
867c1ecd8e9SMike Christie if (!dev->use_worker)
868c1ecd8e9SMike Christie return -EINVAL;
869c1ecd8e9SMike Christie
870c1ecd8e9SMike Christie if (!vhost_dev_has_owner(dev))
871c1ecd8e9SMike Christie return -EINVAL;
872c1ecd8e9SMike Christie
873c1ecd8e9SMike Christie ret = vhost_dev_check_owner(dev);
874c1ecd8e9SMike Christie if (ret)
875c1ecd8e9SMike Christie return ret;
876c1ecd8e9SMike Christie
877c1ecd8e9SMike Christie switch (ioctl) {
878c1ecd8e9SMike Christie /* dev worker ioctls */
879c1ecd8e9SMike Christie case VHOST_NEW_WORKER:
880c1ecd8e9SMike Christie ret = vhost_new_worker(dev, &state);
881c1ecd8e9SMike Christie if (!ret && copy_to_user(argp, &state, sizeof(state)))
882c1ecd8e9SMike Christie ret = -EFAULT;
883c1ecd8e9SMike Christie return ret;
884c1ecd8e9SMike Christie case VHOST_FREE_WORKER:
885c1ecd8e9SMike Christie if (copy_from_user(&state, argp, sizeof(state)))
886c1ecd8e9SMike Christie return -EFAULT;
887c1ecd8e9SMike Christie return vhost_free_worker(dev, &state);
888c1ecd8e9SMike Christie /* vring worker ioctls */
889c1ecd8e9SMike Christie case VHOST_ATTACH_VRING_WORKER:
890c1ecd8e9SMike Christie case VHOST_GET_VRING_WORKER:
891c1ecd8e9SMike Christie break;
892c1ecd8e9SMike Christie default:
893c1ecd8e9SMike Christie return -ENOIOCTLCMD;
894c1ecd8e9SMike Christie }
895c1ecd8e9SMike Christie
896c1ecd8e9SMike Christie ret = vhost_get_vq_from_user(dev, argp, &vq, &idx);
897c1ecd8e9SMike Christie if (ret)
898c1ecd8e9SMike Christie return ret;
899c1ecd8e9SMike Christie
900c1ecd8e9SMike Christie switch (ioctl) {
901c1ecd8e9SMike Christie case VHOST_ATTACH_VRING_WORKER:
902c1ecd8e9SMike Christie if (copy_from_user(&ring_worker, argp, sizeof(ring_worker))) {
903c1ecd8e9SMike Christie ret = -EFAULT;
904c1ecd8e9SMike Christie break;
905c1ecd8e9SMike Christie }
906c1ecd8e9SMike Christie
907c1ecd8e9SMike Christie ret = vhost_vq_attach_worker(vq, &ring_worker);
908c1ecd8e9SMike Christie break;
909c1ecd8e9SMike Christie case VHOST_GET_VRING_WORKER:
910228a27cfSMike Christie worker = rcu_dereference_check(vq->worker,
911228a27cfSMike Christie lockdep_is_held(&dev->mutex));
912228a27cfSMike Christie if (!worker) {
913228a27cfSMike Christie ret = -EINVAL;
914228a27cfSMike Christie break;
915228a27cfSMike Christie }
916228a27cfSMike Christie
917c1ecd8e9SMike Christie ring_worker.index = idx;
918228a27cfSMike Christie ring_worker.worker_id = worker->id;
919c1ecd8e9SMike Christie
920c1ecd8e9SMike Christie if (copy_to_user(argp, &ring_worker, sizeof(ring_worker)))
921c1ecd8e9SMike Christie ret = -EFAULT;
922c1ecd8e9SMike Christie break;
923c1ecd8e9SMike Christie default:
924c1ecd8e9SMike Christie ret = -ENOIOCTLCMD;
925c1ecd8e9SMike Christie break;
926c1ecd8e9SMike Christie }
927c1ecd8e9SMike Christie
928c1ecd8e9SMike Christie return ret;
929c1ecd8e9SMike Christie }
930c1ecd8e9SMike Christie EXPORT_SYMBOL_GPL(vhost_worker_ioctl);
931c1ecd8e9SMike Christie
93205c05351SMichael S. Tsirkin /* Caller should have device mutex */
vhost_dev_set_owner(struct vhost_dev * dev)93354db63c2SAsias He long vhost_dev_set_owner(struct vhost_dev *dev)
9343a4d5c94SMichael S. Tsirkin {
935737bdb64SMike Christie struct vhost_worker *worker;
936737bdb64SMike Christie int err, i;
937d47effe1SKrishna Kumar
9383a4d5c94SMichael S. Tsirkin /* Is there an owner already? */
93905c05351SMichael S. Tsirkin if (vhost_dev_has_owner(dev)) {
940c23f3445STejun Heo err = -EBUSY;
941c23f3445STejun Heo goto err_mm;
942c23f3445STejun Heo }
943d47effe1SKrishna Kumar
9445ce995f3SJason Wang vhost_attach_mm(dev);
9455ce995f3SJason Wang
9463e11c6ebSMike Christie err = vhost_dev_alloc_iovecs(dev);
9473e11c6ebSMike Christie if (err)
9483e11c6ebSMike Christie goto err_iovecs;
9493e11c6ebSMike Christie
95001fcb1cbSJason Wang if (dev->use_worker) {
9513e11c6ebSMike Christie /*
9523e11c6ebSMike Christie * This should be done last, because vsock can queue work
9533e11c6ebSMike Christie * before VHOST_SET_OWNER so it simplifies the failure path
9543e11c6ebSMike Christie * below since we don't have to worry about vsock queueing
9553e11c6ebSMike Christie * while we free the worker.
9563e11c6ebSMike Christie */
957737bdb64SMike Christie worker = vhost_worker_create(dev);
958737bdb64SMike Christie if (!worker) {
959737bdb64SMike Christie err = -ENOMEM;
9601a5f8090SMike Christie goto err_worker;
96101fcb1cbSJason Wang }
962c23f3445STejun Heo
963737bdb64SMike Christie for (i = 0; i < dev->nvqs; i++)
964c1ecd8e9SMike Christie __vhost_vq_attach_worker(dev->vqs[i], worker);
965737bdb64SMike Christie }
966737bdb64SMike Christie
9673a4d5c94SMichael S. Tsirkin return 0;
9683e11c6ebSMike Christie
969c23f3445STejun Heo err_worker:
9703e11c6ebSMike Christie vhost_dev_free_iovecs(dev);
9713e11c6ebSMike Christie err_iovecs:
9725ce995f3SJason Wang vhost_detach_mm(dev);
973c23f3445STejun Heo err_mm:
974c23f3445STejun Heo return err;
9753a4d5c94SMichael S. Tsirkin }
9766ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
9773a4d5c94SMichael S. Tsirkin
iotlb_alloc(void)9780bbe3066SJason Wang static struct vhost_iotlb *iotlb_alloc(void)
979a9709d68SJason Wang {
9800bbe3066SJason Wang return vhost_iotlb_alloc(max_iotlb_entries,
9810bbe3066SJason Wang VHOST_IOTLB_FLAG_RETIRE);
9820bbe3066SJason Wang }
9830bbe3066SJason Wang
vhost_dev_reset_owner_prepare(void)9840bbe3066SJason Wang struct vhost_iotlb *vhost_dev_reset_owner_prepare(void)
9850bbe3066SJason Wang {
9860bbe3066SJason Wang return iotlb_alloc();
987150b9e51SMichael S. Tsirkin }
9886ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare);
9893a4d5c94SMichael S. Tsirkin
990150b9e51SMichael S. Tsirkin /* Caller should have device mutex */
vhost_dev_reset_owner(struct vhost_dev * dev,struct vhost_iotlb * umem)9910bbe3066SJason Wang void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem)
992150b9e51SMichael S. Tsirkin {
99347283befSMichael S. Tsirkin int i;
99447283befSMichael S. Tsirkin
995f6f93f75S夷则(Caspar) vhost_dev_cleanup(dev);
9963a4d5c94SMichael S. Tsirkin
997a9709d68SJason Wang dev->umem = umem;
99847283befSMichael S. Tsirkin /* We don't need VQ locks below since vhost_dev_cleanup makes sure
99947283befSMichael S. Tsirkin * VQs aren't running.
100047283befSMichael S. Tsirkin */
100147283befSMichael S. Tsirkin for (i = 0; i < dev->nvqs; ++i)
1002a9709d68SJason Wang dev->vqs[i]->umem = umem;
10033a4d5c94SMichael S. Tsirkin }
10046ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_reset_owner);
10053a4d5c94SMichael S. Tsirkin
vhost_dev_stop(struct vhost_dev * dev)1006b211616dSMichael S. Tsirkin void vhost_dev_stop(struct vhost_dev *dev)
10073a4d5c94SMichael S. Tsirkin {
10083a4d5c94SMichael S. Tsirkin int i;
1009d47effe1SKrishna Kumar
10103a4d5c94SMichael S. Tsirkin for (i = 0; i < dev->nvqs; ++i) {
10116ca84326SMike Christie if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick)
10123ab2e420SAsias He vhost_poll_stop(&dev->vqs[i]->poll);
10133a4d5c94SMichael S. Tsirkin }
10146ca84326SMike Christie
1015b2ffa407SMike Christie vhost_dev_flush(dev);
1016b211616dSMichael S. Tsirkin }
10176ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_stop);
1018bab632d6SMichael S. Tsirkin
vhost_clear_msg(struct vhost_dev * dev)10199526f9a2SEric Auger void vhost_clear_msg(struct vhost_dev *dev)
10206b1e6cc7SJason Wang {
10216b1e6cc7SJason Wang struct vhost_msg_node *node, *n;
10226b1e6cc7SJason Wang
10236b1e6cc7SJason Wang spin_lock(&dev->iotlb_lock);
10246b1e6cc7SJason Wang
10256b1e6cc7SJason Wang list_for_each_entry_safe(node, n, &dev->read_list, node) {
10266b1e6cc7SJason Wang list_del(&node->node);
10276b1e6cc7SJason Wang kfree(node);
10286b1e6cc7SJason Wang }
10296b1e6cc7SJason Wang
10306b1e6cc7SJason Wang list_for_each_entry_safe(node, n, &dev->pending_list, node) {
10316b1e6cc7SJason Wang list_del(&node->node);
10326b1e6cc7SJason Wang kfree(node);
10336b1e6cc7SJason Wang }
10346b1e6cc7SJason Wang
10356b1e6cc7SJason Wang spin_unlock(&dev->iotlb_lock);
10366b1e6cc7SJason Wang }
10379526f9a2SEric Auger EXPORT_SYMBOL_GPL(vhost_clear_msg);
10386b1e6cc7SJason Wang
vhost_dev_cleanup(struct vhost_dev * dev)1039f6f93f75S夷则(Caspar) void vhost_dev_cleanup(struct vhost_dev *dev)
1040b211616dSMichael S. Tsirkin {
1041b211616dSMichael S. Tsirkin int i;
1042bab632d6SMichael S. Tsirkin
1043b211616dSMichael S. Tsirkin for (i = 0; i < dev->nvqs; ++i) {
10443ab2e420SAsias He if (dev->vqs[i]->error_ctx)
10453ab2e420SAsias He eventfd_ctx_put(dev->vqs[i]->error_ctx);
10463ab2e420SAsias He if (dev->vqs[i]->kick)
10473ab2e420SAsias He fput(dev->vqs[i]->kick);
1048265a0ad8SZhu Lingshan if (dev->vqs[i]->call_ctx.ctx)
1049265a0ad8SZhu Lingshan eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx);
10503ab2e420SAsias He vhost_vq_reset(dev, dev->vqs[i]);
10513a4d5c94SMichael S. Tsirkin }
1052e0e9b406SJason Wang vhost_dev_free_iovecs(dev);
10533a4d5c94SMichael S. Tsirkin if (dev->log_ctx)
10543a4d5c94SMichael S. Tsirkin eventfd_ctx_put(dev->log_ctx);
10553a4d5c94SMichael S. Tsirkin dev->log_ctx = NULL;
10563a4d5c94SMichael S. Tsirkin /* No one will access memory at this point */
10570bbe3066SJason Wang vhost_iotlb_free(dev->umem);
1058a9709d68SJason Wang dev->umem = NULL;
10590bbe3066SJason Wang vhost_iotlb_free(dev->iotlb);
10606b1e6cc7SJason Wang dev->iotlb = NULL;
10616b1e6cc7SJason Wang vhost_clear_msg(dev);
1062a9a08845SLinus Torvalds wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
10631cdaafa1SMike Christie vhost_workers_free(dev);
10645ce995f3SJason Wang vhost_detach_mm(dev);
10653a4d5c94SMichael S. Tsirkin }
10666ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
10673a4d5c94SMichael S. Tsirkin
log_access_ok(void __user * log_base,u64 addr,unsigned long sz)1068ddd3d408SStefan Hajnoczi static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
10693a4d5c94SMichael S. Tsirkin {
10703a4d5c94SMichael S. Tsirkin u64 a = addr / VHOST_PAGE_SIZE / 8;
1071d47effe1SKrishna Kumar
10723a4d5c94SMichael S. Tsirkin /* Make sure 64 bit math will not overflow. */
10733a4d5c94SMichael S. Tsirkin if (a > ULONG_MAX - (unsigned long)log_base ||
10743a4d5c94SMichael S. Tsirkin a + (unsigned long)log_base > ULONG_MAX)
1075ddd3d408SStefan Hajnoczi return false;
10763a4d5c94SMichael S. Tsirkin
107796d4f267SLinus Torvalds return access_ok(log_base + a,
10783a4d5c94SMichael S. Tsirkin (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8);
10793a4d5c94SMichael S. Tsirkin }
10803a4d5c94SMichael S. Tsirkin
1081f7ad318eSXie Yongji /* Make sure 64 bit math will not overflow. */
vhost_overflow(u64 uaddr,u64 size)1082ec33d031SMichael S. Tsirkin static bool vhost_overflow(u64 uaddr, u64 size)
1083ec33d031SMichael S. Tsirkin {
1084f7ad318eSXie Yongji if (uaddr > ULONG_MAX || size > ULONG_MAX)
1085f7ad318eSXie Yongji return true;
1086f7ad318eSXie Yongji
1087f7ad318eSXie Yongji if (!size)
1088f7ad318eSXie Yongji return false;
1089f7ad318eSXie Yongji
1090f7ad318eSXie Yongji return uaddr > ULONG_MAX - size + 1;
1091ec33d031SMichael S. Tsirkin }
1092ec33d031SMichael S. Tsirkin
10933a4d5c94SMichael S. Tsirkin /* Caller should have vq mutex and device mutex. */
vq_memory_access_ok(void __user * log_base,struct vhost_iotlb * umem,int log_all)10940bbe3066SJason Wang static bool vq_memory_access_ok(void __user *log_base, struct vhost_iotlb *umem,
10953a4d5c94SMichael S. Tsirkin int log_all)
10963a4d5c94SMichael S. Tsirkin {
10970bbe3066SJason Wang struct vhost_iotlb_map *map;
1098179b284eSJeff Dike
1099a9709d68SJason Wang if (!umem)
1100ddd3d408SStefan Hajnoczi return false;
1101179b284eSJeff Dike
11020bbe3066SJason Wang list_for_each_entry(map, &umem->list, link) {
11030bbe3066SJason Wang unsigned long a = map->addr;
1104a9709d68SJason Wang
11050bbe3066SJason Wang if (vhost_overflow(map->addr, map->size))
1106ddd3d408SStefan Hajnoczi return false;
1107ec33d031SMichael S. Tsirkin
1108ec33d031SMichael S. Tsirkin
11090bbe3066SJason Wang if (!access_ok((void __user *)a, map->size))
1110ddd3d408SStefan Hajnoczi return false;
11113a4d5c94SMichael S. Tsirkin else if (log_all && !log_access_ok(log_base,
11120bbe3066SJason Wang map->start,
11130bbe3066SJason Wang map->size))
1114ddd3d408SStefan Hajnoczi return false;
11153a4d5c94SMichael S. Tsirkin }
1116ddd3d408SStefan Hajnoczi return true;
11173a4d5c94SMichael S. Tsirkin }
11183a4d5c94SMichael S. Tsirkin
vhost_vq_meta_fetch(struct vhost_virtqueue * vq,u64 addr,unsigned int size,int type)1119f8894913SJason Wang static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
1120f8894913SJason Wang u64 addr, unsigned int size,
1121f8894913SJason Wang int type)
1122f8894913SJason Wang {
11230bbe3066SJason Wang const struct vhost_iotlb_map *map = vq->meta_iotlb[type];
1124f8894913SJason Wang
11250bbe3066SJason Wang if (!map)
1126f8894913SJason Wang return NULL;
1127f8894913SJason Wang
11281b0be99fSMichael S. Tsirkin return (void __user *)(uintptr_t)(map->addr + addr - map->start);
1129f8894913SJason Wang }
1130f8894913SJason Wang
11313a4d5c94SMichael S. Tsirkin /* Can we switch to this memory table? */
11323a4d5c94SMichael S. Tsirkin /* Caller should have device mutex but not vq mutex */
memory_access_ok(struct vhost_dev * d,struct vhost_iotlb * umem,int log_all)11330bbe3066SJason Wang static bool memory_access_ok(struct vhost_dev *d, struct vhost_iotlb *umem,
11343a4d5c94SMichael S. Tsirkin int log_all)
11353a4d5c94SMichael S. Tsirkin {
11363a4d5c94SMichael S. Tsirkin int i;
1137d47effe1SKrishna Kumar
11383a4d5c94SMichael S. Tsirkin for (i = 0; i < d->nvqs; ++i) {
1139ddd3d408SStefan Hajnoczi bool ok;
1140ea16c514SMichael S. Tsirkin bool log;
1141ea16c514SMichael S. Tsirkin
11423ab2e420SAsias He mutex_lock(&d->vqs[i]->mutex);
1143ea16c514SMichael S. Tsirkin log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL);
11443a4d5c94SMichael S. Tsirkin /* If ring is inactive, will check when it's enabled. */
11453ab2e420SAsias He if (d->vqs[i]->private_data)
1146a9709d68SJason Wang ok = vq_memory_access_ok(d->vqs[i]->log_base,
1147a9709d68SJason Wang umem, log);
11483a4d5c94SMichael S. Tsirkin else
1149ddd3d408SStefan Hajnoczi ok = true;
11503ab2e420SAsias He mutex_unlock(&d->vqs[i]->mutex);
11513a4d5c94SMichael S. Tsirkin if (!ok)
1152ddd3d408SStefan Hajnoczi return false;
11533a4d5c94SMichael S. Tsirkin }
1154ddd3d408SStefan Hajnoczi return true;
11553a4d5c94SMichael S. Tsirkin }
11563a4d5c94SMichael S. Tsirkin
11576b1e6cc7SJason Wang static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
11586b1e6cc7SJason Wang struct iovec iov[], int iov_size, int access);
1159bfe2bc51SJason Wang
vhost_copy_to_user(struct vhost_virtqueue * vq,void __user * to,const void * from,unsigned size)116072952cc0SMichael S. Tsirkin static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to,
1161bfe2bc51SJason Wang const void *from, unsigned size)
1162bfe2bc51SJason Wang {
11636b1e6cc7SJason Wang int ret;
1164bfe2bc51SJason Wang
11656b1e6cc7SJason Wang if (!vq->iotlb)
11666b1e6cc7SJason Wang return __copy_to_user(to, from, size);
11676b1e6cc7SJason Wang else {
11686b1e6cc7SJason Wang /* This function should be called after iotlb
11696b1e6cc7SJason Wang * prefetch, which means we're sure that all vq
11706b1e6cc7SJason Wang * could be access through iotlb. So -EAGAIN should
11716b1e6cc7SJason Wang * not happen in this case.
11726b1e6cc7SJason Wang */
11736b1e6cc7SJason Wang struct iov_iter t;
1174f8894913SJason Wang void __user *uaddr = vhost_vq_meta_fetch(vq,
1175f8894913SJason Wang (u64)(uintptr_t)to, size,
11767ced6c98SEric Auger VHOST_ADDR_USED);
1177f8894913SJason Wang
1178f8894913SJason Wang if (uaddr)
1179f8894913SJason Wang return __copy_to_user(uaddr, from, size);
1180f8894913SJason Wang
11816b1e6cc7SJason Wang ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
11826b1e6cc7SJason Wang ARRAY_SIZE(vq->iotlb_iov),
11836b1e6cc7SJason Wang VHOST_ACCESS_WO);
11846b1e6cc7SJason Wang if (ret < 0)
11856b1e6cc7SJason Wang goto out;
1186de4eda9dSAl Viro iov_iter_init(&t, ITER_DEST, vq->iotlb_iov, ret, size);
11876b1e6cc7SJason Wang ret = copy_to_iter(from, size, &t);
11886b1e6cc7SJason Wang if (ret == size)
11896b1e6cc7SJason Wang ret = 0;
11906b1e6cc7SJason Wang }
11916b1e6cc7SJason Wang out:
11926b1e6cc7SJason Wang return ret;
11936b1e6cc7SJason Wang }
1194bfe2bc51SJason Wang
vhost_copy_from_user(struct vhost_virtqueue * vq,void * to,void __user * from,unsigned size)1195bfe2bc51SJason Wang static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
119672952cc0SMichael S. Tsirkin void __user *from, unsigned size)
1197bfe2bc51SJason Wang {
11986b1e6cc7SJason Wang int ret;
11996b1e6cc7SJason Wang
12006b1e6cc7SJason Wang if (!vq->iotlb)
1201bfe2bc51SJason Wang return __copy_from_user(to, from, size);
12026b1e6cc7SJason Wang else {
12036b1e6cc7SJason Wang /* This function should be called after iotlb
12046b1e6cc7SJason Wang * prefetch, which means we're sure that vq
12056b1e6cc7SJason Wang * could be access through iotlb. So -EAGAIN should
12066b1e6cc7SJason Wang * not happen in this case.
12076b1e6cc7SJason Wang */
1208f8894913SJason Wang void __user *uaddr = vhost_vq_meta_fetch(vq,
1209f8894913SJason Wang (u64)(uintptr_t)from, size,
1210f8894913SJason Wang VHOST_ADDR_DESC);
12116b1e6cc7SJason Wang struct iov_iter f;
1212f8894913SJason Wang
1213f8894913SJason Wang if (uaddr)
1214f8894913SJason Wang return __copy_from_user(to, uaddr, size);
1215f8894913SJason Wang
12166b1e6cc7SJason Wang ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
12176b1e6cc7SJason Wang ARRAY_SIZE(vq->iotlb_iov),
12186b1e6cc7SJason Wang VHOST_ACCESS_RO);
12196b1e6cc7SJason Wang if (ret < 0) {
12206b1e6cc7SJason Wang vq_err(vq, "IOTLB translation failure: uaddr "
12216b1e6cc7SJason Wang "%p size 0x%llx\n", from,
12226b1e6cc7SJason Wang (unsigned long long) size);
12236b1e6cc7SJason Wang goto out;
12246b1e6cc7SJason Wang }
1225de4eda9dSAl Viro iov_iter_init(&f, ITER_SOURCE, vq->iotlb_iov, ret, size);
12266b1e6cc7SJason Wang ret = copy_from_iter(to, size, &f);
12276b1e6cc7SJason Wang if (ret == size)
12286b1e6cc7SJason Wang ret = 0;
12296b1e6cc7SJason Wang }
12306b1e6cc7SJason Wang
12316b1e6cc7SJason Wang out:
12326b1e6cc7SJason Wang return ret;
12336b1e6cc7SJason Wang }
12346b1e6cc7SJason Wang
__vhost_get_user_slow(struct vhost_virtqueue * vq,void __user * addr,unsigned int size,int type)1235f8894913SJason Wang static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
1236f8894913SJason Wang void __user *addr, unsigned int size,
1237f8894913SJason Wang int type)
12386b1e6cc7SJason Wang {
12396b1e6cc7SJason Wang int ret;
12406b1e6cc7SJason Wang
12416b1e6cc7SJason Wang ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
12426b1e6cc7SJason Wang ARRAY_SIZE(vq->iotlb_iov),
12436b1e6cc7SJason Wang VHOST_ACCESS_RO);
12446b1e6cc7SJason Wang if (ret < 0) {
12456b1e6cc7SJason Wang vq_err(vq, "IOTLB translation failure: uaddr "
12466b1e6cc7SJason Wang "%p size 0x%llx\n", addr,
12476b1e6cc7SJason Wang (unsigned long long) size);
12486b1e6cc7SJason Wang return NULL;
12496b1e6cc7SJason Wang }
12506b1e6cc7SJason Wang
12516b1e6cc7SJason Wang if (ret != 1 || vq->iotlb_iov[0].iov_len != size) {
12526b1e6cc7SJason Wang vq_err(vq, "Non atomic userspace memory access: uaddr "
12536b1e6cc7SJason Wang "%p size 0x%llx\n", addr,
12546b1e6cc7SJason Wang (unsigned long long) size);
12556b1e6cc7SJason Wang return NULL;
12566b1e6cc7SJason Wang }
12576b1e6cc7SJason Wang
12586b1e6cc7SJason Wang return vq->iotlb_iov[0].iov_base;
12596b1e6cc7SJason Wang }
12606b1e6cc7SJason Wang
1261f8894913SJason Wang /* This function should be called after iotlb
1262f8894913SJason Wang * prefetch, which means we're sure that vq
1263f8894913SJason Wang * could be access through iotlb. So -EAGAIN should
1264f8894913SJason Wang * not happen in this case.
1265f8894913SJason Wang */
__vhost_get_user(struct vhost_virtqueue * vq,void __user * addr,unsigned int size,int type)1266f8894913SJason Wang static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
12671b0be99fSMichael S. Tsirkin void __user *addr, unsigned int size,
1268f8894913SJason Wang int type)
1269f8894913SJason Wang {
1270f8894913SJason Wang void __user *uaddr = vhost_vq_meta_fetch(vq,
1271f8894913SJason Wang (u64)(uintptr_t)addr, size, type);
1272f8894913SJason Wang if (uaddr)
1273f8894913SJason Wang return uaddr;
1274f8894913SJason Wang
1275f8894913SJason Wang return __vhost_get_user_slow(vq, addr, size, type);
1276f8894913SJason Wang }
1277f8894913SJason Wang
12786b1e6cc7SJason Wang #define vhost_put_user(vq, x, ptr) \
12796b1e6cc7SJason Wang ({ \
1280002ef18eSGuennadi Liakhovetski int ret; \
12816b1e6cc7SJason Wang if (!vq->iotlb) { \
12826b1e6cc7SJason Wang ret = __put_user(x, ptr); \
12836b1e6cc7SJason Wang } else { \
12846b1e6cc7SJason Wang __typeof__(ptr) to = \
1285f8894913SJason Wang (__typeof__(ptr)) __vhost_get_user(vq, ptr, \
1286f8894913SJason Wang sizeof(*ptr), VHOST_ADDR_USED); \
12876b1e6cc7SJason Wang if (to != NULL) \
12886b1e6cc7SJason Wang ret = __put_user(x, to); \
12896b1e6cc7SJason Wang else \
12906b1e6cc7SJason Wang ret = -EFAULT; \
12916b1e6cc7SJason Wang } \
12926b1e6cc7SJason Wang ret; \
12936b1e6cc7SJason Wang })
12946b1e6cc7SJason Wang
vhost_put_avail_event(struct vhost_virtqueue * vq)12957b5d753eSJason Wang static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
12967b5d753eSJason Wang {
12977b5d753eSJason Wang return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
12987b5d753eSJason Wang vhost_avail_event(vq));
12997b5d753eSJason Wang }
13007b5d753eSJason Wang
vhost_put_used(struct vhost_virtqueue * vq,struct vring_used_elem * head,int idx,int count)13017b5d753eSJason Wang static inline int vhost_put_used(struct vhost_virtqueue *vq,
13027b5d753eSJason Wang struct vring_used_elem *head, int idx,
13037b5d753eSJason Wang int count)
13047b5d753eSJason Wang {
13057b5d753eSJason Wang return vhost_copy_to_user(vq, vq->used->ring + idx, head,
13067b5d753eSJason Wang count * sizeof(*head));
13077b5d753eSJason Wang }
13087b5d753eSJason Wang
vhost_put_used_flags(struct vhost_virtqueue * vq)13097b5d753eSJason Wang static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
13107b5d753eSJason Wang
13117b5d753eSJason Wang {
13127b5d753eSJason Wang return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
13137b5d753eSJason Wang &vq->used->flags);
13147b5d753eSJason Wang }
13157b5d753eSJason Wang
vhost_put_used_idx(struct vhost_virtqueue * vq)13167b5d753eSJason Wang static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
13177b5d753eSJason Wang
13187b5d753eSJason Wang {
13197b5d753eSJason Wang return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
13207b5d753eSJason Wang &vq->used->idx);
13217b5d753eSJason Wang }
13227b5d753eSJason Wang
1323f8894913SJason Wang #define vhost_get_user(vq, x, ptr, type) \
13246b1e6cc7SJason Wang ({ \
13256b1e6cc7SJason Wang int ret; \
13266b1e6cc7SJason Wang if (!vq->iotlb) { \
13276b1e6cc7SJason Wang ret = __get_user(x, ptr); \
13286b1e6cc7SJason Wang } else { \
13296b1e6cc7SJason Wang __typeof__(ptr) from = \
1330f8894913SJason Wang (__typeof__(ptr)) __vhost_get_user(vq, ptr, \
1331f8894913SJason Wang sizeof(*ptr), \
1332f8894913SJason Wang type); \
13336b1e6cc7SJason Wang if (from != NULL) \
13346b1e6cc7SJason Wang ret = __get_user(x, from); \
13356b1e6cc7SJason Wang else \
13366b1e6cc7SJason Wang ret = -EFAULT; \
13376b1e6cc7SJason Wang } \
13386b1e6cc7SJason Wang ret; \
13396b1e6cc7SJason Wang })
13406b1e6cc7SJason Wang
1341f8894913SJason Wang #define vhost_get_avail(vq, x, ptr) \
1342f8894913SJason Wang vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL)
1343f8894913SJason Wang
1344f8894913SJason Wang #define vhost_get_used(vq, x, ptr) \
1345f8894913SJason Wang vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
1346f8894913SJason Wang
vhost_dev_lock_vqs(struct vhost_dev * d)134786a07da3SJason Wang static void vhost_dev_lock_vqs(struct vhost_dev *d)
134886a07da3SJason Wang {
134986a07da3SJason Wang int i = 0;
135086a07da3SJason Wang for (i = 0; i < d->nvqs; ++i)
135186a07da3SJason Wang mutex_lock_nested(&d->vqs[i]->mutex, i);
135286a07da3SJason Wang }
135386a07da3SJason Wang
vhost_dev_unlock_vqs(struct vhost_dev * d)135486a07da3SJason Wang static void vhost_dev_unlock_vqs(struct vhost_dev *d)
135586a07da3SJason Wang {
135686a07da3SJason Wang int i = 0;
135786a07da3SJason Wang for (i = 0; i < d->nvqs; ++i)
135886a07da3SJason Wang mutex_unlock(&d->vqs[i]->mutex);
135986a07da3SJason Wang }
136086a07da3SJason Wang
vhost_get_avail_idx(struct vhost_virtqueue * vq,__virtio16 * idx)13617b5d753eSJason Wang static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
13627b5d753eSJason Wang __virtio16 *idx)
13637b5d753eSJason Wang {
13647b5d753eSJason Wang return vhost_get_avail(vq, *idx, &vq->avail->idx);
13657b5d753eSJason Wang }
13667b5d753eSJason Wang
vhost_get_avail_head(struct vhost_virtqueue * vq,__virtio16 * head,int idx)13677b5d753eSJason Wang static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
13687b5d753eSJason Wang __virtio16 *head, int idx)
13697b5d753eSJason Wang {
13707b5d753eSJason Wang return vhost_get_avail(vq, *head,
13717b5d753eSJason Wang &vq->avail->ring[idx & (vq->num - 1)]);
13727b5d753eSJason Wang }
13737b5d753eSJason Wang
vhost_get_avail_flags(struct vhost_virtqueue * vq,__virtio16 * flags)13747b5d753eSJason Wang static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq,
13757b5d753eSJason Wang __virtio16 *flags)
13767b5d753eSJason Wang {
13777b5d753eSJason Wang return vhost_get_avail(vq, *flags, &vq->avail->flags);
13787b5d753eSJason Wang }
13797b5d753eSJason Wang
vhost_get_used_event(struct vhost_virtqueue * vq,__virtio16 * event)13807b5d753eSJason Wang static inline int vhost_get_used_event(struct vhost_virtqueue *vq,
13817b5d753eSJason Wang __virtio16 *event)
13827b5d753eSJason Wang {
13837b5d753eSJason Wang return vhost_get_avail(vq, *event, vhost_used_event(vq));
13847b5d753eSJason Wang }
13857b5d753eSJason Wang
vhost_get_used_idx(struct vhost_virtqueue * vq,__virtio16 * idx)13867b5d753eSJason Wang static inline int vhost_get_used_idx(struct vhost_virtqueue *vq,
13877b5d753eSJason Wang __virtio16 *idx)
13887b5d753eSJason Wang {
13897b5d753eSJason Wang return vhost_get_used(vq, *idx, &vq->used->idx);
13907b5d753eSJason Wang }
13917b5d753eSJason Wang
vhost_get_desc(struct vhost_virtqueue * vq,struct vring_desc * desc,int idx)13927b5d753eSJason Wang static inline int vhost_get_desc(struct vhost_virtqueue *vq,
13937b5d753eSJason Wang struct vring_desc *desc, int idx)
13947b5d753eSJason Wang {
13957b5d753eSJason Wang return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc));
13967b5d753eSJason Wang }
13977b5d753eSJason Wang
vhost_iotlb_notify_vq(struct vhost_dev * d,struct vhost_iotlb_msg * msg)13986b1e6cc7SJason Wang static void vhost_iotlb_notify_vq(struct vhost_dev *d,
13996b1e6cc7SJason Wang struct vhost_iotlb_msg *msg)
14006b1e6cc7SJason Wang {
14016b1e6cc7SJason Wang struct vhost_msg_node *node, *n;
14026b1e6cc7SJason Wang
14036b1e6cc7SJason Wang spin_lock(&d->iotlb_lock);
14046b1e6cc7SJason Wang
14056b1e6cc7SJason Wang list_for_each_entry_safe(node, n, &d->pending_list, node) {
14066b1e6cc7SJason Wang struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb;
14076b1e6cc7SJason Wang if (msg->iova <= vq_msg->iova &&
14082d66f997SJason Wang msg->iova + msg->size - 1 >= vq_msg->iova &&
14096b1e6cc7SJason Wang vq_msg->type == VHOST_IOTLB_MISS) {
14106b1e6cc7SJason Wang vhost_poll_queue(&node->vq->poll);
14116b1e6cc7SJason Wang list_del(&node->node);
14126b1e6cc7SJason Wang kfree(node);
14136b1e6cc7SJason Wang }
14146b1e6cc7SJason Wang }
14156b1e6cc7SJason Wang
14166b1e6cc7SJason Wang spin_unlock(&d->iotlb_lock);
14176b1e6cc7SJason Wang }
14186b1e6cc7SJason Wang
umem_access_ok(u64 uaddr,u64 size,int access)1419ddd3d408SStefan Hajnoczi static bool umem_access_ok(u64 uaddr, u64 size, int access)
14206b1e6cc7SJason Wang {
14216b1e6cc7SJason Wang unsigned long a = uaddr;
14226b1e6cc7SJason Wang
1423ec33d031SMichael S. Tsirkin /* Make sure 64 bit math will not overflow. */
1424ec33d031SMichael S. Tsirkin if (vhost_overflow(uaddr, size))
1425ddd3d408SStefan Hajnoczi return false;
1426ec33d031SMichael S. Tsirkin
14276b1e6cc7SJason Wang if ((access & VHOST_ACCESS_RO) &&
142896d4f267SLinus Torvalds !access_ok((void __user *)a, size))
1429ddd3d408SStefan Hajnoczi return false;
14306b1e6cc7SJason Wang if ((access & VHOST_ACCESS_WO) &&
143196d4f267SLinus Torvalds !access_ok((void __user *)a, size))
1432ddd3d408SStefan Hajnoczi return false;
1433ddd3d408SStefan Hajnoczi return true;
14346b1e6cc7SJason Wang }
14356b1e6cc7SJason Wang
vhost_process_iotlb_msg(struct vhost_dev * dev,u32 asid,struct vhost_iotlb_msg * msg)143691233ad7SGautam Dawar static int vhost_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
14376b1e6cc7SJason Wang struct vhost_iotlb_msg *msg)
14386b1e6cc7SJason Wang {
14396b1e6cc7SJason Wang int ret = 0;
14406b1e6cc7SJason Wang
144191233ad7SGautam Dawar if (asid != 0)
144291233ad7SGautam Dawar return -EINVAL;
144391233ad7SGautam Dawar
14441b15ad68SJason Wang mutex_lock(&dev->mutex);
144586a07da3SJason Wang vhost_dev_lock_vqs(dev);
14466b1e6cc7SJason Wang switch (msg->type) {
14476b1e6cc7SJason Wang case VHOST_IOTLB_UPDATE:
14486b1e6cc7SJason Wang if (!dev->iotlb) {
14496b1e6cc7SJason Wang ret = -EFAULT;
14506b1e6cc7SJason Wang break;
14516b1e6cc7SJason Wang }
1452ddd3d408SStefan Hajnoczi if (!umem_access_ok(msg->uaddr, msg->size, msg->perm)) {
14536b1e6cc7SJason Wang ret = -EFAULT;
14546b1e6cc7SJason Wang break;
14556b1e6cc7SJason Wang }
1456f8894913SJason Wang vhost_vq_meta_reset(dev);
14570bbe3066SJason Wang if (vhost_iotlb_add_range(dev->iotlb, msg->iova,
14586b1e6cc7SJason Wang msg->iova + msg->size - 1,
14596b1e6cc7SJason Wang msg->uaddr, msg->perm)) {
14606b1e6cc7SJason Wang ret = -ENOMEM;
14616b1e6cc7SJason Wang break;
14626b1e6cc7SJason Wang }
14636b1e6cc7SJason Wang vhost_iotlb_notify_vq(dev, msg);
14646b1e6cc7SJason Wang break;
14656b1e6cc7SJason Wang case VHOST_IOTLB_INVALIDATE:
14666f3180afSJason Wang if (!dev->iotlb) {
14676f3180afSJason Wang ret = -EFAULT;
14686f3180afSJason Wang break;
14696f3180afSJason Wang }
1470f8894913SJason Wang vhost_vq_meta_reset(dev);
14710bbe3066SJason Wang vhost_iotlb_del_range(dev->iotlb, msg->iova,
14726b1e6cc7SJason Wang msg->iova + msg->size - 1);
14736b1e6cc7SJason Wang break;
14746b1e6cc7SJason Wang default:
14756b1e6cc7SJason Wang ret = -EINVAL;
14766b1e6cc7SJason Wang break;
14776b1e6cc7SJason Wang }
14786b1e6cc7SJason Wang
147986a07da3SJason Wang vhost_dev_unlock_vqs(dev);
14801b15ad68SJason Wang mutex_unlock(&dev->mutex);
14811b15ad68SJason Wang
14826b1e6cc7SJason Wang return ret;
14836b1e6cc7SJason Wang }
vhost_chr_write_iter(struct vhost_dev * dev,struct iov_iter * from)14846b1e6cc7SJason Wang ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
14856b1e6cc7SJason Wang struct iov_iter *from)
14866b1e6cc7SJason Wang {
1487429711aeSJason Wang struct vhost_iotlb_msg msg;
1488429711aeSJason Wang size_t offset;
1489429711aeSJason Wang int type, ret;
149091233ad7SGautam Dawar u32 asid = 0;
14916b1e6cc7SJason Wang
1492429711aeSJason Wang ret = copy_from_iter(&type, sizeof(type), from);
149374ad7419SPavel Tikhomirov if (ret != sizeof(type)) {
149474ad7419SPavel Tikhomirov ret = -EINVAL;
14956b1e6cc7SJason Wang goto done;
149674ad7419SPavel Tikhomirov }
14976b1e6cc7SJason Wang
1498429711aeSJason Wang switch (type) {
14996b1e6cc7SJason Wang case VHOST_IOTLB_MSG:
1500429711aeSJason Wang /* There maybe a hole after type for V1 message type,
1501429711aeSJason Wang * so skip it here.
1502429711aeSJason Wang */
1503429711aeSJason Wang offset = offsetof(struct vhost_msg, iotlb) - sizeof(int);
1504429711aeSJason Wang break;
1505429711aeSJason Wang case VHOST_IOTLB_MSG_V2:
150691233ad7SGautam Dawar if (vhost_backend_has_feature(dev->vqs[0],
150791233ad7SGautam Dawar VHOST_BACKEND_F_IOTLB_ASID)) {
150891233ad7SGautam Dawar ret = copy_from_iter(&asid, sizeof(asid), from);
150991233ad7SGautam Dawar if (ret != sizeof(asid)) {
151091233ad7SGautam Dawar ret = -EINVAL;
151191233ad7SGautam Dawar goto done;
151291233ad7SGautam Dawar }
1513aaca8373SGautam Dawar offset = 0;
151491233ad7SGautam Dawar } else
1515429711aeSJason Wang offset = sizeof(__u32);
15166b1e6cc7SJason Wang break;
15176b1e6cc7SJason Wang default:
15186b1e6cc7SJason Wang ret = -EINVAL;
1519429711aeSJason Wang goto done;
15206b1e6cc7SJason Wang }
15216b1e6cc7SJason Wang
1522429711aeSJason Wang iov_iter_advance(from, offset);
1523429711aeSJason Wang ret = copy_from_iter(&msg, sizeof(msg), from);
152474ad7419SPavel Tikhomirov if (ret != sizeof(msg)) {
152574ad7419SPavel Tikhomirov ret = -EINVAL;
1526429711aeSJason Wang goto done;
152774ad7419SPavel Tikhomirov }
1528792a4f2eSJason Wang
1529ca50ec37SEric Auger if (msg.type == VHOST_IOTLB_UPDATE && msg.size == 0) {
1530e2ae38cfSAnirudh Rayabharam ret = -EINVAL;
1531e2ae38cfSAnirudh Rayabharam goto done;
1532e2ae38cfSAnirudh Rayabharam }
1533e2ae38cfSAnirudh Rayabharam
1534792a4f2eSJason Wang if (dev->msg_handler)
153591233ad7SGautam Dawar ret = dev->msg_handler(dev, asid, &msg);
1536792a4f2eSJason Wang else
153791233ad7SGautam Dawar ret = vhost_process_iotlb_msg(dev, asid, &msg);
1538792a4f2eSJason Wang if (ret) {
1539429711aeSJason Wang ret = -EFAULT;
1540429711aeSJason Wang goto done;
1541429711aeSJason Wang }
1542429711aeSJason Wang
1543429711aeSJason Wang ret = (type == VHOST_IOTLB_MSG) ? sizeof(struct vhost_msg) :
1544429711aeSJason Wang sizeof(struct vhost_msg_v2);
15456b1e6cc7SJason Wang done:
15466b1e6cc7SJason Wang return ret;
15476b1e6cc7SJason Wang }
15486b1e6cc7SJason Wang EXPORT_SYMBOL(vhost_chr_write_iter);
15496b1e6cc7SJason Wang
vhost_chr_poll(struct file * file,struct vhost_dev * dev,poll_table * wait)1550afc9a42bSAl Viro __poll_t vhost_chr_poll(struct file *file, struct vhost_dev *dev,
15516b1e6cc7SJason Wang poll_table *wait)
15526b1e6cc7SJason Wang {
1553afc9a42bSAl Viro __poll_t mask = 0;
15546b1e6cc7SJason Wang
15556b1e6cc7SJason Wang poll_wait(file, &dev->wait, wait);
15566b1e6cc7SJason Wang
15576b1e6cc7SJason Wang if (!list_empty(&dev->read_list))
1558a9a08845SLinus Torvalds mask |= EPOLLIN | EPOLLRDNORM;
15596b1e6cc7SJason Wang
15606b1e6cc7SJason Wang return mask;
15616b1e6cc7SJason Wang }
15626b1e6cc7SJason Wang EXPORT_SYMBOL(vhost_chr_poll);
15636b1e6cc7SJason Wang
vhost_chr_read_iter(struct vhost_dev * dev,struct iov_iter * to,int noblock)15646b1e6cc7SJason Wang ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to,
15656b1e6cc7SJason Wang int noblock)
15666b1e6cc7SJason Wang {
15676b1e6cc7SJason Wang DEFINE_WAIT(wait);
15686b1e6cc7SJason Wang struct vhost_msg_node *node;
15696b1e6cc7SJason Wang ssize_t ret = 0;
15706b1e6cc7SJason Wang unsigned size = sizeof(struct vhost_msg);
15716b1e6cc7SJason Wang
15726b1e6cc7SJason Wang if (iov_iter_count(to) < size)
15736b1e6cc7SJason Wang return 0;
15746b1e6cc7SJason Wang
15756b1e6cc7SJason Wang while (1) {
15766b1e6cc7SJason Wang if (!noblock)
15776b1e6cc7SJason Wang prepare_to_wait(&dev->wait, &wait,
15786b1e6cc7SJason Wang TASK_INTERRUPTIBLE);
15796b1e6cc7SJason Wang
15806b1e6cc7SJason Wang node = vhost_dequeue_msg(dev, &dev->read_list);
15816b1e6cc7SJason Wang if (node)
15826b1e6cc7SJason Wang break;
15836b1e6cc7SJason Wang if (noblock) {
15846b1e6cc7SJason Wang ret = -EAGAIN;
15856b1e6cc7SJason Wang break;
15866b1e6cc7SJason Wang }
15876b1e6cc7SJason Wang if (signal_pending(current)) {
15886b1e6cc7SJason Wang ret = -ERESTARTSYS;
15896b1e6cc7SJason Wang break;
15906b1e6cc7SJason Wang }
15916b1e6cc7SJason Wang if (!dev->iotlb) {
15926b1e6cc7SJason Wang ret = -EBADFD;
15936b1e6cc7SJason Wang break;
15946b1e6cc7SJason Wang }
15956b1e6cc7SJason Wang
15966b1e6cc7SJason Wang schedule();
15976b1e6cc7SJason Wang }
15986b1e6cc7SJason Wang
15996b1e6cc7SJason Wang if (!noblock)
16006b1e6cc7SJason Wang finish_wait(&dev->wait, &wait);
16016b1e6cc7SJason Wang
16026b1e6cc7SJason Wang if (node) {
1603429711aeSJason Wang struct vhost_iotlb_msg *msg;
1604429711aeSJason Wang void *start = &node->msg;
16056b1e6cc7SJason Wang
1606429711aeSJason Wang switch (node->msg.type) {
1607429711aeSJason Wang case VHOST_IOTLB_MSG:
1608429711aeSJason Wang size = sizeof(node->msg);
1609429711aeSJason Wang msg = &node->msg.iotlb;
1610429711aeSJason Wang break;
1611429711aeSJason Wang case VHOST_IOTLB_MSG_V2:
1612429711aeSJason Wang size = sizeof(node->msg_v2);
1613429711aeSJason Wang msg = &node->msg_v2.iotlb;
1614429711aeSJason Wang break;
1615429711aeSJason Wang default:
1616429711aeSJason Wang BUG();
1617429711aeSJason Wang break;
1618429711aeSJason Wang }
1619429711aeSJason Wang
1620429711aeSJason Wang ret = copy_to_iter(start, size, to);
1621429711aeSJason Wang if (ret != size || msg->type != VHOST_IOTLB_MISS) {
16226b1e6cc7SJason Wang kfree(node);
16236b1e6cc7SJason Wang return ret;
16246b1e6cc7SJason Wang }
16256b1e6cc7SJason Wang vhost_enqueue_msg(dev, &dev->pending_list, node);
16266b1e6cc7SJason Wang }
16276b1e6cc7SJason Wang
16286b1e6cc7SJason Wang return ret;
16296b1e6cc7SJason Wang }
16306b1e6cc7SJason Wang EXPORT_SYMBOL_GPL(vhost_chr_read_iter);
16316b1e6cc7SJason Wang
vhost_iotlb_miss(struct vhost_virtqueue * vq,u64 iova,int access)16326b1e6cc7SJason Wang static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access)
16336b1e6cc7SJason Wang {
16346b1e6cc7SJason Wang struct vhost_dev *dev = vq->dev;
16356b1e6cc7SJason Wang struct vhost_msg_node *node;
16366b1e6cc7SJason Wang struct vhost_iotlb_msg *msg;
1637429711aeSJason Wang bool v2 = vhost_backend_has_feature(vq, VHOST_BACKEND_F_IOTLB_MSG_V2);
16386b1e6cc7SJason Wang
1639429711aeSJason Wang node = vhost_new_msg(vq, v2 ? VHOST_IOTLB_MSG_V2 : VHOST_IOTLB_MSG);
16406b1e6cc7SJason Wang if (!node)
16416b1e6cc7SJason Wang return -ENOMEM;
16426b1e6cc7SJason Wang
1643429711aeSJason Wang if (v2) {
1644429711aeSJason Wang node->msg_v2.type = VHOST_IOTLB_MSG_V2;
1645429711aeSJason Wang msg = &node->msg_v2.iotlb;
1646429711aeSJason Wang } else {
16476b1e6cc7SJason Wang msg = &node->msg.iotlb;
1648429711aeSJason Wang }
1649429711aeSJason Wang
16506b1e6cc7SJason Wang msg->type = VHOST_IOTLB_MISS;
16516b1e6cc7SJason Wang msg->iova = iova;
16526b1e6cc7SJason Wang msg->perm = access;
16536b1e6cc7SJason Wang
16546b1e6cc7SJason Wang vhost_enqueue_msg(dev, &dev->read_list, node);
16556b1e6cc7SJason Wang
16566b1e6cc7SJason Wang return 0;
1657bfe2bc51SJason Wang }
1658bfe2bc51SJason Wang
vq_access_ok(struct vhost_virtqueue * vq,unsigned int num,vring_desc_t __user * desc,vring_avail_t __user * avail,vring_used_t __user * used)1659ddd3d408SStefan Hajnoczi static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
1660a865e420SMichael S. Tsirkin vring_desc_t __user *desc,
1661a865e420SMichael S. Tsirkin vring_avail_t __user *avail,
1662a865e420SMichael S. Tsirkin vring_used_t __user *used)
16636b1e6cc7SJason Wang
16643a4d5c94SMichael S. Tsirkin {
16650210a8dbSGreg Kurz /* If an IOTLB device is present, the vring addresses are
16660210a8dbSGreg Kurz * GIOVAs. Access validation occurs at prefetch time. */
16670210a8dbSGreg Kurz if (vq->iotlb)
16680210a8dbSGreg Kurz return true;
16690210a8dbSGreg Kurz
16704942e825SJason Wang return access_ok(desc, vhost_get_desc_size(vq, num)) &&
16714942e825SJason Wang access_ok(avail, vhost_get_avail_size(vq, num)) &&
16724942e825SJason Wang access_ok(used, vhost_get_used_size(vq, num));
16733a4d5c94SMichael S. Tsirkin }
16743a4d5c94SMichael S. Tsirkin
vhost_vq_meta_update(struct vhost_virtqueue * vq,const struct vhost_iotlb_map * map,int type)1675f8894913SJason Wang static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
16760bbe3066SJason Wang const struct vhost_iotlb_map *map,
1677f8894913SJason Wang int type)
1678f8894913SJason Wang {
1679f8894913SJason Wang int access = (type == VHOST_ADDR_USED) ?
1680f8894913SJason Wang VHOST_ACCESS_WO : VHOST_ACCESS_RO;
1681f8894913SJason Wang
16820bbe3066SJason Wang if (likely(map->perm & access))
16830bbe3066SJason Wang vq->meta_iotlb[type] = map;
1684f8894913SJason Wang }
1685f8894913SJason Wang
iotlb_access_ok(struct vhost_virtqueue * vq,int access,u64 addr,u64 len,int type)1686ddd3d408SStefan Hajnoczi static bool iotlb_access_ok(struct vhost_virtqueue *vq,
1687f8894913SJason Wang int access, u64 addr, u64 len, int type)
16886b1e6cc7SJason Wang {
16890bbe3066SJason Wang const struct vhost_iotlb_map *map;
16900bbe3066SJason Wang struct vhost_iotlb *umem = vq->iotlb;
1691ca2c5b33SMichael S. Tsirkin u64 s = 0, size, orig_addr = addr, last = addr + len - 1;
1692f8894913SJason Wang
1693f8894913SJason Wang if (vhost_vq_meta_fetch(vq, addr, len, type))
1694f8894913SJason Wang return true;
16956b1e6cc7SJason Wang
16966b1e6cc7SJason Wang while (len > s) {
16970bbe3066SJason Wang map = vhost_iotlb_itree_first(umem, addr, last);
16980bbe3066SJason Wang if (map == NULL || map->start > addr) {
16996b1e6cc7SJason Wang vhost_iotlb_miss(vq, addr, access);
17006b1e6cc7SJason Wang return false;
17010bbe3066SJason Wang } else if (!(map->perm & access)) {
17026b1e6cc7SJason Wang /* Report the possible access violation by
17036b1e6cc7SJason Wang * request another translation from userspace.
17046b1e6cc7SJason Wang */
17056b1e6cc7SJason Wang return false;
17066b1e6cc7SJason Wang }
17076b1e6cc7SJason Wang
17080bbe3066SJason Wang size = map->size - addr + map->start;
1709f8894913SJason Wang
1710f8894913SJason Wang if (orig_addr == addr && size >= len)
17110bbe3066SJason Wang vhost_vq_meta_update(vq, map, type);
1712f8894913SJason Wang
17136b1e6cc7SJason Wang s += size;
17146b1e6cc7SJason Wang addr += size;
17156b1e6cc7SJason Wang }
17166b1e6cc7SJason Wang
17176b1e6cc7SJason Wang return true;
17186b1e6cc7SJason Wang }
17196b1e6cc7SJason Wang
vq_meta_prefetch(struct vhost_virtqueue * vq)17209b5e830bSJason Wang int vq_meta_prefetch(struct vhost_virtqueue *vq)
17216b1e6cc7SJason Wang {
17226b1e6cc7SJason Wang unsigned int num = vq->num;
17236b1e6cc7SJason Wang
17243d2c7d37SMichael S. Tsirkin if (!vq->iotlb)
17256b1e6cc7SJason Wang return 1;
17266b1e6cc7SJason Wang
17270bbe3066SJason Wang return iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->desc,
17284942e825SJason Wang vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) &&
17290bbe3066SJason Wang iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->avail,
17304942e825SJason Wang vhost_get_avail_size(vq, num),
1731f8894913SJason Wang VHOST_ADDR_AVAIL) &&
17320bbe3066SJason Wang iotlb_access_ok(vq, VHOST_MAP_WO, (u64)(uintptr_t)vq->used,
17334942e825SJason Wang vhost_get_used_size(vq, num), VHOST_ADDR_USED);
17346b1e6cc7SJason Wang }
17359b5e830bSJason Wang EXPORT_SYMBOL_GPL(vq_meta_prefetch);
17366b1e6cc7SJason Wang
17373a4d5c94SMichael S. Tsirkin /* Can we log writes? */
17383a4d5c94SMichael S. Tsirkin /* Caller should have device mutex but not vq mutex */
vhost_log_access_ok(struct vhost_dev * dev)1739ddd3d408SStefan Hajnoczi bool vhost_log_access_ok(struct vhost_dev *dev)
17403a4d5c94SMichael S. Tsirkin {
1741a9709d68SJason Wang return memory_access_ok(dev, dev->umem, 1);
17423a4d5c94SMichael S. Tsirkin }
17436ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_log_access_ok);
17443a4d5c94SMichael S. Tsirkin
vq_log_used_access_ok(struct vhost_virtqueue * vq,void __user * log_base,bool log_used,u64 log_addr)1745ab512251SGreg Kurz static bool vq_log_used_access_ok(struct vhost_virtqueue *vq,
1746ab512251SGreg Kurz void __user *log_base,
1747ab512251SGreg Kurz bool log_used,
1748ab512251SGreg Kurz u64 log_addr)
1749ab512251SGreg Kurz {
1750ab512251SGreg Kurz /* If an IOTLB device is present, log_addr is a GIOVA that
1751ab512251SGreg Kurz * will never be logged by log_used(). */
1752ab512251SGreg Kurz if (vq->iotlb)
1753ab512251SGreg Kurz return true;
1754ab512251SGreg Kurz
1755ab512251SGreg Kurz return !log_used || log_access_ok(log_base, log_addr,
1756ab512251SGreg Kurz vhost_get_used_size(vq, vq->num));
1757ab512251SGreg Kurz }
1758ab512251SGreg Kurz
17593a4d5c94SMichael S. Tsirkin /* Verify access for write logging. */
17603a4d5c94SMichael S. Tsirkin /* Caller should have vq mutex and device mutex */
vq_log_access_ok(struct vhost_virtqueue * vq,void __user * log_base)1761ddd3d408SStefan Hajnoczi static bool vq_log_access_ok(struct vhost_virtqueue *vq,
17628ea8cf89SMichael S. Tsirkin void __user *log_base)
17633a4d5c94SMichael S. Tsirkin {
1764a9709d68SJason Wang return vq_memory_access_ok(log_base, vq->umem,
1765ea16c514SMichael S. Tsirkin vhost_has_feature(vq, VHOST_F_LOG_ALL)) &&
1766ab512251SGreg Kurz vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr);
17673a4d5c94SMichael S. Tsirkin }
17683a4d5c94SMichael S. Tsirkin
17693a4d5c94SMichael S. Tsirkin /* Can we start vq? */
17703a4d5c94SMichael S. Tsirkin /* Caller should have vq mutex and device mutex */
vhost_vq_access_ok(struct vhost_virtqueue * vq)1771ddd3d408SStefan Hajnoczi bool vhost_vq_access_ok(struct vhost_virtqueue *vq)
17723a4d5c94SMichael S. Tsirkin {
1773d14d2b78SStefan Hajnoczi if (!vq_log_access_ok(vq, vq->log_base))
1774ddd3d408SStefan Hajnoczi return false;
1775d65026c6SJason Wang
1776d65026c6SJason Wang return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used);
17773a4d5c94SMichael S. Tsirkin }
17786ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_vq_access_ok);
17793a4d5c94SMichael S. Tsirkin
vhost_set_memory(struct vhost_dev * d,struct vhost_memory __user * m)17803a4d5c94SMichael S. Tsirkin static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
17813a4d5c94SMichael S. Tsirkin {
1782a9709d68SJason Wang struct vhost_memory mem, *newmem;
1783a9709d68SJason Wang struct vhost_memory_region *region;
17840bbe3066SJason Wang struct vhost_iotlb *newumem, *oldumem;
17853a4d5c94SMichael S. Tsirkin unsigned long size = offsetof(struct vhost_memory, regions);
178698f9ca0aSMichael S. Tsirkin int i;
1787d47effe1SKrishna Kumar
17887ad9c9d2STakuya Yoshikawa if (copy_from_user(&mem, m, size))
17897ad9c9d2STakuya Yoshikawa return -EFAULT;
17903a4d5c94SMichael S. Tsirkin if (mem.padding)
17913a4d5c94SMichael S. Tsirkin return -EOPNOTSUPP;
1792c9ce42f7SIgor Mammedov if (mem.nregions > max_mem_regions)
17933a4d5c94SMichael S. Tsirkin return -E2BIG;
1794b2303d7bSMatthew Wilcox newmem = kvzalloc(struct_size(newmem, regions, mem.nregions),
1795b2303d7bSMatthew Wilcox GFP_KERNEL);
17963a4d5c94SMichael S. Tsirkin if (!newmem)
17973a4d5c94SMichael S. Tsirkin return -ENOMEM;
17983a4d5c94SMichael S. Tsirkin
17993a4d5c94SMichael S. Tsirkin memcpy(newmem, &mem, size);
18007ad9c9d2STakuya Yoshikawa if (copy_from_user(newmem->regions, m->regions,
1801bf11d71aSGustavo A. R. Silva flex_array_size(newmem, regions, mem.nregions))) {
1802bcfeacabSIgor Mammedov kvfree(newmem);
18037ad9c9d2STakuya Yoshikawa return -EFAULT;
18043a4d5c94SMichael S. Tsirkin }
18053a4d5c94SMichael S. Tsirkin
18060bbe3066SJason Wang newumem = iotlb_alloc();
1807a9709d68SJason Wang if (!newumem) {
18084de7255fSIgor Mammedov kvfree(newmem);
1809a9709d68SJason Wang return -ENOMEM;
1810a02c3789STakuya Yoshikawa }
1811a9709d68SJason Wang
1812a9709d68SJason Wang for (region = newmem->regions;
1813a9709d68SJason Wang region < newmem->regions + mem.nregions;
1814a9709d68SJason Wang region++) {
18150bbe3066SJason Wang if (vhost_iotlb_add_range(newumem,
18166b1e6cc7SJason Wang region->guest_phys_addr,
18176b1e6cc7SJason Wang region->guest_phys_addr +
18186b1e6cc7SJason Wang region->memory_size - 1,
18196b1e6cc7SJason Wang region->userspace_addr,
18200bbe3066SJason Wang VHOST_MAP_RW))
1821a9709d68SJason Wang goto err;
1822a9709d68SJason Wang }
1823a9709d68SJason Wang
1824a9709d68SJason Wang if (!memory_access_ok(d, newumem, 0))
1825a9709d68SJason Wang goto err;
1826a9709d68SJason Wang
1827a9709d68SJason Wang oldumem = d->umem;
1828a9709d68SJason Wang d->umem = newumem;
182998f9ca0aSMichael S. Tsirkin
183047283befSMichael S. Tsirkin /* All memory accesses are done under some VQ mutex. */
183198f9ca0aSMichael S. Tsirkin for (i = 0; i < d->nvqs; ++i) {
183298f9ca0aSMichael S. Tsirkin mutex_lock(&d->vqs[i]->mutex);
1833a9709d68SJason Wang d->vqs[i]->umem = newumem;
183498f9ca0aSMichael S. Tsirkin mutex_unlock(&d->vqs[i]->mutex);
183598f9ca0aSMichael S. Tsirkin }
1836a9709d68SJason Wang
1837a9709d68SJason Wang kvfree(newmem);
18380bbe3066SJason Wang vhost_iotlb_free(oldumem);
18393a4d5c94SMichael S. Tsirkin return 0;
1840a9709d68SJason Wang
1841a9709d68SJason Wang err:
18420bbe3066SJason Wang vhost_iotlb_free(newumem);
1843a9709d68SJason Wang kvfree(newmem);
1844a9709d68SJason Wang return -EFAULT;
18453a4d5c94SMichael S. Tsirkin }
18463a4d5c94SMichael S. Tsirkin
vhost_vring_set_num(struct vhost_dev * d,struct vhost_virtqueue * vq,void __user * argp)1847feebcaeaSJason Wang static long vhost_vring_set_num(struct vhost_dev *d,
1848feebcaeaSJason Wang struct vhost_virtqueue *vq,
1849feebcaeaSJason Wang void __user *argp)
1850feebcaeaSJason Wang {
1851feebcaeaSJason Wang struct vhost_vring_state s;
1852feebcaeaSJason Wang
1853feebcaeaSJason Wang /* Resizing ring with an active backend?
1854feebcaeaSJason Wang * You don't want to do that. */
1855feebcaeaSJason Wang if (vq->private_data)
1856feebcaeaSJason Wang return -EBUSY;
1857feebcaeaSJason Wang
1858feebcaeaSJason Wang if (copy_from_user(&s, argp, sizeof s))
1859feebcaeaSJason Wang return -EFAULT;
1860feebcaeaSJason Wang
1861feebcaeaSJason Wang if (!s.num || s.num > 0xffff || (s.num & (s.num - 1)))
1862feebcaeaSJason Wang return -EINVAL;
1863feebcaeaSJason Wang vq->num = s.num;
1864feebcaeaSJason Wang
1865feebcaeaSJason Wang return 0;
1866feebcaeaSJason Wang }
1867feebcaeaSJason Wang
vhost_vring_set_addr(struct vhost_dev * d,struct vhost_virtqueue * vq,void __user * argp)1868feebcaeaSJason Wang static long vhost_vring_set_addr(struct vhost_dev *d,
1869feebcaeaSJason Wang struct vhost_virtqueue *vq,
1870feebcaeaSJason Wang void __user *argp)
1871feebcaeaSJason Wang {
1872feebcaeaSJason Wang struct vhost_vring_addr a;
1873feebcaeaSJason Wang
1874feebcaeaSJason Wang if (copy_from_user(&a, argp, sizeof a))
1875feebcaeaSJason Wang return -EFAULT;
1876feebcaeaSJason Wang if (a.flags & ~(0x1 << VHOST_VRING_F_LOG))
1877feebcaeaSJason Wang return -EOPNOTSUPP;
1878feebcaeaSJason Wang
1879feebcaeaSJason Wang /* For 32bit, verify that the top 32bits of the user
1880feebcaeaSJason Wang data are set to zero. */
1881feebcaeaSJason Wang if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr ||
1882feebcaeaSJason Wang (u64)(unsigned long)a.used_user_addr != a.used_user_addr ||
1883feebcaeaSJason Wang (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr)
1884feebcaeaSJason Wang return -EFAULT;
1885feebcaeaSJason Wang
1886feebcaeaSJason Wang /* Make sure it's safe to cast pointers to vring types. */
1887feebcaeaSJason Wang BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE);
1888feebcaeaSJason Wang BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE);
1889feebcaeaSJason Wang if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) ||
1890feebcaeaSJason Wang (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) ||
1891feebcaeaSJason Wang (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1)))
1892feebcaeaSJason Wang return -EINVAL;
1893feebcaeaSJason Wang
1894feebcaeaSJason Wang /* We only verify access here if backend is configured.
1895feebcaeaSJason Wang * If it is not, we don't as size might not have been setup.
1896feebcaeaSJason Wang * We will verify when backend is configured. */
1897feebcaeaSJason Wang if (vq->private_data) {
1898feebcaeaSJason Wang if (!vq_access_ok(vq, vq->num,
1899feebcaeaSJason Wang (void __user *)(unsigned long)a.desc_user_addr,
1900feebcaeaSJason Wang (void __user *)(unsigned long)a.avail_user_addr,
1901feebcaeaSJason Wang (void __user *)(unsigned long)a.used_user_addr))
1902feebcaeaSJason Wang return -EINVAL;
1903feebcaeaSJason Wang
1904feebcaeaSJason Wang /* Also validate log access for used ring if enabled. */
1905ab512251SGreg Kurz if (!vq_log_used_access_ok(vq, vq->log_base,
1906ab512251SGreg Kurz a.flags & (0x1 << VHOST_VRING_F_LOG),
1907ab512251SGreg Kurz a.log_guest_addr))
1908feebcaeaSJason Wang return -EINVAL;
1909feebcaeaSJason Wang }
1910feebcaeaSJason Wang
1911feebcaeaSJason Wang vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG));
1912feebcaeaSJason Wang vq->desc = (void __user *)(unsigned long)a.desc_user_addr;
1913feebcaeaSJason Wang vq->avail = (void __user *)(unsigned long)a.avail_user_addr;
1914feebcaeaSJason Wang vq->log_addr = a.log_guest_addr;
1915feebcaeaSJason Wang vq->used = (void __user *)(unsigned long)a.used_user_addr;
1916feebcaeaSJason Wang
1917feebcaeaSJason Wang return 0;
1918feebcaeaSJason Wang }
1919feebcaeaSJason Wang
vhost_vring_set_num_addr(struct vhost_dev * d,struct vhost_virtqueue * vq,unsigned int ioctl,void __user * argp)1920feebcaeaSJason Wang static long vhost_vring_set_num_addr(struct vhost_dev *d,
1921feebcaeaSJason Wang struct vhost_virtqueue *vq,
1922feebcaeaSJason Wang unsigned int ioctl,
1923feebcaeaSJason Wang void __user *argp)
1924feebcaeaSJason Wang {
1925feebcaeaSJason Wang long r;
1926feebcaeaSJason Wang
1927feebcaeaSJason Wang mutex_lock(&vq->mutex);
1928feebcaeaSJason Wang
1929feebcaeaSJason Wang switch (ioctl) {
1930feebcaeaSJason Wang case VHOST_SET_VRING_NUM:
1931feebcaeaSJason Wang r = vhost_vring_set_num(d, vq, argp);
1932feebcaeaSJason Wang break;
1933feebcaeaSJason Wang case VHOST_SET_VRING_ADDR:
1934feebcaeaSJason Wang r = vhost_vring_set_addr(d, vq, argp);
1935feebcaeaSJason Wang break;
1936feebcaeaSJason Wang default:
1937feebcaeaSJason Wang BUG();
1938feebcaeaSJason Wang }
1939feebcaeaSJason Wang
1940feebcaeaSJason Wang mutex_unlock(&vq->mutex);
1941feebcaeaSJason Wang
1942feebcaeaSJason Wang return r;
1943feebcaeaSJason Wang }
vhost_vring_ioctl(struct vhost_dev * d,unsigned int ioctl,void __user * argp)194426b36604SSonny Rao long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
19453a4d5c94SMichael S. Tsirkin {
1946cecb46f1SAl Viro struct file *eventfp, *filep = NULL;
1947cecb46f1SAl Viro bool pollstart = false, pollstop = false;
19483a4d5c94SMichael S. Tsirkin struct eventfd_ctx *ctx = NULL;
19493a4d5c94SMichael S. Tsirkin struct vhost_virtqueue *vq;
19503a4d5c94SMichael S. Tsirkin struct vhost_vring_state s;
19513a4d5c94SMichael S. Tsirkin struct vhost_vring_file f;
19523a4d5c94SMichael S. Tsirkin u32 idx;
19533a4d5c94SMichael S. Tsirkin long r;
19543a4d5c94SMichael S. Tsirkin
1955cef25866SMike Christie r = vhost_get_vq_from_user(d, argp, &vq, &idx);
19563a4d5c94SMichael S. Tsirkin if (r < 0)
19573a4d5c94SMichael S. Tsirkin return r;
19583a4d5c94SMichael S. Tsirkin
1959feebcaeaSJason Wang if (ioctl == VHOST_SET_VRING_NUM ||
1960feebcaeaSJason Wang ioctl == VHOST_SET_VRING_ADDR) {
1961feebcaeaSJason Wang return vhost_vring_set_num_addr(d, vq, ioctl, argp);
1962feebcaeaSJason Wang }
1963feebcaeaSJason Wang
19643a4d5c94SMichael S. Tsirkin mutex_lock(&vq->mutex);
19653a4d5c94SMichael S. Tsirkin
19663a4d5c94SMichael S. Tsirkin switch (ioctl) {
19673a4d5c94SMichael S. Tsirkin case VHOST_SET_VRING_BASE:
19683a4d5c94SMichael S. Tsirkin /* Moving base with an active backend?
19693a4d5c94SMichael S. Tsirkin * You don't want to do that. */
19703a4d5c94SMichael S. Tsirkin if (vq->private_data) {
19713a4d5c94SMichael S. Tsirkin r = -EBUSY;
19723a4d5c94SMichael S. Tsirkin break;
19733a4d5c94SMichael S. Tsirkin }
19747ad9c9d2STakuya Yoshikawa if (copy_from_user(&s, argp, sizeof s)) {
19757ad9c9d2STakuya Yoshikawa r = -EFAULT;
19763a4d5c94SMichael S. Tsirkin break;
19777ad9c9d2STakuya Yoshikawa }
197855d8122fSShannon Nelson if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
197955d8122fSShannon Nelson vq->last_avail_idx = s.num & 0xffff;
198055d8122fSShannon Nelson vq->last_used_idx = (s.num >> 16) & 0xffff;
198155d8122fSShannon Nelson } else {
19823a4d5c94SMichael S. Tsirkin if (s.num > 0xffff) {
19833a4d5c94SMichael S. Tsirkin r = -EINVAL;
19843a4d5c94SMichael S. Tsirkin break;
19853a4d5c94SMichael S. Tsirkin }
19868d65843cSJason Wang vq->last_avail_idx = s.num;
198755d8122fSShannon Nelson }
19883a4d5c94SMichael S. Tsirkin /* Forget the cached index value. */
19893a4d5c94SMichael S. Tsirkin vq->avail_idx = vq->last_avail_idx;
19903a4d5c94SMichael S. Tsirkin break;
19913a4d5c94SMichael S. Tsirkin case VHOST_GET_VRING_BASE:
19923a4d5c94SMichael S. Tsirkin s.index = idx;
199355d8122fSShannon Nelson if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
199455d8122fSShannon Nelson s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16);
199555d8122fSShannon Nelson else
19963a4d5c94SMichael S. Tsirkin s.num = vq->last_avail_idx;
19977ad9c9d2STakuya Yoshikawa if (copy_to_user(argp, &s, sizeof s))
19987ad9c9d2STakuya Yoshikawa r = -EFAULT;
19993a4d5c94SMichael S. Tsirkin break;
20003a4d5c94SMichael S. Tsirkin case VHOST_SET_VRING_KICK:
20017ad9c9d2STakuya Yoshikawa if (copy_from_user(&f, argp, sizeof f)) {
20027ad9c9d2STakuya Yoshikawa r = -EFAULT;
20033a4d5c94SMichael S. Tsirkin break;
20047ad9c9d2STakuya Yoshikawa }
2005e0136c16SZhu Lingshan eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd);
2006535297a6SMichael S. Tsirkin if (IS_ERR(eventfp)) {
2007535297a6SMichael S. Tsirkin r = PTR_ERR(eventfp);
2008535297a6SMichael S. Tsirkin break;
2009535297a6SMichael S. Tsirkin }
20103a4d5c94SMichael S. Tsirkin if (eventfp != vq->kick) {
2011cecb46f1SAl Viro pollstop = (filep = vq->kick) != NULL;
2012cecb46f1SAl Viro pollstart = (vq->kick = eventfp) != NULL;
20133a4d5c94SMichael S. Tsirkin } else
20143a4d5c94SMichael S. Tsirkin filep = eventfp;
20153a4d5c94SMichael S. Tsirkin break;
20163a4d5c94SMichael S. Tsirkin case VHOST_SET_VRING_CALL:
20177ad9c9d2STakuya Yoshikawa if (copy_from_user(&f, argp, sizeof f)) {
20187ad9c9d2STakuya Yoshikawa r = -EFAULT;
20193a4d5c94SMichael S. Tsirkin break;
20207ad9c9d2STakuya Yoshikawa }
2021e0136c16SZhu Lingshan ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd);
2022e050c7d9SEric Biggers if (IS_ERR(ctx)) {
2023e050c7d9SEric Biggers r = PTR_ERR(ctx);
2024535297a6SMichael S. Tsirkin break;
2025535297a6SMichael S. Tsirkin }
2026265a0ad8SZhu Lingshan
2027265a0ad8SZhu Lingshan swap(ctx, vq->call_ctx.ctx);
20283a4d5c94SMichael S. Tsirkin break;
20293a4d5c94SMichael S. Tsirkin case VHOST_SET_VRING_ERR:
20307ad9c9d2STakuya Yoshikawa if (copy_from_user(&f, argp, sizeof f)) {
20317ad9c9d2STakuya Yoshikawa r = -EFAULT;
20323a4d5c94SMichael S. Tsirkin break;
20337ad9c9d2STakuya Yoshikawa }
2034e0136c16SZhu Lingshan ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd);
203509f332a5SEric Biggers if (IS_ERR(ctx)) {
203609f332a5SEric Biggers r = PTR_ERR(ctx);
2037535297a6SMichael S. Tsirkin break;
2038535297a6SMichael S. Tsirkin }
203909f332a5SEric Biggers swap(ctx, vq->error_ctx);
20403a4d5c94SMichael S. Tsirkin break;
20412751c988SGreg Kurz case VHOST_SET_VRING_ENDIAN:
20422751c988SGreg Kurz r = vhost_set_vring_endian(vq, argp);
20432751c988SGreg Kurz break;
20442751c988SGreg Kurz case VHOST_GET_VRING_ENDIAN:
20452751c988SGreg Kurz r = vhost_get_vring_endian(vq, idx, argp);
20462751c988SGreg Kurz break;
204703088137SJason Wang case VHOST_SET_VRING_BUSYLOOP_TIMEOUT:
204803088137SJason Wang if (copy_from_user(&s, argp, sizeof(s))) {
204903088137SJason Wang r = -EFAULT;
205003088137SJason Wang break;
205103088137SJason Wang }
205203088137SJason Wang vq->busyloop_timeout = s.num;
205303088137SJason Wang break;
205403088137SJason Wang case VHOST_GET_VRING_BUSYLOOP_TIMEOUT:
205503088137SJason Wang s.index = idx;
205603088137SJason Wang s.num = vq->busyloop_timeout;
205703088137SJason Wang if (copy_to_user(argp, &s, sizeof(s)))
205803088137SJason Wang r = -EFAULT;
205903088137SJason Wang break;
20603a4d5c94SMichael S. Tsirkin default:
20613a4d5c94SMichael S. Tsirkin r = -ENOIOCTLCMD;
20623a4d5c94SMichael S. Tsirkin }
20633a4d5c94SMichael S. Tsirkin
20643a4d5c94SMichael S. Tsirkin if (pollstop && vq->handle_kick)
20653a4d5c94SMichael S. Tsirkin vhost_poll_stop(&vq->poll);
20663a4d5c94SMichael S. Tsirkin
2067e050c7d9SEric Biggers if (!IS_ERR_OR_NULL(ctx))
20683a4d5c94SMichael S. Tsirkin eventfd_ctx_put(ctx);
20693a4d5c94SMichael S. Tsirkin if (filep)
20703a4d5c94SMichael S. Tsirkin fput(filep);
20713a4d5c94SMichael S. Tsirkin
20723a4d5c94SMichael S. Tsirkin if (pollstart && vq->handle_kick)
20732b8b328bSJason Wang r = vhost_poll_start(&vq->poll, vq->kick);
20743a4d5c94SMichael S. Tsirkin
20753a4d5c94SMichael S. Tsirkin mutex_unlock(&vq->mutex);
20763a4d5c94SMichael S. Tsirkin
20773a4d5c94SMichael S. Tsirkin if (pollstop && vq->handle_kick)
2078b2ffa407SMike Christie vhost_dev_flush(vq->poll.dev);
20793a4d5c94SMichael S. Tsirkin return r;
20803a4d5c94SMichael S. Tsirkin }
20816ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_vring_ioctl);
20823a4d5c94SMichael S. Tsirkin
vhost_init_device_iotlb(struct vhost_dev * d)2083759aba1eSLiming Wu int vhost_init_device_iotlb(struct vhost_dev *d)
20846b1e6cc7SJason Wang {
20850bbe3066SJason Wang struct vhost_iotlb *niotlb, *oiotlb;
20866b1e6cc7SJason Wang int i;
20876b1e6cc7SJason Wang
20880bbe3066SJason Wang niotlb = iotlb_alloc();
20896b1e6cc7SJason Wang if (!niotlb)
20906b1e6cc7SJason Wang return -ENOMEM;
20916b1e6cc7SJason Wang
20926b1e6cc7SJason Wang oiotlb = d->iotlb;
20936b1e6cc7SJason Wang d->iotlb = niotlb;
20946b1e6cc7SJason Wang
20956b1e6cc7SJason Wang for (i = 0; i < d->nvqs; ++i) {
2096b13f9c63SJason Wang struct vhost_virtqueue *vq = d->vqs[i];
2097b13f9c63SJason Wang
2098b13f9c63SJason Wang mutex_lock(&vq->mutex);
2099b13f9c63SJason Wang vq->iotlb = niotlb;
2100b13f9c63SJason Wang __vhost_vq_meta_reset(vq);
2101b13f9c63SJason Wang mutex_unlock(&vq->mutex);
21026b1e6cc7SJason Wang }
21036b1e6cc7SJason Wang
21040bbe3066SJason Wang vhost_iotlb_free(oiotlb);
21056b1e6cc7SJason Wang
21066b1e6cc7SJason Wang return 0;
21076b1e6cc7SJason Wang }
21086b1e6cc7SJason Wang EXPORT_SYMBOL_GPL(vhost_init_device_iotlb);
21096b1e6cc7SJason Wang
21103a4d5c94SMichael S. Tsirkin /* Caller must have device mutex */
vhost_dev_ioctl(struct vhost_dev * d,unsigned int ioctl,void __user * argp)2111935cdee7SMichael S. Tsirkin long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
21123a4d5c94SMichael S. Tsirkin {
2113d25cc43cSEric Biggers struct eventfd_ctx *ctx;
21143a4d5c94SMichael S. Tsirkin u64 p;
21153a4d5c94SMichael S. Tsirkin long r;
21163a4d5c94SMichael S. Tsirkin int i, fd;
21173a4d5c94SMichael S. Tsirkin
21183a4d5c94SMichael S. Tsirkin /* If you are not the owner, you can become one */
21193a4d5c94SMichael S. Tsirkin if (ioctl == VHOST_SET_OWNER) {
21203a4d5c94SMichael S. Tsirkin r = vhost_dev_set_owner(d);
21213a4d5c94SMichael S. Tsirkin goto done;
21223a4d5c94SMichael S. Tsirkin }
21233a4d5c94SMichael S. Tsirkin
21243a4d5c94SMichael S. Tsirkin /* You must be the owner to do anything else */
21253a4d5c94SMichael S. Tsirkin r = vhost_dev_check_owner(d);
21263a4d5c94SMichael S. Tsirkin if (r)
21273a4d5c94SMichael S. Tsirkin goto done;
21283a4d5c94SMichael S. Tsirkin
21293a4d5c94SMichael S. Tsirkin switch (ioctl) {
21303a4d5c94SMichael S. Tsirkin case VHOST_SET_MEM_TABLE:
21313a4d5c94SMichael S. Tsirkin r = vhost_set_memory(d, argp);
21323a4d5c94SMichael S. Tsirkin break;
21333a4d5c94SMichael S. Tsirkin case VHOST_SET_LOG_BASE:
21347ad9c9d2STakuya Yoshikawa if (copy_from_user(&p, argp, sizeof p)) {
21357ad9c9d2STakuya Yoshikawa r = -EFAULT;
21363a4d5c94SMichael S. Tsirkin break;
21377ad9c9d2STakuya Yoshikawa }
21383a4d5c94SMichael S. Tsirkin if ((u64)(unsigned long)p != p) {
21393a4d5c94SMichael S. Tsirkin r = -EFAULT;
21403a4d5c94SMichael S. Tsirkin break;
21413a4d5c94SMichael S. Tsirkin }
21423a4d5c94SMichael S. Tsirkin for (i = 0; i < d->nvqs; ++i) {
21433a4d5c94SMichael S. Tsirkin struct vhost_virtqueue *vq;
21443a4d5c94SMichael S. Tsirkin void __user *base = (void __user *)(unsigned long)p;
21453ab2e420SAsias He vq = d->vqs[i];
21463a4d5c94SMichael S. Tsirkin mutex_lock(&vq->mutex);
21473a4d5c94SMichael S. Tsirkin /* If ring is inactive, will check when it's enabled. */
2148ea16c514SMichael S. Tsirkin if (vq->private_data && !vq_log_access_ok(vq, base))
21493a4d5c94SMichael S. Tsirkin r = -EFAULT;
21503a4d5c94SMichael S. Tsirkin else
21513a4d5c94SMichael S. Tsirkin vq->log_base = base;
21523a4d5c94SMichael S. Tsirkin mutex_unlock(&vq->mutex);
21533a4d5c94SMichael S. Tsirkin }
21543a4d5c94SMichael S. Tsirkin break;
21553a4d5c94SMichael S. Tsirkin case VHOST_SET_LOG_FD:
21563a4d5c94SMichael S. Tsirkin r = get_user(fd, (int __user *)argp);
21573a4d5c94SMichael S. Tsirkin if (r < 0)
21583a4d5c94SMichael S. Tsirkin break;
2159e0136c16SZhu Lingshan ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
2160d25cc43cSEric Biggers if (IS_ERR(ctx)) {
2161d25cc43cSEric Biggers r = PTR_ERR(ctx);
21623a4d5c94SMichael S. Tsirkin break;
21633a4d5c94SMichael S. Tsirkin }
2164d25cc43cSEric Biggers swap(ctx, d->log_ctx);
21653a4d5c94SMichael S. Tsirkin for (i = 0; i < d->nvqs; ++i) {
21663ab2e420SAsias He mutex_lock(&d->vqs[i]->mutex);
21673ab2e420SAsias He d->vqs[i]->log_ctx = d->log_ctx;
21683ab2e420SAsias He mutex_unlock(&d->vqs[i]->mutex);
21693a4d5c94SMichael S. Tsirkin }
21703a4d5c94SMichael S. Tsirkin if (ctx)
21713a4d5c94SMichael S. Tsirkin eventfd_ctx_put(ctx);
21723a4d5c94SMichael S. Tsirkin break;
21733a4d5c94SMichael S. Tsirkin default:
2174935cdee7SMichael S. Tsirkin r = -ENOIOCTLCMD;
21753a4d5c94SMichael S. Tsirkin break;
21763a4d5c94SMichael S. Tsirkin }
21773a4d5c94SMichael S. Tsirkin done:
21783a4d5c94SMichael S. Tsirkin return r;
21793a4d5c94SMichael S. Tsirkin }
21806ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_dev_ioctl);
21813a4d5c94SMichael S. Tsirkin
21823a4d5c94SMichael S. Tsirkin /* TODO: This is really inefficient. We need something like get_user()
21833a4d5c94SMichael S. Tsirkin * (instruction directly accesses the data, with an exception table entry
2184ff61f079SJonathan Corbet * returning -EFAULT). See Documentation/arch/x86/exception-tables.rst.
21853a4d5c94SMichael S. Tsirkin */
set_bit_to_user(int nr,void __user * addr)21863a4d5c94SMichael S. Tsirkin static int set_bit_to_user(int nr, void __user *addr)
21873a4d5c94SMichael S. Tsirkin {
21883a4d5c94SMichael S. Tsirkin unsigned long log = (unsigned long)addr;
21893a4d5c94SMichael S. Tsirkin struct page *page;
21903a4d5c94SMichael S. Tsirkin void *base;
21913a4d5c94SMichael S. Tsirkin int bit = nr + (log % PAGE_SIZE) * 8;
21923a4d5c94SMichael S. Tsirkin int r;
2193d47effe1SKrishna Kumar
2194690623e1SJohn Hubbard r = pin_user_pages_fast(log, 1, FOLL_WRITE, &page);
2195d6db3f5cSMichael S. Tsirkin if (r < 0)
21963a4d5c94SMichael S. Tsirkin return r;
2197d6db3f5cSMichael S. Tsirkin BUG_ON(r != 1);
2198c6daa7ffSCong Wang base = kmap_atomic(page);
21993a4d5c94SMichael S. Tsirkin set_bit(bit, base);
2200c6daa7ffSCong Wang kunmap_atomic(base);
2201690623e1SJohn Hubbard unpin_user_pages_dirty_lock(&page, 1, true);
22023a4d5c94SMichael S. Tsirkin return 0;
22033a4d5c94SMichael S. Tsirkin }
22043a4d5c94SMichael S. Tsirkin
log_write(void __user * log_base,u64 write_address,u64 write_length)22053a4d5c94SMichael S. Tsirkin static int log_write(void __user *log_base,
22063a4d5c94SMichael S. Tsirkin u64 write_address, u64 write_length)
22073a4d5c94SMichael S. Tsirkin {
220828831ee6SMichael S. Tsirkin u64 write_page = write_address / VHOST_PAGE_SIZE;
22093a4d5c94SMichael S. Tsirkin int r;
2210d47effe1SKrishna Kumar
22113a4d5c94SMichael S. Tsirkin if (!write_length)
22123a4d5c94SMichael S. Tsirkin return 0;
22133bf9be40SMichael S. Tsirkin write_length += write_address % VHOST_PAGE_SIZE;
22143a4d5c94SMichael S. Tsirkin for (;;) {
22153a4d5c94SMichael S. Tsirkin u64 base = (u64)(unsigned long)log_base;
221628831ee6SMichael S. Tsirkin u64 log = base + write_page / 8;
221728831ee6SMichael S. Tsirkin int bit = write_page % 8;
22183a4d5c94SMichael S. Tsirkin if ((u64)(unsigned long)log != log)
22193a4d5c94SMichael S. Tsirkin return -EFAULT;
22203a4d5c94SMichael S. Tsirkin r = set_bit_to_user(bit, (void __user *)(unsigned long)log);
22213a4d5c94SMichael S. Tsirkin if (r < 0)
22223a4d5c94SMichael S. Tsirkin return r;
22233a4d5c94SMichael S. Tsirkin if (write_length <= VHOST_PAGE_SIZE)
22243a4d5c94SMichael S. Tsirkin break;
22253a4d5c94SMichael S. Tsirkin write_length -= VHOST_PAGE_SIZE;
222628831ee6SMichael S. Tsirkin write_page += 1;
22273a4d5c94SMichael S. Tsirkin }
22283a4d5c94SMichael S. Tsirkin return r;
22293a4d5c94SMichael S. Tsirkin }
22303a4d5c94SMichael S. Tsirkin
log_write_hva(struct vhost_virtqueue * vq,u64 hva,u64 len)2231cc5e7107SJason Wang static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len)
2232cc5e7107SJason Wang {
22330bbe3066SJason Wang struct vhost_iotlb *umem = vq->umem;
22340bbe3066SJason Wang struct vhost_iotlb_map *u;
2235cc5e7107SJason Wang u64 start, end, l, min;
2236cc5e7107SJason Wang int r;
2237cc5e7107SJason Wang bool hit = false;
2238cc5e7107SJason Wang
2239cc5e7107SJason Wang while (len) {
2240cc5e7107SJason Wang min = len;
2241cc5e7107SJason Wang /* More than one GPAs can be mapped into a single HVA. So
2242cc5e7107SJason Wang * iterate all possible umems here to be safe.
2243cc5e7107SJason Wang */
22440bbe3066SJason Wang list_for_each_entry(u, &umem->list, link) {
22450bbe3066SJason Wang if (u->addr > hva - 1 + len ||
22460bbe3066SJason Wang u->addr - 1 + u->size < hva)
2247cc5e7107SJason Wang continue;
22480bbe3066SJason Wang start = max(u->addr, hva);
22490bbe3066SJason Wang end = min(u->addr - 1 + u->size, hva - 1 + len);
2250cc5e7107SJason Wang l = end - start + 1;
2251cc5e7107SJason Wang r = log_write(vq->log_base,
22520bbe3066SJason Wang u->start + start - u->addr,
2253cc5e7107SJason Wang l);
2254cc5e7107SJason Wang if (r < 0)
2255cc5e7107SJason Wang return r;
2256cc5e7107SJason Wang hit = true;
2257cc5e7107SJason Wang min = min(l, min);
2258cc5e7107SJason Wang }
2259cc5e7107SJason Wang
2260cc5e7107SJason Wang if (!hit)
2261cc5e7107SJason Wang return -EFAULT;
2262cc5e7107SJason Wang
2263cc5e7107SJason Wang len -= min;
2264cc5e7107SJason Wang hva += min;
2265cc5e7107SJason Wang }
2266cc5e7107SJason Wang
2267cc5e7107SJason Wang return 0;
2268cc5e7107SJason Wang }
2269cc5e7107SJason Wang
log_used(struct vhost_virtqueue * vq,u64 used_offset,u64 len)2270cc5e7107SJason Wang static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len)
2271cc5e7107SJason Wang {
22725e5e8736SLi Wang struct iovec *iov = vq->log_iov;
2273cc5e7107SJason Wang int i, ret;
2274cc5e7107SJason Wang
2275cc5e7107SJason Wang if (!vq->iotlb)
2276cc5e7107SJason Wang return log_write(vq->log_base, vq->log_addr + used_offset, len);
2277cc5e7107SJason Wang
2278cc5e7107SJason Wang ret = translate_desc(vq, (uintptr_t)vq->used + used_offset,
2279cc5e7107SJason Wang len, iov, 64, VHOST_ACCESS_WO);
2280816db766SJason Wang if (ret < 0)
2281cc5e7107SJason Wang return ret;
2282cc5e7107SJason Wang
2283cc5e7107SJason Wang for (i = 0; i < ret; i++) {
2284cc5e7107SJason Wang ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base,
2285cc5e7107SJason Wang iov[i].iov_len);
2286cc5e7107SJason Wang if (ret)
2287cc5e7107SJason Wang return ret;
2288cc5e7107SJason Wang }
2289cc5e7107SJason Wang
2290cc5e7107SJason Wang return 0;
2291cc5e7107SJason Wang }
2292cc5e7107SJason Wang
vhost_log_write(struct vhost_virtqueue * vq,struct vhost_log * log,unsigned int log_num,u64 len,struct iovec * iov,int count)22933a4d5c94SMichael S. Tsirkin int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
2294cc5e7107SJason Wang unsigned int log_num, u64 len, struct iovec *iov, int count)
22953a4d5c94SMichael S. Tsirkin {
22963a4d5c94SMichael S. Tsirkin int i, r;
22973a4d5c94SMichael S. Tsirkin
22983a4d5c94SMichael S. Tsirkin /* Make sure data written is seen before log. */
22995659338cSMichael S. Tsirkin smp_wmb();
2300cc5e7107SJason Wang
2301cc5e7107SJason Wang if (vq->iotlb) {
2302cc5e7107SJason Wang for (i = 0; i < count; i++) {
2303cc5e7107SJason Wang r = log_write_hva(vq, (uintptr_t)iov[i].iov_base,
2304cc5e7107SJason Wang iov[i].iov_len);
2305cc5e7107SJason Wang if (r < 0)
2306cc5e7107SJason Wang return r;
2307cc5e7107SJason Wang }
2308cc5e7107SJason Wang return 0;
2309cc5e7107SJason Wang }
2310cc5e7107SJason Wang
23113a4d5c94SMichael S. Tsirkin for (i = 0; i < log_num; ++i) {
23123a4d5c94SMichael S. Tsirkin u64 l = min(log[i].len, len);
23133a4d5c94SMichael S. Tsirkin r = log_write(vq->log_base, log[i].addr, l);
23143a4d5c94SMichael S. Tsirkin if (r < 0)
23153a4d5c94SMichael S. Tsirkin return r;
23163a4d5c94SMichael S. Tsirkin len -= l;
23175786aee8SMichael S. Tsirkin if (!len) {
23183a4d5c94SMichael S. Tsirkin if (vq->log_ctx)
23193a4d5c94SMichael S. Tsirkin eventfd_signal(vq->log_ctx, 1);
23205786aee8SMichael S. Tsirkin return 0;
23215786aee8SMichael S. Tsirkin }
23225786aee8SMichael S. Tsirkin }
23233a4d5c94SMichael S. Tsirkin /* Length written exceeds what we have stored. This is a bug. */
23243a4d5c94SMichael S. Tsirkin BUG();
23253a4d5c94SMichael S. Tsirkin return 0;
23263a4d5c94SMichael S. Tsirkin }
23276ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_log_write);
23283a4d5c94SMichael S. Tsirkin
vhost_update_used_flags(struct vhost_virtqueue * vq)23292723feaaSJason Wang static int vhost_update_used_flags(struct vhost_virtqueue *vq)
23302723feaaSJason Wang {
23312723feaaSJason Wang void __user *used;
23327b5d753eSJason Wang if (vhost_put_used_flags(vq))
23332723feaaSJason Wang return -EFAULT;
23342723feaaSJason Wang if (unlikely(vq->log_used)) {
23352723feaaSJason Wang /* Make sure the flag is seen before log. */
23362723feaaSJason Wang smp_wmb();
23372723feaaSJason Wang /* Log used flag write. */
23382723feaaSJason Wang used = &vq->used->flags;
2339cc5e7107SJason Wang log_used(vq, (used - (void __user *)vq->used),
23402723feaaSJason Wang sizeof vq->used->flags);
23412723feaaSJason Wang if (vq->log_ctx)
23422723feaaSJason Wang eventfd_signal(vq->log_ctx, 1);
23432723feaaSJason Wang }
23442723feaaSJason Wang return 0;
23452723feaaSJason Wang }
23462723feaaSJason Wang
vhost_update_avail_event(struct vhost_virtqueue * vq)23474c809363SStefano Garzarella static int vhost_update_avail_event(struct vhost_virtqueue *vq)
23482723feaaSJason Wang {
23497b5d753eSJason Wang if (vhost_put_avail_event(vq))
23502723feaaSJason Wang return -EFAULT;
23512723feaaSJason Wang if (unlikely(vq->log_used)) {
23522723feaaSJason Wang void __user *used;
23532723feaaSJason Wang /* Make sure the event is seen before log. */
23542723feaaSJason Wang smp_wmb();
23552723feaaSJason Wang /* Log avail event write */
23562723feaaSJason Wang used = vhost_avail_event(vq);
2357cc5e7107SJason Wang log_used(vq, (used - (void __user *)vq->used),
23582723feaaSJason Wang sizeof *vhost_avail_event(vq));
23592723feaaSJason Wang if (vq->log_ctx)
23602723feaaSJason Wang eventfd_signal(vq->log_ctx, 1);
23612723feaaSJason Wang }
23622723feaaSJason Wang return 0;
23632723feaaSJason Wang }
23642723feaaSJason Wang
vhost_vq_init_access(struct vhost_virtqueue * vq)236580f7d030SGreg Kurz int vhost_vq_init_access(struct vhost_virtqueue *vq)
23662723feaaSJason Wang {
23673b1bbe89SMichael S. Tsirkin __virtio16 last_used_idx;
23682723feaaSJason Wang int r;
2369e1f33be9SGreg Kurz bool is_le = vq->is_le;
2370e1f33be9SGreg Kurz
2371cda8bba0SHalil Pasic if (!vq->private_data)
23722723feaaSJason Wang return 0;
23732751c988SGreg Kurz
23742751c988SGreg Kurz vhost_init_is_le(vq);
23752723feaaSJason Wang
23762723feaaSJason Wang r = vhost_update_used_flags(vq);
23772723feaaSJason Wang if (r)
2378e1f33be9SGreg Kurz goto err;
23792723feaaSJason Wang vq->signalled_used_valid = false;
23806b1e6cc7SJason Wang if (!vq->iotlb &&
238196d4f267SLinus Torvalds !access_ok(&vq->used->idx, sizeof vq->used->idx)) {
2382e1f33be9SGreg Kurz r = -EFAULT;
2383e1f33be9SGreg Kurz goto err;
2384e1f33be9SGreg Kurz }
23857b5d753eSJason Wang r = vhost_get_used_idx(vq, &last_used_idx);
23866b1e6cc7SJason Wang if (r) {
23876b1e6cc7SJason Wang vq_err(vq, "Can't access used idx at %p\n",
23886b1e6cc7SJason Wang &vq->used->idx);
2389e1f33be9SGreg Kurz goto err;
23906b1e6cc7SJason Wang }
23913b1bbe89SMichael S. Tsirkin vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx);
239264f7f051SMichael S. Tsirkin return 0;
23936b1e6cc7SJason Wang
2394e1f33be9SGreg Kurz err:
2395e1f33be9SGreg Kurz vq->is_le = is_le;
2396e1f33be9SGreg Kurz return r;
23972723feaaSJason Wang }
239880f7d030SGreg Kurz EXPORT_SYMBOL_GPL(vhost_vq_init_access);
23992723feaaSJason Wang
translate_desc(struct vhost_virtqueue * vq,u64 addr,u32 len,struct iovec iov[],int iov_size,int access)240047283befSMichael S. Tsirkin static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
24016b1e6cc7SJason Wang struct iovec iov[], int iov_size, int access)
24023a4d5c94SMichael S. Tsirkin {
24030bbe3066SJason Wang const struct vhost_iotlb_map *map;
24046b1e6cc7SJason Wang struct vhost_dev *dev = vq->dev;
24050bbe3066SJason Wang struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem;
24063a4d5c94SMichael S. Tsirkin struct iovec *_iov;
240798047313SStefano Garzarella u64 s = 0, last = addr + len - 1;
24083a4d5c94SMichael S. Tsirkin int ret = 0;
24093a4d5c94SMichael S. Tsirkin
24103a4d5c94SMichael S. Tsirkin while ((u64)len > s) {
24113a4d5c94SMichael S. Tsirkin u64 size;
24127b3384fcSMichael S. Tsirkin if (unlikely(ret >= iov_size)) {
24133a4d5c94SMichael S. Tsirkin ret = -ENOBUFS;
24143a4d5c94SMichael S. Tsirkin break;
24153a4d5c94SMichael S. Tsirkin }
24166b1e6cc7SJason Wang
241798047313SStefano Garzarella map = vhost_iotlb_itree_first(umem, addr, last);
24180bbe3066SJason Wang if (map == NULL || map->start > addr) {
24196b1e6cc7SJason Wang if (umem != dev->iotlb) {
24203a4d5c94SMichael S. Tsirkin ret = -EFAULT;
24213a4d5c94SMichael S. Tsirkin break;
24223a4d5c94SMichael S. Tsirkin }
24236b1e6cc7SJason Wang ret = -EAGAIN;
24246b1e6cc7SJason Wang break;
24250bbe3066SJason Wang } else if (!(map->perm & access)) {
24266b1e6cc7SJason Wang ret = -EPERM;
24276b1e6cc7SJason Wang break;
24286b1e6cc7SJason Wang }
24296b1e6cc7SJason Wang
24303a4d5c94SMichael S. Tsirkin _iov = iov + ret;
24310bbe3066SJason Wang size = map->size - addr + map->start;
2432bd97120fSMichael S. Tsirkin _iov->iov_len = min((u64)len - s, size);
24330d4a3f2aSMichael S. Tsirkin _iov->iov_base = (void __user *)(unsigned long)
24340bbe3066SJason Wang (map->addr + addr - map->start);
24353a4d5c94SMichael S. Tsirkin s += size;
24363a4d5c94SMichael S. Tsirkin addr += size;
24373a4d5c94SMichael S. Tsirkin ++ret;
24383a4d5c94SMichael S. Tsirkin }
24393a4d5c94SMichael S. Tsirkin
24406b1e6cc7SJason Wang if (ret == -EAGAIN)
24416b1e6cc7SJason Wang vhost_iotlb_miss(vq, addr, access);
24423a4d5c94SMichael S. Tsirkin return ret;
24433a4d5c94SMichael S. Tsirkin }
24443a4d5c94SMichael S. Tsirkin
24453a4d5c94SMichael S. Tsirkin /* Each buffer in the virtqueues is actually a chain of descriptors. This
24463a4d5c94SMichael S. Tsirkin * function returns the next descriptor in the chain,
24473a4d5c94SMichael S. Tsirkin * or -1U if we're at the end. */
next_desc(struct vhost_virtqueue * vq,struct vring_desc * desc)24483b1bbe89SMichael S. Tsirkin static unsigned next_desc(struct vhost_virtqueue *vq, struct vring_desc *desc)
24493a4d5c94SMichael S. Tsirkin {
24503a4d5c94SMichael S. Tsirkin unsigned int next;
24513a4d5c94SMichael S. Tsirkin
24523a4d5c94SMichael S. Tsirkin /* If this descriptor says it doesn't chain, we're done. */
24533b1bbe89SMichael S. Tsirkin if (!(desc->flags & cpu_to_vhost16(vq, VRING_DESC_F_NEXT)))
24543a4d5c94SMichael S. Tsirkin return -1U;
24553a4d5c94SMichael S. Tsirkin
24563a4d5c94SMichael S. Tsirkin /* Check they're not leading us off end of descriptors. */
24573a5db0b1SPaul E. McKenney next = vhost16_to_cpu(vq, READ_ONCE(desc->next));
24583a4d5c94SMichael S. Tsirkin return next;
24593a4d5c94SMichael S. Tsirkin }
24603a4d5c94SMichael S. Tsirkin
get_indirect(struct vhost_virtqueue * vq,struct iovec iov[],unsigned int iov_size,unsigned int * out_num,unsigned int * in_num,struct vhost_log * log,unsigned int * log_num,struct vring_desc * indirect)246147283befSMichael S. Tsirkin static int get_indirect(struct vhost_virtqueue *vq,
24623a4d5c94SMichael S. Tsirkin struct iovec iov[], unsigned int iov_size,
24633a4d5c94SMichael S. Tsirkin unsigned int *out_num, unsigned int *in_num,
24643a4d5c94SMichael S. Tsirkin struct vhost_log *log, unsigned int *log_num,
24653a4d5c94SMichael S. Tsirkin struct vring_desc *indirect)
24663a4d5c94SMichael S. Tsirkin {
24673a4d5c94SMichael S. Tsirkin struct vring_desc desc;
24683a4d5c94SMichael S. Tsirkin unsigned int i = 0, count, found = 0;
24693b1bbe89SMichael S. Tsirkin u32 len = vhost32_to_cpu(vq, indirect->len);
2470aad9a1ceSAl Viro struct iov_iter from;
24716b1e6cc7SJason Wang int ret, access;
24723a4d5c94SMichael S. Tsirkin
24733a4d5c94SMichael S. Tsirkin /* Sanity check */
24743b1bbe89SMichael S. Tsirkin if (unlikely(len % sizeof desc)) {
24753a4d5c94SMichael S. Tsirkin vq_err(vq, "Invalid length in indirect descriptor: "
24763a4d5c94SMichael S. Tsirkin "len 0x%llx not multiple of 0x%zx\n",
24773b1bbe89SMichael S. Tsirkin (unsigned long long)len,
24783a4d5c94SMichael S. Tsirkin sizeof desc);
24793a4d5c94SMichael S. Tsirkin return -EINVAL;
24803a4d5c94SMichael S. Tsirkin }
24813a4d5c94SMichael S. Tsirkin
24823b1bbe89SMichael S. Tsirkin ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect,
24836b1e6cc7SJason Wang UIO_MAXIOV, VHOST_ACCESS_RO);
24847b3384fcSMichael S. Tsirkin if (unlikely(ret < 0)) {
24856b1e6cc7SJason Wang if (ret != -EAGAIN)
24863a4d5c94SMichael S. Tsirkin vq_err(vq, "Translation failure %d in indirect.\n", ret);
24873a4d5c94SMichael S. Tsirkin return ret;
24883a4d5c94SMichael S. Tsirkin }
2489de4eda9dSAl Viro iov_iter_init(&from, ITER_SOURCE, vq->indirect, ret, len);
24903b1bbe89SMichael S. Tsirkin count = len / sizeof desc;
24913a4d5c94SMichael S. Tsirkin /* Buffers are chained via a 16 bit next field, so
24923a4d5c94SMichael S. Tsirkin * we can have at most 2^16 of these. */
24937b3384fcSMichael S. Tsirkin if (unlikely(count > USHRT_MAX + 1)) {
24943a4d5c94SMichael S. Tsirkin vq_err(vq, "Indirect buffer length too big: %d\n",
24953a4d5c94SMichael S. Tsirkin indirect->len);
24963a4d5c94SMichael S. Tsirkin return -E2BIG;
24973a4d5c94SMichael S. Tsirkin }
24983a4d5c94SMichael S. Tsirkin
24993a4d5c94SMichael S. Tsirkin do {
25003a4d5c94SMichael S. Tsirkin unsigned iov_count = *in_num + *out_num;
25017b3384fcSMichael S. Tsirkin if (unlikely(++found > count)) {
25023a4d5c94SMichael S. Tsirkin vq_err(vq, "Loop detected: last one at %u "
25033a4d5c94SMichael S. Tsirkin "indirect size %u\n",
25043a4d5c94SMichael S. Tsirkin i, count);
25053a4d5c94SMichael S. Tsirkin return -EINVAL;
25063a4d5c94SMichael S. Tsirkin }
2507cbbd26b8SAl Viro if (unlikely(!copy_from_iter_full(&desc, sizeof(desc), &from))) {
25083a4d5c94SMichael S. Tsirkin vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
25093b1bbe89SMichael S. Tsirkin i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc);
25103a4d5c94SMichael S. Tsirkin return -EINVAL;
25113a4d5c94SMichael S. Tsirkin }
25123b1bbe89SMichael S. Tsirkin if (unlikely(desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT))) {
25133a4d5c94SMichael S. Tsirkin vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n",
25143b1bbe89SMichael S. Tsirkin i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc);
25153a4d5c94SMichael S. Tsirkin return -EINVAL;
25163a4d5c94SMichael S. Tsirkin }
25173a4d5c94SMichael S. Tsirkin
25186b1e6cc7SJason Wang if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE))
25196b1e6cc7SJason Wang access = VHOST_ACCESS_WO;
25206b1e6cc7SJason Wang else
25216b1e6cc7SJason Wang access = VHOST_ACCESS_RO;
25226b1e6cc7SJason Wang
25233b1bbe89SMichael S. Tsirkin ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr),
25243b1bbe89SMichael S. Tsirkin vhost32_to_cpu(vq, desc.len), iov + iov_count,
25256b1e6cc7SJason Wang iov_size - iov_count, access);
25267b3384fcSMichael S. Tsirkin if (unlikely(ret < 0)) {
25276b1e6cc7SJason Wang if (ret != -EAGAIN)
25283a4d5c94SMichael S. Tsirkin vq_err(vq, "Translation failure %d indirect idx %d\n",
25293a4d5c94SMichael S. Tsirkin ret, i);
25303a4d5c94SMichael S. Tsirkin return ret;
25313a4d5c94SMichael S. Tsirkin }
25323a4d5c94SMichael S. Tsirkin /* If this is an input descriptor, increment that count. */
25336b1e6cc7SJason Wang if (access == VHOST_ACCESS_WO) {
25343a4d5c94SMichael S. Tsirkin *in_num += ret;
2535060423bfSyongduan if (unlikely(log && ret)) {
25363b1bbe89SMichael S. Tsirkin log[*log_num].addr = vhost64_to_cpu(vq, desc.addr);
25373b1bbe89SMichael S. Tsirkin log[*log_num].len = vhost32_to_cpu(vq, desc.len);
25383a4d5c94SMichael S. Tsirkin ++*log_num;
25393a4d5c94SMichael S. Tsirkin }
25403a4d5c94SMichael S. Tsirkin } else {
25413a4d5c94SMichael S. Tsirkin /* If it's an output descriptor, they're all supposed
25423a4d5c94SMichael S. Tsirkin * to come before any input descriptors. */
25437b3384fcSMichael S. Tsirkin if (unlikely(*in_num)) {
25443a4d5c94SMichael S. Tsirkin vq_err(vq, "Indirect descriptor "
25453a4d5c94SMichael S. Tsirkin "has out after in: idx %d\n", i);
25463a4d5c94SMichael S. Tsirkin return -EINVAL;
25473a4d5c94SMichael S. Tsirkin }
25483a4d5c94SMichael S. Tsirkin *out_num += ret;
25493a4d5c94SMichael S. Tsirkin }
25503b1bbe89SMichael S. Tsirkin } while ((i = next_desc(vq, &desc)) != -1);
25513a4d5c94SMichael S. Tsirkin return 0;
25523a4d5c94SMichael S. Tsirkin }
25533a4d5c94SMichael S. Tsirkin
25543a4d5c94SMichael S. Tsirkin /* This looks in the virtqueue and for the first available buffer, and converts
25553a4d5c94SMichael S. Tsirkin * it to an iovec for convenient access. Since descriptors consist of some
25563a4d5c94SMichael S. Tsirkin * number of output then some number of input descriptors, it's actually two
25573a4d5c94SMichael S. Tsirkin * iovecs, but we pack them into one and note how many of each there were.
25583a4d5c94SMichael S. Tsirkin *
2559d5675bd2SMichael S. Tsirkin * This function returns the descriptor number found, or vq->num (which is
2560d5675bd2SMichael S. Tsirkin * never a valid descriptor number) if none was found. A negative code is
2561d5675bd2SMichael S. Tsirkin * returned on error. */
vhost_get_vq_desc(struct vhost_virtqueue * vq,struct iovec iov[],unsigned int iov_size,unsigned int * out_num,unsigned int * in_num,struct vhost_log * log,unsigned int * log_num)256247283befSMichael S. Tsirkin int vhost_get_vq_desc(struct vhost_virtqueue *vq,
25633a4d5c94SMichael S. Tsirkin struct iovec iov[], unsigned int iov_size,
25643a4d5c94SMichael S. Tsirkin unsigned int *out_num, unsigned int *in_num,
25653a4d5c94SMichael S. Tsirkin struct vhost_log *log, unsigned int *log_num)
25663a4d5c94SMichael S. Tsirkin {
25673a4d5c94SMichael S. Tsirkin struct vring_desc desc;
25683a4d5c94SMichael S. Tsirkin unsigned int i, head, found = 0;
25693a4d5c94SMichael S. Tsirkin u16 last_avail_idx;
25703b1bbe89SMichael S. Tsirkin __virtio16 avail_idx;
25713b1bbe89SMichael S. Tsirkin __virtio16 ring_head;
25726b1e6cc7SJason Wang int ret, access;
25733a4d5c94SMichael S. Tsirkin
25743a4d5c94SMichael S. Tsirkin /* Check it isn't doing very strange things with descriptor numbers. */
25753a4d5c94SMichael S. Tsirkin last_avail_idx = vq->last_avail_idx;
2576e3b56cddSJason Wang
2577e3b56cddSJason Wang if (vq->avail_idx == vq->last_avail_idx) {
25787b5d753eSJason Wang if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) {
25793a4d5c94SMichael S. Tsirkin vq_err(vq, "Failed to access avail idx at %p\n",
25803a4d5c94SMichael S. Tsirkin &vq->avail->idx);
2581d5675bd2SMichael S. Tsirkin return -EFAULT;
25823a4d5c94SMichael S. Tsirkin }
25833b1bbe89SMichael S. Tsirkin vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
25843a4d5c94SMichael S. Tsirkin
25857b3384fcSMichael S. Tsirkin if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
25863a4d5c94SMichael S. Tsirkin vq_err(vq, "Guest moved used index from %u to %u",
25873a4d5c94SMichael S. Tsirkin last_avail_idx, vq->avail_idx);
2588d5675bd2SMichael S. Tsirkin return -EFAULT;
25893a4d5c94SMichael S. Tsirkin }
25903a4d5c94SMichael S. Tsirkin
2591e3b56cddSJason Wang /* If there's nothing new since last we looked, return
2592e3b56cddSJason Wang * invalid.
2593e3b56cddSJason Wang */
25943a4d5c94SMichael S. Tsirkin if (vq->avail_idx == last_avail_idx)
25953a4d5c94SMichael S. Tsirkin return vq->num;
25963a4d5c94SMichael S. Tsirkin
2597e3b56cddSJason Wang /* Only get avail ring entries after they have been
2598e3b56cddSJason Wang * exposed by guest.
2599e3b56cddSJason Wang */
26005659338cSMichael S. Tsirkin smp_rmb();
2601e3b56cddSJason Wang }
26023a4d5c94SMichael S. Tsirkin
26033a4d5c94SMichael S. Tsirkin /* Grab the next descriptor number they're advertising, and increment
26043a4d5c94SMichael S. Tsirkin * the index we've seen. */
26057b5d753eSJason Wang if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) {
26063a4d5c94SMichael S. Tsirkin vq_err(vq, "Failed to read head: idx %d address %p\n",
26073a4d5c94SMichael S. Tsirkin last_avail_idx,
26083a4d5c94SMichael S. Tsirkin &vq->avail->ring[last_avail_idx % vq->num]);
2609d5675bd2SMichael S. Tsirkin return -EFAULT;
26103a4d5c94SMichael S. Tsirkin }
26113a4d5c94SMichael S. Tsirkin
26123b1bbe89SMichael S. Tsirkin head = vhost16_to_cpu(vq, ring_head);
26133b1bbe89SMichael S. Tsirkin
26143a4d5c94SMichael S. Tsirkin /* If their number is silly, that's an error. */
26157b3384fcSMichael S. Tsirkin if (unlikely(head >= vq->num)) {
26163a4d5c94SMichael S. Tsirkin vq_err(vq, "Guest says index %u > %u is available",
26173a4d5c94SMichael S. Tsirkin head, vq->num);
2618d5675bd2SMichael S. Tsirkin return -EINVAL;
26193a4d5c94SMichael S. Tsirkin }
26203a4d5c94SMichael S. Tsirkin
26213a4d5c94SMichael S. Tsirkin /* When we start there are none of either input nor output. */
26223a4d5c94SMichael S. Tsirkin *out_num = *in_num = 0;
26233a4d5c94SMichael S. Tsirkin if (unlikely(log))
26243a4d5c94SMichael S. Tsirkin *log_num = 0;
26253a4d5c94SMichael S. Tsirkin
26263a4d5c94SMichael S. Tsirkin i = head;
26273a4d5c94SMichael S. Tsirkin do {
26283a4d5c94SMichael S. Tsirkin unsigned iov_count = *in_num + *out_num;
26297b3384fcSMichael S. Tsirkin if (unlikely(i >= vq->num)) {
26303a4d5c94SMichael S. Tsirkin vq_err(vq, "Desc index is %u > %u, head = %u",
26313a4d5c94SMichael S. Tsirkin i, vq->num, head);
2632d5675bd2SMichael S. Tsirkin return -EINVAL;
26333a4d5c94SMichael S. Tsirkin }
26347b3384fcSMichael S. Tsirkin if (unlikely(++found > vq->num)) {
26353a4d5c94SMichael S. Tsirkin vq_err(vq, "Loop detected: last one at %u "
26363a4d5c94SMichael S. Tsirkin "vq size %u head %u\n",
26373a4d5c94SMichael S. Tsirkin i, vq->num, head);
2638d5675bd2SMichael S. Tsirkin return -EINVAL;
26393a4d5c94SMichael S. Tsirkin }
26407b5d753eSJason Wang ret = vhost_get_desc(vq, &desc, i);
26417b3384fcSMichael S. Tsirkin if (unlikely(ret)) {
26423a4d5c94SMichael S. Tsirkin vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
26433a4d5c94SMichael S. Tsirkin i, vq->desc + i);
2644d5675bd2SMichael S. Tsirkin return -EFAULT;
26453a4d5c94SMichael S. Tsirkin }
26463b1bbe89SMichael S. Tsirkin if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT)) {
264747283befSMichael S. Tsirkin ret = get_indirect(vq, iov, iov_size,
26483a4d5c94SMichael S. Tsirkin out_num, in_num,
26493a4d5c94SMichael S. Tsirkin log, log_num, &desc);
26507b3384fcSMichael S. Tsirkin if (unlikely(ret < 0)) {
26516b1e6cc7SJason Wang if (ret != -EAGAIN)
26523a4d5c94SMichael S. Tsirkin vq_err(vq, "Failure detected "
26533a4d5c94SMichael S. Tsirkin "in indirect descriptor at idx %d\n", i);
2654d5675bd2SMichael S. Tsirkin return ret;
26553a4d5c94SMichael S. Tsirkin }
26563a4d5c94SMichael S. Tsirkin continue;
26573a4d5c94SMichael S. Tsirkin }
26583a4d5c94SMichael S. Tsirkin
26596b1e6cc7SJason Wang if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE))
26606b1e6cc7SJason Wang access = VHOST_ACCESS_WO;
26616b1e6cc7SJason Wang else
26626b1e6cc7SJason Wang access = VHOST_ACCESS_RO;
26633b1bbe89SMichael S. Tsirkin ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr),
26643b1bbe89SMichael S. Tsirkin vhost32_to_cpu(vq, desc.len), iov + iov_count,
26656b1e6cc7SJason Wang iov_size - iov_count, access);
26667b3384fcSMichael S. Tsirkin if (unlikely(ret < 0)) {
26676b1e6cc7SJason Wang if (ret != -EAGAIN)
26683a4d5c94SMichael S. Tsirkin vq_err(vq, "Translation failure %d descriptor idx %d\n",
26693a4d5c94SMichael S. Tsirkin ret, i);
2670d5675bd2SMichael S. Tsirkin return ret;
26713a4d5c94SMichael S. Tsirkin }
26726b1e6cc7SJason Wang if (access == VHOST_ACCESS_WO) {
26733a4d5c94SMichael S. Tsirkin /* If this is an input descriptor,
26743a4d5c94SMichael S. Tsirkin * increment that count. */
26753a4d5c94SMichael S. Tsirkin *in_num += ret;
2676060423bfSyongduan if (unlikely(log && ret)) {
26773b1bbe89SMichael S. Tsirkin log[*log_num].addr = vhost64_to_cpu(vq, desc.addr);
26783b1bbe89SMichael S. Tsirkin log[*log_num].len = vhost32_to_cpu(vq, desc.len);
26793a4d5c94SMichael S. Tsirkin ++*log_num;
26803a4d5c94SMichael S. Tsirkin }
26813a4d5c94SMichael S. Tsirkin } else {
26823a4d5c94SMichael S. Tsirkin /* If it's an output descriptor, they're all supposed
26833a4d5c94SMichael S. Tsirkin * to come before any input descriptors. */
26847b3384fcSMichael S. Tsirkin if (unlikely(*in_num)) {
26853a4d5c94SMichael S. Tsirkin vq_err(vq, "Descriptor has out after in: "
26863a4d5c94SMichael S. Tsirkin "idx %d\n", i);
2687d5675bd2SMichael S. Tsirkin return -EINVAL;
26883a4d5c94SMichael S. Tsirkin }
26893a4d5c94SMichael S. Tsirkin *out_num += ret;
26903a4d5c94SMichael S. Tsirkin }
26913b1bbe89SMichael S. Tsirkin } while ((i = next_desc(vq, &desc)) != -1);
26923a4d5c94SMichael S. Tsirkin
26933a4d5c94SMichael S. Tsirkin /* On success, increment avail index. */
26943a4d5c94SMichael S. Tsirkin vq->last_avail_idx++;
26958ea8cf89SMichael S. Tsirkin
26968ea8cf89SMichael S. Tsirkin /* Assume notifications from guest are disabled at this point,
26978ea8cf89SMichael S. Tsirkin * if they aren't we would need to update avail_event index. */
26988ea8cf89SMichael S. Tsirkin BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
26993a4d5c94SMichael S. Tsirkin return head;
27003a4d5c94SMichael S. Tsirkin }
27016ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_get_vq_desc);
27023a4d5c94SMichael S. Tsirkin
27033a4d5c94SMichael S. Tsirkin /* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
vhost_discard_vq_desc(struct vhost_virtqueue * vq,int n)27048dd014adSDavid Stevens void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
27053a4d5c94SMichael S. Tsirkin {
27068dd014adSDavid Stevens vq->last_avail_idx -= n;
27073a4d5c94SMichael S. Tsirkin }
27086ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_discard_vq_desc);
27093a4d5c94SMichael S. Tsirkin
27103a4d5c94SMichael S. Tsirkin /* After we've used one of their buffers, we tell them about it. We'll then
27113a4d5c94SMichael S. Tsirkin * want to notify the guest, using eventfd. */
vhost_add_used(struct vhost_virtqueue * vq,unsigned int head,int len)27123a4d5c94SMichael S. Tsirkin int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
27133a4d5c94SMichael S. Tsirkin {
27143b1bbe89SMichael S. Tsirkin struct vring_used_elem heads = {
27153b1bbe89SMichael S. Tsirkin cpu_to_vhost32(vq, head),
27163b1bbe89SMichael S. Tsirkin cpu_to_vhost32(vq, len)
27173b1bbe89SMichael S. Tsirkin };
27183a4d5c94SMichael S. Tsirkin
2719c49e4e57SJason Wang return vhost_add_used_n(vq, &heads, 1);
27203a4d5c94SMichael S. Tsirkin }
27216ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_add_used);
27223a4d5c94SMichael S. Tsirkin
__vhost_add_used_n(struct vhost_virtqueue * vq,struct vring_used_elem * heads,unsigned count)27238dd014adSDavid Stevens static int __vhost_add_used_n(struct vhost_virtqueue *vq,
27248dd014adSDavid Stevens struct vring_used_elem *heads,
27258dd014adSDavid Stevens unsigned count)
27268dd014adSDavid Stevens {
2727a865e420SMichael S. Tsirkin vring_used_elem_t __user *used;
27288ea8cf89SMichael S. Tsirkin u16 old, new;
27298dd014adSDavid Stevens int start;
27308dd014adSDavid Stevens
27315fba13b5SMichael S. Tsirkin start = vq->last_used_idx & (vq->num - 1);
27328dd014adSDavid Stevens used = vq->used->ring + start;
27337b5d753eSJason Wang if (vhost_put_used(vq, heads, start, count)) {
27348dd014adSDavid Stevens vq_err(vq, "Failed to write used");
27358dd014adSDavid Stevens return -EFAULT;
27368dd014adSDavid Stevens }
27378dd014adSDavid Stevens if (unlikely(vq->log_used)) {
27388dd014adSDavid Stevens /* Make sure data is seen before log. */
27398dd014adSDavid Stevens smp_wmb();
27408dd014adSDavid Stevens /* Log used ring entry write. */
2741cc5e7107SJason Wang log_used(vq, ((void __user *)used - (void __user *)vq->used),
27428dd014adSDavid Stevens count * sizeof *used);
27438dd014adSDavid Stevens }
27448ea8cf89SMichael S. Tsirkin old = vq->last_used_idx;
27458ea8cf89SMichael S. Tsirkin new = (vq->last_used_idx += count);
27468ea8cf89SMichael S. Tsirkin /* If the driver never bothers to signal in a very long while,
27478ea8cf89SMichael S. Tsirkin * used index might wrap around. If that happens, invalidate
27488ea8cf89SMichael S. Tsirkin * signalled_used index we stored. TODO: make sure driver
27498ea8cf89SMichael S. Tsirkin * signals at least once in 2^16 and remove this. */
27508ea8cf89SMichael S. Tsirkin if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
27518ea8cf89SMichael S. Tsirkin vq->signalled_used_valid = false;
27528dd014adSDavid Stevens return 0;
27538dd014adSDavid Stevens }
27548dd014adSDavid Stevens
27558dd014adSDavid Stevens /* After we've used one of their buffers, we tell them about it. We'll then
27568dd014adSDavid Stevens * want to notify the guest, using eventfd. */
vhost_add_used_n(struct vhost_virtqueue * vq,struct vring_used_elem * heads,unsigned count)27578dd014adSDavid Stevens int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
27588dd014adSDavid Stevens unsigned count)
27598dd014adSDavid Stevens {
27608dd014adSDavid Stevens int start, n, r;
27618dd014adSDavid Stevens
27625fba13b5SMichael S. Tsirkin start = vq->last_used_idx & (vq->num - 1);
27638dd014adSDavid Stevens n = vq->num - start;
27648dd014adSDavid Stevens if (n < count) {
27658dd014adSDavid Stevens r = __vhost_add_used_n(vq, heads, n);
27668dd014adSDavid Stevens if (r < 0)
27678dd014adSDavid Stevens return r;
27688dd014adSDavid Stevens heads += n;
27698dd014adSDavid Stevens count -= n;
27708dd014adSDavid Stevens }
27718dd014adSDavid Stevens r = __vhost_add_used_n(vq, heads, count);
27728dd014adSDavid Stevens
27738dd014adSDavid Stevens /* Make sure buffer is written before we update index. */
27748dd014adSDavid Stevens smp_wmb();
27757b5d753eSJason Wang if (vhost_put_used_idx(vq)) {
27768dd014adSDavid Stevens vq_err(vq, "Failed to increment used idx");
27778dd014adSDavid Stevens return -EFAULT;
27788dd014adSDavid Stevens }
27798dd014adSDavid Stevens if (unlikely(vq->log_used)) {
2780841df922SJason Wang /* Make sure used idx is seen before log. */
2781841df922SJason Wang smp_wmb();
27828dd014adSDavid Stevens /* Log used index update. */
2783cc5e7107SJason Wang log_used(vq, offsetof(struct vring_used, idx),
27848dd014adSDavid Stevens sizeof vq->used->idx);
27858dd014adSDavid Stevens if (vq->log_ctx)
27868dd014adSDavid Stevens eventfd_signal(vq->log_ctx, 1);
27878dd014adSDavid Stevens }
27888dd014adSDavid Stevens return r;
27898dd014adSDavid Stevens }
27906ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_add_used_n);
27918dd014adSDavid Stevens
vhost_notify(struct vhost_dev * dev,struct vhost_virtqueue * vq)27928ea8cf89SMichael S. Tsirkin static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
27933a4d5c94SMichael S. Tsirkin {
27943b1bbe89SMichael S. Tsirkin __u16 old, new;
27953b1bbe89SMichael S. Tsirkin __virtio16 event;
27968ea8cf89SMichael S. Tsirkin bool v;
27978d65843cSJason Wang /* Flush out used index updates. This is paired
27988d65843cSJason Wang * with the barrier that the Guest executes when enabling
27998d65843cSJason Wang * interrupts. */
28008d65843cSJason Wang smp_mb();
28010d499356SMichael S. Tsirkin
2802ea16c514SMichael S. Tsirkin if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) &&
28038ea8cf89SMichael S. Tsirkin unlikely(vq->avail_idx == vq->last_avail_idx))
28048ea8cf89SMichael S. Tsirkin return true;
28058ea8cf89SMichael S. Tsirkin
2806ea16c514SMichael S. Tsirkin if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
28073b1bbe89SMichael S. Tsirkin __virtio16 flags;
28087b5d753eSJason Wang if (vhost_get_avail_flags(vq, &flags)) {
28093a4d5c94SMichael S. Tsirkin vq_err(vq, "Failed to get flags");
28108ea8cf89SMichael S. Tsirkin return true;
28118ea8cf89SMichael S. Tsirkin }
28123b1bbe89SMichael S. Tsirkin return !(flags & cpu_to_vhost16(vq, VRING_AVAIL_F_NO_INTERRUPT));
28138ea8cf89SMichael S. Tsirkin }
28148ea8cf89SMichael S. Tsirkin old = vq->signalled_used;
28158ea8cf89SMichael S. Tsirkin v = vq->signalled_used_valid;
28168ea8cf89SMichael S. Tsirkin new = vq->signalled_used = vq->last_used_idx;
28178ea8cf89SMichael S. Tsirkin vq->signalled_used_valid = true;
28188ea8cf89SMichael S. Tsirkin
28198ea8cf89SMichael S. Tsirkin if (unlikely(!v))
28208ea8cf89SMichael S. Tsirkin return true;
28218ea8cf89SMichael S. Tsirkin
28227b5d753eSJason Wang if (vhost_get_used_event(vq, &event)) {
28238ea8cf89SMichael S. Tsirkin vq_err(vq, "Failed to get used event idx");
28248ea8cf89SMichael S. Tsirkin return true;
28258ea8cf89SMichael S. Tsirkin }
28268d65843cSJason Wang return vring_need_event(vhost16_to_cpu(vq, event), new, old);
28273a4d5c94SMichael S. Tsirkin }
28283a4d5c94SMichael S. Tsirkin
28298ea8cf89SMichael S. Tsirkin /* This actually signals the guest, using eventfd. */
vhost_signal(struct vhost_dev * dev,struct vhost_virtqueue * vq)28308ea8cf89SMichael S. Tsirkin void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
28318ea8cf89SMichael S. Tsirkin {
28323a4d5c94SMichael S. Tsirkin /* Signal the Guest tell them we used something up. */
2833265a0ad8SZhu Lingshan if (vq->call_ctx.ctx && vhost_notify(dev, vq))
2834265a0ad8SZhu Lingshan eventfd_signal(vq->call_ctx.ctx, 1);
28353a4d5c94SMichael S. Tsirkin }
28366ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_signal);
28373a4d5c94SMichael S. Tsirkin
28383a4d5c94SMichael S. Tsirkin /* And here's the combo meal deal. Supersize me! */
vhost_add_used_and_signal(struct vhost_dev * dev,struct vhost_virtqueue * vq,unsigned int head,int len)28393a4d5c94SMichael S. Tsirkin void vhost_add_used_and_signal(struct vhost_dev *dev,
28403a4d5c94SMichael S. Tsirkin struct vhost_virtqueue *vq,
28413a4d5c94SMichael S. Tsirkin unsigned int head, int len)
28423a4d5c94SMichael S. Tsirkin {
28433a4d5c94SMichael S. Tsirkin vhost_add_used(vq, head, len);
28443a4d5c94SMichael S. Tsirkin vhost_signal(dev, vq);
28453a4d5c94SMichael S. Tsirkin }
28466ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_add_used_and_signal);
28473a4d5c94SMichael S. Tsirkin
28488dd014adSDavid Stevens /* multi-buffer version of vhost_add_used_and_signal */
vhost_add_used_and_signal_n(struct vhost_dev * dev,struct vhost_virtqueue * vq,struct vring_used_elem * heads,unsigned count)28498dd014adSDavid Stevens void vhost_add_used_and_signal_n(struct vhost_dev *dev,
28508dd014adSDavid Stevens struct vhost_virtqueue *vq,
28518dd014adSDavid Stevens struct vring_used_elem *heads, unsigned count)
28528dd014adSDavid Stevens {
28538dd014adSDavid Stevens vhost_add_used_n(vq, heads, count);
28548dd014adSDavid Stevens vhost_signal(dev, vq);
28558dd014adSDavid Stevens }
28566ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n);
28578dd014adSDavid Stevens
2858d4a60603SJason Wang /* return true if we're sure that avaiable ring is empty */
vhost_vq_avail_empty(struct vhost_dev * dev,struct vhost_virtqueue * vq)2859d4a60603SJason Wang bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
2860d4a60603SJason Wang {
2861d4a60603SJason Wang __virtio16 avail_idx;
2862d4a60603SJason Wang int r;
2863d4a60603SJason Wang
2864275bf960SJason Wang if (vq->avail_idx != vq->last_avail_idx)
2865d4a60603SJason Wang return false;
2866d4a60603SJason Wang
28677b5d753eSJason Wang r = vhost_get_avail_idx(vq, &avail_idx);
2868275bf960SJason Wang if (unlikely(r))
2869275bf960SJason Wang return false;
2870275bf960SJason Wang
28718a05b663SGavin Shan vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
28728a05b663SGavin Shan if (vq->avail_idx != vq->last_avail_idx) {
28738a05b663SGavin Shan /* Since we have updated avail_idx, the following
28748a05b663SGavin Shan * call to vhost_get_vq_desc() will read available
28758a05b663SGavin Shan * ring entries. Make sure that read happens after
28768a05b663SGavin Shan * the avail_idx read.
28778a05b663SGavin Shan */
28788a05b663SGavin Shan smp_rmb();
28798a05b663SGavin Shan return false;
28808a05b663SGavin Shan }
28818a05b663SGavin Shan
28828a05b663SGavin Shan return true;
2883d4a60603SJason Wang }
2884d4a60603SJason Wang EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
2885d4a60603SJason Wang
28863a4d5c94SMichael S. Tsirkin /* OK, now we need to know about added descriptors. */
vhost_enable_notify(struct vhost_dev * dev,struct vhost_virtqueue * vq)28878ea8cf89SMichael S. Tsirkin bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
28883a4d5c94SMichael S. Tsirkin {
28893b1bbe89SMichael S. Tsirkin __virtio16 avail_idx;
28903a4d5c94SMichael S. Tsirkin int r;
2891d47effe1SKrishna Kumar
28923a4d5c94SMichael S. Tsirkin if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
28933a4d5c94SMichael S. Tsirkin return false;
28943a4d5c94SMichael S. Tsirkin vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
2895ea16c514SMichael S. Tsirkin if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
28962723feaaSJason Wang r = vhost_update_used_flags(vq);
28973a4d5c94SMichael S. Tsirkin if (r) {
28983a4d5c94SMichael S. Tsirkin vq_err(vq, "Failed to enable notification at %p: %d\n",
28993a4d5c94SMichael S. Tsirkin &vq->used->flags, r);
29003a4d5c94SMichael S. Tsirkin return false;
29013a4d5c94SMichael S. Tsirkin }
29028ea8cf89SMichael S. Tsirkin } else {
29034c809363SStefano Garzarella r = vhost_update_avail_event(vq);
29048ea8cf89SMichael S. Tsirkin if (r) {
29058ea8cf89SMichael S. Tsirkin vq_err(vq, "Failed to update avail event index at %p: %d\n",
29068ea8cf89SMichael S. Tsirkin vhost_avail_event(vq), r);
29078ea8cf89SMichael S. Tsirkin return false;
29088ea8cf89SMichael S. Tsirkin }
29098ea8cf89SMichael S. Tsirkin }
29103a4d5c94SMichael S. Tsirkin /* They could have slipped one in as we were doing that: make
29113a4d5c94SMichael S. Tsirkin * sure it's written, then check again. */
29125659338cSMichael S. Tsirkin smp_mb();
29137b5d753eSJason Wang r = vhost_get_avail_idx(vq, &avail_idx);
29143a4d5c94SMichael S. Tsirkin if (r) {
29153a4d5c94SMichael S. Tsirkin vq_err(vq, "Failed to check avail idx at %p: %d\n",
29163a4d5c94SMichael S. Tsirkin &vq->avail->idx, r);
29173a4d5c94SMichael S. Tsirkin return false;
29183a4d5c94SMichael S. Tsirkin }
29193a4d5c94SMichael S. Tsirkin
2920d619651aSGavin Shan vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
2921d619651aSGavin Shan if (vq->avail_idx != vq->last_avail_idx) {
2922d619651aSGavin Shan /* Since we have updated avail_idx, the following
2923d619651aSGavin Shan * call to vhost_get_vq_desc() will read available
2924d619651aSGavin Shan * ring entries. Make sure that read happens after
2925d619651aSGavin Shan * the avail_idx read.
2926d619651aSGavin Shan */
2927d619651aSGavin Shan smp_rmb();
2928d619651aSGavin Shan return true;
2929d619651aSGavin Shan }
2930d619651aSGavin Shan
2931d619651aSGavin Shan return false;
29323a4d5c94SMichael S. Tsirkin }
29336ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_enable_notify);
29343a4d5c94SMichael S. Tsirkin
29353a4d5c94SMichael S. Tsirkin /* We don't need to be notified again. */
vhost_disable_notify(struct vhost_dev * dev,struct vhost_virtqueue * vq)29368ea8cf89SMichael S. Tsirkin void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
29373a4d5c94SMichael S. Tsirkin {
29383a4d5c94SMichael S. Tsirkin int r;
2939d47effe1SKrishna Kumar
29403a4d5c94SMichael S. Tsirkin if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
29413a4d5c94SMichael S. Tsirkin return;
29423a4d5c94SMichael S. Tsirkin vq->used_flags |= VRING_USED_F_NO_NOTIFY;
2943ea16c514SMichael S. Tsirkin if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) {
29442723feaaSJason Wang r = vhost_update_used_flags(vq);
29453a4d5c94SMichael S. Tsirkin if (r)
2946ae6961deSYunsheng Lin vq_err(vq, "Failed to disable notification at %p: %d\n",
29473a4d5c94SMichael S. Tsirkin &vq->used->flags, r);
29483a4d5c94SMichael S. Tsirkin }
29498ea8cf89SMichael S. Tsirkin }
29506ac1afbfSAsias He EXPORT_SYMBOL_GPL(vhost_disable_notify);
29516ac1afbfSAsias He
29526b1e6cc7SJason Wang /* Create a new message. */
vhost_new_msg(struct vhost_virtqueue * vq,int type)29536b1e6cc7SJason Wang struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type)
29546b1e6cc7SJason Wang {
29554d8df0f5SPrathu Baronia /* Make sure all padding within the structure is initialized. */
29564d8df0f5SPrathu Baronia struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
29576b1e6cc7SJason Wang if (!node)
29586b1e6cc7SJason Wang return NULL;
2959670ae9caSMichael S. Tsirkin
29606b1e6cc7SJason Wang node->vq = vq;
29616b1e6cc7SJason Wang node->msg.type = type;
29626b1e6cc7SJason Wang return node;
29636b1e6cc7SJason Wang }
29646b1e6cc7SJason Wang EXPORT_SYMBOL_GPL(vhost_new_msg);
29656b1e6cc7SJason Wang
vhost_enqueue_msg(struct vhost_dev * dev,struct list_head * head,struct vhost_msg_node * node)29666b1e6cc7SJason Wang void vhost_enqueue_msg(struct vhost_dev *dev, struct list_head *head,
29676b1e6cc7SJason Wang struct vhost_msg_node *node)
29686b1e6cc7SJason Wang {
29696b1e6cc7SJason Wang spin_lock(&dev->iotlb_lock);
29706b1e6cc7SJason Wang list_add_tail(&node->node, head);
29716b1e6cc7SJason Wang spin_unlock(&dev->iotlb_lock);
29726b1e6cc7SJason Wang
2973a9a08845SLinus Torvalds wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
29746b1e6cc7SJason Wang }
29756b1e6cc7SJason Wang EXPORT_SYMBOL_GPL(vhost_enqueue_msg);
29766b1e6cc7SJason Wang
vhost_dequeue_msg(struct vhost_dev * dev,struct list_head * head)29776b1e6cc7SJason Wang struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev,
29786b1e6cc7SJason Wang struct list_head *head)
29796b1e6cc7SJason Wang {
29806b1e6cc7SJason Wang struct vhost_msg_node *node = NULL;
29816b1e6cc7SJason Wang
29826b1e6cc7SJason Wang spin_lock(&dev->iotlb_lock);
29836b1e6cc7SJason Wang if (!list_empty(head)) {
29846b1e6cc7SJason Wang node = list_first_entry(head, struct vhost_msg_node,
29856b1e6cc7SJason Wang node);
29866b1e6cc7SJason Wang list_del(&node->node);
29876b1e6cc7SJason Wang }
29886b1e6cc7SJason Wang spin_unlock(&dev->iotlb_lock);
29896b1e6cc7SJason Wang
29906b1e6cc7SJason Wang return node;
29916b1e6cc7SJason Wang }
29926b1e6cc7SJason Wang EXPORT_SYMBOL_GPL(vhost_dequeue_msg);
29936b1e6cc7SJason Wang
vhost_set_backend_features(struct vhost_dev * dev,u64 features)2994460f7ce1SJason Wang void vhost_set_backend_features(struct vhost_dev *dev, u64 features)
2995460f7ce1SJason Wang {
2996460f7ce1SJason Wang struct vhost_virtqueue *vq;
2997460f7ce1SJason Wang int i;
2998460f7ce1SJason Wang
2999460f7ce1SJason Wang mutex_lock(&dev->mutex);
3000460f7ce1SJason Wang for (i = 0; i < dev->nvqs; ++i) {
3001460f7ce1SJason Wang vq = dev->vqs[i];
3002460f7ce1SJason Wang mutex_lock(&vq->mutex);
3003460f7ce1SJason Wang vq->acked_backend_features = features;
3004460f7ce1SJason Wang mutex_unlock(&vq->mutex);
3005460f7ce1SJason Wang }
3006460f7ce1SJason Wang mutex_unlock(&dev->mutex);
3007460f7ce1SJason Wang }
3008460f7ce1SJason Wang EXPORT_SYMBOL_GPL(vhost_set_backend_features);
30096b1e6cc7SJason Wang
vhost_init(void)30106ac1afbfSAsias He static int __init vhost_init(void)
30116ac1afbfSAsias He {
30126ac1afbfSAsias He return 0;
30136ac1afbfSAsias He }
30146ac1afbfSAsias He
vhost_exit(void)30156ac1afbfSAsias He static void __exit vhost_exit(void)
30166ac1afbfSAsias He {
30176ac1afbfSAsias He }
30186ac1afbfSAsias He
30196ac1afbfSAsias He module_init(vhost_init);
30206ac1afbfSAsias He module_exit(vhost_exit);
30216ac1afbfSAsias He
30226ac1afbfSAsias He MODULE_VERSION("0.0.1");
30236ac1afbfSAsias He MODULE_LICENSE("GPL v2");
30246ac1afbfSAsias He MODULE_AUTHOR("Michael S. Tsirkin");
30256ac1afbfSAsias He MODULE_DESCRIPTION("Host kernel accelerator for virtio");
3026