xref: /openbmc/linux/drivers/vhost/vdpa.c (revision 34d6f206a88c2651d216bd3487ac956a40b2ba8e)
14c8cf318STiwei Bie // SPDX-License-Identifier: GPL-2.0
24c8cf318STiwei Bie /*
34c8cf318STiwei Bie  * Copyright (C) 2018-2020 Intel Corporation.
44c8cf318STiwei Bie  * Copyright (C) 2020 Red Hat, Inc.
54c8cf318STiwei Bie  *
64c8cf318STiwei Bie  * Author: Tiwei Bie <tiwei.bie@intel.com>
74c8cf318STiwei Bie  *         Jason Wang <jasowang@redhat.com>
84c8cf318STiwei Bie  *
94c8cf318STiwei Bie  * Thanks Michael S. Tsirkin for the valuable comments and
104c8cf318STiwei Bie  * suggestions.  And thanks to Cunming Liang and Zhihong Wang for all
114c8cf318STiwei Bie  * their supports.
124c8cf318STiwei Bie  */
134c8cf318STiwei Bie 
144c8cf318STiwei Bie #include <linux/kernel.h>
154c8cf318STiwei Bie #include <linux/module.h>
164c8cf318STiwei Bie #include <linux/cdev.h>
174c8cf318STiwei Bie #include <linux/device.h>
18ddd89d0aSJason Wang #include <linux/mm.h>
199d6d97bfSXie Yongji #include <linux/slab.h>
204c8cf318STiwei Bie #include <linux/iommu.h>
214c8cf318STiwei Bie #include <linux/uuid.h>
224c8cf318STiwei Bie #include <linux/vdpa.h>
234c8cf318STiwei Bie #include <linux/nospec.h>
244c8cf318STiwei Bie #include <linux/vhost.h>
254c8cf318STiwei Bie 
264c8cf318STiwei Bie #include "vhost.h"
274c8cf318STiwei Bie 
28653055b9SJason Wang enum {
2925abc060SJason Wang 	VHOST_VDPA_BACKEND_FEATURES =
3025abc060SJason Wang 	(1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
31aaca8373SGautam Dawar 	(1ULL << VHOST_BACKEND_F_IOTLB_BATCH) |
32aaca8373SGautam Dawar 	(1ULL << VHOST_BACKEND_F_IOTLB_ASID),
33653055b9SJason Wang };
34653055b9SJason Wang 
354c8cf318STiwei Bie #define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
364c8cf318STiwei Bie 
373d569879SGautam Dawar #define VHOST_VDPA_IOTLB_BUCKETS 16
383d569879SGautam Dawar 
393d569879SGautam Dawar struct vhost_vdpa_as {
403d569879SGautam Dawar 	struct hlist_node hash_link;
413d569879SGautam Dawar 	struct vhost_iotlb iotlb;
423d569879SGautam Dawar 	u32 id;
433d569879SGautam Dawar };
443d569879SGautam Dawar 
454c8cf318STiwei Bie struct vhost_vdpa {
464c8cf318STiwei Bie 	struct vhost_dev vdev;
474c8cf318STiwei Bie 	struct iommu_domain *domain;
484c8cf318STiwei Bie 	struct vhost_virtqueue *vqs;
494c8cf318STiwei Bie 	struct completion completion;
504c8cf318STiwei Bie 	struct vdpa_device *vdpa;
513d569879SGautam Dawar 	struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
524c8cf318STiwei Bie 	struct device dev;
534c8cf318STiwei Bie 	struct cdev cdev;
544c8cf318STiwei Bie 	atomic_t opened;
5581d46d69SLongpeng 	u32 nvqs;
564c8cf318STiwei Bie 	int virtio_id;
574c8cf318STiwei Bie 	int minor;
58776f3950SZhu Lingshan 	struct eventfd_ctx *config_ctx;
5925abc060SJason Wang 	int in_batch;
601b48dc03SJason Wang 	struct vdpa_iova_range range;
61aaca8373SGautam Dawar 	u32 batch_asid;
624c8cf318STiwei Bie };
634c8cf318STiwei Bie 
644c8cf318STiwei Bie static DEFINE_IDA(vhost_vdpa_ida);
654c8cf318STiwei Bie 
664c8cf318STiwei Bie static dev_t vhost_vdpa_major;
674c8cf318STiwei Bie 
68c070c191SStefano Garzarella static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
69e794070aSCindy Lu 				   struct vhost_iotlb *iotlb, u64 start,
70e794070aSCindy Lu 				   u64 last, u32 asid);
71c070c191SStefano Garzarella 
iotlb_to_asid(struct vhost_iotlb * iotlb)72aaca8373SGautam Dawar static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb)
73aaca8373SGautam Dawar {
74aaca8373SGautam Dawar 	struct vhost_vdpa_as *as = container_of(iotlb, struct
75aaca8373SGautam Dawar 						vhost_vdpa_as, iotlb);
76aaca8373SGautam Dawar 	return as->id;
77aaca8373SGautam Dawar }
78aaca8373SGautam Dawar 
asid_to_as(struct vhost_vdpa * v,u32 asid)793d569879SGautam Dawar static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid)
803d569879SGautam Dawar {
813d569879SGautam Dawar 	struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
823d569879SGautam Dawar 	struct vhost_vdpa_as *as;
833d569879SGautam Dawar 
843d569879SGautam Dawar 	hlist_for_each_entry(as, head, hash_link)
853d569879SGautam Dawar 		if (as->id == asid)
863d569879SGautam Dawar 			return as;
873d569879SGautam Dawar 
883d569879SGautam Dawar 	return NULL;
893d569879SGautam Dawar }
903d569879SGautam Dawar 
asid_to_iotlb(struct vhost_vdpa * v,u32 asid)91aaca8373SGautam Dawar static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid)
92aaca8373SGautam Dawar {
93aaca8373SGautam Dawar 	struct vhost_vdpa_as *as = asid_to_as(v, asid);
94aaca8373SGautam Dawar 
95aaca8373SGautam Dawar 	if (!as)
96aaca8373SGautam Dawar 		return NULL;
97aaca8373SGautam Dawar 
98aaca8373SGautam Dawar 	return &as->iotlb;
99aaca8373SGautam Dawar }
100aaca8373SGautam Dawar 
vhost_vdpa_alloc_as(struct vhost_vdpa * v,u32 asid)1013d569879SGautam Dawar static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid)
1023d569879SGautam Dawar {
1033d569879SGautam Dawar 	struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
1043d569879SGautam Dawar 	struct vhost_vdpa_as *as;
1053d569879SGautam Dawar 
1063d569879SGautam Dawar 	if (asid_to_as(v, asid))
1073d569879SGautam Dawar 		return NULL;
1083d569879SGautam Dawar 
109aaca8373SGautam Dawar 	if (asid >= v->vdpa->nas)
110aaca8373SGautam Dawar 		return NULL;
111aaca8373SGautam Dawar 
1123d569879SGautam Dawar 	as = kmalloc(sizeof(*as), GFP_KERNEL);
1133d569879SGautam Dawar 	if (!as)
1143d569879SGautam Dawar 		return NULL;
1153d569879SGautam Dawar 
1163d569879SGautam Dawar 	vhost_iotlb_init(&as->iotlb, 0, 0);
1173d569879SGautam Dawar 	as->id = asid;
1183d569879SGautam Dawar 	hlist_add_head(&as->hash_link, head);
1193d569879SGautam Dawar 
1203d569879SGautam Dawar 	return as;
1213d569879SGautam Dawar }
1223d569879SGautam Dawar 
vhost_vdpa_find_alloc_as(struct vhost_vdpa * v,u32 asid)123aaca8373SGautam Dawar static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
124aaca8373SGautam Dawar 						      u32 asid)
125aaca8373SGautam Dawar {
126aaca8373SGautam Dawar 	struct vhost_vdpa_as *as = asid_to_as(v, asid);
127aaca8373SGautam Dawar 
128aaca8373SGautam Dawar 	if (as)
129aaca8373SGautam Dawar 		return as;
130aaca8373SGautam Dawar 
131aaca8373SGautam Dawar 	return vhost_vdpa_alloc_as(v, asid);
132aaca8373SGautam Dawar }
133aaca8373SGautam Dawar 
vhost_vdpa_remove_as(struct vhost_vdpa * v,u32 asid)1343d569879SGautam Dawar static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
1353d569879SGautam Dawar {
1363d569879SGautam Dawar 	struct vhost_vdpa_as *as = asid_to_as(v, asid);
1373d569879SGautam Dawar 
1383d569879SGautam Dawar 	if (!as)
1393d569879SGautam Dawar 		return -EINVAL;
1403d569879SGautam Dawar 
1413d569879SGautam Dawar 	hlist_del(&as->hash_link);
142e794070aSCindy Lu 	vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
1433d569879SGautam Dawar 	kfree(as);
1443d569879SGautam Dawar 
1453d569879SGautam Dawar 	return 0;
1463d569879SGautam Dawar }
1473d569879SGautam Dawar 
handle_vq_kick(struct vhost_work * work)1484c8cf318STiwei Bie static void handle_vq_kick(struct vhost_work *work)
1494c8cf318STiwei Bie {
1504c8cf318STiwei Bie 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
1514c8cf318STiwei Bie 						  poll.work);
1524c8cf318STiwei Bie 	struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
1534c8cf318STiwei Bie 	const struct vdpa_config_ops *ops = v->vdpa->config;
1544c8cf318STiwei Bie 
1554c8cf318STiwei Bie 	ops->kick_vq(v->vdpa, vq - v->vqs);
1564c8cf318STiwei Bie }
1574c8cf318STiwei Bie 
vhost_vdpa_virtqueue_cb(void * private)1584c8cf318STiwei Bie static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
1594c8cf318STiwei Bie {
1604c8cf318STiwei Bie 	struct vhost_virtqueue *vq = private;
161265a0ad8SZhu Lingshan 	struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
1624c8cf318STiwei Bie 
1634c8cf318STiwei Bie 	if (call_ctx)
1644c8cf318STiwei Bie 		eventfd_signal(call_ctx, 1);
1654c8cf318STiwei Bie 
1664c8cf318STiwei Bie 	return IRQ_HANDLED;
1674c8cf318STiwei Bie }
1684c8cf318STiwei Bie 
vhost_vdpa_config_cb(void * private)169776f3950SZhu Lingshan static irqreturn_t vhost_vdpa_config_cb(void *private)
170776f3950SZhu Lingshan {
171776f3950SZhu Lingshan 	struct vhost_vdpa *v = private;
172776f3950SZhu Lingshan 	struct eventfd_ctx *config_ctx = v->config_ctx;
173776f3950SZhu Lingshan 
174776f3950SZhu Lingshan 	if (config_ctx)
175776f3950SZhu Lingshan 		eventfd_signal(config_ctx, 1);
176776f3950SZhu Lingshan 
177776f3950SZhu Lingshan 	return IRQ_HANDLED;
178776f3950SZhu Lingshan }
179776f3950SZhu Lingshan 
vhost_vdpa_setup_vq_irq(struct vhost_vdpa * v,u16 qid)1802cf1ba9aSZhu Lingshan static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
1812cf1ba9aSZhu Lingshan {
1822cf1ba9aSZhu Lingshan 	struct vhost_virtqueue *vq = &v->vqs[qid];
1832cf1ba9aSZhu Lingshan 	const struct vdpa_config_ops *ops = v->vdpa->config;
1842cf1ba9aSZhu Lingshan 	struct vdpa_device *vdpa = v->vdpa;
1852cf1ba9aSZhu Lingshan 	int ret, irq;
1862cf1ba9aSZhu Lingshan 
1872cf1ba9aSZhu Lingshan 	if (!ops->get_vq_irq)
1882cf1ba9aSZhu Lingshan 		return;
1892cf1ba9aSZhu Lingshan 
1902cf1ba9aSZhu Lingshan 	irq = ops->get_vq_irq(vdpa, qid);
191cce0ab2bSZhu Lingshan 	if (irq < 0)
192cce0ab2bSZhu Lingshan 		return;
193cce0ab2bSZhu Lingshan 
194cce0ab2bSZhu Lingshan 	if (!vq->call_ctx.ctx)
1952cf1ba9aSZhu Lingshan 		return;
1962cf1ba9aSZhu Lingshan 
1972cf1ba9aSZhu Lingshan 	vq->call_ctx.producer.irq = irq;
1982cf1ba9aSZhu Lingshan 	ret = irq_bypass_register_producer(&vq->call_ctx.producer);
199e01afe36SZhu Lingshan 	if (unlikely(ret))
200e01afe36SZhu Lingshan 		dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret =  %d\n",
201e01afe36SZhu Lingshan 			 qid, vq->call_ctx.producer.token, ret);
2022cf1ba9aSZhu Lingshan }
2032cf1ba9aSZhu Lingshan 
vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa * v,u16 qid)2042cf1ba9aSZhu Lingshan static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
2052cf1ba9aSZhu Lingshan {
2062cf1ba9aSZhu Lingshan 	struct vhost_virtqueue *vq = &v->vqs[qid];
2072cf1ba9aSZhu Lingshan 
2082cf1ba9aSZhu Lingshan 	irq_bypass_unregister_producer(&vq->call_ctx.producer);
2092cf1ba9aSZhu Lingshan }
2102cf1ba9aSZhu Lingshan 
vhost_vdpa_reset(struct vhost_vdpa * v)2117f05630dSXie Yongji static int vhost_vdpa_reset(struct vhost_vdpa *v)
2124c8cf318STiwei Bie {
2134c8cf318STiwei Bie 	struct vdpa_device *vdpa = v->vdpa;
2144c8cf318STiwei Bie 
21525abc060SJason Wang 	v->in_batch = 0;
2167f05630dSXie Yongji 
2177f05630dSXie Yongji 	return vdpa_reset(vdpa);
2184c8cf318STiwei Bie }
2194c8cf318STiwei Bie 
vhost_vdpa_bind_mm(struct vhost_vdpa * v)2209067de47SStefano Garzarella static long vhost_vdpa_bind_mm(struct vhost_vdpa *v)
2219067de47SStefano Garzarella {
2229067de47SStefano Garzarella 	struct vdpa_device *vdpa = v->vdpa;
2239067de47SStefano Garzarella 	const struct vdpa_config_ops *ops = vdpa->config;
2249067de47SStefano Garzarella 
2259067de47SStefano Garzarella 	if (!vdpa->use_va || !ops->bind_mm)
2269067de47SStefano Garzarella 		return 0;
2279067de47SStefano Garzarella 
2289067de47SStefano Garzarella 	return ops->bind_mm(vdpa, v->vdev.mm);
2299067de47SStefano Garzarella }
2309067de47SStefano Garzarella 
vhost_vdpa_unbind_mm(struct vhost_vdpa * v)2319067de47SStefano Garzarella static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v)
2329067de47SStefano Garzarella {
2339067de47SStefano Garzarella 	struct vdpa_device *vdpa = v->vdpa;
2349067de47SStefano Garzarella 	const struct vdpa_config_ops *ops = vdpa->config;
2359067de47SStefano Garzarella 
2369067de47SStefano Garzarella 	if (!vdpa->use_va || !ops->unbind_mm)
2379067de47SStefano Garzarella 		return;
2389067de47SStefano Garzarella 
2399067de47SStefano Garzarella 	ops->unbind_mm(vdpa);
2409067de47SStefano Garzarella }
2419067de47SStefano Garzarella 
vhost_vdpa_get_device_id(struct vhost_vdpa * v,u8 __user * argp)2424c8cf318STiwei Bie static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
2434c8cf318STiwei Bie {
2444c8cf318STiwei Bie 	struct vdpa_device *vdpa = v->vdpa;
2454c8cf318STiwei Bie 	const struct vdpa_config_ops *ops = vdpa->config;
2464c8cf318STiwei Bie 	u32 device_id;
2474c8cf318STiwei Bie 
2484c8cf318STiwei Bie 	device_id = ops->get_device_id(vdpa);
2494c8cf318STiwei Bie 
2504c8cf318STiwei Bie 	if (copy_to_user(argp, &device_id, sizeof(device_id)))
2514c8cf318STiwei Bie 		return -EFAULT;
2524c8cf318STiwei Bie 
2534c8cf318STiwei Bie 	return 0;
2544c8cf318STiwei Bie }
2554c8cf318STiwei Bie 
vhost_vdpa_get_status(struct vhost_vdpa * v,u8 __user * statusp)2564c8cf318STiwei Bie static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
2574c8cf318STiwei Bie {
2584c8cf318STiwei Bie 	struct vdpa_device *vdpa = v->vdpa;
259f6d955d8SEli Cohen 	const struct vdpa_config_ops *ops = vdpa->config;
2604c8cf318STiwei Bie 	u8 status;
2614c8cf318STiwei Bie 
262f6d955d8SEli Cohen 	status = ops->get_status(vdpa);
2634c8cf318STiwei Bie 
2644c8cf318STiwei Bie 	if (copy_to_user(statusp, &status, sizeof(status)))
2654c8cf318STiwei Bie 		return -EFAULT;
2664c8cf318STiwei Bie 
2674c8cf318STiwei Bie 	return 0;
2684c8cf318STiwei Bie }
2694c8cf318STiwei Bie 
vhost_vdpa_set_status(struct vhost_vdpa * v,u8 __user * statusp)2704c8cf318STiwei Bie static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
2714c8cf318STiwei Bie {
2724c8cf318STiwei Bie 	struct vdpa_device *vdpa = v->vdpa;
2734c8cf318STiwei Bie 	const struct vdpa_config_ops *ops = vdpa->config;
2742cf1ba9aSZhu Lingshan 	u8 status, status_old;
27581d46d69SLongpeng 	u32 nvqs = v->nvqs;
27681d46d69SLongpeng 	int ret;
2772cf1ba9aSZhu Lingshan 	u16 i;
2784c8cf318STiwei Bie 
2794c8cf318STiwei Bie 	if (copy_from_user(&status, statusp, sizeof(status)))
2804c8cf318STiwei Bie 		return -EFAULT;
2814c8cf318STiwei Bie 
282f6d955d8SEli Cohen 	status_old = ops->get_status(vdpa);
2832cf1ba9aSZhu Lingshan 
2844c8cf318STiwei Bie 	/*
2854c8cf318STiwei Bie 	 * Userspace shouldn't remove status bits unless reset the
2864c8cf318STiwei Bie 	 * status to 0.
2874c8cf318STiwei Bie 	 */
28823118b09SEugenio Pérez 	if (status != 0 && (status_old & ~status) != 0)
2894c8cf318STiwei Bie 		return -EINVAL;
2904c8cf318STiwei Bie 
29197f854beSWu Zongyong 	if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
29297f854beSWu Zongyong 		for (i = 0; i < nvqs; i++)
29397f854beSWu Zongyong 			vhost_vdpa_unsetup_vq_irq(v, i);
29497f854beSWu Zongyong 
2950686082dSXie Yongji 	if (status == 0) {
296680ab9d6SEli Cohen 		ret = vdpa_reset(vdpa);
2970686082dSXie Yongji 		if (ret)
2980686082dSXie Yongji 			return ret;
2990686082dSXie Yongji 	} else
30073bc0dbbSEli Cohen 		vdpa_set_status(vdpa, status);
3014c8cf318STiwei Bie 
3022cf1ba9aSZhu Lingshan 	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
3032cf1ba9aSZhu Lingshan 		for (i = 0; i < nvqs; i++)
3042cf1ba9aSZhu Lingshan 			vhost_vdpa_setup_vq_irq(v, i);
3052cf1ba9aSZhu Lingshan 
3064c8cf318STiwei Bie 	return 0;
3074c8cf318STiwei Bie }
3084c8cf318STiwei Bie 
vhost_vdpa_config_validate(struct vhost_vdpa * v,struct vhost_vdpa_config * c)3094c8cf318STiwei Bie static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
3104c8cf318STiwei Bie 				      struct vhost_vdpa_config *c)
3114c8cf318STiwei Bie {
312d6d8bb92SStefano Garzarella 	struct vdpa_device *vdpa = v->vdpa;
313870aaff9SLaura Abbott 	size_t size = vdpa->config->get_config_size(vdpa);
3144c8cf318STiwei Bie 
3153ed21c14SDan Carpenter 	if (c->len == 0 || c->off > size)
3164c8cf318STiwei Bie 		return -EINVAL;
3174c8cf318STiwei Bie 
3184c8cf318STiwei Bie 	if (c->len > size - c->off)
3194c8cf318STiwei Bie 		return -E2BIG;
3204c8cf318STiwei Bie 
3214c8cf318STiwei Bie 	return 0;
3224c8cf318STiwei Bie }
3234c8cf318STiwei Bie 
vhost_vdpa_get_config(struct vhost_vdpa * v,struct vhost_vdpa_config __user * c)3244c8cf318STiwei Bie static long vhost_vdpa_get_config(struct vhost_vdpa *v,
3254c8cf318STiwei Bie 				  struct vhost_vdpa_config __user *c)
3264c8cf318STiwei Bie {
3274c8cf318STiwei Bie 	struct vdpa_device *vdpa = v->vdpa;
3284c8cf318STiwei Bie 	struct vhost_vdpa_config config;
3294c8cf318STiwei Bie 	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
3304c8cf318STiwei Bie 	u8 *buf;
3314c8cf318STiwei Bie 
3324c8cf318STiwei Bie 	if (copy_from_user(&config, c, size))
3334c8cf318STiwei Bie 		return -EFAULT;
3344c8cf318STiwei Bie 	if (vhost_vdpa_config_validate(v, &config))
3354c8cf318STiwei Bie 		return -EINVAL;
3364c8cf318STiwei Bie 	buf = kvzalloc(config.len, GFP_KERNEL);
3374c8cf318STiwei Bie 	if (!buf)
3384c8cf318STiwei Bie 		return -ENOMEM;
3394c8cf318STiwei Bie 
3400d234007SMichael S. Tsirkin 	vdpa_get_config(vdpa, config.off, buf, config.len);
3414c8cf318STiwei Bie 
3424c8cf318STiwei Bie 	if (copy_to_user(c->buf, buf, config.len)) {
3434c8cf318STiwei Bie 		kvfree(buf);
3444c8cf318STiwei Bie 		return -EFAULT;
3454c8cf318STiwei Bie 	}
3464c8cf318STiwei Bie 
3474c8cf318STiwei Bie 	kvfree(buf);
3484c8cf318STiwei Bie 	return 0;
3494c8cf318STiwei Bie }
3504c8cf318STiwei Bie 
vhost_vdpa_set_config(struct vhost_vdpa * v,struct vhost_vdpa_config __user * c)3514c8cf318STiwei Bie static long vhost_vdpa_set_config(struct vhost_vdpa *v,
3524c8cf318STiwei Bie 				  struct vhost_vdpa_config __user *c)
3534c8cf318STiwei Bie {
3544c8cf318STiwei Bie 	struct vdpa_device *vdpa = v->vdpa;
3554c8cf318STiwei Bie 	struct vhost_vdpa_config config;
3564c8cf318STiwei Bie 	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
3574c8cf318STiwei Bie 	u8 *buf;
3584c8cf318STiwei Bie 
3594c8cf318STiwei Bie 	if (copy_from_user(&config, c, size))
3604c8cf318STiwei Bie 		return -EFAULT;
3614c8cf318STiwei Bie 	if (vhost_vdpa_config_validate(v, &config))
3624c8cf318STiwei Bie 		return -EINVAL;
3634c8cf318STiwei Bie 
3640ab4b890STian Tao 	buf = vmemdup_user(c->buf, config.len);
3650ab4b890STian Tao 	if (IS_ERR(buf))
3660ab4b890STian Tao 		return PTR_ERR(buf);
3674c8cf318STiwei Bie 
3686dbb1f16SParav Pandit 	vdpa_set_config(vdpa, config.off, buf, config.len);
3694c8cf318STiwei Bie 
3704c8cf318STiwei Bie 	kvfree(buf);
3714c8cf318STiwei Bie 	return 0;
3724c8cf318STiwei Bie }
3734c8cf318STiwei Bie 
vhost_vdpa_can_suspend(const struct vhost_vdpa * v)3740723f1dfSEugenio Pérez static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
3750723f1dfSEugenio Pérez {
3760723f1dfSEugenio Pérez 	struct vdpa_device *vdpa = v->vdpa;
3770723f1dfSEugenio Pérez 	const struct vdpa_config_ops *ops = vdpa->config;
3780723f1dfSEugenio Pérez 
3790723f1dfSEugenio Pérez 	return ops->suspend;
3800723f1dfSEugenio Pérez }
3810723f1dfSEugenio Pérez 
vhost_vdpa_can_resume(const struct vhost_vdpa * v)38269106b6fSSebastien Boeuf static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
38369106b6fSSebastien Boeuf {
38469106b6fSSebastien Boeuf 	struct vdpa_device *vdpa = v->vdpa;
38569106b6fSSebastien Boeuf 	const struct vdpa_config_ops *ops = vdpa->config;
38669106b6fSSebastien Boeuf 
38769106b6fSSebastien Boeuf 	return ops->resume;
38869106b6fSSebastien Boeuf }
38969106b6fSSebastien Boeuf 
vhost_vdpa_get_features(struct vhost_vdpa * v,u64 __user * featurep)3904c8cf318STiwei Bie static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
3914c8cf318STiwei Bie {
3924c8cf318STiwei Bie 	struct vdpa_device *vdpa = v->vdpa;
3934c8cf318STiwei Bie 	const struct vdpa_config_ops *ops = vdpa->config;
3944c8cf318STiwei Bie 	u64 features;
3954c8cf318STiwei Bie 
396a64917bcSEli Cohen 	features = ops->get_device_features(vdpa);
3974c8cf318STiwei Bie 
3984c8cf318STiwei Bie 	if (copy_to_user(featurep, &features, sizeof(features)))
3994c8cf318STiwei Bie 		return -EFAULT;
4004c8cf318STiwei Bie 
4014c8cf318STiwei Bie 	return 0;
4024c8cf318STiwei Bie }
4034c8cf318STiwei Bie 
vhost_vdpa_get_backend_features(const struct vhost_vdpa * v)404b63e5c70SEugenio Pérez static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v)
405b63e5c70SEugenio Pérez {
406b63e5c70SEugenio Pérez 	struct vdpa_device *vdpa = v->vdpa;
407b63e5c70SEugenio Pérez 	const struct vdpa_config_ops *ops = vdpa->config;
408b63e5c70SEugenio Pérez 
409b63e5c70SEugenio Pérez 	if (!ops->get_backend_features)
410b63e5c70SEugenio Pérez 		return 0;
411b63e5c70SEugenio Pérez 	else
412b63e5c70SEugenio Pérez 		return ops->get_backend_features(vdpa);
413b63e5c70SEugenio Pérez }
414b63e5c70SEugenio Pérez 
vhost_vdpa_set_features(struct vhost_vdpa * v,u64 __user * featurep)4154c8cf318STiwei Bie static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
4164c8cf318STiwei Bie {
4174c8cf318STiwei Bie 	struct vdpa_device *vdpa = v->vdpa;
4184c8cf318STiwei Bie 	const struct vdpa_config_ops *ops = vdpa->config;
419376daf31SShannon Nelson 	struct vhost_dev *d = &v->vdev;
420376daf31SShannon Nelson 	u64 actual_features;
4214c8cf318STiwei Bie 	u64 features;
422376daf31SShannon Nelson 	int i;
4234c8cf318STiwei Bie 
4244c8cf318STiwei Bie 	/*
4254c8cf318STiwei Bie 	 * It's not allowed to change the features after they have
4264c8cf318STiwei Bie 	 * been negotiated.
4274c8cf318STiwei Bie 	 */
4284c8cf318STiwei Bie 	if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
4294c8cf318STiwei Bie 		return -EBUSY;
4304c8cf318STiwei Bie 
4314c8cf318STiwei Bie 	if (copy_from_user(&features, featurep, sizeof(features)))
4324c8cf318STiwei Bie 		return -EFAULT;
4334c8cf318STiwei Bie 
434e0077cc1SSi-Wei Liu 	if (vdpa_set_features(vdpa, features))
4354c8cf318STiwei Bie 		return -EINVAL;
4364c8cf318STiwei Bie 
437376daf31SShannon Nelson 	/* let the vqs know what has been configured */
438376daf31SShannon Nelson 	actual_features = ops->get_driver_features(vdpa);
439376daf31SShannon Nelson 	for (i = 0; i < d->nvqs; ++i) {
440376daf31SShannon Nelson 		struct vhost_virtqueue *vq = d->vqs[i];
441376daf31SShannon Nelson 
442376daf31SShannon Nelson 		mutex_lock(&vq->mutex);
443376daf31SShannon Nelson 		vq->acked_features = actual_features;
444376daf31SShannon Nelson 		mutex_unlock(&vq->mutex);
445376daf31SShannon Nelson 	}
446376daf31SShannon Nelson 
4474c8cf318STiwei Bie 	return 0;
4484c8cf318STiwei Bie }
4494c8cf318STiwei Bie 
vhost_vdpa_get_vring_num(struct vhost_vdpa * v,u16 __user * argp)4504c8cf318STiwei Bie static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
4514c8cf318STiwei Bie {
4524c8cf318STiwei Bie 	struct vdpa_device *vdpa = v->vdpa;
4534c8cf318STiwei Bie 	const struct vdpa_config_ops *ops = vdpa->config;
4544c8cf318STiwei Bie 	u16 num;
4554c8cf318STiwei Bie 
4564c8cf318STiwei Bie 	num = ops->get_vq_num_max(vdpa);
4574c8cf318STiwei Bie 
4584c8cf318STiwei Bie 	if (copy_to_user(argp, &num, sizeof(num)))
4594c8cf318STiwei Bie 		return -EFAULT;
4604c8cf318STiwei Bie 
4614c8cf318STiwei Bie 	return 0;
4624c8cf318STiwei Bie }
4634c8cf318STiwei Bie 
vhost_vdpa_config_put(struct vhost_vdpa * v)464776f3950SZhu Lingshan static void vhost_vdpa_config_put(struct vhost_vdpa *v)
465776f3950SZhu Lingshan {
466f6bbf001SStefano Garzarella 	if (v->config_ctx) {
467776f3950SZhu Lingshan 		eventfd_ctx_put(v->config_ctx);
468f6bbf001SStefano Garzarella 		v->config_ctx = NULL;
469f6bbf001SStefano Garzarella 	}
470776f3950SZhu Lingshan }
471776f3950SZhu Lingshan 
vhost_vdpa_set_config_call(struct vhost_vdpa * v,u32 __user * argp)472776f3950SZhu Lingshan static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
473776f3950SZhu Lingshan {
474776f3950SZhu Lingshan 	struct vdpa_callback cb;
475776f3950SZhu Lingshan 	int fd;
476776f3950SZhu Lingshan 	struct eventfd_ctx *ctx;
477776f3950SZhu Lingshan 
478776f3950SZhu Lingshan 	cb.callback = vhost_vdpa_config_cb;
479bcef9356SCindy Lu 	cb.private = v;
480776f3950SZhu Lingshan 	if (copy_from_user(&fd, argp, sizeof(fd)))
481776f3950SZhu Lingshan 		return  -EFAULT;
482776f3950SZhu Lingshan 
483776f3950SZhu Lingshan 	ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
484776f3950SZhu Lingshan 	swap(ctx, v->config_ctx);
485776f3950SZhu Lingshan 
486776f3950SZhu Lingshan 	if (!IS_ERR_OR_NULL(ctx))
487776f3950SZhu Lingshan 		eventfd_ctx_put(ctx);
488776f3950SZhu Lingshan 
4890bde59c1SStefano Garzarella 	if (IS_ERR(v->config_ctx)) {
4900bde59c1SStefano Garzarella 		long ret = PTR_ERR(v->config_ctx);
4910bde59c1SStefano Garzarella 
4920bde59c1SStefano Garzarella 		v->config_ctx = NULL;
4930bde59c1SStefano Garzarella 		return ret;
4940bde59c1SStefano Garzarella 	}
495776f3950SZhu Lingshan 
496776f3950SZhu Lingshan 	v->vdpa->config->set_config_cb(v->vdpa, &cb);
497776f3950SZhu Lingshan 
498776f3950SZhu Lingshan 	return 0;
499776f3950SZhu Lingshan }
5002cf1ba9aSZhu Lingshan 
vhost_vdpa_get_iova_range(struct vhost_vdpa * v,u32 __user * argp)5011b48dc03SJason Wang static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
5021b48dc03SJason Wang {
5031b48dc03SJason Wang 	struct vhost_vdpa_iova_range range = {
5041b48dc03SJason Wang 		.first = v->range.first,
5051b48dc03SJason Wang 		.last = v->range.last,
5061b48dc03SJason Wang 	};
5071b48dc03SJason Wang 
5082c602741SDan Carpenter 	if (copy_to_user(argp, &range, sizeof(range)))
5092c602741SDan Carpenter 		return -EFAULT;
5102c602741SDan Carpenter 	return 0;
5111b48dc03SJason Wang }
5121b48dc03SJason Wang 
vhost_vdpa_get_config_size(struct vhost_vdpa * v,u32 __user * argp)513a61280ddSLongpeng static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp)
514a61280ddSLongpeng {
515a61280ddSLongpeng 	struct vdpa_device *vdpa = v->vdpa;
516a61280ddSLongpeng 	const struct vdpa_config_ops *ops = vdpa->config;
517a61280ddSLongpeng 	u32 size;
518a61280ddSLongpeng 
519a61280ddSLongpeng 	size = ops->get_config_size(vdpa);
520a61280ddSLongpeng 
521a61280ddSLongpeng 	if (copy_to_user(argp, &size, sizeof(size)))
522a61280ddSLongpeng 		return -EFAULT;
523a61280ddSLongpeng 
524a61280ddSLongpeng 	return 0;
525a61280ddSLongpeng }
526a61280ddSLongpeng 
vhost_vdpa_get_vqs_count(struct vhost_vdpa * v,u32 __user * argp)527b04d910aSLongpeng static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
528b04d910aSLongpeng {
529b04d910aSLongpeng 	struct vdpa_device *vdpa = v->vdpa;
530b04d910aSLongpeng 
531b04d910aSLongpeng 	if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs)))
532b04d910aSLongpeng 		return -EFAULT;
533b04d910aSLongpeng 
534b04d910aSLongpeng 	return 0;
535b04d910aSLongpeng }
536b04d910aSLongpeng 
537f345a014SEugenio Pérez /* After a successful return of ioctl the device must not process more
538f345a014SEugenio Pérez  * virtqueue descriptors. The device can answer to read or writes of config
539f345a014SEugenio Pérez  * fields as if it were not suspended. In particular, writing to "queue_enable"
540f345a014SEugenio Pérez  * with a value of 1 will not make the device start processing buffers.
541f345a014SEugenio Pérez  */
vhost_vdpa_suspend(struct vhost_vdpa * v)542f345a014SEugenio Pérez static long vhost_vdpa_suspend(struct vhost_vdpa *v)
543f345a014SEugenio Pérez {
544f345a014SEugenio Pérez 	struct vdpa_device *vdpa = v->vdpa;
545f345a014SEugenio Pérez 	const struct vdpa_config_ops *ops = vdpa->config;
546f345a014SEugenio Pérez 
547f345a014SEugenio Pérez 	if (!ops->suspend)
548f345a014SEugenio Pérez 		return -EOPNOTSUPP;
549f345a014SEugenio Pérez 
550f345a014SEugenio Pérez 	return ops->suspend(vdpa);
551f345a014SEugenio Pérez }
552f345a014SEugenio Pérez 
5533b688d7aSSebastien Boeuf /* After a successful return of this ioctl the device resumes processing
5543b688d7aSSebastien Boeuf  * virtqueue descriptors. The device becomes fully operational the same way it
5553b688d7aSSebastien Boeuf  * was before it was suspended.
5563b688d7aSSebastien Boeuf  */
vhost_vdpa_resume(struct vhost_vdpa * v)5573b688d7aSSebastien Boeuf static long vhost_vdpa_resume(struct vhost_vdpa *v)
5583b688d7aSSebastien Boeuf {
5593b688d7aSSebastien Boeuf 	struct vdpa_device *vdpa = v->vdpa;
5603b688d7aSSebastien Boeuf 	const struct vdpa_config_ops *ops = vdpa->config;
5613b688d7aSSebastien Boeuf 
5623b688d7aSSebastien Boeuf 	if (!ops->resume)
5633b688d7aSSebastien Boeuf 		return -EOPNOTSUPP;
5643b688d7aSSebastien Boeuf 
5653b688d7aSSebastien Boeuf 	return ops->resume(vdpa);
5663b688d7aSSebastien Boeuf }
5673b688d7aSSebastien Boeuf 
vhost_vdpa_vring_ioctl(struct vhost_vdpa * v,unsigned int cmd,void __user * argp)5684c8cf318STiwei Bie static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
5694c8cf318STiwei Bie 				   void __user *argp)
5704c8cf318STiwei Bie {
5714c8cf318STiwei Bie 	struct vdpa_device *vdpa = v->vdpa;
5724c8cf318STiwei Bie 	const struct vdpa_config_ops *ops = vdpa->config;
573aac50c0bSEli Cohen 	struct vdpa_vq_state vq_state;
5744c8cf318STiwei Bie 	struct vdpa_callback cb;
5754c8cf318STiwei Bie 	struct vhost_virtqueue *vq;
5764c8cf318STiwei Bie 	struct vhost_vring_state s;
5774c8cf318STiwei Bie 	u32 idx;
5784c8cf318STiwei Bie 	long r;
5794c8cf318STiwei Bie 
5804c8cf318STiwei Bie 	r = get_user(idx, (u32 __user *)argp);
5814c8cf318STiwei Bie 	if (r < 0)
5824c8cf318STiwei Bie 		return r;
5834c8cf318STiwei Bie 
5844c8cf318STiwei Bie 	if (idx >= v->nvqs)
5854c8cf318STiwei Bie 		return -ENOBUFS;
5864c8cf318STiwei Bie 
5874c8cf318STiwei Bie 	idx = array_index_nospec(idx, v->nvqs);
5884c8cf318STiwei Bie 	vq = &v->vqs[idx];
5894c8cf318STiwei Bie 
590b0bd82bfSJason Wang 	switch (cmd) {
591b0bd82bfSJason Wang 	case VHOST_VDPA_SET_VRING_ENABLE:
5924c8cf318STiwei Bie 		if (copy_from_user(&s, argp, sizeof(s)))
5934c8cf318STiwei Bie 			return -EFAULT;
5944c8cf318STiwei Bie 		ops->set_vq_ready(vdpa, idx, s.num);
5954c8cf318STiwei Bie 		return 0;
5962d1fcb77SGautam Dawar 	case VHOST_VDPA_GET_VRING_GROUP:
59700d1f546SJason Wang 		if (!ops->get_vq_group)
59800d1f546SJason Wang 			return -EOPNOTSUPP;
5992d1fcb77SGautam Dawar 		s.index = idx;
6002d1fcb77SGautam Dawar 		s.num = ops->get_vq_group(vdpa, idx);
6012d1fcb77SGautam Dawar 		if (s.num >= vdpa->ngroups)
6022d1fcb77SGautam Dawar 			return -EIO;
6032d1fcb77SGautam Dawar 		else if (copy_to_user(argp, &s, sizeof(s)))
6042d1fcb77SGautam Dawar 			return -EFAULT;
6052d1fcb77SGautam Dawar 		return 0;
60684d7c8fdSGautam Dawar 	case VHOST_VDPA_SET_GROUP_ASID:
60784d7c8fdSGautam Dawar 		if (copy_from_user(&s, argp, sizeof(s)))
60884d7c8fdSGautam Dawar 			return -EFAULT;
60984d7c8fdSGautam Dawar 		if (s.num >= vdpa->nas)
61084d7c8fdSGautam Dawar 			return -EINVAL;
61184d7c8fdSGautam Dawar 		if (!ops->set_group_asid)
61284d7c8fdSGautam Dawar 			return -EOPNOTSUPP;
61384d7c8fdSGautam Dawar 		return ops->set_group_asid(vdpa, idx, s.num);
614b0bd82bfSJason Wang 	case VHOST_GET_VRING_BASE:
61523750e39SEli Cohen 		r = ops->get_vq_state(v->vdpa, idx, &vq_state);
61623750e39SEli Cohen 		if (r)
61723750e39SEli Cohen 			return r;
61823750e39SEli Cohen 
619beee7fdbSShannon Nelson 		if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
620beee7fdbSShannon Nelson 			vq->last_avail_idx = vq_state.packed.last_avail_idx |
621beee7fdbSShannon Nelson 					     (vq_state.packed.last_avail_counter << 15);
622beee7fdbSShannon Nelson 			vq->last_used_idx = vq_state.packed.last_used_idx |
623beee7fdbSShannon Nelson 					    (vq_state.packed.last_used_counter << 15);
624beee7fdbSShannon Nelson 		} else {
625530a5678SJason Wang 			vq->last_avail_idx = vq_state.split.avail_index;
626beee7fdbSShannon Nelson 		}
627b0bd82bfSJason Wang 		break;
628*ca64edd7SJason Wang 	case VHOST_SET_VRING_CALL:
629*ca64edd7SJason Wang 		if (vq->call_ctx.ctx) {
630*ca64edd7SJason Wang 			if (ops->get_status(vdpa) &
631*ca64edd7SJason Wang 			    VIRTIO_CONFIG_S_DRIVER_OK)
632*ca64edd7SJason Wang 				vhost_vdpa_unsetup_vq_irq(v, idx);
633*ca64edd7SJason Wang 			vq->call_ctx.producer.token = NULL;
634*ca64edd7SJason Wang 		}
635*ca64edd7SJason Wang 		break;
636b0bd82bfSJason Wang 	}
6374c8cf318STiwei Bie 
6384c8cf318STiwei Bie 	r = vhost_vring_ioctl(&v->vdev, cmd, argp);
6394c8cf318STiwei Bie 	if (r)
6404c8cf318STiwei Bie 		return r;
6414c8cf318STiwei Bie 
6424c8cf318STiwei Bie 	switch (cmd) {
6434c8cf318STiwei Bie 	case VHOST_SET_VRING_ADDR:
6444c8cf318STiwei Bie 		if (ops->set_vq_address(vdpa, idx,
6454c8cf318STiwei Bie 					(u64)(uintptr_t)vq->desc,
6464c8cf318STiwei Bie 					(u64)(uintptr_t)vq->avail,
6474c8cf318STiwei Bie 					(u64)(uintptr_t)vq->used))
6484c8cf318STiwei Bie 			r = -EINVAL;
6494c8cf318STiwei Bie 		break;
6504c8cf318STiwei Bie 
6514c8cf318STiwei Bie 	case VHOST_SET_VRING_BASE:
652beee7fdbSShannon Nelson 		if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
653beee7fdbSShannon Nelson 			vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
654beee7fdbSShannon Nelson 			vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
655beee7fdbSShannon Nelson 			vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
656beee7fdbSShannon Nelson 			vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
657beee7fdbSShannon Nelson 		} else {
658530a5678SJason Wang 			vq_state.split.avail_index = vq->last_avail_idx;
659beee7fdbSShannon Nelson 		}
660beee7fdbSShannon Nelson 		r = ops->set_vq_state(vdpa, idx, &vq_state);
6614c8cf318STiwei Bie 		break;
6624c8cf318STiwei Bie 
6634c8cf318STiwei Bie 	case VHOST_SET_VRING_CALL:
664265a0ad8SZhu Lingshan 		if (vq->call_ctx.ctx) {
6654c8cf318STiwei Bie 			cb.callback = vhost_vdpa_virtqueue_cb;
6664c8cf318STiwei Bie 			cb.private = vq;
6675e68470fSXie Yongji 			cb.trigger = vq->call_ctx.ctx;
668*ca64edd7SJason Wang 			vq->call_ctx.producer.token = vq->call_ctx.ctx;
669*ca64edd7SJason Wang 			if (ops->get_status(vdpa) &
670*ca64edd7SJason Wang 			    VIRTIO_CONFIG_S_DRIVER_OK)
671*ca64edd7SJason Wang 				vhost_vdpa_setup_vq_irq(v, idx);
6724c8cf318STiwei Bie 		} else {
6734c8cf318STiwei Bie 			cb.callback = NULL;
6744c8cf318STiwei Bie 			cb.private = NULL;
6755e68470fSXie Yongji 			cb.trigger = NULL;
6764c8cf318STiwei Bie 		}
6774c8cf318STiwei Bie 		ops->set_vq_cb(vdpa, idx, &cb);
6784c8cf318STiwei Bie 		break;
6794c8cf318STiwei Bie 
6804c8cf318STiwei Bie 	case VHOST_SET_VRING_NUM:
6814c8cf318STiwei Bie 		ops->set_vq_num(vdpa, idx, vq->num);
6824c8cf318STiwei Bie 		break;
6834c8cf318STiwei Bie 	}
6844c8cf318STiwei Bie 
6854c8cf318STiwei Bie 	return r;
6864c8cf318STiwei Bie }
6874c8cf318STiwei Bie 
vhost_vdpa_unlocked_ioctl(struct file * filep,unsigned int cmd,unsigned long arg)6884c8cf318STiwei Bie static long vhost_vdpa_unlocked_ioctl(struct file *filep,
6894c8cf318STiwei Bie 				      unsigned int cmd, unsigned long arg)
6904c8cf318STiwei Bie {
6914c8cf318STiwei Bie 	struct vhost_vdpa *v = filep->private_data;
6924c8cf318STiwei Bie 	struct vhost_dev *d = &v->vdev;
6934c8cf318STiwei Bie 	void __user *argp = (void __user *)arg;
694a127c5bbSJason Wang 	u64 __user *featurep = argp;
695a127c5bbSJason Wang 	u64 features;
6967922460eSDan Carpenter 	long r = 0;
6974c8cf318STiwei Bie 
698a127c5bbSJason Wang 	if (cmd == VHOST_SET_BACKEND_FEATURES) {
6997922460eSDan Carpenter 		if (copy_from_user(&features, featurep, sizeof(features)))
7007922460eSDan Carpenter 			return -EFAULT;
7010723f1dfSEugenio Pérez 		if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
70269106b6fSSebastien Boeuf 				 BIT_ULL(VHOST_BACKEND_F_SUSPEND) |
7039f09fd61SEugenio Pérez 				 BIT_ULL(VHOST_BACKEND_F_RESUME) |
7049f09fd61SEugenio Pérez 				 BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK)))
7050723f1dfSEugenio Pérez 			return -EOPNOTSUPP;
7060723f1dfSEugenio Pérez 		if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
7070723f1dfSEugenio Pérez 		     !vhost_vdpa_can_suspend(v))
708a127c5bbSJason Wang 			return -EOPNOTSUPP;
70969106b6fSSebastien Boeuf 		if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) &&
71069106b6fSSebastien Boeuf 		     !vhost_vdpa_can_resume(v))
71169106b6fSSebastien Boeuf 			return -EOPNOTSUPP;
712a127c5bbSJason Wang 		vhost_set_backend_features(&v->vdev, features);
713a127c5bbSJason Wang 		return 0;
714a127c5bbSJason Wang 	}
715a127c5bbSJason Wang 
7164c8cf318STiwei Bie 	mutex_lock(&d->mutex);
7174c8cf318STiwei Bie 
7184c8cf318STiwei Bie 	switch (cmd) {
7194c8cf318STiwei Bie 	case VHOST_VDPA_GET_DEVICE_ID:
7204c8cf318STiwei Bie 		r = vhost_vdpa_get_device_id(v, argp);
7214c8cf318STiwei Bie 		break;
7224c8cf318STiwei Bie 	case VHOST_VDPA_GET_STATUS:
7234c8cf318STiwei Bie 		r = vhost_vdpa_get_status(v, argp);
7244c8cf318STiwei Bie 		break;
7254c8cf318STiwei Bie 	case VHOST_VDPA_SET_STATUS:
7264c8cf318STiwei Bie 		r = vhost_vdpa_set_status(v, argp);
7274c8cf318STiwei Bie 		break;
7284c8cf318STiwei Bie 	case VHOST_VDPA_GET_CONFIG:
7294c8cf318STiwei Bie 		r = vhost_vdpa_get_config(v, argp);
7304c8cf318STiwei Bie 		break;
7314c8cf318STiwei Bie 	case VHOST_VDPA_SET_CONFIG:
7324c8cf318STiwei Bie 		r = vhost_vdpa_set_config(v, argp);
7334c8cf318STiwei Bie 		break;
7344c8cf318STiwei Bie 	case VHOST_GET_FEATURES:
7354c8cf318STiwei Bie 		r = vhost_vdpa_get_features(v, argp);
7364c8cf318STiwei Bie 		break;
7374c8cf318STiwei Bie 	case VHOST_SET_FEATURES:
7384c8cf318STiwei Bie 		r = vhost_vdpa_set_features(v, argp);
7394c8cf318STiwei Bie 		break;
7404c8cf318STiwei Bie 	case VHOST_VDPA_GET_VRING_NUM:
7414c8cf318STiwei Bie 		r = vhost_vdpa_get_vring_num(v, argp);
7424c8cf318STiwei Bie 		break;
7433ace88bdSGautam Dawar 	case VHOST_VDPA_GET_GROUP_NUM:
744f4a8686eSDan Carpenter 		if (copy_to_user(argp, &v->vdpa->ngroups,
745f4a8686eSDan Carpenter 				 sizeof(v->vdpa->ngroups)))
746f4a8686eSDan Carpenter 			r = -EFAULT;
7473ace88bdSGautam Dawar 		break;
748a0c95f20SGautam Dawar 	case VHOST_VDPA_GET_AS_NUM:
749f4a8686eSDan Carpenter 		if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)))
750f4a8686eSDan Carpenter 			r = -EFAULT;
751a0c95f20SGautam Dawar 		break;
7524c8cf318STiwei Bie 	case VHOST_SET_LOG_BASE:
7534c8cf318STiwei Bie 	case VHOST_SET_LOG_FD:
7544c8cf318STiwei Bie 		r = -ENOIOCTLCMD;
7554c8cf318STiwei Bie 		break;
756776f3950SZhu Lingshan 	case VHOST_VDPA_SET_CONFIG_CALL:
757776f3950SZhu Lingshan 		r = vhost_vdpa_set_config_call(v, argp);
758776f3950SZhu Lingshan 		break;
759a127c5bbSJason Wang 	case VHOST_GET_BACKEND_FEATURES:
760a127c5bbSJason Wang 		features = VHOST_VDPA_BACKEND_FEATURES;
7610723f1dfSEugenio Pérez 		if (vhost_vdpa_can_suspend(v))
7620723f1dfSEugenio Pérez 			features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
76369106b6fSSebastien Boeuf 		if (vhost_vdpa_can_resume(v))
76469106b6fSSebastien Boeuf 			features |= BIT_ULL(VHOST_BACKEND_F_RESUME);
765b63e5c70SEugenio Pérez 		features |= vhost_vdpa_get_backend_features(v);
7667922460eSDan Carpenter 		if (copy_to_user(featurep, &features, sizeof(features)))
7677922460eSDan Carpenter 			r = -EFAULT;
768a127c5bbSJason Wang 		break;
7691b48dc03SJason Wang 	case VHOST_VDPA_GET_IOVA_RANGE:
7701b48dc03SJason Wang 		r = vhost_vdpa_get_iova_range(v, argp);
7711b48dc03SJason Wang 		break;
772a61280ddSLongpeng 	case VHOST_VDPA_GET_CONFIG_SIZE:
773a61280ddSLongpeng 		r = vhost_vdpa_get_config_size(v, argp);
774a61280ddSLongpeng 		break;
775b04d910aSLongpeng 	case VHOST_VDPA_GET_VQS_COUNT:
776b04d910aSLongpeng 		r = vhost_vdpa_get_vqs_count(v, argp);
777b04d910aSLongpeng 		break;
778f345a014SEugenio Pérez 	case VHOST_VDPA_SUSPEND:
779f345a014SEugenio Pérez 		r = vhost_vdpa_suspend(v);
780f345a014SEugenio Pérez 		break;
7813b688d7aSSebastien Boeuf 	case VHOST_VDPA_RESUME:
7823b688d7aSSebastien Boeuf 		r = vhost_vdpa_resume(v);
7833b688d7aSSebastien Boeuf 		break;
7844c8cf318STiwei Bie 	default:
7854c8cf318STiwei Bie 		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
7864c8cf318STiwei Bie 		if (r == -ENOIOCTLCMD)
7874c8cf318STiwei Bie 			r = vhost_vdpa_vring_ioctl(v, cmd, argp);
7884c8cf318STiwei Bie 		break;
7894c8cf318STiwei Bie 	}
7904c8cf318STiwei Bie 
7919067de47SStefano Garzarella 	if (r)
7929067de47SStefano Garzarella 		goto out;
7939067de47SStefano Garzarella 
7949067de47SStefano Garzarella 	switch (cmd) {
7959067de47SStefano Garzarella 	case VHOST_SET_OWNER:
7969067de47SStefano Garzarella 		r = vhost_vdpa_bind_mm(v);
7979067de47SStefano Garzarella 		if (r)
7989067de47SStefano Garzarella 			vhost_dev_reset_owner(d, NULL);
7999067de47SStefano Garzarella 		break;
8009067de47SStefano Garzarella 	}
8019067de47SStefano Garzarella out:
8024c8cf318STiwei Bie 	mutex_unlock(&d->mutex);
8034c8cf318STiwei Bie 	return r;
8044c8cf318STiwei Bie }
vhost_vdpa_general_unmap(struct vhost_vdpa * v,struct vhost_iotlb_map * map,u32 asid)805e794070aSCindy Lu static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
806e794070aSCindy Lu 				     struct vhost_iotlb_map *map, u32 asid)
807e794070aSCindy Lu {
808e794070aSCindy Lu 	struct vdpa_device *vdpa = v->vdpa;
809e794070aSCindy Lu 	const struct vdpa_config_ops *ops = vdpa->config;
810e794070aSCindy Lu 	if (ops->dma_map) {
811e794070aSCindy Lu 		ops->dma_unmap(vdpa, asid, map->start, map->size);
812e794070aSCindy Lu 	} else if (ops->set_map == NULL) {
813e794070aSCindy Lu 		iommu_unmap(v->domain, map->start, map->size);
814e794070aSCindy Lu 	}
815e794070aSCindy Lu }
8164c8cf318STiwei Bie 
vhost_vdpa_pa_unmap(struct vhost_vdpa * v,struct vhost_iotlb * iotlb,u64 start,u64 last,u32 asid)817e794070aSCindy Lu static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
818e794070aSCindy Lu 				u64 start, u64 last, u32 asid)
8194c8cf318STiwei Bie {
8204c8cf318STiwei Bie 	struct vhost_dev *dev = &v->vdev;
8214c8cf318STiwei Bie 	struct vhost_iotlb_map *map;
8224c8cf318STiwei Bie 	struct page *page;
8234c8cf318STiwei Bie 	unsigned long pfn, pinned;
8244c8cf318STiwei Bie 
8254c8cf318STiwei Bie 	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
826729ce5a5SCai Huoqing 		pinned = PFN_DOWN(map->size);
827729ce5a5SCai Huoqing 		for (pfn = PFN_DOWN(map->addr);
8284c8cf318STiwei Bie 		     pinned > 0; pfn++, pinned--) {
8294c8cf318STiwei Bie 			page = pfn_to_page(pfn);
8304c8cf318STiwei Bie 			if (map->perm & VHOST_ACCESS_WO)
8314c8cf318STiwei Bie 				set_page_dirty_lock(page);
8324c8cf318STiwei Bie 			unpin_user_page(page);
8334c8cf318STiwei Bie 		}
834729ce5a5SCai Huoqing 		atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
835e794070aSCindy Lu 		vhost_vdpa_general_unmap(v, map, asid);
8364c8cf318STiwei Bie 		vhost_iotlb_map_free(iotlb, map);
8374c8cf318STiwei Bie 	}
8384c8cf318STiwei Bie }
8394c8cf318STiwei Bie 
vhost_vdpa_va_unmap(struct vhost_vdpa * v,struct vhost_iotlb * iotlb,u64 start,u64 last,u32 asid)840e794070aSCindy Lu static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
841e794070aSCindy Lu 				u64 start, u64 last, u32 asid)
842d8945ec4SXie Yongji {
843d8945ec4SXie Yongji 	struct vhost_iotlb_map *map;
844d8945ec4SXie Yongji 	struct vdpa_map_file *map_file;
845d8945ec4SXie Yongji 
846d8945ec4SXie Yongji 	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
847d8945ec4SXie Yongji 		map_file = (struct vdpa_map_file *)map->opaque;
848d8945ec4SXie Yongji 		fput(map_file->file);
849d8945ec4SXie Yongji 		kfree(map_file);
850e794070aSCindy Lu 		vhost_vdpa_general_unmap(v, map, asid);
851d8945ec4SXie Yongji 		vhost_iotlb_map_free(iotlb, map);
852d8945ec4SXie Yongji 	}
853d8945ec4SXie Yongji }
854d8945ec4SXie Yongji 
vhost_vdpa_iotlb_unmap(struct vhost_vdpa * v,struct vhost_iotlb * iotlb,u64 start,u64 last,u32 asid)855ae967246SGautam Dawar static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
856e794070aSCindy Lu 				   struct vhost_iotlb *iotlb, u64 start,
857e794070aSCindy Lu 				   u64 last, u32 asid)
85822af48cfSXie Yongji {
859d8945ec4SXie Yongji 	struct vdpa_device *vdpa = v->vdpa;
860d8945ec4SXie Yongji 
861d8945ec4SXie Yongji 	if (vdpa->use_va)
862e794070aSCindy Lu 		return vhost_vdpa_va_unmap(v, iotlb, start, last, asid);
863d8945ec4SXie Yongji 
864e794070aSCindy Lu 	return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid);
86522af48cfSXie Yongji }
86622af48cfSXie Yongji 
perm_to_iommu_flags(u32 perm)8674c8cf318STiwei Bie static int perm_to_iommu_flags(u32 perm)
8684c8cf318STiwei Bie {
8694c8cf318STiwei Bie 	int flags = 0;
8704c8cf318STiwei Bie 
8714c8cf318STiwei Bie 	switch (perm) {
8724c8cf318STiwei Bie 	case VHOST_ACCESS_WO:
8734c8cf318STiwei Bie 		flags |= IOMMU_WRITE;
8744c8cf318STiwei Bie 		break;
8754c8cf318STiwei Bie 	case VHOST_ACCESS_RO:
8764c8cf318STiwei Bie 		flags |= IOMMU_READ;
8774c8cf318STiwei Bie 		break;
8784c8cf318STiwei Bie 	case VHOST_ACCESS_RW:
8794c8cf318STiwei Bie 		flags |= (IOMMU_WRITE | IOMMU_READ);
8804c8cf318STiwei Bie 		break;
8814c8cf318STiwei Bie 	default:
8824c8cf318STiwei Bie 		WARN(1, "invalidate vhost IOTLB permission\n");
8834c8cf318STiwei Bie 		break;
8844c8cf318STiwei Bie 	}
8854c8cf318STiwei Bie 
8864c8cf318STiwei Bie 	return flags | IOMMU_CACHE;
8874c8cf318STiwei Bie }
8884c8cf318STiwei Bie 
vhost_vdpa_map(struct vhost_vdpa * v,struct vhost_iotlb * iotlb,u64 iova,u64 size,u64 pa,u32 perm,void * opaque)889ae967246SGautam Dawar static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
890ae967246SGautam Dawar 			  u64 iova, u64 size, u64 pa, u32 perm, void *opaque)
8914c8cf318STiwei Bie {
8924c8cf318STiwei Bie 	struct vhost_dev *dev = &v->vdev;
8934c8cf318STiwei Bie 	struct vdpa_device *vdpa = v->vdpa;
8944c8cf318STiwei Bie 	const struct vdpa_config_ops *ops = vdpa->config;
895aaca8373SGautam Dawar 	u32 asid = iotlb_to_asid(iotlb);
8964c8cf318STiwei Bie 	int r = 0;
8974c8cf318STiwei Bie 
898ae967246SGautam Dawar 	r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1,
899d8945ec4SXie Yongji 				      pa, perm, opaque);
9004c8cf318STiwei Bie 	if (r)
9014c8cf318STiwei Bie 		return r;
9024c8cf318STiwei Bie 
90325abc060SJason Wang 	if (ops->dma_map) {
904aaca8373SGautam Dawar 		r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
90525abc060SJason Wang 	} else if (ops->set_map) {
90625abc060SJason Wang 		if (!v->in_batch)
907aaca8373SGautam Dawar 			r = ops->set_map(vdpa, asid, iotlb);
90825abc060SJason Wang 	} else {
9094c8cf318STiwei Bie 		r = iommu_map(v->domain, iova, pa, size,
9101369459bSJason Gunthorpe 			      perm_to_iommu_flags(perm), GFP_KERNEL);
91125abc060SJason Wang 	}
912d8945ec4SXie Yongji 	if (r) {
913ae967246SGautam Dawar 		vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
914d8945ec4SXie Yongji 		return r;
915d8945ec4SXie Yongji 	}
916d8945ec4SXie Yongji 
917d8945ec4SXie Yongji 	if (!vdpa->use_va)
918729ce5a5SCai Huoqing 		atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
9191477c8aeSSi-Wei Liu 
920d8945ec4SXie Yongji 	return 0;
9214c8cf318STiwei Bie }
9224c8cf318STiwei Bie 
vhost_vdpa_unmap(struct vhost_vdpa * v,struct vhost_iotlb * iotlb,u64 iova,u64 size)923ae967246SGautam Dawar static void vhost_vdpa_unmap(struct vhost_vdpa *v,
924ae967246SGautam Dawar 			     struct vhost_iotlb *iotlb,
925ae967246SGautam Dawar 			     u64 iova, u64 size)
9264c8cf318STiwei Bie {
9274c8cf318STiwei Bie 	struct vdpa_device *vdpa = v->vdpa;
9284c8cf318STiwei Bie 	const struct vdpa_config_ops *ops = vdpa->config;
929aaca8373SGautam Dawar 	u32 asid = iotlb_to_asid(iotlb);
9304c8cf318STiwei Bie 
931e794070aSCindy Lu 	vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid);
9324c8cf318STiwei Bie 
933e794070aSCindy Lu 	if (ops->set_map) {
93425abc060SJason Wang 		if (!v->in_batch)
935aaca8373SGautam Dawar 			ops->set_map(vdpa, asid, iotlb);
9364c8cf318STiwei Bie 	}
937c82729e0SCindy Lu 
93825abc060SJason Wang }
9394c8cf318STiwei Bie 
vhost_vdpa_va_map(struct vhost_vdpa * v,struct vhost_iotlb * iotlb,u64 iova,u64 size,u64 uaddr,u32 perm)940d8945ec4SXie Yongji static int vhost_vdpa_va_map(struct vhost_vdpa *v,
941ae967246SGautam Dawar 			     struct vhost_iotlb *iotlb,
942d8945ec4SXie Yongji 			     u64 iova, u64 size, u64 uaddr, u32 perm)
943d8945ec4SXie Yongji {
944d8945ec4SXie Yongji 	struct vhost_dev *dev = &v->vdev;
945d8945ec4SXie Yongji 	u64 offset, map_size, map_iova = iova;
946d8945ec4SXie Yongji 	struct vdpa_map_file *map_file;
947d8945ec4SXie Yongji 	struct vm_area_struct *vma;
948be9c6badSDan Carpenter 	int ret = 0;
949d8945ec4SXie Yongji 
950d8945ec4SXie Yongji 	mmap_read_lock(dev->mm);
951d8945ec4SXie Yongji 
952d8945ec4SXie Yongji 	while (size) {
953d8945ec4SXie Yongji 		vma = find_vma(dev->mm, uaddr);
954d8945ec4SXie Yongji 		if (!vma) {
955d8945ec4SXie Yongji 			ret = -EINVAL;
956d8945ec4SXie Yongji 			break;
957d8945ec4SXie Yongji 		}
958d8945ec4SXie Yongji 		map_size = min(size, vma->vm_end - uaddr);
959d8945ec4SXie Yongji 		if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
960d8945ec4SXie Yongji 			!(vma->vm_flags & (VM_IO | VM_PFNMAP))))
961d8945ec4SXie Yongji 			goto next;
962d8945ec4SXie Yongji 
963d8945ec4SXie Yongji 		map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
964d8945ec4SXie Yongji 		if (!map_file) {
965d8945ec4SXie Yongji 			ret = -ENOMEM;
966d8945ec4SXie Yongji 			break;
967d8945ec4SXie Yongji 		}
968d8945ec4SXie Yongji 		offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
969d8945ec4SXie Yongji 		map_file->offset = offset;
970d8945ec4SXie Yongji 		map_file->file = get_file(vma->vm_file);
971ae967246SGautam Dawar 		ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr,
972d8945ec4SXie Yongji 				     perm, map_file);
973d8945ec4SXie Yongji 		if (ret) {
974d8945ec4SXie Yongji 			fput(map_file->file);
975d8945ec4SXie Yongji 			kfree(map_file);
976d8945ec4SXie Yongji 			break;
977d8945ec4SXie Yongji 		}
978d8945ec4SXie Yongji next:
979d8945ec4SXie Yongji 		size -= map_size;
980d8945ec4SXie Yongji 		uaddr += map_size;
981d8945ec4SXie Yongji 		map_iova += map_size;
982d8945ec4SXie Yongji 	}
983d8945ec4SXie Yongji 	if (ret)
984ae967246SGautam Dawar 		vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova);
985d8945ec4SXie Yongji 
986d8945ec4SXie Yongji 	mmap_read_unlock(dev->mm);
987d8945ec4SXie Yongji 
988d8945ec4SXie Yongji 	return ret;
989d8945ec4SXie Yongji }
990d8945ec4SXie Yongji 
vhost_vdpa_pa_map(struct vhost_vdpa * v,struct vhost_iotlb * iotlb,u64 iova,u64 size,u64 uaddr,u32 perm)99122af48cfSXie Yongji static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
992ae967246SGautam Dawar 			     struct vhost_iotlb *iotlb,
99322af48cfSXie Yongji 			     u64 iova, u64 size, u64 uaddr, u32 perm)
9944c8cf318STiwei Bie {
9954c8cf318STiwei Bie 	struct vhost_dev *dev = &v->vdev;
9964c8cf318STiwei Bie 	struct page **page_list;
9975e1a3149SMichael S. Tsirkin 	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
9984c8cf318STiwei Bie 	unsigned int gup_flags = FOLL_LONGTERM;
9995e1a3149SMichael S. Tsirkin 	unsigned long npages, cur_base, map_pfn, last_pfn = 0;
1000ad89653fSSi-Wei Liu 	unsigned long lock_limit, sz2pin, nchunks, i;
100122af48cfSXie Yongji 	u64 start = iova;
1002ad89653fSSi-Wei Liu 	long pinned;
10034c8cf318STiwei Bie 	int ret = 0;
10044c8cf318STiwei Bie 
1005ad89653fSSi-Wei Liu 	/* Limit the use of memory for bookkeeping */
10065e1a3149SMichael S. Tsirkin 	page_list = (struct page **) __get_free_page(GFP_KERNEL);
10075e1a3149SMichael S. Tsirkin 	if (!page_list)
10085e1a3149SMichael S. Tsirkin 		return -ENOMEM;
10095e1a3149SMichael S. Tsirkin 
101022af48cfSXie Yongji 	if (perm & VHOST_ACCESS_WO)
10114c8cf318STiwei Bie 		gup_flags |= FOLL_WRITE;
10124c8cf318STiwei Bie 
101322af48cfSXie Yongji 	npages = PFN_UP(size + (iova & ~PAGE_MASK));
1014ad89653fSSi-Wei Liu 	if (!npages) {
1015ad89653fSSi-Wei Liu 		ret = -EINVAL;
1016ad89653fSSi-Wei Liu 		goto free;
1017ad89653fSSi-Wei Liu 	}
10184c8cf318STiwei Bie 
1019d8ed45c5SMichel Lespinasse 	mmap_read_lock(dev->mm);
10204c8cf318STiwei Bie 
1021729ce5a5SCai Huoqing 	lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1022ad89653fSSi-Wei Liu 	if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
10234c8cf318STiwei Bie 		ret = -ENOMEM;
1024ad89653fSSi-Wei Liu 		goto unlock;
10257ed9e3d9SSi-Wei Liu 	}
10265e1a3149SMichael S. Tsirkin 
102722af48cfSXie Yongji 	cur_base = uaddr & PAGE_MASK;
10285e1a3149SMichael S. Tsirkin 	iova &= PAGE_MASK;
1029ad89653fSSi-Wei Liu 	nchunks = 0;
10305e1a3149SMichael S. Tsirkin 
10315e1a3149SMichael S. Tsirkin 	while (npages) {
1032ad89653fSSi-Wei Liu 		sz2pin = min_t(unsigned long, npages, list_size);
1033ad89653fSSi-Wei Liu 		pinned = pin_user_pages(cur_base, sz2pin,
10344c630f30SLorenzo Stoakes 					gup_flags, page_list);
1035ad89653fSSi-Wei Liu 		if (sz2pin != pinned) {
1036ad89653fSSi-Wei Liu 			if (pinned < 0) {
1037ad89653fSSi-Wei Liu 				ret = pinned;
1038ad89653fSSi-Wei Liu 			} else {
1039ad89653fSSi-Wei Liu 				unpin_user_pages(page_list, pinned);
1040ad89653fSSi-Wei Liu 				ret = -ENOMEM;
1041ad89653fSSi-Wei Liu 			}
10425e1a3149SMichael S. Tsirkin 			goto out;
1043ad89653fSSi-Wei Liu 		}
1044ad89653fSSi-Wei Liu 		nchunks++;
10455e1a3149SMichael S. Tsirkin 
10465e1a3149SMichael S. Tsirkin 		if (!last_pfn)
10475e1a3149SMichael S. Tsirkin 			map_pfn = page_to_pfn(page_list[0]);
10485e1a3149SMichael S. Tsirkin 
1049ad89653fSSi-Wei Liu 		for (i = 0; i < pinned; i++) {
10505e1a3149SMichael S. Tsirkin 			unsigned long this_pfn = page_to_pfn(page_list[i]);
10515e1a3149SMichael S. Tsirkin 			u64 csize;
10525e1a3149SMichael S. Tsirkin 
10535e1a3149SMichael S. Tsirkin 			if (last_pfn && (this_pfn != last_pfn + 1)) {
10545e1a3149SMichael S. Tsirkin 				/* Pin a contiguous chunk of memory */
1055729ce5a5SCai Huoqing 				csize = PFN_PHYS(last_pfn - map_pfn + 1);
1056ae967246SGautam Dawar 				ret = vhost_vdpa_map(v, iotlb, iova, csize,
1057729ce5a5SCai Huoqing 						     PFN_PHYS(map_pfn),
1058d8945ec4SXie Yongji 						     perm, NULL);
1059ad89653fSSi-Wei Liu 				if (ret) {
1060ad89653fSSi-Wei Liu 					/*
1061ad89653fSSi-Wei Liu 					 * Unpin the pages that are left unmapped
1062ad89653fSSi-Wei Liu 					 * from this point on in the current
1063ad89653fSSi-Wei Liu 					 * page_list. The remaining outstanding
1064ad89653fSSi-Wei Liu 					 * ones which may stride across several
1065ad89653fSSi-Wei Liu 					 * chunks will be covered in the common
1066ad89653fSSi-Wei Liu 					 * error path subsequently.
1067ad89653fSSi-Wei Liu 					 */
1068ad89653fSSi-Wei Liu 					unpin_user_pages(&page_list[i],
1069ad89653fSSi-Wei Liu 							 pinned - i);
10705e1a3149SMichael S. Tsirkin 					goto out;
1071ad89653fSSi-Wei Liu 				}
1072ad89653fSSi-Wei Liu 
10737ed9e3d9SSi-Wei Liu 				map_pfn = this_pfn;
10745e1a3149SMichael S. Tsirkin 				iova += csize;
1075ad89653fSSi-Wei Liu 				nchunks = 0;
10767ed9e3d9SSi-Wei Liu 			}
10775e1a3149SMichael S. Tsirkin 
10784c8cf318STiwei Bie 			last_pfn = this_pfn;
10794c8cf318STiwei Bie 		}
10804c8cf318STiwei Bie 
1081729ce5a5SCai Huoqing 		cur_base += PFN_PHYS(pinned);
1082ad89653fSSi-Wei Liu 		npages -= pinned;
10835e1a3149SMichael S. Tsirkin 	}
10845e1a3149SMichael S. Tsirkin 
10855e1a3149SMichael S. Tsirkin 	/* Pin the rest chunk */
1086ae967246SGautam Dawar 	ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1),
1087d8945ec4SXie Yongji 			     PFN_PHYS(map_pfn), perm, NULL);
10884c8cf318STiwei Bie out:
10895e1a3149SMichael S. Tsirkin 	if (ret) {
1090ad89653fSSi-Wei Liu 		if (nchunks) {
1091ad89653fSSi-Wei Liu 			unsigned long pfn;
1092ad89653fSSi-Wei Liu 
1093ad89653fSSi-Wei Liu 			/*
1094ad89653fSSi-Wei Liu 			 * Unpin the outstanding pages which are yet to be
1095ad89653fSSi-Wei Liu 			 * mapped but haven't due to vdpa_map() or
1096ad89653fSSi-Wei Liu 			 * pin_user_pages() failure.
1097ad89653fSSi-Wei Liu 			 *
1098ad89653fSSi-Wei Liu 			 * Mapped pages are accounted in vdpa_map(), hence
1099ad89653fSSi-Wei Liu 			 * the corresponding unpinning will be handled by
1100ad89653fSSi-Wei Liu 			 * vdpa_unmap().
1101ad89653fSSi-Wei Liu 			 */
1102ad89653fSSi-Wei Liu 			WARN_ON(!last_pfn);
1103ad89653fSSi-Wei Liu 			for (pfn = map_pfn; pfn <= last_pfn; pfn++)
1104ad89653fSSi-Wei Liu 				unpin_user_page(pfn_to_page(pfn));
11055e1a3149SMichael S. Tsirkin 		}
1106ae967246SGautam Dawar 		vhost_vdpa_unmap(v, iotlb, start, size);
1107ad89653fSSi-Wei Liu 	}
1108ad89653fSSi-Wei Liu unlock:
1109d8ed45c5SMichel Lespinasse 	mmap_read_unlock(dev->mm);
1110ad89653fSSi-Wei Liu free:
11115e1a3149SMichael S. Tsirkin 	free_page((unsigned long)page_list);
11124c8cf318STiwei Bie 	return ret;
111322af48cfSXie Yongji 
111422af48cfSXie Yongji }
111522af48cfSXie Yongji 
vhost_vdpa_process_iotlb_update(struct vhost_vdpa * v,struct vhost_iotlb * iotlb,struct vhost_iotlb_msg * msg)111622af48cfSXie Yongji static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
1117ae967246SGautam Dawar 					   struct vhost_iotlb *iotlb,
111822af48cfSXie Yongji 					   struct vhost_iotlb_msg *msg)
111922af48cfSXie Yongji {
1120d8945ec4SXie Yongji 	struct vdpa_device *vdpa = v->vdpa;
112122af48cfSXie Yongji 
112222af48cfSXie Yongji 	if (msg->iova < v->range.first || !msg->size ||
112322af48cfSXie Yongji 	    msg->iova > U64_MAX - msg->size + 1 ||
112422af48cfSXie Yongji 	    msg->iova + msg->size - 1 > v->range.last)
112522af48cfSXie Yongji 		return -EINVAL;
112622af48cfSXie Yongji 
112722af48cfSXie Yongji 	if (vhost_iotlb_itree_first(iotlb, msg->iova,
112822af48cfSXie Yongji 				    msg->iova + msg->size - 1))
112922af48cfSXie Yongji 		return -EEXIST;
113022af48cfSXie Yongji 
1131d8945ec4SXie Yongji 	if (vdpa->use_va)
1132ae967246SGautam Dawar 		return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size,
1133d8945ec4SXie Yongji 					 msg->uaddr, msg->perm);
1134d8945ec4SXie Yongji 
1135ae967246SGautam Dawar 	return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr,
113622af48cfSXie Yongji 				 msg->perm);
11374c8cf318STiwei Bie }
11384c8cf318STiwei Bie 
vhost_vdpa_process_iotlb_msg(struct vhost_dev * dev,u32 asid,struct vhost_iotlb_msg * msg)113991233ad7SGautam Dawar static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
11404c8cf318STiwei Bie 					struct vhost_iotlb_msg *msg)
11414c8cf318STiwei Bie {
11424c8cf318STiwei Bie 	struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
114325abc060SJason Wang 	struct vdpa_device *vdpa = v->vdpa;
114425abc060SJason Wang 	const struct vdpa_config_ops *ops = vdpa->config;
1145aaca8373SGautam Dawar 	struct vhost_iotlb *iotlb = NULL;
1146aaca8373SGautam Dawar 	struct vhost_vdpa_as *as = NULL;
11474c8cf318STiwei Bie 	int r = 0;
11484c8cf318STiwei Bie 
1149a9d06452SXie Yongji 	mutex_lock(&dev->mutex);
1150a9d06452SXie Yongji 
11514c8cf318STiwei Bie 	r = vhost_dev_check_owner(dev);
11524c8cf318STiwei Bie 	if (r)
1153a9d06452SXie Yongji 		goto unlock;
11544c8cf318STiwei Bie 
1155aaca8373SGautam Dawar 	if (msg->type == VHOST_IOTLB_UPDATE ||
1156aaca8373SGautam Dawar 	    msg->type == VHOST_IOTLB_BATCH_BEGIN) {
1157aaca8373SGautam Dawar 		as = vhost_vdpa_find_alloc_as(v, asid);
1158aaca8373SGautam Dawar 		if (!as) {
1159aaca8373SGautam Dawar 			dev_err(&v->dev, "can't find and alloc asid %d\n",
1160aaca8373SGautam Dawar 				asid);
1161aaca8373SGautam Dawar 			r = -EINVAL;
1162aaca8373SGautam Dawar 			goto unlock;
1163aaca8373SGautam Dawar 		}
1164aaca8373SGautam Dawar 		iotlb = &as->iotlb;
1165aaca8373SGautam Dawar 	} else
1166aaca8373SGautam Dawar 		iotlb = asid_to_iotlb(v, asid);
1167aaca8373SGautam Dawar 
1168aaca8373SGautam Dawar 	if ((v->in_batch && v->batch_asid != asid) || !iotlb) {
1169aaca8373SGautam Dawar 		if (v->in_batch && v->batch_asid != asid) {
1170aaca8373SGautam Dawar 			dev_info(&v->dev, "batch id %d asid %d\n",
1171aaca8373SGautam Dawar 				 v->batch_asid, asid);
1172aaca8373SGautam Dawar 		}
1173aaca8373SGautam Dawar 		if (!iotlb)
1174aaca8373SGautam Dawar 			dev_err(&v->dev, "no iotlb for asid %d\n", asid);
1175aaca8373SGautam Dawar 		r = -EINVAL;
1176aaca8373SGautam Dawar 		goto unlock;
1177aaca8373SGautam Dawar 	}
1178aaca8373SGautam Dawar 
11794c8cf318STiwei Bie 	switch (msg->type) {
11804c8cf318STiwei Bie 	case VHOST_IOTLB_UPDATE:
1181ae967246SGautam Dawar 		r = vhost_vdpa_process_iotlb_update(v, iotlb, msg);
11824c8cf318STiwei Bie 		break;
11834c8cf318STiwei Bie 	case VHOST_IOTLB_INVALIDATE:
1184ae967246SGautam Dawar 		vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size);
11854c8cf318STiwei Bie 		break;
118625abc060SJason Wang 	case VHOST_IOTLB_BATCH_BEGIN:
1187aaca8373SGautam Dawar 		v->batch_asid = asid;
118825abc060SJason Wang 		v->in_batch = true;
118925abc060SJason Wang 		break;
119025abc060SJason Wang 	case VHOST_IOTLB_BATCH_END:
119125abc060SJason Wang 		if (v->in_batch && ops->set_map)
1192aaca8373SGautam Dawar 			ops->set_map(vdpa, asid, iotlb);
119325abc060SJason Wang 		v->in_batch = false;
119425abc060SJason Wang 		break;
11954c8cf318STiwei Bie 	default:
11964c8cf318STiwei Bie 		r = -EINVAL;
11974c8cf318STiwei Bie 		break;
11984c8cf318STiwei Bie 	}
1199a9d06452SXie Yongji unlock:
1200a9d06452SXie Yongji 	mutex_unlock(&dev->mutex);
12014c8cf318STiwei Bie 
12024c8cf318STiwei Bie 	return r;
12034c8cf318STiwei Bie }
12044c8cf318STiwei Bie 
vhost_vdpa_chr_write_iter(struct kiocb * iocb,struct iov_iter * from)12054c8cf318STiwei Bie static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
12064c8cf318STiwei Bie 					 struct iov_iter *from)
12074c8cf318STiwei Bie {
12084c8cf318STiwei Bie 	struct file *file = iocb->ki_filp;
12094c8cf318STiwei Bie 	struct vhost_vdpa *v = file->private_data;
12104c8cf318STiwei Bie 	struct vhost_dev *dev = &v->vdev;
12114c8cf318STiwei Bie 
12124c8cf318STiwei Bie 	return vhost_chr_write_iter(dev, from);
12134c8cf318STiwei Bie }
12144c8cf318STiwei Bie 
vhost_vdpa_alloc_domain(struct vhost_vdpa * v)12154c8cf318STiwei Bie static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
12164c8cf318STiwei Bie {
12174c8cf318STiwei Bie 	struct vdpa_device *vdpa = v->vdpa;
12184c8cf318STiwei Bie 	const struct vdpa_config_ops *ops = vdpa->config;
12194c8cf318STiwei Bie 	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
122094a11504SGreg Kroah-Hartman 	const struct bus_type *bus;
12214c8cf318STiwei Bie 	int ret;
12224c8cf318STiwei Bie 
12234c8cf318STiwei Bie 	/* Device want to do DMA by itself */
12244c8cf318STiwei Bie 	if (ops->set_map || ops->dma_map)
12254c8cf318STiwei Bie 		return 0;
12264c8cf318STiwei Bie 
12274c8cf318STiwei Bie 	bus = dma_dev->bus;
12284c8cf318STiwei Bie 	if (!bus)
12294c8cf318STiwei Bie 		return -EFAULT;
12304c8cf318STiwei Bie 
12316830a6abSAlvaro Karsz 	if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) {
12326830a6abSAlvaro Karsz 		dev_warn_once(&v->dev,
12336830a6abSAlvaro Karsz 			      "Failed to allocate domain, device is not IOMMU cache coherent capable\n");
12344c8cf318STiwei Bie 		return -ENOTSUPP;
12356830a6abSAlvaro Karsz 	}
12364c8cf318STiwei Bie 
12374c8cf318STiwei Bie 	v->domain = iommu_domain_alloc(bus);
12384c8cf318STiwei Bie 	if (!v->domain)
12394c8cf318STiwei Bie 		return -EIO;
12404c8cf318STiwei Bie 
12414c8cf318STiwei Bie 	ret = iommu_attach_device(v->domain, dma_dev);
12424c8cf318STiwei Bie 	if (ret)
12434c8cf318STiwei Bie 		goto err_attach;
12444c8cf318STiwei Bie 
12454c8cf318STiwei Bie 	return 0;
12464c8cf318STiwei Bie 
12474c8cf318STiwei Bie err_attach:
12484c8cf318STiwei Bie 	iommu_domain_free(v->domain);
12495a522150SGautam Dawar 	v->domain = NULL;
12504c8cf318STiwei Bie 	return ret;
12514c8cf318STiwei Bie }
12524c8cf318STiwei Bie 
vhost_vdpa_free_domain(struct vhost_vdpa * v)12534c8cf318STiwei Bie static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
12544c8cf318STiwei Bie {
12554c8cf318STiwei Bie 	struct vdpa_device *vdpa = v->vdpa;
12564c8cf318STiwei Bie 	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
12574c8cf318STiwei Bie 
12584c8cf318STiwei Bie 	if (v->domain) {
12594c8cf318STiwei Bie 		iommu_detach_device(v->domain, dma_dev);
12604c8cf318STiwei Bie 		iommu_domain_free(v->domain);
12614c8cf318STiwei Bie 	}
12624c8cf318STiwei Bie 
12634c8cf318STiwei Bie 	v->domain = NULL;
12644c8cf318STiwei Bie }
12654c8cf318STiwei Bie 
vhost_vdpa_set_iova_range(struct vhost_vdpa * v)12661b48dc03SJason Wang static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
12671b48dc03SJason Wang {
12681b48dc03SJason Wang 	struct vdpa_iova_range *range = &v->range;
12691b48dc03SJason Wang 	struct vdpa_device *vdpa = v->vdpa;
12701b48dc03SJason Wang 	const struct vdpa_config_ops *ops = vdpa->config;
12711b48dc03SJason Wang 
12721b48dc03SJason Wang 	if (ops->get_iova_range) {
12731b48dc03SJason Wang 		*range = ops->get_iova_range(vdpa);
1274bc9a05eeSChristoph Hellwig 	} else if (v->domain && v->domain->geometry.force_aperture) {
1275bc9a05eeSChristoph Hellwig 		range->first = v->domain->geometry.aperture_start;
1276bc9a05eeSChristoph Hellwig 		range->last = v->domain->geometry.aperture_end;
12771b48dc03SJason Wang 	} else {
12781b48dc03SJason Wang 		range->first = 0;
12791b48dc03SJason Wang 		range->last = ULLONG_MAX;
12801b48dc03SJason Wang 	}
12811b48dc03SJason Wang }
12821b48dc03SJason Wang 
vhost_vdpa_cleanup(struct vhost_vdpa * v)12833d569879SGautam Dawar static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
12843d569879SGautam Dawar {
1285aaca8373SGautam Dawar 	struct vhost_vdpa_as *as;
1286aaca8373SGautam Dawar 	u32 asid;
1287aaca8373SGautam Dawar 
1288aaca8373SGautam Dawar 	for (asid = 0; asid < v->vdpa->nas; asid++) {
1289aaca8373SGautam Dawar 		as = asid_to_as(v, asid);
1290aaca8373SGautam Dawar 		if (as)
1291aaca8373SGautam Dawar 			vhost_vdpa_remove_as(v, asid);
1292aaca8373SGautam Dawar 	}
1293c070c191SStefano Garzarella 
12945a522150SGautam Dawar 	vhost_vdpa_free_domain(v);
1295c070c191SStefano Garzarella 	vhost_dev_cleanup(&v->vdev);
1296c070c191SStefano Garzarella 	kfree(v->vdev.vqs);
12973d569879SGautam Dawar }
12983d569879SGautam Dawar 
vhost_vdpa_open(struct inode * inode,struct file * filep)12994c8cf318STiwei Bie static int vhost_vdpa_open(struct inode *inode, struct file *filep)
13004c8cf318STiwei Bie {
13014c8cf318STiwei Bie 	struct vhost_vdpa *v;
13024c8cf318STiwei Bie 	struct vhost_dev *dev;
13034c8cf318STiwei Bie 	struct vhost_virtqueue **vqs;
130481d46d69SLongpeng 	int r, opened;
130581d46d69SLongpeng 	u32 i, nvqs;
13064c8cf318STiwei Bie 
13074c8cf318STiwei Bie 	v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
13084c8cf318STiwei Bie 
13094c8cf318STiwei Bie 	opened = atomic_cmpxchg(&v->opened, 0, 1);
13104c8cf318STiwei Bie 	if (opened)
13114c8cf318STiwei Bie 		return -EBUSY;
13124c8cf318STiwei Bie 
13134c8cf318STiwei Bie 	nvqs = v->nvqs;
13147f05630dSXie Yongji 	r = vhost_vdpa_reset(v);
13157f05630dSXie Yongji 	if (r)
13167f05630dSXie Yongji 		goto err;
13174c8cf318STiwei Bie 
13184c8cf318STiwei Bie 	vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
13194c8cf318STiwei Bie 	if (!vqs) {
13204c8cf318STiwei Bie 		r = -ENOMEM;
13214c8cf318STiwei Bie 		goto err;
13224c8cf318STiwei Bie 	}
13234c8cf318STiwei Bie 
13244c8cf318STiwei Bie 	dev = &v->vdev;
13254c8cf318STiwei Bie 	for (i = 0; i < nvqs; i++) {
13264c8cf318STiwei Bie 		vqs[i] = &v->vqs[i];
13274c8cf318STiwei Bie 		vqs[i]->handle_kick = handle_vq_kick;
1328*ca64edd7SJason Wang 		vqs[i]->call_ctx.ctx = NULL;
13294c8cf318STiwei Bie 	}
133001fcb1cbSJason Wang 	vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
13314c8cf318STiwei Bie 		       vhost_vdpa_process_iotlb_msg);
13324c8cf318STiwei Bie 
13334c8cf318STiwei Bie 	r = vhost_vdpa_alloc_domain(v);
13344c8cf318STiwei Bie 	if (r)
1335aaca8373SGautam Dawar 		goto err_alloc_domain;
13364c8cf318STiwei Bie 
13371b48dc03SJason Wang 	vhost_vdpa_set_iova_range(v);
13381b48dc03SJason Wang 
13394c8cf318STiwei Bie 	filep->private_data = v;
13404c8cf318STiwei Bie 
13414c8cf318STiwei Bie 	return 0;
13424c8cf318STiwei Bie 
1343aaca8373SGautam Dawar err_alloc_domain:
13443d569879SGautam Dawar 	vhost_vdpa_cleanup(v);
13454c8cf318STiwei Bie err:
13464c8cf318STiwei Bie 	atomic_dec(&v->opened);
13474c8cf318STiwei Bie 	return r;
13484c8cf318STiwei Bie }
13494c8cf318STiwei Bie 
vhost_vdpa_clean_irq(struct vhost_vdpa * v)13502cf1ba9aSZhu Lingshan static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
13512cf1ba9aSZhu Lingshan {
135281d46d69SLongpeng 	u32 i;
13532cf1ba9aSZhu Lingshan 
13544c050286SGautam Dawar 	for (i = 0; i < v->nvqs; i++)
13554c050286SGautam Dawar 		vhost_vdpa_unsetup_vq_irq(v, i);
13562cf1ba9aSZhu Lingshan }
13572cf1ba9aSZhu Lingshan 
vhost_vdpa_release(struct inode * inode,struct file * filep)13584c8cf318STiwei Bie static int vhost_vdpa_release(struct inode *inode, struct file *filep)
13594c8cf318STiwei Bie {
13604c8cf318STiwei Bie 	struct vhost_vdpa *v = filep->private_data;
13614c8cf318STiwei Bie 	struct vhost_dev *d = &v->vdev;
13624c8cf318STiwei Bie 
13634c8cf318STiwei Bie 	mutex_lock(&d->mutex);
13644c8cf318STiwei Bie 	filep->private_data = NULL;
1365ea8f17e4SWu Zongyong 	vhost_vdpa_clean_irq(v);
13664c8cf318STiwei Bie 	vhost_vdpa_reset(v);
13674c8cf318STiwei Bie 	vhost_dev_stop(&v->vdev);
13689067de47SStefano Garzarella 	vhost_vdpa_unbind_mm(v);
1369776f3950SZhu Lingshan 	vhost_vdpa_config_put(v);
1370037d4305SStefano Garzarella 	vhost_vdpa_cleanup(v);
13714c8cf318STiwei Bie 	mutex_unlock(&d->mutex);
13724c8cf318STiwei Bie 
13734c8cf318STiwei Bie 	atomic_dec(&v->opened);
13744c8cf318STiwei Bie 	complete(&v->completion);
13754c8cf318STiwei Bie 
13764c8cf318STiwei Bie 	return 0;
13774c8cf318STiwei Bie }
13784c8cf318STiwei Bie 
13794b4e4867SMichael S. Tsirkin #ifdef CONFIG_MMU
vhost_vdpa_fault(struct vm_fault * vmf)1380ddd89d0aSJason Wang static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
1381ddd89d0aSJason Wang {
1382ddd89d0aSJason Wang 	struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
1383ddd89d0aSJason Wang 	struct vdpa_device *vdpa = v->vdpa;
1384ddd89d0aSJason Wang 	const struct vdpa_config_ops *ops = vdpa->config;
1385ddd89d0aSJason Wang 	struct vdpa_notification_area notify;
1386ddd89d0aSJason Wang 	struct vm_area_struct *vma = vmf->vma;
1387ddd89d0aSJason Wang 	u16 index = vma->vm_pgoff;
1388ddd89d0aSJason Wang 
1389ddd89d0aSJason Wang 	notify = ops->get_vq_notification(vdpa, index);
1390ddd89d0aSJason Wang 
13912ac6deb9SJason Wang 	return vmf_insert_pfn(vma, vmf->address & PAGE_MASK, PFN_DOWN(notify.addr));
1392ddd89d0aSJason Wang }
1393ddd89d0aSJason Wang 
1394ddd89d0aSJason Wang static const struct vm_operations_struct vhost_vdpa_vm_ops = {
1395ddd89d0aSJason Wang 	.fault = vhost_vdpa_fault,
1396ddd89d0aSJason Wang };
1397ddd89d0aSJason Wang 
vhost_vdpa_mmap(struct file * file,struct vm_area_struct * vma)1398ddd89d0aSJason Wang static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
1399ddd89d0aSJason Wang {
1400ddd89d0aSJason Wang 	struct vhost_vdpa *v = vma->vm_file->private_data;
1401ddd89d0aSJason Wang 	struct vdpa_device *vdpa = v->vdpa;
1402ddd89d0aSJason Wang 	const struct vdpa_config_ops *ops = vdpa->config;
1403ddd89d0aSJason Wang 	struct vdpa_notification_area notify;
1404c09cc2c3SDan Carpenter 	unsigned long index = vma->vm_pgoff;
1405ddd89d0aSJason Wang 
1406ddd89d0aSJason Wang 	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1407ddd89d0aSJason Wang 		return -EINVAL;
1408ddd89d0aSJason Wang 	if ((vma->vm_flags & VM_SHARED) == 0)
1409ddd89d0aSJason Wang 		return -EINVAL;
1410ddd89d0aSJason Wang 	if (vma->vm_flags & VM_READ)
1411ddd89d0aSJason Wang 		return -EINVAL;
1412ddd89d0aSJason Wang 	if (index > 65535)
1413ddd89d0aSJason Wang 		return -EINVAL;
1414ddd89d0aSJason Wang 	if (!ops->get_vq_notification)
1415ddd89d0aSJason Wang 		return -ENOTSUPP;
1416ddd89d0aSJason Wang 
1417ddd89d0aSJason Wang 	/* To be safe and easily modelled by userspace, We only
1418ddd89d0aSJason Wang 	 * support the doorbell which sits on the page boundary and
1419ddd89d0aSJason Wang 	 * does not share the page with other registers.
1420ddd89d0aSJason Wang 	 */
1421ddd89d0aSJason Wang 	notify = ops->get_vq_notification(vdpa, index);
1422ddd89d0aSJason Wang 	if (notify.addr & (PAGE_SIZE - 1))
1423ddd89d0aSJason Wang 		return -EINVAL;
1424ddd89d0aSJason Wang 	if (vma->vm_end - vma->vm_start != notify.size)
1425ddd89d0aSJason Wang 		return -ENOTSUPP;
1426ddd89d0aSJason Wang 
14271c71222eSSuren Baghdasaryan 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
1428ddd89d0aSJason Wang 	vma->vm_ops = &vhost_vdpa_vm_ops;
1429ddd89d0aSJason Wang 	return 0;
1430ddd89d0aSJason Wang }
14314b4e4867SMichael S. Tsirkin #endif /* CONFIG_MMU */
1432ddd89d0aSJason Wang 
14334c8cf318STiwei Bie static const struct file_operations vhost_vdpa_fops = {
14344c8cf318STiwei Bie 	.owner		= THIS_MODULE,
14354c8cf318STiwei Bie 	.open		= vhost_vdpa_open,
14364c8cf318STiwei Bie 	.release	= vhost_vdpa_release,
14374c8cf318STiwei Bie 	.write_iter	= vhost_vdpa_chr_write_iter,
14384c8cf318STiwei Bie 	.unlocked_ioctl	= vhost_vdpa_unlocked_ioctl,
14394b4e4867SMichael S. Tsirkin #ifdef CONFIG_MMU
1440ddd89d0aSJason Wang 	.mmap		= vhost_vdpa_mmap,
14414b4e4867SMichael S. Tsirkin #endif /* CONFIG_MMU */
14424c8cf318STiwei Bie 	.compat_ioctl	= compat_ptr_ioctl,
14434c8cf318STiwei Bie };
14444c8cf318STiwei Bie 
vhost_vdpa_release_dev(struct device * device)14454c8cf318STiwei Bie static void vhost_vdpa_release_dev(struct device *device)
14464c8cf318STiwei Bie {
14474c8cf318STiwei Bie 	struct vhost_vdpa *v =
14484c8cf318STiwei Bie 	       container_of(device, struct vhost_vdpa, dev);
14494c8cf318STiwei Bie 
14504c8cf318STiwei Bie 	ida_simple_remove(&vhost_vdpa_ida, v->minor);
14514c8cf318STiwei Bie 	kfree(v->vqs);
14524c8cf318STiwei Bie 	kfree(v);
14534c8cf318STiwei Bie }
14544c8cf318STiwei Bie 
vhost_vdpa_probe(struct vdpa_device * vdpa)14554c8cf318STiwei Bie static int vhost_vdpa_probe(struct vdpa_device *vdpa)
14564c8cf318STiwei Bie {
14574c8cf318STiwei Bie 	const struct vdpa_config_ops *ops = vdpa->config;
14584c8cf318STiwei Bie 	struct vhost_vdpa *v;
1459a9974489SMax Gurtovoy 	int minor;
14603d569879SGautam Dawar 	int i, r;
14614c8cf318STiwei Bie 
1462aaca8373SGautam Dawar 	/* We can't support platform IOMMU device with more than 1
1463aaca8373SGautam Dawar 	 * group or as
1464aaca8373SGautam Dawar 	 */
1465aaca8373SGautam Dawar 	if (!ops->set_map && !ops->dma_map &&
1466aaca8373SGautam Dawar 	    (vdpa->ngroups > 1 || vdpa->nas > 1))
1467db9adcbfSGautam Dawar 		return -EOPNOTSUPP;
1468db9adcbfSGautam Dawar 
14694c8cf318STiwei Bie 	v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
14704c8cf318STiwei Bie 	if (!v)
14714c8cf318STiwei Bie 		return -ENOMEM;
14724c8cf318STiwei Bie 
14734c8cf318STiwei Bie 	minor = ida_simple_get(&vhost_vdpa_ida, 0,
14744c8cf318STiwei Bie 			       VHOST_VDPA_DEV_MAX, GFP_KERNEL);
14754c8cf318STiwei Bie 	if (minor < 0) {
14764c8cf318STiwei Bie 		kfree(v);
14774c8cf318STiwei Bie 		return minor;
14784c8cf318STiwei Bie 	}
14794c8cf318STiwei Bie 
14804c8cf318STiwei Bie 	atomic_set(&v->opened, 0);
14814c8cf318STiwei Bie 	v->minor = minor;
14824c8cf318STiwei Bie 	v->vdpa = vdpa;
1483a9974489SMax Gurtovoy 	v->nvqs = vdpa->nvqs;
14844c8cf318STiwei Bie 	v->virtio_id = ops->get_device_id(vdpa);
14854c8cf318STiwei Bie 
14864c8cf318STiwei Bie 	device_initialize(&v->dev);
14874c8cf318STiwei Bie 	v->dev.release = vhost_vdpa_release_dev;
14884c8cf318STiwei Bie 	v->dev.parent = &vdpa->dev;
14894c8cf318STiwei Bie 	v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
1490a9974489SMax Gurtovoy 	v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
14914c8cf318STiwei Bie 			       GFP_KERNEL);
14924c8cf318STiwei Bie 	if (!v->vqs) {
14934c8cf318STiwei Bie 		r = -ENOMEM;
14944c8cf318STiwei Bie 		goto err;
14954c8cf318STiwei Bie 	}
14964c8cf318STiwei Bie 
14974c8cf318STiwei Bie 	r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
14984c8cf318STiwei Bie 	if (r)
14994c8cf318STiwei Bie 		goto err;
15004c8cf318STiwei Bie 
15014c8cf318STiwei Bie 	cdev_init(&v->cdev, &vhost_vdpa_fops);
15024c8cf318STiwei Bie 	v->cdev.owner = THIS_MODULE;
15034c8cf318STiwei Bie 
15044c8cf318STiwei Bie 	r = cdev_device_add(&v->cdev, &v->dev);
15054c8cf318STiwei Bie 	if (r)
15064c8cf318STiwei Bie 		goto err;
15074c8cf318STiwei Bie 
15084c8cf318STiwei Bie 	init_completion(&v->completion);
15094c8cf318STiwei Bie 	vdpa_set_drvdata(vdpa, v);
15104c8cf318STiwei Bie 
15113d569879SGautam Dawar 	for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++)
15123d569879SGautam Dawar 		INIT_HLIST_HEAD(&v->as[i]);
15133d569879SGautam Dawar 
15144c8cf318STiwei Bie 	return 0;
15154c8cf318STiwei Bie 
15164c8cf318STiwei Bie err:
15174c8cf318STiwei Bie 	put_device(&v->dev);
15184c8cf318STiwei Bie 	return r;
15194c8cf318STiwei Bie }
15204c8cf318STiwei Bie 
vhost_vdpa_remove(struct vdpa_device * vdpa)15214c8cf318STiwei Bie static void vhost_vdpa_remove(struct vdpa_device *vdpa)
15224c8cf318STiwei Bie {
15234c8cf318STiwei Bie 	struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
15244c8cf318STiwei Bie 	int opened;
15254c8cf318STiwei Bie 
15264c8cf318STiwei Bie 	cdev_device_del(&v->cdev, &v->dev);
15274c8cf318STiwei Bie 
15284c8cf318STiwei Bie 	do {
15294c8cf318STiwei Bie 		opened = atomic_cmpxchg(&v->opened, 0, 1);
15304c8cf318STiwei Bie 		if (!opened)
15314c8cf318STiwei Bie 			break;
15324c8cf318STiwei Bie 		wait_for_completion(&v->completion);
15334c8cf318STiwei Bie 	} while (1);
15344c8cf318STiwei Bie 
15354c8cf318STiwei Bie 	put_device(&v->dev);
15364c8cf318STiwei Bie }
15374c8cf318STiwei Bie 
15384c8cf318STiwei Bie static struct vdpa_driver vhost_vdpa_driver = {
15394c8cf318STiwei Bie 	.driver = {
15404c8cf318STiwei Bie 		.name	= "vhost_vdpa",
15414c8cf318STiwei Bie 	},
15424c8cf318STiwei Bie 	.probe	= vhost_vdpa_probe,
15434c8cf318STiwei Bie 	.remove	= vhost_vdpa_remove,
15444c8cf318STiwei Bie };
15454c8cf318STiwei Bie 
vhost_vdpa_init(void)15464c8cf318STiwei Bie static int __init vhost_vdpa_init(void)
15474c8cf318STiwei Bie {
15484c8cf318STiwei Bie 	int r;
15494c8cf318STiwei Bie 
15504c8cf318STiwei Bie 	r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
15514c8cf318STiwei Bie 				"vhost-vdpa");
15524c8cf318STiwei Bie 	if (r)
15534c8cf318STiwei Bie 		goto err_alloc_chrdev;
15544c8cf318STiwei Bie 
15554c8cf318STiwei Bie 	r = vdpa_register_driver(&vhost_vdpa_driver);
15564c8cf318STiwei Bie 	if (r)
15574c8cf318STiwei Bie 		goto err_vdpa_register_driver;
15584c8cf318STiwei Bie 
15594c8cf318STiwei Bie 	return 0;
15604c8cf318STiwei Bie 
15614c8cf318STiwei Bie err_vdpa_register_driver:
15624c8cf318STiwei Bie 	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
15634c8cf318STiwei Bie err_alloc_chrdev:
15644c8cf318STiwei Bie 	return r;
15654c8cf318STiwei Bie }
15664c8cf318STiwei Bie module_init(vhost_vdpa_init);
15674c8cf318STiwei Bie 
vhost_vdpa_exit(void)15684c8cf318STiwei Bie static void __exit vhost_vdpa_exit(void)
15694c8cf318STiwei Bie {
15704c8cf318STiwei Bie 	vdpa_unregister_driver(&vhost_vdpa_driver);
15714c8cf318STiwei Bie 	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
15724c8cf318STiwei Bie }
15734c8cf318STiwei Bie module_exit(vhost_vdpa_exit);
15744c8cf318STiwei Bie 
15754c8cf318STiwei Bie MODULE_VERSION("0.0.1");
15764c8cf318STiwei Bie MODULE_LICENSE("GPL v2");
15774c8cf318STiwei Bie MODULE_AUTHOR("Intel Corporation");
15784c8cf318STiwei Bie MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");
1579