xref: /openbmc/linux/drivers/vhost/vdpa.c (revision 7583028d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2018-2020 Intel Corporation.
4  * Copyright (C) 2020 Red Hat, Inc.
5  *
6  * Author: Tiwei Bie <tiwei.bie@intel.com>
7  *         Jason Wang <jasowang@redhat.com>
8  *
9  * Thanks Michael S. Tsirkin for the valuable comments and
10  * suggestions.  And thanks to Cunming Liang and Zhihong Wang for all
11  * their supports.
12  */
13 
14 #include <linux/kernel.h>
15 #include <linux/module.h>
16 #include <linux/cdev.h>
17 #include <linux/device.h>
18 #include <linux/mm.h>
19 #include <linux/slab.h>
20 #include <linux/iommu.h>
21 #include <linux/uuid.h>
22 #include <linux/vdpa.h>
23 #include <linux/nospec.h>
24 #include <linux/vhost.h>
25 
26 #include "vhost.h"
27 
28 enum {
29 	VHOST_VDPA_BACKEND_FEATURES =
30 	(1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
31 	(1ULL << VHOST_BACKEND_F_IOTLB_BATCH) |
32 	(1ULL << VHOST_BACKEND_F_IOTLB_ASID),
33 };
34 
35 #define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
36 
37 #define VHOST_VDPA_IOTLB_BUCKETS 16
38 
39 struct vhost_vdpa_as {
40 	struct hlist_node hash_link;
41 	struct vhost_iotlb iotlb;
42 	u32 id;
43 };
44 
45 struct vhost_vdpa {
46 	struct vhost_dev vdev;
47 	struct iommu_domain *domain;
48 	struct vhost_virtqueue *vqs;
49 	struct completion completion;
50 	struct vdpa_device *vdpa;
51 	struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS];
52 	struct device dev;
53 	struct cdev cdev;
54 	atomic_t opened;
55 	u32 nvqs;
56 	int virtio_id;
57 	int minor;
58 	struct eventfd_ctx *config_ctx;
59 	int in_batch;
60 	struct vdpa_iova_range range;
61 	u32 batch_asid;
62 };
63 
64 static DEFINE_IDA(vhost_vdpa_ida);
65 
66 static dev_t vhost_vdpa_major;
67 
68 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
69 				   struct vhost_iotlb *iotlb, u64 start,
70 				   u64 last, u32 asid);
71 
72 static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb)
73 {
74 	struct vhost_vdpa_as *as = container_of(iotlb, struct
75 						vhost_vdpa_as, iotlb);
76 	return as->id;
77 }
78 
79 static struct vhost_vdpa_as *asid_to_as(struct vhost_vdpa *v, u32 asid)
80 {
81 	struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
82 	struct vhost_vdpa_as *as;
83 
84 	hlist_for_each_entry(as, head, hash_link)
85 		if (as->id == asid)
86 			return as;
87 
88 	return NULL;
89 }
90 
91 static struct vhost_iotlb *asid_to_iotlb(struct vhost_vdpa *v, u32 asid)
92 {
93 	struct vhost_vdpa_as *as = asid_to_as(v, asid);
94 
95 	if (!as)
96 		return NULL;
97 
98 	return &as->iotlb;
99 }
100 
101 static struct vhost_vdpa_as *vhost_vdpa_alloc_as(struct vhost_vdpa *v, u32 asid)
102 {
103 	struct hlist_head *head = &v->as[asid % VHOST_VDPA_IOTLB_BUCKETS];
104 	struct vhost_vdpa_as *as;
105 
106 	if (asid_to_as(v, asid))
107 		return NULL;
108 
109 	if (asid >= v->vdpa->nas)
110 		return NULL;
111 
112 	as = kmalloc(sizeof(*as), GFP_KERNEL);
113 	if (!as)
114 		return NULL;
115 
116 	vhost_iotlb_init(&as->iotlb, 0, 0);
117 	as->id = asid;
118 	hlist_add_head(&as->hash_link, head);
119 
120 	return as;
121 }
122 
123 static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
124 						      u32 asid)
125 {
126 	struct vhost_vdpa_as *as = asid_to_as(v, asid);
127 
128 	if (as)
129 		return as;
130 
131 	return vhost_vdpa_alloc_as(v, asid);
132 }
133 
134 static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
135 {
136 	struct vhost_vdpa_as *as = asid_to_as(v, asid);
137 
138 	if (!as)
139 		return -EINVAL;
140 
141 	hlist_del(&as->hash_link);
142 	vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
143 	kfree(as);
144 
145 	return 0;
146 }
147 
148 static void handle_vq_kick(struct vhost_work *work)
149 {
150 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
151 						  poll.work);
152 	struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
153 	const struct vdpa_config_ops *ops = v->vdpa->config;
154 
155 	ops->kick_vq(v->vdpa, vq - v->vqs);
156 }
157 
158 static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
159 {
160 	struct vhost_virtqueue *vq = private;
161 	struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
162 
163 	if (call_ctx)
164 		eventfd_signal(call_ctx, 1);
165 
166 	return IRQ_HANDLED;
167 }
168 
169 static irqreturn_t vhost_vdpa_config_cb(void *private)
170 {
171 	struct vhost_vdpa *v = private;
172 	struct eventfd_ctx *config_ctx = v->config_ctx;
173 
174 	if (config_ctx)
175 		eventfd_signal(config_ctx, 1);
176 
177 	return IRQ_HANDLED;
178 }
179 
180 static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
181 {
182 	struct vhost_virtqueue *vq = &v->vqs[qid];
183 	const struct vdpa_config_ops *ops = v->vdpa->config;
184 	struct vdpa_device *vdpa = v->vdpa;
185 	int ret, irq;
186 
187 	if (!ops->get_vq_irq)
188 		return;
189 
190 	irq = ops->get_vq_irq(vdpa, qid);
191 	if (irq < 0)
192 		return;
193 
194 	irq_bypass_unregister_producer(&vq->call_ctx.producer);
195 	if (!vq->call_ctx.ctx)
196 		return;
197 
198 	vq->call_ctx.producer.token = vq->call_ctx.ctx;
199 	vq->call_ctx.producer.irq = irq;
200 	ret = irq_bypass_register_producer(&vq->call_ctx.producer);
201 	if (unlikely(ret))
202 		dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret =  %d\n",
203 			 qid, vq->call_ctx.producer.token, ret);
204 }
205 
206 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
207 {
208 	struct vhost_virtqueue *vq = &v->vqs[qid];
209 
210 	irq_bypass_unregister_producer(&vq->call_ctx.producer);
211 }
212 
213 static int vhost_vdpa_reset(struct vhost_vdpa *v)
214 {
215 	struct vdpa_device *vdpa = v->vdpa;
216 
217 	v->in_batch = 0;
218 
219 	return vdpa_reset(vdpa);
220 }
221 
222 static long vhost_vdpa_bind_mm(struct vhost_vdpa *v)
223 {
224 	struct vdpa_device *vdpa = v->vdpa;
225 	const struct vdpa_config_ops *ops = vdpa->config;
226 
227 	if (!vdpa->use_va || !ops->bind_mm)
228 		return 0;
229 
230 	return ops->bind_mm(vdpa, v->vdev.mm);
231 }
232 
233 static void vhost_vdpa_unbind_mm(struct vhost_vdpa *v)
234 {
235 	struct vdpa_device *vdpa = v->vdpa;
236 	const struct vdpa_config_ops *ops = vdpa->config;
237 
238 	if (!vdpa->use_va || !ops->unbind_mm)
239 		return;
240 
241 	ops->unbind_mm(vdpa);
242 }
243 
244 static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
245 {
246 	struct vdpa_device *vdpa = v->vdpa;
247 	const struct vdpa_config_ops *ops = vdpa->config;
248 	u32 device_id;
249 
250 	device_id = ops->get_device_id(vdpa);
251 
252 	if (copy_to_user(argp, &device_id, sizeof(device_id)))
253 		return -EFAULT;
254 
255 	return 0;
256 }
257 
258 static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
259 {
260 	struct vdpa_device *vdpa = v->vdpa;
261 	const struct vdpa_config_ops *ops = vdpa->config;
262 	u8 status;
263 
264 	status = ops->get_status(vdpa);
265 
266 	if (copy_to_user(statusp, &status, sizeof(status)))
267 		return -EFAULT;
268 
269 	return 0;
270 }
271 
272 static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
273 {
274 	struct vdpa_device *vdpa = v->vdpa;
275 	const struct vdpa_config_ops *ops = vdpa->config;
276 	u8 status, status_old;
277 	u32 nvqs = v->nvqs;
278 	int ret;
279 	u16 i;
280 
281 	if (copy_from_user(&status, statusp, sizeof(status)))
282 		return -EFAULT;
283 
284 	status_old = ops->get_status(vdpa);
285 
286 	/*
287 	 * Userspace shouldn't remove status bits unless reset the
288 	 * status to 0.
289 	 */
290 	if (status != 0 && (status_old & ~status) != 0)
291 		return -EINVAL;
292 
293 	if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
294 		for (i = 0; i < nvqs; i++)
295 			vhost_vdpa_unsetup_vq_irq(v, i);
296 
297 	if (status == 0) {
298 		ret = vdpa_reset(vdpa);
299 		if (ret)
300 			return ret;
301 	} else
302 		vdpa_set_status(vdpa, status);
303 
304 	if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
305 		for (i = 0; i < nvqs; i++)
306 			vhost_vdpa_setup_vq_irq(v, i);
307 
308 	return 0;
309 }
310 
311 static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
312 				      struct vhost_vdpa_config *c)
313 {
314 	struct vdpa_device *vdpa = v->vdpa;
315 	size_t size = vdpa->config->get_config_size(vdpa);
316 
317 	if (c->len == 0 || c->off > size)
318 		return -EINVAL;
319 
320 	if (c->len > size - c->off)
321 		return -E2BIG;
322 
323 	return 0;
324 }
325 
326 static long vhost_vdpa_get_config(struct vhost_vdpa *v,
327 				  struct vhost_vdpa_config __user *c)
328 {
329 	struct vdpa_device *vdpa = v->vdpa;
330 	struct vhost_vdpa_config config;
331 	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
332 	u8 *buf;
333 
334 	if (copy_from_user(&config, c, size))
335 		return -EFAULT;
336 	if (vhost_vdpa_config_validate(v, &config))
337 		return -EINVAL;
338 	buf = kvzalloc(config.len, GFP_KERNEL);
339 	if (!buf)
340 		return -ENOMEM;
341 
342 	vdpa_get_config(vdpa, config.off, buf, config.len);
343 
344 	if (copy_to_user(c->buf, buf, config.len)) {
345 		kvfree(buf);
346 		return -EFAULT;
347 	}
348 
349 	kvfree(buf);
350 	return 0;
351 }
352 
353 static long vhost_vdpa_set_config(struct vhost_vdpa *v,
354 				  struct vhost_vdpa_config __user *c)
355 {
356 	struct vdpa_device *vdpa = v->vdpa;
357 	struct vhost_vdpa_config config;
358 	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
359 	u8 *buf;
360 
361 	if (copy_from_user(&config, c, size))
362 		return -EFAULT;
363 	if (vhost_vdpa_config_validate(v, &config))
364 		return -EINVAL;
365 
366 	buf = vmemdup_user(c->buf, config.len);
367 	if (IS_ERR(buf))
368 		return PTR_ERR(buf);
369 
370 	vdpa_set_config(vdpa, config.off, buf, config.len);
371 
372 	kvfree(buf);
373 	return 0;
374 }
375 
376 static bool vhost_vdpa_can_suspend(const struct vhost_vdpa *v)
377 {
378 	struct vdpa_device *vdpa = v->vdpa;
379 	const struct vdpa_config_ops *ops = vdpa->config;
380 
381 	return ops->suspend;
382 }
383 
384 static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
385 {
386 	struct vdpa_device *vdpa = v->vdpa;
387 	const struct vdpa_config_ops *ops = vdpa->config;
388 
389 	return ops->resume;
390 }
391 
392 static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
393 {
394 	struct vdpa_device *vdpa = v->vdpa;
395 	const struct vdpa_config_ops *ops = vdpa->config;
396 	u64 features;
397 
398 	features = ops->get_device_features(vdpa);
399 
400 	if (copy_to_user(featurep, &features, sizeof(features)))
401 		return -EFAULT;
402 
403 	return 0;
404 }
405 
406 static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
407 {
408 	struct vdpa_device *vdpa = v->vdpa;
409 	const struct vdpa_config_ops *ops = vdpa->config;
410 	struct vhost_dev *d = &v->vdev;
411 	u64 actual_features;
412 	u64 features;
413 	int i;
414 
415 	/*
416 	 * It's not allowed to change the features after they have
417 	 * been negotiated.
418 	 */
419 	if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
420 		return -EBUSY;
421 
422 	if (copy_from_user(&features, featurep, sizeof(features)))
423 		return -EFAULT;
424 
425 	if (vdpa_set_features(vdpa, features))
426 		return -EINVAL;
427 
428 	/* let the vqs know what has been configured */
429 	actual_features = ops->get_driver_features(vdpa);
430 	for (i = 0; i < d->nvqs; ++i) {
431 		struct vhost_virtqueue *vq = d->vqs[i];
432 
433 		mutex_lock(&vq->mutex);
434 		vq->acked_features = actual_features;
435 		mutex_unlock(&vq->mutex);
436 	}
437 
438 	return 0;
439 }
440 
441 static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
442 {
443 	struct vdpa_device *vdpa = v->vdpa;
444 	const struct vdpa_config_ops *ops = vdpa->config;
445 	u16 num;
446 
447 	num = ops->get_vq_num_max(vdpa);
448 
449 	if (copy_to_user(argp, &num, sizeof(num)))
450 		return -EFAULT;
451 
452 	return 0;
453 }
454 
455 static void vhost_vdpa_config_put(struct vhost_vdpa *v)
456 {
457 	if (v->config_ctx) {
458 		eventfd_ctx_put(v->config_ctx);
459 		v->config_ctx = NULL;
460 	}
461 }
462 
463 static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
464 {
465 	struct vdpa_callback cb;
466 	int fd;
467 	struct eventfd_ctx *ctx;
468 
469 	cb.callback = vhost_vdpa_config_cb;
470 	cb.private = v;
471 	if (copy_from_user(&fd, argp, sizeof(fd)))
472 		return  -EFAULT;
473 
474 	ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
475 	swap(ctx, v->config_ctx);
476 
477 	if (!IS_ERR_OR_NULL(ctx))
478 		eventfd_ctx_put(ctx);
479 
480 	if (IS_ERR(v->config_ctx)) {
481 		long ret = PTR_ERR(v->config_ctx);
482 
483 		v->config_ctx = NULL;
484 		return ret;
485 	}
486 
487 	v->vdpa->config->set_config_cb(v->vdpa, &cb);
488 
489 	return 0;
490 }
491 
492 static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
493 {
494 	struct vhost_vdpa_iova_range range = {
495 		.first = v->range.first,
496 		.last = v->range.last,
497 	};
498 
499 	if (copy_to_user(argp, &range, sizeof(range)))
500 		return -EFAULT;
501 	return 0;
502 }
503 
504 static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp)
505 {
506 	struct vdpa_device *vdpa = v->vdpa;
507 	const struct vdpa_config_ops *ops = vdpa->config;
508 	u32 size;
509 
510 	size = ops->get_config_size(vdpa);
511 
512 	if (copy_to_user(argp, &size, sizeof(size)))
513 		return -EFAULT;
514 
515 	return 0;
516 }
517 
518 static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
519 {
520 	struct vdpa_device *vdpa = v->vdpa;
521 
522 	if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs)))
523 		return -EFAULT;
524 
525 	return 0;
526 }
527 
528 /* After a successful return of ioctl the device must not process more
529  * virtqueue descriptors. The device can answer to read or writes of config
530  * fields as if it were not suspended. In particular, writing to "queue_enable"
531  * with a value of 1 will not make the device start processing buffers.
532  */
533 static long vhost_vdpa_suspend(struct vhost_vdpa *v)
534 {
535 	struct vdpa_device *vdpa = v->vdpa;
536 	const struct vdpa_config_ops *ops = vdpa->config;
537 
538 	if (!ops->suspend)
539 		return -EOPNOTSUPP;
540 
541 	return ops->suspend(vdpa);
542 }
543 
544 /* After a successful return of this ioctl the device resumes processing
545  * virtqueue descriptors. The device becomes fully operational the same way it
546  * was before it was suspended.
547  */
548 static long vhost_vdpa_resume(struct vhost_vdpa *v)
549 {
550 	struct vdpa_device *vdpa = v->vdpa;
551 	const struct vdpa_config_ops *ops = vdpa->config;
552 
553 	if (!ops->resume)
554 		return -EOPNOTSUPP;
555 
556 	return ops->resume(vdpa);
557 }
558 
559 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
560 				   void __user *argp)
561 {
562 	struct vdpa_device *vdpa = v->vdpa;
563 	const struct vdpa_config_ops *ops = vdpa->config;
564 	struct vdpa_vq_state vq_state;
565 	struct vdpa_callback cb;
566 	struct vhost_virtqueue *vq;
567 	struct vhost_vring_state s;
568 	u32 idx;
569 	long r;
570 
571 	r = get_user(idx, (u32 __user *)argp);
572 	if (r < 0)
573 		return r;
574 
575 	if (idx >= v->nvqs)
576 		return -ENOBUFS;
577 
578 	idx = array_index_nospec(idx, v->nvqs);
579 	vq = &v->vqs[idx];
580 
581 	switch (cmd) {
582 	case VHOST_VDPA_SET_VRING_ENABLE:
583 		if (copy_from_user(&s, argp, sizeof(s)))
584 			return -EFAULT;
585 		ops->set_vq_ready(vdpa, idx, s.num);
586 		return 0;
587 	case VHOST_VDPA_GET_VRING_GROUP:
588 		if (!ops->get_vq_group)
589 			return -EOPNOTSUPP;
590 		s.index = idx;
591 		s.num = ops->get_vq_group(vdpa, idx);
592 		if (s.num >= vdpa->ngroups)
593 			return -EIO;
594 		else if (copy_to_user(argp, &s, sizeof(s)))
595 			return -EFAULT;
596 		return 0;
597 	case VHOST_VDPA_SET_GROUP_ASID:
598 		if (copy_from_user(&s, argp, sizeof(s)))
599 			return -EFAULT;
600 		if (s.num >= vdpa->nas)
601 			return -EINVAL;
602 		if (!ops->set_group_asid)
603 			return -EOPNOTSUPP;
604 		return ops->set_group_asid(vdpa, idx, s.num);
605 	case VHOST_GET_VRING_BASE:
606 		r = ops->get_vq_state(v->vdpa, idx, &vq_state);
607 		if (r)
608 			return r;
609 
610 		if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
611 			vq->last_avail_idx = vq_state.packed.last_avail_idx |
612 					     (vq_state.packed.last_avail_counter << 15);
613 			vq->last_used_idx = vq_state.packed.last_used_idx |
614 					    (vq_state.packed.last_used_counter << 15);
615 		} else {
616 			vq->last_avail_idx = vq_state.split.avail_index;
617 		}
618 		break;
619 	}
620 
621 	r = vhost_vring_ioctl(&v->vdev, cmd, argp);
622 	if (r)
623 		return r;
624 
625 	switch (cmd) {
626 	case VHOST_SET_VRING_ADDR:
627 		if (ops->set_vq_address(vdpa, idx,
628 					(u64)(uintptr_t)vq->desc,
629 					(u64)(uintptr_t)vq->avail,
630 					(u64)(uintptr_t)vq->used))
631 			r = -EINVAL;
632 		break;
633 
634 	case VHOST_SET_VRING_BASE:
635 		if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
636 			vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
637 			vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
638 			vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
639 			vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
640 		} else {
641 			vq_state.split.avail_index = vq->last_avail_idx;
642 		}
643 		r = ops->set_vq_state(vdpa, idx, &vq_state);
644 		break;
645 
646 	case VHOST_SET_VRING_CALL:
647 		if (vq->call_ctx.ctx) {
648 			cb.callback = vhost_vdpa_virtqueue_cb;
649 			cb.private = vq;
650 			cb.trigger = vq->call_ctx.ctx;
651 		} else {
652 			cb.callback = NULL;
653 			cb.private = NULL;
654 			cb.trigger = NULL;
655 		}
656 		ops->set_vq_cb(vdpa, idx, &cb);
657 		vhost_vdpa_setup_vq_irq(v, idx);
658 		break;
659 
660 	case VHOST_SET_VRING_NUM:
661 		ops->set_vq_num(vdpa, idx, vq->num);
662 		break;
663 	}
664 
665 	return r;
666 }
667 
668 static long vhost_vdpa_unlocked_ioctl(struct file *filep,
669 				      unsigned int cmd, unsigned long arg)
670 {
671 	struct vhost_vdpa *v = filep->private_data;
672 	struct vhost_dev *d = &v->vdev;
673 	void __user *argp = (void __user *)arg;
674 	u64 __user *featurep = argp;
675 	u64 features;
676 	long r = 0;
677 
678 	if (cmd == VHOST_SET_BACKEND_FEATURES) {
679 		if (copy_from_user(&features, featurep, sizeof(features)))
680 			return -EFAULT;
681 		if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
682 				 BIT_ULL(VHOST_BACKEND_F_SUSPEND) |
683 				 BIT_ULL(VHOST_BACKEND_F_RESUME)))
684 			return -EOPNOTSUPP;
685 		if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) &&
686 		     !vhost_vdpa_can_suspend(v))
687 			return -EOPNOTSUPP;
688 		if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) &&
689 		     !vhost_vdpa_can_resume(v))
690 			return -EOPNOTSUPP;
691 		vhost_set_backend_features(&v->vdev, features);
692 		return 0;
693 	}
694 
695 	mutex_lock(&d->mutex);
696 
697 	switch (cmd) {
698 	case VHOST_VDPA_GET_DEVICE_ID:
699 		r = vhost_vdpa_get_device_id(v, argp);
700 		break;
701 	case VHOST_VDPA_GET_STATUS:
702 		r = vhost_vdpa_get_status(v, argp);
703 		break;
704 	case VHOST_VDPA_SET_STATUS:
705 		r = vhost_vdpa_set_status(v, argp);
706 		break;
707 	case VHOST_VDPA_GET_CONFIG:
708 		r = vhost_vdpa_get_config(v, argp);
709 		break;
710 	case VHOST_VDPA_SET_CONFIG:
711 		r = vhost_vdpa_set_config(v, argp);
712 		break;
713 	case VHOST_GET_FEATURES:
714 		r = vhost_vdpa_get_features(v, argp);
715 		break;
716 	case VHOST_SET_FEATURES:
717 		r = vhost_vdpa_set_features(v, argp);
718 		break;
719 	case VHOST_VDPA_GET_VRING_NUM:
720 		r = vhost_vdpa_get_vring_num(v, argp);
721 		break;
722 	case VHOST_VDPA_GET_GROUP_NUM:
723 		if (copy_to_user(argp, &v->vdpa->ngroups,
724 				 sizeof(v->vdpa->ngroups)))
725 			r = -EFAULT;
726 		break;
727 	case VHOST_VDPA_GET_AS_NUM:
728 		if (copy_to_user(argp, &v->vdpa->nas, sizeof(v->vdpa->nas)))
729 			r = -EFAULT;
730 		break;
731 	case VHOST_SET_LOG_BASE:
732 	case VHOST_SET_LOG_FD:
733 		r = -ENOIOCTLCMD;
734 		break;
735 	case VHOST_VDPA_SET_CONFIG_CALL:
736 		r = vhost_vdpa_set_config_call(v, argp);
737 		break;
738 	case VHOST_GET_BACKEND_FEATURES:
739 		features = VHOST_VDPA_BACKEND_FEATURES;
740 		if (vhost_vdpa_can_suspend(v))
741 			features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
742 		if (vhost_vdpa_can_resume(v))
743 			features |= BIT_ULL(VHOST_BACKEND_F_RESUME);
744 		if (copy_to_user(featurep, &features, sizeof(features)))
745 			r = -EFAULT;
746 		break;
747 	case VHOST_VDPA_GET_IOVA_RANGE:
748 		r = vhost_vdpa_get_iova_range(v, argp);
749 		break;
750 	case VHOST_VDPA_GET_CONFIG_SIZE:
751 		r = vhost_vdpa_get_config_size(v, argp);
752 		break;
753 	case VHOST_VDPA_GET_VQS_COUNT:
754 		r = vhost_vdpa_get_vqs_count(v, argp);
755 		break;
756 	case VHOST_VDPA_SUSPEND:
757 		r = vhost_vdpa_suspend(v);
758 		break;
759 	case VHOST_VDPA_RESUME:
760 		r = vhost_vdpa_resume(v);
761 		break;
762 	default:
763 		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
764 		if (r == -ENOIOCTLCMD)
765 			r = vhost_vdpa_vring_ioctl(v, cmd, argp);
766 		break;
767 	}
768 
769 	if (r)
770 		goto out;
771 
772 	switch (cmd) {
773 	case VHOST_SET_OWNER:
774 		r = vhost_vdpa_bind_mm(v);
775 		if (r)
776 			vhost_dev_reset_owner(d, NULL);
777 		break;
778 	}
779 out:
780 	mutex_unlock(&d->mutex);
781 	return r;
782 }
783 static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
784 				     struct vhost_iotlb_map *map, u32 asid)
785 {
786 	struct vdpa_device *vdpa = v->vdpa;
787 	const struct vdpa_config_ops *ops = vdpa->config;
788 	if (ops->dma_map) {
789 		ops->dma_unmap(vdpa, asid, map->start, map->size);
790 	} else if (ops->set_map == NULL) {
791 		iommu_unmap(v->domain, map->start, map->size);
792 	}
793 }
794 
795 static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
796 				u64 start, u64 last, u32 asid)
797 {
798 	struct vhost_dev *dev = &v->vdev;
799 	struct vhost_iotlb_map *map;
800 	struct page *page;
801 	unsigned long pfn, pinned;
802 
803 	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
804 		pinned = PFN_DOWN(map->size);
805 		for (pfn = PFN_DOWN(map->addr);
806 		     pinned > 0; pfn++, pinned--) {
807 			page = pfn_to_page(pfn);
808 			if (map->perm & VHOST_ACCESS_WO)
809 				set_page_dirty_lock(page);
810 			unpin_user_page(page);
811 		}
812 		atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
813 		vhost_vdpa_general_unmap(v, map, asid);
814 		vhost_iotlb_map_free(iotlb, map);
815 	}
816 }
817 
818 static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
819 				u64 start, u64 last, u32 asid)
820 {
821 	struct vhost_iotlb_map *map;
822 	struct vdpa_map_file *map_file;
823 
824 	while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
825 		map_file = (struct vdpa_map_file *)map->opaque;
826 		fput(map_file->file);
827 		kfree(map_file);
828 		vhost_vdpa_general_unmap(v, map, asid);
829 		vhost_iotlb_map_free(iotlb, map);
830 	}
831 }
832 
833 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v,
834 				   struct vhost_iotlb *iotlb, u64 start,
835 				   u64 last, u32 asid)
836 {
837 	struct vdpa_device *vdpa = v->vdpa;
838 
839 	if (vdpa->use_va)
840 		return vhost_vdpa_va_unmap(v, iotlb, start, last, asid);
841 
842 	return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid);
843 }
844 
845 static int perm_to_iommu_flags(u32 perm)
846 {
847 	int flags = 0;
848 
849 	switch (perm) {
850 	case VHOST_ACCESS_WO:
851 		flags |= IOMMU_WRITE;
852 		break;
853 	case VHOST_ACCESS_RO:
854 		flags |= IOMMU_READ;
855 		break;
856 	case VHOST_ACCESS_RW:
857 		flags |= (IOMMU_WRITE | IOMMU_READ);
858 		break;
859 	default:
860 		WARN(1, "invalidate vhost IOTLB permission\n");
861 		break;
862 	}
863 
864 	return flags | IOMMU_CACHE;
865 }
866 
867 static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
868 			  u64 iova, u64 size, u64 pa, u32 perm, void *opaque)
869 {
870 	struct vhost_dev *dev = &v->vdev;
871 	struct vdpa_device *vdpa = v->vdpa;
872 	const struct vdpa_config_ops *ops = vdpa->config;
873 	u32 asid = iotlb_to_asid(iotlb);
874 	int r = 0;
875 
876 	r = vhost_iotlb_add_range_ctx(iotlb, iova, iova + size - 1,
877 				      pa, perm, opaque);
878 	if (r)
879 		return r;
880 
881 	if (ops->dma_map) {
882 		r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
883 	} else if (ops->set_map) {
884 		if (!v->in_batch)
885 			r = ops->set_map(vdpa, asid, iotlb);
886 	} else {
887 		r = iommu_map(v->domain, iova, pa, size,
888 			      perm_to_iommu_flags(perm), GFP_KERNEL);
889 	}
890 	if (r) {
891 		vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
892 		return r;
893 	}
894 
895 	if (!vdpa->use_va)
896 		atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
897 
898 	return 0;
899 }
900 
901 static void vhost_vdpa_unmap(struct vhost_vdpa *v,
902 			     struct vhost_iotlb *iotlb,
903 			     u64 iova, u64 size)
904 {
905 	struct vdpa_device *vdpa = v->vdpa;
906 	const struct vdpa_config_ops *ops = vdpa->config;
907 	u32 asid = iotlb_to_asid(iotlb);
908 
909 	vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid);
910 
911 	if (ops->set_map) {
912 		if (!v->in_batch)
913 			ops->set_map(vdpa, asid, iotlb);
914 	}
915 
916 }
917 
918 static int vhost_vdpa_va_map(struct vhost_vdpa *v,
919 			     struct vhost_iotlb *iotlb,
920 			     u64 iova, u64 size, u64 uaddr, u32 perm)
921 {
922 	struct vhost_dev *dev = &v->vdev;
923 	u64 offset, map_size, map_iova = iova;
924 	struct vdpa_map_file *map_file;
925 	struct vm_area_struct *vma;
926 	int ret = 0;
927 
928 	mmap_read_lock(dev->mm);
929 
930 	while (size) {
931 		vma = find_vma(dev->mm, uaddr);
932 		if (!vma) {
933 			ret = -EINVAL;
934 			break;
935 		}
936 		map_size = min(size, vma->vm_end - uaddr);
937 		if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
938 			!(vma->vm_flags & (VM_IO | VM_PFNMAP))))
939 			goto next;
940 
941 		map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
942 		if (!map_file) {
943 			ret = -ENOMEM;
944 			break;
945 		}
946 		offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
947 		map_file->offset = offset;
948 		map_file->file = get_file(vma->vm_file);
949 		ret = vhost_vdpa_map(v, iotlb, map_iova, map_size, uaddr,
950 				     perm, map_file);
951 		if (ret) {
952 			fput(map_file->file);
953 			kfree(map_file);
954 			break;
955 		}
956 next:
957 		size -= map_size;
958 		uaddr += map_size;
959 		map_iova += map_size;
960 	}
961 	if (ret)
962 		vhost_vdpa_unmap(v, iotlb, iova, map_iova - iova);
963 
964 	mmap_read_unlock(dev->mm);
965 
966 	return ret;
967 }
968 
969 static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
970 			     struct vhost_iotlb *iotlb,
971 			     u64 iova, u64 size, u64 uaddr, u32 perm)
972 {
973 	struct vhost_dev *dev = &v->vdev;
974 	struct page **page_list;
975 	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
976 	unsigned int gup_flags = FOLL_LONGTERM;
977 	unsigned long npages, cur_base, map_pfn, last_pfn = 0;
978 	unsigned long lock_limit, sz2pin, nchunks, i;
979 	u64 start = iova;
980 	long pinned;
981 	int ret = 0;
982 
983 	/* Limit the use of memory for bookkeeping */
984 	page_list = (struct page **) __get_free_page(GFP_KERNEL);
985 	if (!page_list)
986 		return -ENOMEM;
987 
988 	if (perm & VHOST_ACCESS_WO)
989 		gup_flags |= FOLL_WRITE;
990 
991 	npages = PFN_UP(size + (iova & ~PAGE_MASK));
992 	if (!npages) {
993 		ret = -EINVAL;
994 		goto free;
995 	}
996 
997 	mmap_read_lock(dev->mm);
998 
999 	lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1000 	if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
1001 		ret = -ENOMEM;
1002 		goto unlock;
1003 	}
1004 
1005 	cur_base = uaddr & PAGE_MASK;
1006 	iova &= PAGE_MASK;
1007 	nchunks = 0;
1008 
1009 	while (npages) {
1010 		sz2pin = min_t(unsigned long, npages, list_size);
1011 		pinned = pin_user_pages(cur_base, sz2pin,
1012 					gup_flags, page_list);
1013 		if (sz2pin != pinned) {
1014 			if (pinned < 0) {
1015 				ret = pinned;
1016 			} else {
1017 				unpin_user_pages(page_list, pinned);
1018 				ret = -ENOMEM;
1019 			}
1020 			goto out;
1021 		}
1022 		nchunks++;
1023 
1024 		if (!last_pfn)
1025 			map_pfn = page_to_pfn(page_list[0]);
1026 
1027 		for (i = 0; i < pinned; i++) {
1028 			unsigned long this_pfn = page_to_pfn(page_list[i]);
1029 			u64 csize;
1030 
1031 			if (last_pfn && (this_pfn != last_pfn + 1)) {
1032 				/* Pin a contiguous chunk of memory */
1033 				csize = PFN_PHYS(last_pfn - map_pfn + 1);
1034 				ret = vhost_vdpa_map(v, iotlb, iova, csize,
1035 						     PFN_PHYS(map_pfn),
1036 						     perm, NULL);
1037 				if (ret) {
1038 					/*
1039 					 * Unpin the pages that are left unmapped
1040 					 * from this point on in the current
1041 					 * page_list. The remaining outstanding
1042 					 * ones which may stride across several
1043 					 * chunks will be covered in the common
1044 					 * error path subsequently.
1045 					 */
1046 					unpin_user_pages(&page_list[i],
1047 							 pinned - i);
1048 					goto out;
1049 				}
1050 
1051 				map_pfn = this_pfn;
1052 				iova += csize;
1053 				nchunks = 0;
1054 			}
1055 
1056 			last_pfn = this_pfn;
1057 		}
1058 
1059 		cur_base += PFN_PHYS(pinned);
1060 		npages -= pinned;
1061 	}
1062 
1063 	/* Pin the rest chunk */
1064 	ret = vhost_vdpa_map(v, iotlb, iova, PFN_PHYS(last_pfn - map_pfn + 1),
1065 			     PFN_PHYS(map_pfn), perm, NULL);
1066 out:
1067 	if (ret) {
1068 		if (nchunks) {
1069 			unsigned long pfn;
1070 
1071 			/*
1072 			 * Unpin the outstanding pages which are yet to be
1073 			 * mapped but haven't due to vdpa_map() or
1074 			 * pin_user_pages() failure.
1075 			 *
1076 			 * Mapped pages are accounted in vdpa_map(), hence
1077 			 * the corresponding unpinning will be handled by
1078 			 * vdpa_unmap().
1079 			 */
1080 			WARN_ON(!last_pfn);
1081 			for (pfn = map_pfn; pfn <= last_pfn; pfn++)
1082 				unpin_user_page(pfn_to_page(pfn));
1083 		}
1084 		vhost_vdpa_unmap(v, iotlb, start, size);
1085 	}
1086 unlock:
1087 	mmap_read_unlock(dev->mm);
1088 free:
1089 	free_page((unsigned long)page_list);
1090 	return ret;
1091 
1092 }
1093 
1094 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
1095 					   struct vhost_iotlb *iotlb,
1096 					   struct vhost_iotlb_msg *msg)
1097 {
1098 	struct vdpa_device *vdpa = v->vdpa;
1099 
1100 	if (msg->iova < v->range.first || !msg->size ||
1101 	    msg->iova > U64_MAX - msg->size + 1 ||
1102 	    msg->iova + msg->size - 1 > v->range.last)
1103 		return -EINVAL;
1104 
1105 	if (vhost_iotlb_itree_first(iotlb, msg->iova,
1106 				    msg->iova + msg->size - 1))
1107 		return -EEXIST;
1108 
1109 	if (vdpa->use_va)
1110 		return vhost_vdpa_va_map(v, iotlb, msg->iova, msg->size,
1111 					 msg->uaddr, msg->perm);
1112 
1113 	return vhost_vdpa_pa_map(v, iotlb, msg->iova, msg->size, msg->uaddr,
1114 				 msg->perm);
1115 }
1116 
1117 static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
1118 					struct vhost_iotlb_msg *msg)
1119 {
1120 	struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
1121 	struct vdpa_device *vdpa = v->vdpa;
1122 	const struct vdpa_config_ops *ops = vdpa->config;
1123 	struct vhost_iotlb *iotlb = NULL;
1124 	struct vhost_vdpa_as *as = NULL;
1125 	int r = 0;
1126 
1127 	mutex_lock(&dev->mutex);
1128 
1129 	r = vhost_dev_check_owner(dev);
1130 	if (r)
1131 		goto unlock;
1132 
1133 	if (msg->type == VHOST_IOTLB_UPDATE ||
1134 	    msg->type == VHOST_IOTLB_BATCH_BEGIN) {
1135 		as = vhost_vdpa_find_alloc_as(v, asid);
1136 		if (!as) {
1137 			dev_err(&v->dev, "can't find and alloc asid %d\n",
1138 				asid);
1139 			r = -EINVAL;
1140 			goto unlock;
1141 		}
1142 		iotlb = &as->iotlb;
1143 	} else
1144 		iotlb = asid_to_iotlb(v, asid);
1145 
1146 	if ((v->in_batch && v->batch_asid != asid) || !iotlb) {
1147 		if (v->in_batch && v->batch_asid != asid) {
1148 			dev_info(&v->dev, "batch id %d asid %d\n",
1149 				 v->batch_asid, asid);
1150 		}
1151 		if (!iotlb)
1152 			dev_err(&v->dev, "no iotlb for asid %d\n", asid);
1153 		r = -EINVAL;
1154 		goto unlock;
1155 	}
1156 
1157 	switch (msg->type) {
1158 	case VHOST_IOTLB_UPDATE:
1159 		r = vhost_vdpa_process_iotlb_update(v, iotlb, msg);
1160 		break;
1161 	case VHOST_IOTLB_INVALIDATE:
1162 		vhost_vdpa_unmap(v, iotlb, msg->iova, msg->size);
1163 		break;
1164 	case VHOST_IOTLB_BATCH_BEGIN:
1165 		v->batch_asid = asid;
1166 		v->in_batch = true;
1167 		break;
1168 	case VHOST_IOTLB_BATCH_END:
1169 		if (v->in_batch && ops->set_map)
1170 			ops->set_map(vdpa, asid, iotlb);
1171 		v->in_batch = false;
1172 		break;
1173 	default:
1174 		r = -EINVAL;
1175 		break;
1176 	}
1177 unlock:
1178 	mutex_unlock(&dev->mutex);
1179 
1180 	return r;
1181 }
1182 
1183 static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
1184 					 struct iov_iter *from)
1185 {
1186 	struct file *file = iocb->ki_filp;
1187 	struct vhost_vdpa *v = file->private_data;
1188 	struct vhost_dev *dev = &v->vdev;
1189 
1190 	return vhost_chr_write_iter(dev, from);
1191 }
1192 
1193 static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
1194 {
1195 	struct vdpa_device *vdpa = v->vdpa;
1196 	const struct vdpa_config_ops *ops = vdpa->config;
1197 	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
1198 	const struct bus_type *bus;
1199 	int ret;
1200 
1201 	/* Device want to do DMA by itself */
1202 	if (ops->set_map || ops->dma_map)
1203 		return 0;
1204 
1205 	bus = dma_dev->bus;
1206 	if (!bus)
1207 		return -EFAULT;
1208 
1209 	if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) {
1210 		dev_warn_once(&v->dev,
1211 			      "Failed to allocate domain, device is not IOMMU cache coherent capable\n");
1212 		return -ENOTSUPP;
1213 	}
1214 
1215 	v->domain = iommu_domain_alloc(bus);
1216 	if (!v->domain)
1217 		return -EIO;
1218 
1219 	ret = iommu_attach_device(v->domain, dma_dev);
1220 	if (ret)
1221 		goto err_attach;
1222 
1223 	return 0;
1224 
1225 err_attach:
1226 	iommu_domain_free(v->domain);
1227 	v->domain = NULL;
1228 	return ret;
1229 }
1230 
1231 static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
1232 {
1233 	struct vdpa_device *vdpa = v->vdpa;
1234 	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
1235 
1236 	if (v->domain) {
1237 		iommu_detach_device(v->domain, dma_dev);
1238 		iommu_domain_free(v->domain);
1239 	}
1240 
1241 	v->domain = NULL;
1242 }
1243 
1244 static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
1245 {
1246 	struct vdpa_iova_range *range = &v->range;
1247 	struct vdpa_device *vdpa = v->vdpa;
1248 	const struct vdpa_config_ops *ops = vdpa->config;
1249 
1250 	if (ops->get_iova_range) {
1251 		*range = ops->get_iova_range(vdpa);
1252 	} else if (v->domain && v->domain->geometry.force_aperture) {
1253 		range->first = v->domain->geometry.aperture_start;
1254 		range->last = v->domain->geometry.aperture_end;
1255 	} else {
1256 		range->first = 0;
1257 		range->last = ULLONG_MAX;
1258 	}
1259 }
1260 
1261 static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
1262 {
1263 	struct vhost_vdpa_as *as;
1264 	u32 asid;
1265 
1266 	for (asid = 0; asid < v->vdpa->nas; asid++) {
1267 		as = asid_to_as(v, asid);
1268 		if (as)
1269 			vhost_vdpa_remove_as(v, asid);
1270 	}
1271 
1272 	vhost_vdpa_free_domain(v);
1273 	vhost_dev_cleanup(&v->vdev);
1274 	kfree(v->vdev.vqs);
1275 }
1276 
1277 static int vhost_vdpa_open(struct inode *inode, struct file *filep)
1278 {
1279 	struct vhost_vdpa *v;
1280 	struct vhost_dev *dev;
1281 	struct vhost_virtqueue **vqs;
1282 	int r, opened;
1283 	u32 i, nvqs;
1284 
1285 	v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
1286 
1287 	opened = atomic_cmpxchg(&v->opened, 0, 1);
1288 	if (opened)
1289 		return -EBUSY;
1290 
1291 	nvqs = v->nvqs;
1292 	r = vhost_vdpa_reset(v);
1293 	if (r)
1294 		goto err;
1295 
1296 	vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
1297 	if (!vqs) {
1298 		r = -ENOMEM;
1299 		goto err;
1300 	}
1301 
1302 	dev = &v->vdev;
1303 	for (i = 0; i < nvqs; i++) {
1304 		vqs[i] = &v->vqs[i];
1305 		vqs[i]->handle_kick = handle_vq_kick;
1306 	}
1307 	vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
1308 		       vhost_vdpa_process_iotlb_msg);
1309 
1310 	r = vhost_vdpa_alloc_domain(v);
1311 	if (r)
1312 		goto err_alloc_domain;
1313 
1314 	vhost_vdpa_set_iova_range(v);
1315 
1316 	filep->private_data = v;
1317 
1318 	return 0;
1319 
1320 err_alloc_domain:
1321 	vhost_vdpa_cleanup(v);
1322 err:
1323 	atomic_dec(&v->opened);
1324 	return r;
1325 }
1326 
1327 static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
1328 {
1329 	u32 i;
1330 
1331 	for (i = 0; i < v->nvqs; i++)
1332 		vhost_vdpa_unsetup_vq_irq(v, i);
1333 }
1334 
1335 static int vhost_vdpa_release(struct inode *inode, struct file *filep)
1336 {
1337 	struct vhost_vdpa *v = filep->private_data;
1338 	struct vhost_dev *d = &v->vdev;
1339 
1340 	mutex_lock(&d->mutex);
1341 	filep->private_data = NULL;
1342 	vhost_vdpa_clean_irq(v);
1343 	vhost_vdpa_reset(v);
1344 	vhost_dev_stop(&v->vdev);
1345 	vhost_vdpa_unbind_mm(v);
1346 	vhost_vdpa_config_put(v);
1347 	vhost_vdpa_cleanup(v);
1348 	mutex_unlock(&d->mutex);
1349 
1350 	atomic_dec(&v->opened);
1351 	complete(&v->completion);
1352 
1353 	return 0;
1354 }
1355 
1356 #ifdef CONFIG_MMU
1357 static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
1358 {
1359 	struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
1360 	struct vdpa_device *vdpa = v->vdpa;
1361 	const struct vdpa_config_ops *ops = vdpa->config;
1362 	struct vdpa_notification_area notify;
1363 	struct vm_area_struct *vma = vmf->vma;
1364 	u16 index = vma->vm_pgoff;
1365 
1366 	notify = ops->get_vq_notification(vdpa, index);
1367 
1368 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1369 	if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
1370 			    PFN_DOWN(notify.addr), PAGE_SIZE,
1371 			    vma->vm_page_prot))
1372 		return VM_FAULT_SIGBUS;
1373 
1374 	return VM_FAULT_NOPAGE;
1375 }
1376 
1377 static const struct vm_operations_struct vhost_vdpa_vm_ops = {
1378 	.fault = vhost_vdpa_fault,
1379 };
1380 
1381 static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
1382 {
1383 	struct vhost_vdpa *v = vma->vm_file->private_data;
1384 	struct vdpa_device *vdpa = v->vdpa;
1385 	const struct vdpa_config_ops *ops = vdpa->config;
1386 	struct vdpa_notification_area notify;
1387 	unsigned long index = vma->vm_pgoff;
1388 
1389 	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1390 		return -EINVAL;
1391 	if ((vma->vm_flags & VM_SHARED) == 0)
1392 		return -EINVAL;
1393 	if (vma->vm_flags & VM_READ)
1394 		return -EINVAL;
1395 	if (index > 65535)
1396 		return -EINVAL;
1397 	if (!ops->get_vq_notification)
1398 		return -ENOTSUPP;
1399 
1400 	/* To be safe and easily modelled by userspace, We only
1401 	 * support the doorbell which sits on the page boundary and
1402 	 * does not share the page with other registers.
1403 	 */
1404 	notify = ops->get_vq_notification(vdpa, index);
1405 	if (notify.addr & (PAGE_SIZE - 1))
1406 		return -EINVAL;
1407 	if (vma->vm_end - vma->vm_start != notify.size)
1408 		return -ENOTSUPP;
1409 
1410 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
1411 	vma->vm_ops = &vhost_vdpa_vm_ops;
1412 	return 0;
1413 }
1414 #endif /* CONFIG_MMU */
1415 
1416 static const struct file_operations vhost_vdpa_fops = {
1417 	.owner		= THIS_MODULE,
1418 	.open		= vhost_vdpa_open,
1419 	.release	= vhost_vdpa_release,
1420 	.write_iter	= vhost_vdpa_chr_write_iter,
1421 	.unlocked_ioctl	= vhost_vdpa_unlocked_ioctl,
1422 #ifdef CONFIG_MMU
1423 	.mmap		= vhost_vdpa_mmap,
1424 #endif /* CONFIG_MMU */
1425 	.compat_ioctl	= compat_ptr_ioctl,
1426 };
1427 
1428 static void vhost_vdpa_release_dev(struct device *device)
1429 {
1430 	struct vhost_vdpa *v =
1431 	       container_of(device, struct vhost_vdpa, dev);
1432 
1433 	ida_simple_remove(&vhost_vdpa_ida, v->minor);
1434 	kfree(v->vqs);
1435 	kfree(v);
1436 }
1437 
1438 static int vhost_vdpa_probe(struct vdpa_device *vdpa)
1439 {
1440 	const struct vdpa_config_ops *ops = vdpa->config;
1441 	struct vhost_vdpa *v;
1442 	int minor;
1443 	int i, r;
1444 
1445 	/* We can't support platform IOMMU device with more than 1
1446 	 * group or as
1447 	 */
1448 	if (!ops->set_map && !ops->dma_map &&
1449 	    (vdpa->ngroups > 1 || vdpa->nas > 1))
1450 		return -EOPNOTSUPP;
1451 
1452 	v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
1453 	if (!v)
1454 		return -ENOMEM;
1455 
1456 	minor = ida_simple_get(&vhost_vdpa_ida, 0,
1457 			       VHOST_VDPA_DEV_MAX, GFP_KERNEL);
1458 	if (minor < 0) {
1459 		kfree(v);
1460 		return minor;
1461 	}
1462 
1463 	atomic_set(&v->opened, 0);
1464 	v->minor = minor;
1465 	v->vdpa = vdpa;
1466 	v->nvqs = vdpa->nvqs;
1467 	v->virtio_id = ops->get_device_id(vdpa);
1468 
1469 	device_initialize(&v->dev);
1470 	v->dev.release = vhost_vdpa_release_dev;
1471 	v->dev.parent = &vdpa->dev;
1472 	v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
1473 	v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
1474 			       GFP_KERNEL);
1475 	if (!v->vqs) {
1476 		r = -ENOMEM;
1477 		goto err;
1478 	}
1479 
1480 	r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
1481 	if (r)
1482 		goto err;
1483 
1484 	cdev_init(&v->cdev, &vhost_vdpa_fops);
1485 	v->cdev.owner = THIS_MODULE;
1486 
1487 	r = cdev_device_add(&v->cdev, &v->dev);
1488 	if (r)
1489 		goto err;
1490 
1491 	init_completion(&v->completion);
1492 	vdpa_set_drvdata(vdpa, v);
1493 
1494 	for (i = 0; i < VHOST_VDPA_IOTLB_BUCKETS; i++)
1495 		INIT_HLIST_HEAD(&v->as[i]);
1496 
1497 	return 0;
1498 
1499 err:
1500 	put_device(&v->dev);
1501 	ida_simple_remove(&vhost_vdpa_ida, v->minor);
1502 	return r;
1503 }
1504 
1505 static void vhost_vdpa_remove(struct vdpa_device *vdpa)
1506 {
1507 	struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
1508 	int opened;
1509 
1510 	cdev_device_del(&v->cdev, &v->dev);
1511 
1512 	do {
1513 		opened = atomic_cmpxchg(&v->opened, 0, 1);
1514 		if (!opened)
1515 			break;
1516 		wait_for_completion(&v->completion);
1517 	} while (1);
1518 
1519 	put_device(&v->dev);
1520 }
1521 
1522 static struct vdpa_driver vhost_vdpa_driver = {
1523 	.driver = {
1524 		.name	= "vhost_vdpa",
1525 	},
1526 	.probe	= vhost_vdpa_probe,
1527 	.remove	= vhost_vdpa_remove,
1528 };
1529 
1530 static int __init vhost_vdpa_init(void)
1531 {
1532 	int r;
1533 
1534 	r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
1535 				"vhost-vdpa");
1536 	if (r)
1537 		goto err_alloc_chrdev;
1538 
1539 	r = vdpa_register_driver(&vhost_vdpa_driver);
1540 	if (r)
1541 		goto err_vdpa_register_driver;
1542 
1543 	return 0;
1544 
1545 err_vdpa_register_driver:
1546 	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1547 err_alloc_chrdev:
1548 	return r;
1549 }
1550 module_init(vhost_vdpa_init);
1551 
1552 static void __exit vhost_vdpa_exit(void)
1553 {
1554 	vdpa_unregister_driver(&vhost_vdpa_driver);
1555 	unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1556 }
1557 module_exit(vhost_vdpa_exit);
1558 
1559 MODULE_VERSION("0.0.1");
1560 MODULE_LICENSE("GPL v2");
1561 MODULE_AUTHOR("Intel Corporation");
1562 MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");
1563