1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VDUSE: vDPA Device in Userspace
4  *
5  * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6  *
7  * Author: Xie Yongji <xieyongji@bytedance.com>
8  *
9  */
10 
11 #include "linux/virtio_net.h"
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/cdev.h>
15 #include <linux/device.h>
16 #include <linux/eventfd.h>
17 #include <linux/slab.h>
18 #include <linux/wait.h>
19 #include <linux/dma-map-ops.h>
20 #include <linux/poll.h>
21 #include <linux/file.h>
22 #include <linux/uio.h>
23 #include <linux/vdpa.h>
24 #include <linux/nospec.h>
25 #include <linux/vmalloc.h>
26 #include <linux/sched/mm.h>
27 #include <uapi/linux/vduse.h>
28 #include <uapi/linux/vdpa.h>
29 #include <uapi/linux/virtio_config.h>
30 #include <uapi/linux/virtio_ids.h>
31 #include <uapi/linux/virtio_blk.h>
32 #include <uapi/linux/virtio_ring.h>
33 #include <linux/mod_devicetable.h>
34 
35 #include "iova_domain.h"
36 
37 #define DRV_AUTHOR   "Yongji Xie <xieyongji@bytedance.com>"
38 #define DRV_DESC     "vDPA Device in Userspace"
39 #define DRV_LICENSE  "GPL v2"
40 
41 #define VDUSE_DEV_MAX (1U << MINORBITS)
42 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
43 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
44 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
45 /* 128 MB reserved for virtqueue creation */
46 #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
47 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
48 
49 #define IRQ_UNBOUND -1
50 
51 struct vduse_virtqueue {
52 	u16 index;
53 	u16 num_max;
54 	u32 num;
55 	u64 desc_addr;
56 	u64 driver_addr;
57 	u64 device_addr;
58 	struct vdpa_vq_state state;
59 	bool ready;
60 	bool kicked;
61 	spinlock_t kick_lock;
62 	spinlock_t irq_lock;
63 	struct eventfd_ctx *kickfd;
64 	struct vdpa_callback cb;
65 	struct work_struct inject;
66 	struct work_struct kick;
67 	int irq_effective_cpu;
68 	struct cpumask irq_affinity;
69 	struct kobject kobj;
70 };
71 
72 struct vduse_dev;
73 
74 struct vduse_vdpa {
75 	struct vdpa_device vdpa;
76 	struct vduse_dev *dev;
77 };
78 
79 struct vduse_umem {
80 	unsigned long iova;
81 	unsigned long npages;
82 	struct page **pages;
83 	struct mm_struct *mm;
84 };
85 
86 struct vduse_dev {
87 	struct vduse_vdpa *vdev;
88 	struct device *dev;
89 	struct vduse_virtqueue **vqs;
90 	struct vduse_iova_domain *domain;
91 	char *name;
92 	struct mutex lock;
93 	spinlock_t msg_lock;
94 	u64 msg_unique;
95 	u32 msg_timeout;
96 	wait_queue_head_t waitq;
97 	struct list_head send_list;
98 	struct list_head recv_list;
99 	struct vdpa_callback config_cb;
100 	struct work_struct inject;
101 	spinlock_t irq_lock;
102 	struct rw_semaphore rwsem;
103 	int minor;
104 	bool broken;
105 	bool connected;
106 	u64 api_version;
107 	u64 device_features;
108 	u64 driver_features;
109 	u32 device_id;
110 	u32 vendor_id;
111 	u32 generation;
112 	u32 config_size;
113 	void *config;
114 	u8 status;
115 	u32 vq_num;
116 	u32 vq_align;
117 	struct vduse_umem *umem;
118 	struct mutex mem_lock;
119 	unsigned int bounce_size;
120 	struct mutex domain_lock;
121 };
122 
123 struct vduse_dev_msg {
124 	struct vduse_dev_request req;
125 	struct vduse_dev_response resp;
126 	struct list_head list;
127 	wait_queue_head_t waitq;
128 	bool completed;
129 };
130 
131 struct vduse_control {
132 	u64 api_version;
133 };
134 
135 static DEFINE_MUTEX(vduse_lock);
136 static DEFINE_IDR(vduse_idr);
137 
138 static dev_t vduse_major;
139 static struct class *vduse_class;
140 static struct cdev vduse_ctrl_cdev;
141 static struct cdev vduse_cdev;
142 static struct workqueue_struct *vduse_irq_wq;
143 static struct workqueue_struct *vduse_irq_bound_wq;
144 
145 static u32 allowed_device_id[] = {
146 	VIRTIO_ID_BLOCK,
147 };
148 
vdpa_to_vduse(struct vdpa_device * vdpa)149 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
150 {
151 	struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
152 
153 	return vdev->dev;
154 }
155 
dev_to_vduse(struct device * dev)156 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
157 {
158 	struct vdpa_device *vdpa = dev_to_vdpa(dev);
159 
160 	return vdpa_to_vduse(vdpa);
161 }
162 
vduse_find_msg(struct list_head * head,uint32_t request_id)163 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
164 					    uint32_t request_id)
165 {
166 	struct vduse_dev_msg *msg;
167 
168 	list_for_each_entry(msg, head, list) {
169 		if (msg->req.request_id == request_id) {
170 			list_del(&msg->list);
171 			return msg;
172 		}
173 	}
174 
175 	return NULL;
176 }
177 
vduse_dequeue_msg(struct list_head * head)178 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
179 {
180 	struct vduse_dev_msg *msg = NULL;
181 
182 	if (!list_empty(head)) {
183 		msg = list_first_entry(head, struct vduse_dev_msg, list);
184 		list_del(&msg->list);
185 	}
186 
187 	return msg;
188 }
189 
vduse_enqueue_msg(struct list_head * head,struct vduse_dev_msg * msg)190 static void vduse_enqueue_msg(struct list_head *head,
191 			      struct vduse_dev_msg *msg)
192 {
193 	list_add_tail(&msg->list, head);
194 }
195 
vduse_dev_broken(struct vduse_dev * dev)196 static void vduse_dev_broken(struct vduse_dev *dev)
197 {
198 	struct vduse_dev_msg *msg, *tmp;
199 
200 	if (unlikely(dev->broken))
201 		return;
202 
203 	list_splice_init(&dev->recv_list, &dev->send_list);
204 	list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
205 		list_del(&msg->list);
206 		msg->completed = 1;
207 		msg->resp.result = VDUSE_REQ_RESULT_FAILED;
208 		wake_up(&msg->waitq);
209 	}
210 	dev->broken = true;
211 	wake_up(&dev->waitq);
212 }
213 
vduse_dev_msg_sync(struct vduse_dev * dev,struct vduse_dev_msg * msg)214 static int vduse_dev_msg_sync(struct vduse_dev *dev,
215 			      struct vduse_dev_msg *msg)
216 {
217 	int ret;
218 
219 	if (unlikely(dev->broken))
220 		return -EIO;
221 
222 	init_waitqueue_head(&msg->waitq);
223 	spin_lock(&dev->msg_lock);
224 	if (unlikely(dev->broken)) {
225 		spin_unlock(&dev->msg_lock);
226 		return -EIO;
227 	}
228 	msg->req.request_id = dev->msg_unique++;
229 	vduse_enqueue_msg(&dev->send_list, msg);
230 	wake_up(&dev->waitq);
231 	spin_unlock(&dev->msg_lock);
232 	if (dev->msg_timeout)
233 		ret = wait_event_killable_timeout(msg->waitq, msg->completed,
234 						  (long)dev->msg_timeout * HZ);
235 	else
236 		ret = wait_event_killable(msg->waitq, msg->completed);
237 
238 	spin_lock(&dev->msg_lock);
239 	if (!msg->completed) {
240 		list_del(&msg->list);
241 		msg->resp.result = VDUSE_REQ_RESULT_FAILED;
242 		/* Mark the device as malfunction when there is a timeout */
243 		if (!ret)
244 			vduse_dev_broken(dev);
245 	}
246 	ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
247 	spin_unlock(&dev->msg_lock);
248 
249 	return ret;
250 }
251 
vduse_dev_get_vq_state_packed(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_packed * packed)252 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
253 					 struct vduse_virtqueue *vq,
254 					 struct vdpa_vq_state_packed *packed)
255 {
256 	struct vduse_dev_msg msg = { 0 };
257 	int ret;
258 
259 	msg.req.type = VDUSE_GET_VQ_STATE;
260 	msg.req.vq_state.index = vq->index;
261 
262 	ret = vduse_dev_msg_sync(dev, &msg);
263 	if (ret)
264 		return ret;
265 
266 	packed->last_avail_counter =
267 			msg.resp.vq_state.packed.last_avail_counter & 0x0001;
268 	packed->last_avail_idx =
269 			msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
270 	packed->last_used_counter =
271 			msg.resp.vq_state.packed.last_used_counter & 0x0001;
272 	packed->last_used_idx =
273 			msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
274 
275 	return 0;
276 }
277 
vduse_dev_get_vq_state_split(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_split * split)278 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
279 					struct vduse_virtqueue *vq,
280 					struct vdpa_vq_state_split *split)
281 {
282 	struct vduse_dev_msg msg = { 0 };
283 	int ret;
284 
285 	msg.req.type = VDUSE_GET_VQ_STATE;
286 	msg.req.vq_state.index = vq->index;
287 
288 	ret = vduse_dev_msg_sync(dev, &msg);
289 	if (ret)
290 		return ret;
291 
292 	split->avail_index = msg.resp.vq_state.split.avail_index;
293 
294 	return 0;
295 }
296 
vduse_dev_set_status(struct vduse_dev * dev,u8 status)297 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
298 {
299 	struct vduse_dev_msg msg = { 0 };
300 
301 	msg.req.type = VDUSE_SET_STATUS;
302 	msg.req.s.status = status;
303 
304 	return vduse_dev_msg_sync(dev, &msg);
305 }
306 
vduse_dev_update_iotlb(struct vduse_dev * dev,u64 start,u64 last)307 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
308 				  u64 start, u64 last)
309 {
310 	struct vduse_dev_msg msg = { 0 };
311 
312 	if (last < start)
313 		return -EINVAL;
314 
315 	msg.req.type = VDUSE_UPDATE_IOTLB;
316 	msg.req.iova.start = start;
317 	msg.req.iova.last = last;
318 
319 	return vduse_dev_msg_sync(dev, &msg);
320 }
321 
vduse_dev_read_iter(struct kiocb * iocb,struct iov_iter * to)322 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
323 {
324 	struct file *file = iocb->ki_filp;
325 	struct vduse_dev *dev = file->private_data;
326 	struct vduse_dev_msg *msg;
327 	int size = sizeof(struct vduse_dev_request);
328 	ssize_t ret;
329 
330 	if (iov_iter_count(to) < size)
331 		return -EINVAL;
332 
333 	spin_lock(&dev->msg_lock);
334 	while (1) {
335 		msg = vduse_dequeue_msg(&dev->send_list);
336 		if (msg)
337 			break;
338 
339 		ret = -EAGAIN;
340 		if (file->f_flags & O_NONBLOCK)
341 			goto unlock;
342 
343 		spin_unlock(&dev->msg_lock);
344 		ret = wait_event_interruptible_exclusive(dev->waitq,
345 					!list_empty(&dev->send_list));
346 		if (ret)
347 			return ret;
348 
349 		spin_lock(&dev->msg_lock);
350 	}
351 	spin_unlock(&dev->msg_lock);
352 	ret = copy_to_iter(&msg->req, size, to);
353 	spin_lock(&dev->msg_lock);
354 	if (ret != size) {
355 		ret = -EFAULT;
356 		vduse_enqueue_msg(&dev->send_list, msg);
357 		goto unlock;
358 	}
359 	vduse_enqueue_msg(&dev->recv_list, msg);
360 unlock:
361 	spin_unlock(&dev->msg_lock);
362 
363 	return ret;
364 }
365 
is_mem_zero(const char * ptr,int size)366 static bool is_mem_zero(const char *ptr, int size)
367 {
368 	int i;
369 
370 	for (i = 0; i < size; i++) {
371 		if (ptr[i])
372 			return false;
373 	}
374 	return true;
375 }
376 
vduse_dev_write_iter(struct kiocb * iocb,struct iov_iter * from)377 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
378 {
379 	struct file *file = iocb->ki_filp;
380 	struct vduse_dev *dev = file->private_data;
381 	struct vduse_dev_response resp;
382 	struct vduse_dev_msg *msg;
383 	size_t ret;
384 
385 	ret = copy_from_iter(&resp, sizeof(resp), from);
386 	if (ret != sizeof(resp))
387 		return -EINVAL;
388 
389 	if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
390 		return -EINVAL;
391 
392 	spin_lock(&dev->msg_lock);
393 	msg = vduse_find_msg(&dev->recv_list, resp.request_id);
394 	if (!msg) {
395 		ret = -ENOENT;
396 		goto unlock;
397 	}
398 
399 	memcpy(&msg->resp, &resp, sizeof(resp));
400 	msg->completed = 1;
401 	wake_up(&msg->waitq);
402 unlock:
403 	spin_unlock(&dev->msg_lock);
404 
405 	return ret;
406 }
407 
vduse_dev_poll(struct file * file,poll_table * wait)408 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
409 {
410 	struct vduse_dev *dev = file->private_data;
411 	__poll_t mask = 0;
412 
413 	poll_wait(file, &dev->waitq, wait);
414 
415 	spin_lock(&dev->msg_lock);
416 
417 	if (unlikely(dev->broken))
418 		mask |= EPOLLERR;
419 	if (!list_empty(&dev->send_list))
420 		mask |= EPOLLIN | EPOLLRDNORM;
421 	if (!list_empty(&dev->recv_list))
422 		mask |= EPOLLOUT | EPOLLWRNORM;
423 
424 	spin_unlock(&dev->msg_lock);
425 
426 	return mask;
427 }
428 
vduse_dev_reset(struct vduse_dev * dev)429 static void vduse_dev_reset(struct vduse_dev *dev)
430 {
431 	int i;
432 	struct vduse_iova_domain *domain = dev->domain;
433 
434 	/* The coherent mappings are handled in vduse_dev_free_coherent() */
435 	if (domain && domain->bounce_map)
436 		vduse_domain_reset_bounce_map(domain);
437 
438 	down_write(&dev->rwsem);
439 
440 	dev->status = 0;
441 	dev->driver_features = 0;
442 	dev->generation++;
443 	spin_lock(&dev->irq_lock);
444 	dev->config_cb.callback = NULL;
445 	dev->config_cb.private = NULL;
446 	spin_unlock(&dev->irq_lock);
447 	flush_work(&dev->inject);
448 
449 	for (i = 0; i < dev->vq_num; i++) {
450 		struct vduse_virtqueue *vq = dev->vqs[i];
451 
452 		vq->ready = false;
453 		vq->desc_addr = 0;
454 		vq->driver_addr = 0;
455 		vq->device_addr = 0;
456 		vq->num = 0;
457 		memset(&vq->state, 0, sizeof(vq->state));
458 
459 		spin_lock(&vq->kick_lock);
460 		vq->kicked = false;
461 		if (vq->kickfd)
462 			eventfd_ctx_put(vq->kickfd);
463 		vq->kickfd = NULL;
464 		spin_unlock(&vq->kick_lock);
465 
466 		spin_lock(&vq->irq_lock);
467 		vq->cb.callback = NULL;
468 		vq->cb.private = NULL;
469 		vq->cb.trigger = NULL;
470 		spin_unlock(&vq->irq_lock);
471 		flush_work(&vq->inject);
472 		flush_work(&vq->kick);
473 	}
474 
475 	up_write(&dev->rwsem);
476 }
477 
vduse_vdpa_set_vq_address(struct vdpa_device * vdpa,u16 idx,u64 desc_area,u64 driver_area,u64 device_area)478 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
479 				u64 desc_area, u64 driver_area,
480 				u64 device_area)
481 {
482 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
483 	struct vduse_virtqueue *vq = dev->vqs[idx];
484 
485 	vq->desc_addr = desc_area;
486 	vq->driver_addr = driver_area;
487 	vq->device_addr = device_area;
488 
489 	return 0;
490 }
491 
vduse_vq_kick(struct vduse_virtqueue * vq)492 static void vduse_vq_kick(struct vduse_virtqueue *vq)
493 {
494 	spin_lock(&vq->kick_lock);
495 	if (!vq->ready)
496 		goto unlock;
497 
498 	if (vq->kickfd)
499 		eventfd_signal(vq->kickfd, 1);
500 	else
501 		vq->kicked = true;
502 unlock:
503 	spin_unlock(&vq->kick_lock);
504 }
505 
vduse_vq_kick_work(struct work_struct * work)506 static void vduse_vq_kick_work(struct work_struct *work)
507 {
508 	struct vduse_virtqueue *vq = container_of(work,
509 					struct vduse_virtqueue, kick);
510 
511 	vduse_vq_kick(vq);
512 }
513 
vduse_vdpa_kick_vq(struct vdpa_device * vdpa,u16 idx)514 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
515 {
516 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
517 	struct vduse_virtqueue *vq = dev->vqs[idx];
518 
519 	if (!eventfd_signal_allowed()) {
520 		schedule_work(&vq->kick);
521 		return;
522 	}
523 	vduse_vq_kick(vq);
524 }
525 
vduse_vdpa_set_vq_cb(struct vdpa_device * vdpa,u16 idx,struct vdpa_callback * cb)526 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
527 			      struct vdpa_callback *cb)
528 {
529 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
530 	struct vduse_virtqueue *vq = dev->vqs[idx];
531 
532 	spin_lock(&vq->irq_lock);
533 	vq->cb.callback = cb->callback;
534 	vq->cb.private = cb->private;
535 	vq->cb.trigger = cb->trigger;
536 	spin_unlock(&vq->irq_lock);
537 }
538 
vduse_vdpa_set_vq_num(struct vdpa_device * vdpa,u16 idx,u32 num)539 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
540 {
541 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
542 	struct vduse_virtqueue *vq = dev->vqs[idx];
543 
544 	vq->num = num;
545 }
546 
vduse_vdpa_set_vq_ready(struct vdpa_device * vdpa,u16 idx,bool ready)547 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
548 					u16 idx, bool ready)
549 {
550 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
551 	struct vduse_virtqueue *vq = dev->vqs[idx];
552 
553 	vq->ready = ready;
554 }
555 
vduse_vdpa_get_vq_ready(struct vdpa_device * vdpa,u16 idx)556 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
557 {
558 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
559 	struct vduse_virtqueue *vq = dev->vqs[idx];
560 
561 	return vq->ready;
562 }
563 
vduse_vdpa_set_vq_state(struct vdpa_device * vdpa,u16 idx,const struct vdpa_vq_state * state)564 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
565 				const struct vdpa_vq_state *state)
566 {
567 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
568 	struct vduse_virtqueue *vq = dev->vqs[idx];
569 
570 	if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
571 		vq->state.packed.last_avail_counter =
572 				state->packed.last_avail_counter;
573 		vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
574 		vq->state.packed.last_used_counter =
575 				state->packed.last_used_counter;
576 		vq->state.packed.last_used_idx = state->packed.last_used_idx;
577 	} else
578 		vq->state.split.avail_index = state->split.avail_index;
579 
580 	return 0;
581 }
582 
vduse_vdpa_get_vq_state(struct vdpa_device * vdpa,u16 idx,struct vdpa_vq_state * state)583 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
584 				struct vdpa_vq_state *state)
585 {
586 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
587 	struct vduse_virtqueue *vq = dev->vqs[idx];
588 
589 	if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
590 		return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
591 
592 	return vduse_dev_get_vq_state_split(dev, vq, &state->split);
593 }
594 
vduse_vdpa_get_vq_align(struct vdpa_device * vdpa)595 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
596 {
597 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
598 
599 	return dev->vq_align;
600 }
601 
vduse_vdpa_get_device_features(struct vdpa_device * vdpa)602 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
603 {
604 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
605 
606 	return dev->device_features;
607 }
608 
vduse_vdpa_set_driver_features(struct vdpa_device * vdpa,u64 features)609 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
610 {
611 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
612 
613 	dev->driver_features = features;
614 	return 0;
615 }
616 
vduse_vdpa_get_driver_features(struct vdpa_device * vdpa)617 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
618 {
619 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
620 
621 	return dev->driver_features;
622 }
623 
vduse_vdpa_set_config_cb(struct vdpa_device * vdpa,struct vdpa_callback * cb)624 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
625 				  struct vdpa_callback *cb)
626 {
627 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
628 
629 	spin_lock(&dev->irq_lock);
630 	dev->config_cb.callback = cb->callback;
631 	dev->config_cb.private = cb->private;
632 	spin_unlock(&dev->irq_lock);
633 }
634 
vduse_vdpa_get_vq_num_max(struct vdpa_device * vdpa)635 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
636 {
637 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
638 	u16 num_max = 0;
639 	int i;
640 
641 	for (i = 0; i < dev->vq_num; i++)
642 		if (num_max < dev->vqs[i]->num_max)
643 			num_max = dev->vqs[i]->num_max;
644 
645 	return num_max;
646 }
647 
vduse_vdpa_get_device_id(struct vdpa_device * vdpa)648 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
649 {
650 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
651 
652 	return dev->device_id;
653 }
654 
vduse_vdpa_get_vendor_id(struct vdpa_device * vdpa)655 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
656 {
657 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
658 
659 	return dev->vendor_id;
660 }
661 
vduse_vdpa_get_status(struct vdpa_device * vdpa)662 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
663 {
664 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
665 
666 	return dev->status;
667 }
668 
vduse_vdpa_set_status(struct vdpa_device * vdpa,u8 status)669 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
670 {
671 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
672 
673 	if (vduse_dev_set_status(dev, status))
674 		return;
675 
676 	dev->status = status;
677 }
678 
vduse_vdpa_get_config_size(struct vdpa_device * vdpa)679 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
680 {
681 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
682 
683 	return dev->config_size;
684 }
685 
vduse_vdpa_get_config(struct vdpa_device * vdpa,unsigned int offset,void * buf,unsigned int len)686 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
687 				  void *buf, unsigned int len)
688 {
689 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
690 
691 	/* Initialize the buffer in case of partial copy. */
692 	memset(buf, 0, len);
693 
694 	if (offset > dev->config_size)
695 		return;
696 
697 	if (len > dev->config_size - offset)
698 		len = dev->config_size - offset;
699 
700 	memcpy(buf, dev->config + offset, len);
701 }
702 
vduse_vdpa_set_config(struct vdpa_device * vdpa,unsigned int offset,const void * buf,unsigned int len)703 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
704 			const void *buf, unsigned int len)
705 {
706 	/* Now we only support read-only configuration space */
707 }
708 
vduse_vdpa_reset(struct vdpa_device * vdpa)709 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
710 {
711 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
712 	int ret = vduse_dev_set_status(dev, 0);
713 
714 	vduse_dev_reset(dev);
715 
716 	return ret;
717 }
718 
vduse_vdpa_get_generation(struct vdpa_device * vdpa)719 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
720 {
721 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
722 
723 	return dev->generation;
724 }
725 
vduse_vdpa_set_vq_affinity(struct vdpa_device * vdpa,u16 idx,const struct cpumask * cpu_mask)726 static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
727 				      const struct cpumask *cpu_mask)
728 {
729 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
730 
731 	if (cpu_mask)
732 		cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
733 	else
734 		cpumask_setall(&dev->vqs[idx]->irq_affinity);
735 
736 	return 0;
737 }
738 
739 static const struct cpumask *
vduse_vdpa_get_vq_affinity(struct vdpa_device * vdpa,u16 idx)740 vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
741 {
742 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
743 
744 	return &dev->vqs[idx]->irq_affinity;
745 }
746 
vduse_vdpa_set_map(struct vdpa_device * vdpa,unsigned int asid,struct vhost_iotlb * iotlb)747 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
748 				unsigned int asid,
749 				struct vhost_iotlb *iotlb)
750 {
751 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
752 	int ret;
753 
754 	ret = vduse_domain_set_map(dev->domain, iotlb);
755 	if (ret)
756 		return ret;
757 
758 	ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
759 	if (ret) {
760 		vduse_domain_clear_map(dev->domain, iotlb);
761 		return ret;
762 	}
763 
764 	return 0;
765 }
766 
vduse_vdpa_free(struct vdpa_device * vdpa)767 static void vduse_vdpa_free(struct vdpa_device *vdpa)
768 {
769 	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
770 
771 	dev->vdev = NULL;
772 }
773 
774 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
775 	.set_vq_address		= vduse_vdpa_set_vq_address,
776 	.kick_vq		= vduse_vdpa_kick_vq,
777 	.set_vq_cb		= vduse_vdpa_set_vq_cb,
778 	.set_vq_num             = vduse_vdpa_set_vq_num,
779 	.set_vq_ready		= vduse_vdpa_set_vq_ready,
780 	.get_vq_ready		= vduse_vdpa_get_vq_ready,
781 	.set_vq_state		= vduse_vdpa_set_vq_state,
782 	.get_vq_state		= vduse_vdpa_get_vq_state,
783 	.get_vq_align		= vduse_vdpa_get_vq_align,
784 	.get_device_features	= vduse_vdpa_get_device_features,
785 	.set_driver_features	= vduse_vdpa_set_driver_features,
786 	.get_driver_features	= vduse_vdpa_get_driver_features,
787 	.set_config_cb		= vduse_vdpa_set_config_cb,
788 	.get_vq_num_max		= vduse_vdpa_get_vq_num_max,
789 	.get_device_id		= vduse_vdpa_get_device_id,
790 	.get_vendor_id		= vduse_vdpa_get_vendor_id,
791 	.get_status		= vduse_vdpa_get_status,
792 	.set_status		= vduse_vdpa_set_status,
793 	.get_config_size	= vduse_vdpa_get_config_size,
794 	.get_config		= vduse_vdpa_get_config,
795 	.set_config		= vduse_vdpa_set_config,
796 	.get_generation		= vduse_vdpa_get_generation,
797 	.set_vq_affinity	= vduse_vdpa_set_vq_affinity,
798 	.get_vq_affinity	= vduse_vdpa_get_vq_affinity,
799 	.reset			= vduse_vdpa_reset,
800 	.set_map		= vduse_vdpa_set_map,
801 	.free			= vduse_vdpa_free,
802 };
803 
vduse_dev_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)804 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
805 				     unsigned long offset, size_t size,
806 				     enum dma_data_direction dir,
807 				     unsigned long attrs)
808 {
809 	struct vduse_dev *vdev = dev_to_vduse(dev);
810 	struct vduse_iova_domain *domain = vdev->domain;
811 
812 	return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
813 }
814 
vduse_dev_unmap_page(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)815 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
816 				size_t size, enum dma_data_direction dir,
817 				unsigned long attrs)
818 {
819 	struct vduse_dev *vdev = dev_to_vduse(dev);
820 	struct vduse_iova_domain *domain = vdev->domain;
821 
822 	return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
823 }
824 
vduse_dev_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_addr,gfp_t flag,unsigned long attrs)825 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
826 					dma_addr_t *dma_addr, gfp_t flag,
827 					unsigned long attrs)
828 {
829 	struct vduse_dev *vdev = dev_to_vduse(dev);
830 	struct vduse_iova_domain *domain = vdev->domain;
831 	unsigned long iova;
832 	void *addr;
833 
834 	*dma_addr = DMA_MAPPING_ERROR;
835 	addr = vduse_domain_alloc_coherent(domain, size,
836 				(dma_addr_t *)&iova, flag, attrs);
837 	if (!addr)
838 		return NULL;
839 
840 	*dma_addr = (dma_addr_t)iova;
841 
842 	return addr;
843 }
844 
vduse_dev_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_addr,unsigned long attrs)845 static void vduse_dev_free_coherent(struct device *dev, size_t size,
846 					void *vaddr, dma_addr_t dma_addr,
847 					unsigned long attrs)
848 {
849 	struct vduse_dev *vdev = dev_to_vduse(dev);
850 	struct vduse_iova_domain *domain = vdev->domain;
851 
852 	vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
853 }
854 
vduse_dev_max_mapping_size(struct device * dev)855 static size_t vduse_dev_max_mapping_size(struct device *dev)
856 {
857 	struct vduse_dev *vdev = dev_to_vduse(dev);
858 	struct vduse_iova_domain *domain = vdev->domain;
859 
860 	return domain->bounce_size;
861 }
862 
863 static const struct dma_map_ops vduse_dev_dma_ops = {
864 	.map_page = vduse_dev_map_page,
865 	.unmap_page = vduse_dev_unmap_page,
866 	.alloc = vduse_dev_alloc_coherent,
867 	.free = vduse_dev_free_coherent,
868 	.max_mapping_size = vduse_dev_max_mapping_size,
869 };
870 
perm_to_file_flags(u8 perm)871 static unsigned int perm_to_file_flags(u8 perm)
872 {
873 	unsigned int flags = 0;
874 
875 	switch (perm) {
876 	case VDUSE_ACCESS_WO:
877 		flags |= O_WRONLY;
878 		break;
879 	case VDUSE_ACCESS_RO:
880 		flags |= O_RDONLY;
881 		break;
882 	case VDUSE_ACCESS_RW:
883 		flags |= O_RDWR;
884 		break;
885 	default:
886 		WARN(1, "invalidate vhost IOTLB permission\n");
887 		break;
888 	}
889 
890 	return flags;
891 }
892 
vduse_kickfd_setup(struct vduse_dev * dev,struct vduse_vq_eventfd * eventfd)893 static int vduse_kickfd_setup(struct vduse_dev *dev,
894 			struct vduse_vq_eventfd *eventfd)
895 {
896 	struct eventfd_ctx *ctx = NULL;
897 	struct vduse_virtqueue *vq;
898 	u32 index;
899 
900 	if (eventfd->index >= dev->vq_num)
901 		return -EINVAL;
902 
903 	index = array_index_nospec(eventfd->index, dev->vq_num);
904 	vq = dev->vqs[index];
905 	if (eventfd->fd >= 0) {
906 		ctx = eventfd_ctx_fdget(eventfd->fd);
907 		if (IS_ERR(ctx))
908 			return PTR_ERR(ctx);
909 	} else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
910 		return 0;
911 
912 	spin_lock(&vq->kick_lock);
913 	if (vq->kickfd)
914 		eventfd_ctx_put(vq->kickfd);
915 	vq->kickfd = ctx;
916 	if (vq->ready && vq->kicked && vq->kickfd) {
917 		eventfd_signal(vq->kickfd, 1);
918 		vq->kicked = false;
919 	}
920 	spin_unlock(&vq->kick_lock);
921 
922 	return 0;
923 }
924 
vduse_dev_is_ready(struct vduse_dev * dev)925 static bool vduse_dev_is_ready(struct vduse_dev *dev)
926 {
927 	int i;
928 
929 	for (i = 0; i < dev->vq_num; i++)
930 		if (!dev->vqs[i]->num_max)
931 			return false;
932 
933 	return true;
934 }
935 
vduse_dev_irq_inject(struct work_struct * work)936 static void vduse_dev_irq_inject(struct work_struct *work)
937 {
938 	struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
939 
940 	spin_lock_bh(&dev->irq_lock);
941 	if (dev->config_cb.callback)
942 		dev->config_cb.callback(dev->config_cb.private);
943 	spin_unlock_bh(&dev->irq_lock);
944 }
945 
vduse_vq_irq_inject(struct work_struct * work)946 static void vduse_vq_irq_inject(struct work_struct *work)
947 {
948 	struct vduse_virtqueue *vq = container_of(work,
949 					struct vduse_virtqueue, inject);
950 
951 	spin_lock_bh(&vq->irq_lock);
952 	if (vq->ready && vq->cb.callback)
953 		vq->cb.callback(vq->cb.private);
954 	spin_unlock_bh(&vq->irq_lock);
955 }
956 
vduse_vq_signal_irqfd(struct vduse_virtqueue * vq)957 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
958 {
959 	bool signal = false;
960 
961 	if (!vq->cb.trigger)
962 		return false;
963 
964 	spin_lock_irq(&vq->irq_lock);
965 	if (vq->ready && vq->cb.trigger) {
966 		eventfd_signal(vq->cb.trigger, 1);
967 		signal = true;
968 	}
969 	spin_unlock_irq(&vq->irq_lock);
970 
971 	return signal;
972 }
973 
vduse_dev_queue_irq_work(struct vduse_dev * dev,struct work_struct * irq_work,int irq_effective_cpu)974 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
975 				    struct work_struct *irq_work,
976 				    int irq_effective_cpu)
977 {
978 	int ret = -EINVAL;
979 
980 	down_read(&dev->rwsem);
981 	if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
982 		goto unlock;
983 
984 	ret = 0;
985 	if (irq_effective_cpu == IRQ_UNBOUND)
986 		queue_work(vduse_irq_wq, irq_work);
987 	else
988 		queue_work_on(irq_effective_cpu,
989 			      vduse_irq_bound_wq, irq_work);
990 unlock:
991 	up_read(&dev->rwsem);
992 
993 	return ret;
994 }
995 
vduse_dev_dereg_umem(struct vduse_dev * dev,u64 iova,u64 size)996 static int vduse_dev_dereg_umem(struct vduse_dev *dev,
997 				u64 iova, u64 size)
998 {
999 	int ret;
1000 
1001 	mutex_lock(&dev->mem_lock);
1002 	ret = -ENOENT;
1003 	if (!dev->umem)
1004 		goto unlock;
1005 
1006 	ret = -EINVAL;
1007 	if (!dev->domain)
1008 		goto unlock;
1009 
1010 	if (dev->umem->iova != iova || size != dev->domain->bounce_size)
1011 		goto unlock;
1012 
1013 	vduse_domain_remove_user_bounce_pages(dev->domain);
1014 	unpin_user_pages_dirty_lock(dev->umem->pages,
1015 				    dev->umem->npages, true);
1016 	atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
1017 	mmdrop(dev->umem->mm);
1018 	vfree(dev->umem->pages);
1019 	kfree(dev->umem);
1020 	dev->umem = NULL;
1021 	ret = 0;
1022 unlock:
1023 	mutex_unlock(&dev->mem_lock);
1024 	return ret;
1025 }
1026 
vduse_dev_reg_umem(struct vduse_dev * dev,u64 iova,u64 uaddr,u64 size)1027 static int vduse_dev_reg_umem(struct vduse_dev *dev,
1028 			      u64 iova, u64 uaddr, u64 size)
1029 {
1030 	struct page **page_list = NULL;
1031 	struct vduse_umem *umem = NULL;
1032 	long pinned = 0;
1033 	unsigned long npages, lock_limit;
1034 	int ret;
1035 
1036 	if (!dev->domain || !dev->domain->bounce_map ||
1037 	    size != dev->domain->bounce_size ||
1038 	    iova != 0 || uaddr & ~PAGE_MASK)
1039 		return -EINVAL;
1040 
1041 	mutex_lock(&dev->mem_lock);
1042 	ret = -EEXIST;
1043 	if (dev->umem)
1044 		goto unlock;
1045 
1046 	ret = -ENOMEM;
1047 	npages = size >> PAGE_SHIFT;
1048 	page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
1049 			      GFP_KERNEL_ACCOUNT);
1050 	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
1051 	if (!page_list || !umem)
1052 		goto unlock;
1053 
1054 	mmap_read_lock(current->mm);
1055 
1056 	lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1057 	if (npages + atomic64_read(&current->mm->pinned_vm) > lock_limit)
1058 		goto out;
1059 
1060 	pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
1061 				page_list);
1062 	if (pinned != npages) {
1063 		ret = pinned < 0 ? pinned : -ENOMEM;
1064 		goto out;
1065 	}
1066 
1067 	ret = vduse_domain_add_user_bounce_pages(dev->domain,
1068 						 page_list, pinned);
1069 	if (ret)
1070 		goto out;
1071 
1072 	atomic64_add(npages, &current->mm->pinned_vm);
1073 
1074 	umem->pages = page_list;
1075 	umem->npages = pinned;
1076 	umem->iova = iova;
1077 	umem->mm = current->mm;
1078 	mmgrab(current->mm);
1079 
1080 	dev->umem = umem;
1081 out:
1082 	if (ret && pinned > 0)
1083 		unpin_user_pages(page_list, pinned);
1084 
1085 	mmap_read_unlock(current->mm);
1086 unlock:
1087 	if (ret) {
1088 		vfree(page_list);
1089 		kfree(umem);
1090 	}
1091 	mutex_unlock(&dev->mem_lock);
1092 	return ret;
1093 }
1094 
vduse_vq_update_effective_cpu(struct vduse_virtqueue * vq)1095 static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
1096 {
1097 	int curr_cpu = vq->irq_effective_cpu;
1098 
1099 	while (true) {
1100 		curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
1101 		if (cpu_online(curr_cpu))
1102 			break;
1103 
1104 		if (curr_cpu >= nr_cpu_ids)
1105 			curr_cpu = IRQ_UNBOUND;
1106 	}
1107 
1108 	vq->irq_effective_cpu = curr_cpu;
1109 }
1110 
vduse_dev_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1111 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1112 			    unsigned long arg)
1113 {
1114 	struct vduse_dev *dev = file->private_data;
1115 	void __user *argp = (void __user *)arg;
1116 	int ret;
1117 
1118 	if (unlikely(dev->broken))
1119 		return -EPERM;
1120 
1121 	switch (cmd) {
1122 	case VDUSE_IOTLB_GET_FD: {
1123 		struct vduse_iotlb_entry entry;
1124 		struct vhost_iotlb_map *map;
1125 		struct vdpa_map_file *map_file;
1126 		struct file *f = NULL;
1127 
1128 		ret = -EFAULT;
1129 		if (copy_from_user(&entry, argp, sizeof(entry)))
1130 			break;
1131 
1132 		ret = -EINVAL;
1133 		if (entry.start > entry.last)
1134 			break;
1135 
1136 		mutex_lock(&dev->domain_lock);
1137 		if (!dev->domain) {
1138 			mutex_unlock(&dev->domain_lock);
1139 			break;
1140 		}
1141 		spin_lock(&dev->domain->iotlb_lock);
1142 		map = vhost_iotlb_itree_first(dev->domain->iotlb,
1143 					      entry.start, entry.last);
1144 		if (map) {
1145 			map_file = (struct vdpa_map_file *)map->opaque;
1146 			f = get_file(map_file->file);
1147 			entry.offset = map_file->offset;
1148 			entry.start = map->start;
1149 			entry.last = map->last;
1150 			entry.perm = map->perm;
1151 		}
1152 		spin_unlock(&dev->domain->iotlb_lock);
1153 		mutex_unlock(&dev->domain_lock);
1154 		ret = -EINVAL;
1155 		if (!f)
1156 			break;
1157 
1158 		ret = -EFAULT;
1159 		if (copy_to_user(argp, &entry, sizeof(entry))) {
1160 			fput(f);
1161 			break;
1162 		}
1163 		ret = receive_fd(f, perm_to_file_flags(entry.perm));
1164 		fput(f);
1165 		break;
1166 	}
1167 	case VDUSE_DEV_GET_FEATURES:
1168 		/*
1169 		 * Just mirror what driver wrote here.
1170 		 * The driver is expected to check FEATURE_OK later.
1171 		 */
1172 		ret = put_user(dev->driver_features, (u64 __user *)argp);
1173 		break;
1174 	case VDUSE_DEV_SET_CONFIG: {
1175 		struct vduse_config_data config;
1176 		unsigned long size = offsetof(struct vduse_config_data,
1177 					      buffer);
1178 
1179 		ret = -EFAULT;
1180 		if (copy_from_user(&config, argp, size))
1181 			break;
1182 
1183 		ret = -EINVAL;
1184 		if (config.offset > dev->config_size ||
1185 		    config.length == 0 ||
1186 		    config.length > dev->config_size - config.offset)
1187 			break;
1188 
1189 		ret = -EFAULT;
1190 		if (copy_from_user(dev->config + config.offset, argp + size,
1191 				   config.length))
1192 			break;
1193 
1194 		ret = 0;
1195 		break;
1196 	}
1197 	case VDUSE_DEV_INJECT_CONFIG_IRQ:
1198 		ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
1199 		break;
1200 	case VDUSE_VQ_SETUP: {
1201 		struct vduse_vq_config config;
1202 		u32 index;
1203 
1204 		ret = -EFAULT;
1205 		if (copy_from_user(&config, argp, sizeof(config)))
1206 			break;
1207 
1208 		ret = -EINVAL;
1209 		if (config.index >= dev->vq_num)
1210 			break;
1211 
1212 		if (!is_mem_zero((const char *)config.reserved,
1213 				 sizeof(config.reserved)))
1214 			break;
1215 
1216 		index = array_index_nospec(config.index, dev->vq_num);
1217 		dev->vqs[index]->num_max = config.max_size;
1218 		ret = 0;
1219 		break;
1220 	}
1221 	case VDUSE_VQ_GET_INFO: {
1222 		struct vduse_vq_info vq_info;
1223 		struct vduse_virtqueue *vq;
1224 		u32 index;
1225 
1226 		ret = -EFAULT;
1227 		if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1228 			break;
1229 
1230 		ret = -EINVAL;
1231 		if (vq_info.index >= dev->vq_num)
1232 			break;
1233 
1234 		index = array_index_nospec(vq_info.index, dev->vq_num);
1235 		vq = dev->vqs[index];
1236 		vq_info.desc_addr = vq->desc_addr;
1237 		vq_info.driver_addr = vq->driver_addr;
1238 		vq_info.device_addr = vq->device_addr;
1239 		vq_info.num = vq->num;
1240 
1241 		if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1242 			vq_info.packed.last_avail_counter =
1243 				vq->state.packed.last_avail_counter;
1244 			vq_info.packed.last_avail_idx =
1245 				vq->state.packed.last_avail_idx;
1246 			vq_info.packed.last_used_counter =
1247 				vq->state.packed.last_used_counter;
1248 			vq_info.packed.last_used_idx =
1249 				vq->state.packed.last_used_idx;
1250 		} else
1251 			vq_info.split.avail_index =
1252 				vq->state.split.avail_index;
1253 
1254 		vq_info.ready = vq->ready;
1255 
1256 		ret = -EFAULT;
1257 		if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1258 			break;
1259 
1260 		ret = 0;
1261 		break;
1262 	}
1263 	case VDUSE_VQ_SETUP_KICKFD: {
1264 		struct vduse_vq_eventfd eventfd;
1265 
1266 		ret = -EFAULT;
1267 		if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1268 			break;
1269 
1270 		ret = vduse_kickfd_setup(dev, &eventfd);
1271 		break;
1272 	}
1273 	case VDUSE_VQ_INJECT_IRQ: {
1274 		u32 index;
1275 
1276 		ret = -EFAULT;
1277 		if (get_user(index, (u32 __user *)argp))
1278 			break;
1279 
1280 		ret = -EINVAL;
1281 		if (index >= dev->vq_num)
1282 			break;
1283 
1284 		ret = 0;
1285 		index = array_index_nospec(index, dev->vq_num);
1286 		if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
1287 			vduse_vq_update_effective_cpu(dev->vqs[index]);
1288 			ret = vduse_dev_queue_irq_work(dev,
1289 						&dev->vqs[index]->inject,
1290 						dev->vqs[index]->irq_effective_cpu);
1291 		}
1292 		break;
1293 	}
1294 	case VDUSE_IOTLB_REG_UMEM: {
1295 		struct vduse_iova_umem umem;
1296 
1297 		ret = -EFAULT;
1298 		if (copy_from_user(&umem, argp, sizeof(umem)))
1299 			break;
1300 
1301 		ret = -EINVAL;
1302 		if (!is_mem_zero((const char *)umem.reserved,
1303 				 sizeof(umem.reserved)))
1304 			break;
1305 
1306 		mutex_lock(&dev->domain_lock);
1307 		ret = vduse_dev_reg_umem(dev, umem.iova,
1308 					 umem.uaddr, umem.size);
1309 		mutex_unlock(&dev->domain_lock);
1310 		break;
1311 	}
1312 	case VDUSE_IOTLB_DEREG_UMEM: {
1313 		struct vduse_iova_umem umem;
1314 
1315 		ret = -EFAULT;
1316 		if (copy_from_user(&umem, argp, sizeof(umem)))
1317 			break;
1318 
1319 		ret = -EINVAL;
1320 		if (!is_mem_zero((const char *)umem.reserved,
1321 				 sizeof(umem.reserved)))
1322 			break;
1323 		mutex_lock(&dev->domain_lock);
1324 		ret = vduse_dev_dereg_umem(dev, umem.iova,
1325 					   umem.size);
1326 		mutex_unlock(&dev->domain_lock);
1327 		break;
1328 	}
1329 	case VDUSE_IOTLB_GET_INFO: {
1330 		struct vduse_iova_info info;
1331 		struct vhost_iotlb_map *map;
1332 
1333 		ret = -EFAULT;
1334 		if (copy_from_user(&info, argp, sizeof(info)))
1335 			break;
1336 
1337 		ret = -EINVAL;
1338 		if (info.start > info.last)
1339 			break;
1340 
1341 		if (!is_mem_zero((const char *)info.reserved,
1342 				 sizeof(info.reserved)))
1343 			break;
1344 
1345 		mutex_lock(&dev->domain_lock);
1346 		if (!dev->domain) {
1347 			mutex_unlock(&dev->domain_lock);
1348 			break;
1349 		}
1350 		spin_lock(&dev->domain->iotlb_lock);
1351 		map = vhost_iotlb_itree_first(dev->domain->iotlb,
1352 					      info.start, info.last);
1353 		if (map) {
1354 			info.start = map->start;
1355 			info.last = map->last;
1356 			info.capability = 0;
1357 			if (dev->domain->bounce_map && map->start == 0 &&
1358 			    map->last == dev->domain->bounce_size - 1)
1359 				info.capability |= VDUSE_IOVA_CAP_UMEM;
1360 		}
1361 		spin_unlock(&dev->domain->iotlb_lock);
1362 		mutex_unlock(&dev->domain_lock);
1363 		if (!map)
1364 			break;
1365 
1366 		ret = -EFAULT;
1367 		if (copy_to_user(argp, &info, sizeof(info)))
1368 			break;
1369 
1370 		ret = 0;
1371 		break;
1372 	}
1373 	default:
1374 		ret = -ENOIOCTLCMD;
1375 		break;
1376 	}
1377 
1378 	return ret;
1379 }
1380 
vduse_dev_release(struct inode * inode,struct file * file)1381 static int vduse_dev_release(struct inode *inode, struct file *file)
1382 {
1383 	struct vduse_dev *dev = file->private_data;
1384 
1385 	mutex_lock(&dev->domain_lock);
1386 	if (dev->domain)
1387 		vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
1388 	mutex_unlock(&dev->domain_lock);
1389 	spin_lock(&dev->msg_lock);
1390 	/* Make sure the inflight messages can processed after reconncection */
1391 	list_splice_init(&dev->recv_list, &dev->send_list);
1392 	spin_unlock(&dev->msg_lock);
1393 	dev->connected = false;
1394 
1395 	return 0;
1396 }
1397 
vduse_dev_get_from_minor(int minor)1398 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1399 {
1400 	struct vduse_dev *dev;
1401 
1402 	mutex_lock(&vduse_lock);
1403 	dev = idr_find(&vduse_idr, minor);
1404 	mutex_unlock(&vduse_lock);
1405 
1406 	return dev;
1407 }
1408 
vduse_dev_open(struct inode * inode,struct file * file)1409 static int vduse_dev_open(struct inode *inode, struct file *file)
1410 {
1411 	int ret;
1412 	struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1413 
1414 	if (!dev)
1415 		return -ENODEV;
1416 
1417 	ret = -EBUSY;
1418 	mutex_lock(&dev->lock);
1419 	if (dev->connected)
1420 		goto unlock;
1421 
1422 	ret = 0;
1423 	dev->connected = true;
1424 	file->private_data = dev;
1425 unlock:
1426 	mutex_unlock(&dev->lock);
1427 
1428 	return ret;
1429 }
1430 
1431 static const struct file_operations vduse_dev_fops = {
1432 	.owner		= THIS_MODULE,
1433 	.open		= vduse_dev_open,
1434 	.release	= vduse_dev_release,
1435 	.read_iter	= vduse_dev_read_iter,
1436 	.write_iter	= vduse_dev_write_iter,
1437 	.poll		= vduse_dev_poll,
1438 	.unlocked_ioctl	= vduse_dev_ioctl,
1439 	.compat_ioctl	= compat_ptr_ioctl,
1440 	.llseek		= noop_llseek,
1441 };
1442 
irq_cb_affinity_show(struct vduse_virtqueue * vq,char * buf)1443 static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
1444 {
1445 	return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
1446 }
1447 
irq_cb_affinity_store(struct vduse_virtqueue * vq,const char * buf,size_t count)1448 static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
1449 				     const char *buf, size_t count)
1450 {
1451 	cpumask_var_t new_value;
1452 	int ret;
1453 
1454 	if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
1455 		return -ENOMEM;
1456 
1457 	ret = cpumask_parse(buf, new_value);
1458 	if (ret)
1459 		goto free_mask;
1460 
1461 	ret = -EINVAL;
1462 	if (!cpumask_intersects(new_value, cpu_online_mask))
1463 		goto free_mask;
1464 
1465 	cpumask_copy(&vq->irq_affinity, new_value);
1466 	ret = count;
1467 free_mask:
1468 	free_cpumask_var(new_value);
1469 	return ret;
1470 }
1471 
1472 struct vq_sysfs_entry {
1473 	struct attribute attr;
1474 	ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
1475 	ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
1476 			 size_t count);
1477 };
1478 
1479 static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
1480 
1481 static struct attribute *vq_attrs[] = {
1482 	&irq_cb_affinity_attr.attr,
1483 	NULL,
1484 };
1485 ATTRIBUTE_GROUPS(vq);
1486 
vq_attr_show(struct kobject * kobj,struct attribute * attr,char * buf)1487 static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
1488 			    char *buf)
1489 {
1490 	struct vduse_virtqueue *vq = container_of(kobj,
1491 					struct vduse_virtqueue, kobj);
1492 	struct vq_sysfs_entry *entry = container_of(attr,
1493 					struct vq_sysfs_entry, attr);
1494 
1495 	if (!entry->show)
1496 		return -EIO;
1497 
1498 	return entry->show(vq, buf);
1499 }
1500 
vq_attr_store(struct kobject * kobj,struct attribute * attr,const char * buf,size_t count)1501 static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
1502 			     const char *buf, size_t count)
1503 {
1504 	struct vduse_virtqueue *vq = container_of(kobj,
1505 					struct vduse_virtqueue, kobj);
1506 	struct vq_sysfs_entry *entry = container_of(attr,
1507 					struct vq_sysfs_entry, attr);
1508 
1509 	if (!entry->store)
1510 		return -EIO;
1511 
1512 	return entry->store(vq, buf, count);
1513 }
1514 
1515 static const struct sysfs_ops vq_sysfs_ops = {
1516 	.show = vq_attr_show,
1517 	.store = vq_attr_store,
1518 };
1519 
vq_release(struct kobject * kobj)1520 static void vq_release(struct kobject *kobj)
1521 {
1522 	struct vduse_virtqueue *vq = container_of(kobj,
1523 					struct vduse_virtqueue, kobj);
1524 	kfree(vq);
1525 }
1526 
1527 static const struct kobj_type vq_type = {
1528 	.release	= vq_release,
1529 	.sysfs_ops	= &vq_sysfs_ops,
1530 	.default_groups	= vq_groups,
1531 };
1532 
vduse_dev_deinit_vqs(struct vduse_dev * dev)1533 static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
1534 {
1535 	int i;
1536 
1537 	if (!dev->vqs)
1538 		return;
1539 
1540 	for (i = 0; i < dev->vq_num; i++)
1541 		kobject_put(&dev->vqs[i]->kobj);
1542 	kfree(dev->vqs);
1543 }
1544 
vduse_dev_init_vqs(struct vduse_dev * dev,u32 vq_align,u32 vq_num)1545 static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
1546 {
1547 	int ret, i;
1548 
1549 	dev->vq_align = vq_align;
1550 	dev->vq_num = vq_num;
1551 	dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1552 	if (!dev->vqs)
1553 		return -ENOMEM;
1554 
1555 	for (i = 0; i < vq_num; i++) {
1556 		dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
1557 		if (!dev->vqs[i]) {
1558 			ret = -ENOMEM;
1559 			goto err;
1560 		}
1561 
1562 		dev->vqs[i]->index = i;
1563 		dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
1564 		INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
1565 		INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
1566 		spin_lock_init(&dev->vqs[i]->kick_lock);
1567 		spin_lock_init(&dev->vqs[i]->irq_lock);
1568 		cpumask_setall(&dev->vqs[i]->irq_affinity);
1569 
1570 		kobject_init(&dev->vqs[i]->kobj, &vq_type);
1571 		ret = kobject_add(&dev->vqs[i]->kobj,
1572 				  &dev->dev->kobj, "vq%d", i);
1573 		if (ret) {
1574 			kfree(dev->vqs[i]);
1575 			goto err;
1576 		}
1577 	}
1578 
1579 	return 0;
1580 err:
1581 	while (i--)
1582 		kobject_put(&dev->vqs[i]->kobj);
1583 	kfree(dev->vqs);
1584 	dev->vqs = NULL;
1585 	return ret;
1586 }
1587 
vduse_dev_create(void)1588 static struct vduse_dev *vduse_dev_create(void)
1589 {
1590 	struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1591 
1592 	if (!dev)
1593 		return NULL;
1594 
1595 	mutex_init(&dev->lock);
1596 	mutex_init(&dev->mem_lock);
1597 	mutex_init(&dev->domain_lock);
1598 	spin_lock_init(&dev->msg_lock);
1599 	INIT_LIST_HEAD(&dev->send_list);
1600 	INIT_LIST_HEAD(&dev->recv_list);
1601 	spin_lock_init(&dev->irq_lock);
1602 	init_rwsem(&dev->rwsem);
1603 
1604 	INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1605 	init_waitqueue_head(&dev->waitq);
1606 
1607 	return dev;
1608 }
1609 
vduse_dev_destroy(struct vduse_dev * dev)1610 static void vduse_dev_destroy(struct vduse_dev *dev)
1611 {
1612 	kfree(dev);
1613 }
1614 
vduse_find_dev(const char * name)1615 static struct vduse_dev *vduse_find_dev(const char *name)
1616 {
1617 	struct vduse_dev *dev;
1618 	int id;
1619 
1620 	idr_for_each_entry(&vduse_idr, dev, id)
1621 		if (!strcmp(dev->name, name))
1622 			return dev;
1623 
1624 	return NULL;
1625 }
1626 
vduse_destroy_dev(char * name)1627 static int vduse_destroy_dev(char *name)
1628 {
1629 	struct vduse_dev *dev = vduse_find_dev(name);
1630 
1631 	if (!dev)
1632 		return -EINVAL;
1633 
1634 	mutex_lock(&dev->lock);
1635 	if (dev->vdev || dev->connected) {
1636 		mutex_unlock(&dev->lock);
1637 		return -EBUSY;
1638 	}
1639 	dev->connected = true;
1640 	mutex_unlock(&dev->lock);
1641 
1642 	vduse_dev_reset(dev);
1643 	device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1644 	idr_remove(&vduse_idr, dev->minor);
1645 	kvfree(dev->config);
1646 	vduse_dev_deinit_vqs(dev);
1647 	if (dev->domain)
1648 		vduse_domain_destroy(dev->domain);
1649 	kfree(dev->name);
1650 	vduse_dev_destroy(dev);
1651 	module_put(THIS_MODULE);
1652 
1653 	return 0;
1654 }
1655 
device_is_allowed(u32 device_id)1656 static bool device_is_allowed(u32 device_id)
1657 {
1658 	int i;
1659 
1660 	for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1661 		if (allowed_device_id[i] == device_id)
1662 			return true;
1663 
1664 	return false;
1665 }
1666 
features_is_valid(struct vduse_dev_config * config)1667 static bool features_is_valid(struct vduse_dev_config *config)
1668 {
1669 	if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1670 		return false;
1671 
1672 	/* Now we only support read-only configuration space */
1673 	if ((config->device_id == VIRTIO_ID_BLOCK) &&
1674 			(config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE)))
1675 		return false;
1676 	else if ((config->device_id == VIRTIO_ID_NET) &&
1677 			(config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1678 		return false;
1679 
1680 	return true;
1681 }
1682 
vduse_validate_config(struct vduse_dev_config * config)1683 static bool vduse_validate_config(struct vduse_dev_config *config)
1684 {
1685 	if (!is_mem_zero((const char *)config->reserved,
1686 			 sizeof(config->reserved)))
1687 		return false;
1688 
1689 	if (config->vq_align > PAGE_SIZE)
1690 		return false;
1691 
1692 	if (config->config_size > PAGE_SIZE)
1693 		return false;
1694 
1695 	if (config->vq_num > 0xffff)
1696 		return false;
1697 
1698 	if (!config->name[0])
1699 		return false;
1700 
1701 	if (!device_is_allowed(config->device_id))
1702 		return false;
1703 
1704 	if (!features_is_valid(config))
1705 		return false;
1706 
1707 	return true;
1708 }
1709 
msg_timeout_show(struct device * device,struct device_attribute * attr,char * buf)1710 static ssize_t msg_timeout_show(struct device *device,
1711 				struct device_attribute *attr, char *buf)
1712 {
1713 	struct vduse_dev *dev = dev_get_drvdata(device);
1714 
1715 	return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1716 }
1717 
msg_timeout_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1718 static ssize_t msg_timeout_store(struct device *device,
1719 				 struct device_attribute *attr,
1720 				 const char *buf, size_t count)
1721 {
1722 	struct vduse_dev *dev = dev_get_drvdata(device);
1723 	int ret;
1724 
1725 	ret = kstrtouint(buf, 10, &dev->msg_timeout);
1726 	if (ret < 0)
1727 		return ret;
1728 
1729 	return count;
1730 }
1731 
1732 static DEVICE_ATTR_RW(msg_timeout);
1733 
bounce_size_show(struct device * device,struct device_attribute * attr,char * buf)1734 static ssize_t bounce_size_show(struct device *device,
1735 				struct device_attribute *attr, char *buf)
1736 {
1737 	struct vduse_dev *dev = dev_get_drvdata(device);
1738 
1739 	return sysfs_emit(buf, "%u\n", dev->bounce_size);
1740 }
1741 
bounce_size_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1742 static ssize_t bounce_size_store(struct device *device,
1743 				 struct device_attribute *attr,
1744 				 const char *buf, size_t count)
1745 {
1746 	struct vduse_dev *dev = dev_get_drvdata(device);
1747 	unsigned int bounce_size;
1748 	int ret;
1749 
1750 	ret = -EPERM;
1751 	mutex_lock(&dev->domain_lock);
1752 	if (dev->domain)
1753 		goto unlock;
1754 
1755 	ret = kstrtouint(buf, 10, &bounce_size);
1756 	if (ret < 0)
1757 		goto unlock;
1758 
1759 	ret = -EINVAL;
1760 	if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
1761 	    bounce_size < VDUSE_MIN_BOUNCE_SIZE)
1762 		goto unlock;
1763 
1764 	dev->bounce_size = bounce_size & PAGE_MASK;
1765 	ret = count;
1766 unlock:
1767 	mutex_unlock(&dev->domain_lock);
1768 	return ret;
1769 }
1770 
1771 static DEVICE_ATTR_RW(bounce_size);
1772 
1773 static struct attribute *vduse_dev_attrs[] = {
1774 	&dev_attr_msg_timeout.attr,
1775 	&dev_attr_bounce_size.attr,
1776 	NULL
1777 };
1778 
1779 ATTRIBUTE_GROUPS(vduse_dev);
1780 
vduse_create_dev(struct vduse_dev_config * config,void * config_buf,u64 api_version)1781 static int vduse_create_dev(struct vduse_dev_config *config,
1782 			    void *config_buf, u64 api_version)
1783 {
1784 	int ret;
1785 	struct vduse_dev *dev;
1786 
1787 	ret = -EEXIST;
1788 	if (vduse_find_dev(config->name))
1789 		goto err;
1790 
1791 	ret = -ENOMEM;
1792 	dev = vduse_dev_create();
1793 	if (!dev)
1794 		goto err;
1795 
1796 	dev->api_version = api_version;
1797 	dev->device_features = config->features;
1798 	dev->device_id = config->device_id;
1799 	dev->vendor_id = config->vendor_id;
1800 	dev->name = kstrdup(config->name, GFP_KERNEL);
1801 	if (!dev->name)
1802 		goto err_str;
1803 
1804 	dev->bounce_size = VDUSE_BOUNCE_SIZE;
1805 	dev->config = config_buf;
1806 	dev->config_size = config->config_size;
1807 
1808 	ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1809 	if (ret < 0)
1810 		goto err_idr;
1811 
1812 	dev->minor = ret;
1813 	dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1814 	dev->dev = device_create_with_groups(vduse_class, NULL,
1815 				MKDEV(MAJOR(vduse_major), dev->minor),
1816 				dev, vduse_dev_groups, "%s", config->name);
1817 	if (IS_ERR(dev->dev)) {
1818 		ret = PTR_ERR(dev->dev);
1819 		goto err_dev;
1820 	}
1821 
1822 	ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
1823 	if (ret)
1824 		goto err_vqs;
1825 
1826 	__module_get(THIS_MODULE);
1827 
1828 	return 0;
1829 err_vqs:
1830 	device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1831 err_dev:
1832 	idr_remove(&vduse_idr, dev->minor);
1833 err_idr:
1834 	kfree(dev->name);
1835 err_str:
1836 	vduse_dev_destroy(dev);
1837 err:
1838 	return ret;
1839 }
1840 
vduse_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1841 static long vduse_ioctl(struct file *file, unsigned int cmd,
1842 			unsigned long arg)
1843 {
1844 	int ret;
1845 	void __user *argp = (void __user *)arg;
1846 	struct vduse_control *control = file->private_data;
1847 
1848 	mutex_lock(&vduse_lock);
1849 	switch (cmd) {
1850 	case VDUSE_GET_API_VERSION:
1851 		ret = put_user(control->api_version, (u64 __user *)argp);
1852 		break;
1853 	case VDUSE_SET_API_VERSION: {
1854 		u64 api_version;
1855 
1856 		ret = -EFAULT;
1857 		if (get_user(api_version, (u64 __user *)argp))
1858 			break;
1859 
1860 		ret = -EINVAL;
1861 		if (api_version > VDUSE_API_VERSION)
1862 			break;
1863 
1864 		ret = 0;
1865 		control->api_version = api_version;
1866 		break;
1867 	}
1868 	case VDUSE_CREATE_DEV: {
1869 		struct vduse_dev_config config;
1870 		unsigned long size = offsetof(struct vduse_dev_config, config);
1871 		void *buf;
1872 
1873 		ret = -EFAULT;
1874 		if (copy_from_user(&config, argp, size))
1875 			break;
1876 
1877 		ret = -EINVAL;
1878 		if (vduse_validate_config(&config) == false)
1879 			break;
1880 
1881 		buf = vmemdup_user(argp + size, config.config_size);
1882 		if (IS_ERR(buf)) {
1883 			ret = PTR_ERR(buf);
1884 			break;
1885 		}
1886 		config.name[VDUSE_NAME_MAX - 1] = '\0';
1887 		ret = vduse_create_dev(&config, buf, control->api_version);
1888 		if (ret)
1889 			kvfree(buf);
1890 		break;
1891 	}
1892 	case VDUSE_DESTROY_DEV: {
1893 		char name[VDUSE_NAME_MAX];
1894 
1895 		ret = -EFAULT;
1896 		if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1897 			break;
1898 
1899 		name[VDUSE_NAME_MAX - 1] = '\0';
1900 		ret = vduse_destroy_dev(name);
1901 		break;
1902 	}
1903 	default:
1904 		ret = -EINVAL;
1905 		break;
1906 	}
1907 	mutex_unlock(&vduse_lock);
1908 
1909 	return ret;
1910 }
1911 
vduse_release(struct inode * inode,struct file * file)1912 static int vduse_release(struct inode *inode, struct file *file)
1913 {
1914 	struct vduse_control *control = file->private_data;
1915 
1916 	kfree(control);
1917 	return 0;
1918 }
1919 
vduse_open(struct inode * inode,struct file * file)1920 static int vduse_open(struct inode *inode, struct file *file)
1921 {
1922 	struct vduse_control *control;
1923 
1924 	control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1925 	if (!control)
1926 		return -ENOMEM;
1927 
1928 	control->api_version = VDUSE_API_VERSION;
1929 	file->private_data = control;
1930 
1931 	return 0;
1932 }
1933 
1934 static const struct file_operations vduse_ctrl_fops = {
1935 	.owner		= THIS_MODULE,
1936 	.open		= vduse_open,
1937 	.release	= vduse_release,
1938 	.unlocked_ioctl	= vduse_ioctl,
1939 	.compat_ioctl	= compat_ptr_ioctl,
1940 	.llseek		= noop_llseek,
1941 };
1942 
vduse_devnode(const struct device * dev,umode_t * mode)1943 static char *vduse_devnode(const struct device *dev, umode_t *mode)
1944 {
1945 	return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1946 }
1947 
1948 struct vduse_mgmt_dev {
1949 	struct vdpa_mgmt_dev mgmt_dev;
1950 	struct device dev;
1951 };
1952 
1953 static struct vduse_mgmt_dev *vduse_mgmt;
1954 
vduse_dev_init_vdpa(struct vduse_dev * dev,const char * name)1955 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
1956 {
1957 	struct vduse_vdpa *vdev;
1958 	int ret;
1959 
1960 	if (dev->vdev)
1961 		return -EEXIST;
1962 
1963 	vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
1964 				 &vduse_vdpa_config_ops, 1, 1, name, true);
1965 	if (IS_ERR(vdev))
1966 		return PTR_ERR(vdev);
1967 
1968 	dev->vdev = vdev;
1969 	vdev->dev = dev;
1970 	vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
1971 	ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
1972 	if (ret) {
1973 		put_device(&vdev->vdpa.dev);
1974 		return ret;
1975 	}
1976 	set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
1977 	vdev->vdpa.dma_dev = &vdev->vdpa.dev;
1978 	vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
1979 
1980 	return 0;
1981 }
1982 
vdpa_dev_add(struct vdpa_mgmt_dev * mdev,const char * name,const struct vdpa_dev_set_config * config)1983 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
1984 			const struct vdpa_dev_set_config *config)
1985 {
1986 	struct vduse_dev *dev;
1987 	int ret;
1988 
1989 	mutex_lock(&vduse_lock);
1990 	dev = vduse_find_dev(name);
1991 	if (!dev || !vduse_dev_is_ready(dev)) {
1992 		mutex_unlock(&vduse_lock);
1993 		return -EINVAL;
1994 	}
1995 	ret = vduse_dev_init_vdpa(dev, name);
1996 	mutex_unlock(&vduse_lock);
1997 	if (ret)
1998 		return ret;
1999 
2000 	mutex_lock(&dev->domain_lock);
2001 	if (!dev->domain)
2002 		dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
2003 						  dev->bounce_size);
2004 	mutex_unlock(&dev->domain_lock);
2005 	if (!dev->domain) {
2006 		put_device(&dev->vdev->vdpa.dev);
2007 		return -ENOMEM;
2008 	}
2009 
2010 	ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
2011 	if (ret) {
2012 		put_device(&dev->vdev->vdpa.dev);
2013 		mutex_lock(&dev->domain_lock);
2014 		vduse_domain_destroy(dev->domain);
2015 		dev->domain = NULL;
2016 		mutex_unlock(&dev->domain_lock);
2017 		return ret;
2018 	}
2019 
2020 	return 0;
2021 }
2022 
vdpa_dev_del(struct vdpa_mgmt_dev * mdev,struct vdpa_device * dev)2023 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
2024 {
2025 	_vdpa_unregister_device(dev);
2026 }
2027 
2028 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
2029 	.dev_add = vdpa_dev_add,
2030 	.dev_del = vdpa_dev_del,
2031 };
2032 
2033 static struct virtio_device_id id_table[] = {
2034 	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
2035 	{ 0 },
2036 };
2037 
vduse_mgmtdev_release(struct device * dev)2038 static void vduse_mgmtdev_release(struct device *dev)
2039 {
2040 	struct vduse_mgmt_dev *mgmt_dev;
2041 
2042 	mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
2043 	kfree(mgmt_dev);
2044 }
2045 
vduse_mgmtdev_init(void)2046 static int vduse_mgmtdev_init(void)
2047 {
2048 	int ret;
2049 
2050 	vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
2051 	if (!vduse_mgmt)
2052 		return -ENOMEM;
2053 
2054 	ret = dev_set_name(&vduse_mgmt->dev, "vduse");
2055 	if (ret) {
2056 		kfree(vduse_mgmt);
2057 		return ret;
2058 	}
2059 
2060 	vduse_mgmt->dev.release = vduse_mgmtdev_release;
2061 
2062 	ret = device_register(&vduse_mgmt->dev);
2063 	if (ret)
2064 		goto dev_reg_err;
2065 
2066 	vduse_mgmt->mgmt_dev.id_table = id_table;
2067 	vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
2068 	vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
2069 	ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
2070 	if (ret)
2071 		device_unregister(&vduse_mgmt->dev);
2072 
2073 	return ret;
2074 
2075 dev_reg_err:
2076 	put_device(&vduse_mgmt->dev);
2077 	return ret;
2078 }
2079 
vduse_mgmtdev_exit(void)2080 static void vduse_mgmtdev_exit(void)
2081 {
2082 	vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
2083 	device_unregister(&vduse_mgmt->dev);
2084 }
2085 
vduse_init(void)2086 static int vduse_init(void)
2087 {
2088 	int ret;
2089 	struct device *dev;
2090 
2091 	vduse_class = class_create("vduse");
2092 	if (IS_ERR(vduse_class))
2093 		return PTR_ERR(vduse_class);
2094 
2095 	vduse_class->devnode = vduse_devnode;
2096 
2097 	ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
2098 	if (ret)
2099 		goto err_chardev_region;
2100 
2101 	/* /dev/vduse/control */
2102 	cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
2103 	vduse_ctrl_cdev.owner = THIS_MODULE;
2104 	ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
2105 	if (ret)
2106 		goto err_ctrl_cdev;
2107 
2108 	dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
2109 	if (IS_ERR(dev)) {
2110 		ret = PTR_ERR(dev);
2111 		goto err_device;
2112 	}
2113 
2114 	/* /dev/vduse/$DEVICE */
2115 	cdev_init(&vduse_cdev, &vduse_dev_fops);
2116 	vduse_cdev.owner = THIS_MODULE;
2117 	ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
2118 		       VDUSE_DEV_MAX - 1);
2119 	if (ret)
2120 		goto err_cdev;
2121 
2122 	ret = -ENOMEM;
2123 	vduse_irq_wq = alloc_workqueue("vduse-irq",
2124 				WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
2125 	if (!vduse_irq_wq)
2126 		goto err_wq;
2127 
2128 	vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
2129 	if (!vduse_irq_bound_wq)
2130 		goto err_bound_wq;
2131 
2132 	ret = vduse_domain_init();
2133 	if (ret)
2134 		goto err_domain;
2135 
2136 	ret = vduse_mgmtdev_init();
2137 	if (ret)
2138 		goto err_mgmtdev;
2139 
2140 	return 0;
2141 err_mgmtdev:
2142 	vduse_domain_exit();
2143 err_domain:
2144 	destroy_workqueue(vduse_irq_bound_wq);
2145 err_bound_wq:
2146 	destroy_workqueue(vduse_irq_wq);
2147 err_wq:
2148 	cdev_del(&vduse_cdev);
2149 err_cdev:
2150 	device_destroy(vduse_class, vduse_major);
2151 err_device:
2152 	cdev_del(&vduse_ctrl_cdev);
2153 err_ctrl_cdev:
2154 	unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2155 err_chardev_region:
2156 	class_destroy(vduse_class);
2157 	return ret;
2158 }
2159 module_init(vduse_init);
2160 
vduse_exit(void)2161 static void vduse_exit(void)
2162 {
2163 	vduse_mgmtdev_exit();
2164 	vduse_domain_exit();
2165 	destroy_workqueue(vduse_irq_bound_wq);
2166 	destroy_workqueue(vduse_irq_wq);
2167 	cdev_del(&vduse_cdev);
2168 	device_destroy(vduse_class, vduse_major);
2169 	cdev_del(&vduse_ctrl_cdev);
2170 	unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2171 	class_destroy(vduse_class);
2172 }
2173 module_exit(vduse_exit);
2174 
2175 MODULE_LICENSE(DRV_LICENSE);
2176 MODULE_AUTHOR(DRV_AUTHOR);
2177 MODULE_DESCRIPTION(DRV_DESC);
2178