1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VDUSE: vDPA Device in Userspace
4 *
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6 *
7 * Author: Xie Yongji <xieyongji@bytedance.com>
8 *
9 */
10
11 #include "linux/virtio_net.h"
12 #include <linux/init.h>
13 #include <linux/module.h>
14 #include <linux/cdev.h>
15 #include <linux/device.h>
16 #include <linux/eventfd.h>
17 #include <linux/slab.h>
18 #include <linux/wait.h>
19 #include <linux/dma-map-ops.h>
20 #include <linux/poll.h>
21 #include <linux/file.h>
22 #include <linux/uio.h>
23 #include <linux/vdpa.h>
24 #include <linux/nospec.h>
25 #include <linux/vmalloc.h>
26 #include <linux/sched/mm.h>
27 #include <uapi/linux/vduse.h>
28 #include <uapi/linux/vdpa.h>
29 #include <uapi/linux/virtio_config.h>
30 #include <uapi/linux/virtio_ids.h>
31 #include <uapi/linux/virtio_blk.h>
32 #include <uapi/linux/virtio_ring.h>
33 #include <linux/mod_devicetable.h>
34
35 #include "iova_domain.h"
36
37 #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
38 #define DRV_DESC "vDPA Device in Userspace"
39 #define DRV_LICENSE "GPL v2"
40
41 #define VDUSE_DEV_MAX (1U << MINORBITS)
42 #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
43 #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
44 #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
45 /* 128 MB reserved for virtqueue creation */
46 #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
47 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
48
49 #define IRQ_UNBOUND -1
50
51 struct vduse_virtqueue {
52 u16 index;
53 u16 num_max;
54 u32 num;
55 u64 desc_addr;
56 u64 driver_addr;
57 u64 device_addr;
58 struct vdpa_vq_state state;
59 bool ready;
60 bool kicked;
61 spinlock_t kick_lock;
62 spinlock_t irq_lock;
63 struct eventfd_ctx *kickfd;
64 struct vdpa_callback cb;
65 struct work_struct inject;
66 struct work_struct kick;
67 int irq_effective_cpu;
68 struct cpumask irq_affinity;
69 struct kobject kobj;
70 };
71
72 struct vduse_dev;
73
74 struct vduse_vdpa {
75 struct vdpa_device vdpa;
76 struct vduse_dev *dev;
77 };
78
79 struct vduse_umem {
80 unsigned long iova;
81 unsigned long npages;
82 struct page **pages;
83 struct mm_struct *mm;
84 };
85
86 struct vduse_dev {
87 struct vduse_vdpa *vdev;
88 struct device *dev;
89 struct vduse_virtqueue **vqs;
90 struct vduse_iova_domain *domain;
91 char *name;
92 struct mutex lock;
93 spinlock_t msg_lock;
94 u64 msg_unique;
95 u32 msg_timeout;
96 wait_queue_head_t waitq;
97 struct list_head send_list;
98 struct list_head recv_list;
99 struct vdpa_callback config_cb;
100 struct work_struct inject;
101 spinlock_t irq_lock;
102 struct rw_semaphore rwsem;
103 int minor;
104 bool broken;
105 bool connected;
106 u64 api_version;
107 u64 device_features;
108 u64 driver_features;
109 u32 device_id;
110 u32 vendor_id;
111 u32 generation;
112 u32 config_size;
113 void *config;
114 u8 status;
115 u32 vq_num;
116 u32 vq_align;
117 struct vduse_umem *umem;
118 struct mutex mem_lock;
119 unsigned int bounce_size;
120 struct mutex domain_lock;
121 };
122
123 struct vduse_dev_msg {
124 struct vduse_dev_request req;
125 struct vduse_dev_response resp;
126 struct list_head list;
127 wait_queue_head_t waitq;
128 bool completed;
129 };
130
131 struct vduse_control {
132 u64 api_version;
133 };
134
135 static DEFINE_MUTEX(vduse_lock);
136 static DEFINE_IDR(vduse_idr);
137
138 static dev_t vduse_major;
139 static struct class *vduse_class;
140 static struct cdev vduse_ctrl_cdev;
141 static struct cdev vduse_cdev;
142 static struct workqueue_struct *vduse_irq_wq;
143 static struct workqueue_struct *vduse_irq_bound_wq;
144
145 static u32 allowed_device_id[] = {
146 VIRTIO_ID_BLOCK,
147 };
148
vdpa_to_vduse(struct vdpa_device * vdpa)149 static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
150 {
151 struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
152
153 return vdev->dev;
154 }
155
dev_to_vduse(struct device * dev)156 static inline struct vduse_dev *dev_to_vduse(struct device *dev)
157 {
158 struct vdpa_device *vdpa = dev_to_vdpa(dev);
159
160 return vdpa_to_vduse(vdpa);
161 }
162
vduse_find_msg(struct list_head * head,uint32_t request_id)163 static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
164 uint32_t request_id)
165 {
166 struct vduse_dev_msg *msg;
167
168 list_for_each_entry(msg, head, list) {
169 if (msg->req.request_id == request_id) {
170 list_del(&msg->list);
171 return msg;
172 }
173 }
174
175 return NULL;
176 }
177
vduse_dequeue_msg(struct list_head * head)178 static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
179 {
180 struct vduse_dev_msg *msg = NULL;
181
182 if (!list_empty(head)) {
183 msg = list_first_entry(head, struct vduse_dev_msg, list);
184 list_del(&msg->list);
185 }
186
187 return msg;
188 }
189
vduse_enqueue_msg(struct list_head * head,struct vduse_dev_msg * msg)190 static void vduse_enqueue_msg(struct list_head *head,
191 struct vduse_dev_msg *msg)
192 {
193 list_add_tail(&msg->list, head);
194 }
195
vduse_dev_broken(struct vduse_dev * dev)196 static void vduse_dev_broken(struct vduse_dev *dev)
197 {
198 struct vduse_dev_msg *msg, *tmp;
199
200 if (unlikely(dev->broken))
201 return;
202
203 list_splice_init(&dev->recv_list, &dev->send_list);
204 list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
205 list_del(&msg->list);
206 msg->completed = 1;
207 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
208 wake_up(&msg->waitq);
209 }
210 dev->broken = true;
211 wake_up(&dev->waitq);
212 }
213
vduse_dev_msg_sync(struct vduse_dev * dev,struct vduse_dev_msg * msg)214 static int vduse_dev_msg_sync(struct vduse_dev *dev,
215 struct vduse_dev_msg *msg)
216 {
217 int ret;
218
219 if (unlikely(dev->broken))
220 return -EIO;
221
222 init_waitqueue_head(&msg->waitq);
223 spin_lock(&dev->msg_lock);
224 if (unlikely(dev->broken)) {
225 spin_unlock(&dev->msg_lock);
226 return -EIO;
227 }
228 msg->req.request_id = dev->msg_unique++;
229 vduse_enqueue_msg(&dev->send_list, msg);
230 wake_up(&dev->waitq);
231 spin_unlock(&dev->msg_lock);
232 if (dev->msg_timeout)
233 ret = wait_event_killable_timeout(msg->waitq, msg->completed,
234 (long)dev->msg_timeout * HZ);
235 else
236 ret = wait_event_killable(msg->waitq, msg->completed);
237
238 spin_lock(&dev->msg_lock);
239 if (!msg->completed) {
240 list_del(&msg->list);
241 msg->resp.result = VDUSE_REQ_RESULT_FAILED;
242 /* Mark the device as malfunction when there is a timeout */
243 if (!ret)
244 vduse_dev_broken(dev);
245 }
246 ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
247 spin_unlock(&dev->msg_lock);
248
249 return ret;
250 }
251
vduse_dev_get_vq_state_packed(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_packed * packed)252 static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
253 struct vduse_virtqueue *vq,
254 struct vdpa_vq_state_packed *packed)
255 {
256 struct vduse_dev_msg msg = { 0 };
257 int ret;
258
259 msg.req.type = VDUSE_GET_VQ_STATE;
260 msg.req.vq_state.index = vq->index;
261
262 ret = vduse_dev_msg_sync(dev, &msg);
263 if (ret)
264 return ret;
265
266 packed->last_avail_counter =
267 msg.resp.vq_state.packed.last_avail_counter & 0x0001;
268 packed->last_avail_idx =
269 msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
270 packed->last_used_counter =
271 msg.resp.vq_state.packed.last_used_counter & 0x0001;
272 packed->last_used_idx =
273 msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
274
275 return 0;
276 }
277
vduse_dev_get_vq_state_split(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_split * split)278 static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
279 struct vduse_virtqueue *vq,
280 struct vdpa_vq_state_split *split)
281 {
282 struct vduse_dev_msg msg = { 0 };
283 int ret;
284
285 msg.req.type = VDUSE_GET_VQ_STATE;
286 msg.req.vq_state.index = vq->index;
287
288 ret = vduse_dev_msg_sync(dev, &msg);
289 if (ret)
290 return ret;
291
292 split->avail_index = msg.resp.vq_state.split.avail_index;
293
294 return 0;
295 }
296
vduse_dev_set_status(struct vduse_dev * dev,u8 status)297 static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
298 {
299 struct vduse_dev_msg msg = { 0 };
300
301 msg.req.type = VDUSE_SET_STATUS;
302 msg.req.s.status = status;
303
304 return vduse_dev_msg_sync(dev, &msg);
305 }
306
vduse_dev_update_iotlb(struct vduse_dev * dev,u64 start,u64 last)307 static int vduse_dev_update_iotlb(struct vduse_dev *dev,
308 u64 start, u64 last)
309 {
310 struct vduse_dev_msg msg = { 0 };
311
312 if (last < start)
313 return -EINVAL;
314
315 msg.req.type = VDUSE_UPDATE_IOTLB;
316 msg.req.iova.start = start;
317 msg.req.iova.last = last;
318
319 return vduse_dev_msg_sync(dev, &msg);
320 }
321
vduse_dev_read_iter(struct kiocb * iocb,struct iov_iter * to)322 static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
323 {
324 struct file *file = iocb->ki_filp;
325 struct vduse_dev *dev = file->private_data;
326 struct vduse_dev_msg *msg;
327 int size = sizeof(struct vduse_dev_request);
328 ssize_t ret;
329
330 if (iov_iter_count(to) < size)
331 return -EINVAL;
332
333 spin_lock(&dev->msg_lock);
334 while (1) {
335 msg = vduse_dequeue_msg(&dev->send_list);
336 if (msg)
337 break;
338
339 ret = -EAGAIN;
340 if (file->f_flags & O_NONBLOCK)
341 goto unlock;
342
343 spin_unlock(&dev->msg_lock);
344 ret = wait_event_interruptible_exclusive(dev->waitq,
345 !list_empty(&dev->send_list));
346 if (ret)
347 return ret;
348
349 spin_lock(&dev->msg_lock);
350 }
351 spin_unlock(&dev->msg_lock);
352 ret = copy_to_iter(&msg->req, size, to);
353 spin_lock(&dev->msg_lock);
354 if (ret != size) {
355 ret = -EFAULT;
356 vduse_enqueue_msg(&dev->send_list, msg);
357 goto unlock;
358 }
359 vduse_enqueue_msg(&dev->recv_list, msg);
360 unlock:
361 spin_unlock(&dev->msg_lock);
362
363 return ret;
364 }
365
is_mem_zero(const char * ptr,int size)366 static bool is_mem_zero(const char *ptr, int size)
367 {
368 int i;
369
370 for (i = 0; i < size; i++) {
371 if (ptr[i])
372 return false;
373 }
374 return true;
375 }
376
vduse_dev_write_iter(struct kiocb * iocb,struct iov_iter * from)377 static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
378 {
379 struct file *file = iocb->ki_filp;
380 struct vduse_dev *dev = file->private_data;
381 struct vduse_dev_response resp;
382 struct vduse_dev_msg *msg;
383 size_t ret;
384
385 ret = copy_from_iter(&resp, sizeof(resp), from);
386 if (ret != sizeof(resp))
387 return -EINVAL;
388
389 if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
390 return -EINVAL;
391
392 spin_lock(&dev->msg_lock);
393 msg = vduse_find_msg(&dev->recv_list, resp.request_id);
394 if (!msg) {
395 ret = -ENOENT;
396 goto unlock;
397 }
398
399 memcpy(&msg->resp, &resp, sizeof(resp));
400 msg->completed = 1;
401 wake_up(&msg->waitq);
402 unlock:
403 spin_unlock(&dev->msg_lock);
404
405 return ret;
406 }
407
vduse_dev_poll(struct file * file,poll_table * wait)408 static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
409 {
410 struct vduse_dev *dev = file->private_data;
411 __poll_t mask = 0;
412
413 poll_wait(file, &dev->waitq, wait);
414
415 spin_lock(&dev->msg_lock);
416
417 if (unlikely(dev->broken))
418 mask |= EPOLLERR;
419 if (!list_empty(&dev->send_list))
420 mask |= EPOLLIN | EPOLLRDNORM;
421 if (!list_empty(&dev->recv_list))
422 mask |= EPOLLOUT | EPOLLWRNORM;
423
424 spin_unlock(&dev->msg_lock);
425
426 return mask;
427 }
428
vduse_dev_reset(struct vduse_dev * dev)429 static void vduse_dev_reset(struct vduse_dev *dev)
430 {
431 int i;
432 struct vduse_iova_domain *domain = dev->domain;
433
434 /* The coherent mappings are handled in vduse_dev_free_coherent() */
435 if (domain && domain->bounce_map)
436 vduse_domain_reset_bounce_map(domain);
437
438 down_write(&dev->rwsem);
439
440 dev->status = 0;
441 dev->driver_features = 0;
442 dev->generation++;
443 spin_lock(&dev->irq_lock);
444 dev->config_cb.callback = NULL;
445 dev->config_cb.private = NULL;
446 spin_unlock(&dev->irq_lock);
447 flush_work(&dev->inject);
448
449 for (i = 0; i < dev->vq_num; i++) {
450 struct vduse_virtqueue *vq = dev->vqs[i];
451
452 vq->ready = false;
453 vq->desc_addr = 0;
454 vq->driver_addr = 0;
455 vq->device_addr = 0;
456 vq->num = 0;
457 memset(&vq->state, 0, sizeof(vq->state));
458
459 spin_lock(&vq->kick_lock);
460 vq->kicked = false;
461 if (vq->kickfd)
462 eventfd_ctx_put(vq->kickfd);
463 vq->kickfd = NULL;
464 spin_unlock(&vq->kick_lock);
465
466 spin_lock(&vq->irq_lock);
467 vq->cb.callback = NULL;
468 vq->cb.private = NULL;
469 vq->cb.trigger = NULL;
470 spin_unlock(&vq->irq_lock);
471 flush_work(&vq->inject);
472 flush_work(&vq->kick);
473 }
474
475 up_write(&dev->rwsem);
476 }
477
vduse_vdpa_set_vq_address(struct vdpa_device * vdpa,u16 idx,u64 desc_area,u64 driver_area,u64 device_area)478 static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
479 u64 desc_area, u64 driver_area,
480 u64 device_area)
481 {
482 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
483 struct vduse_virtqueue *vq = dev->vqs[idx];
484
485 vq->desc_addr = desc_area;
486 vq->driver_addr = driver_area;
487 vq->device_addr = device_area;
488
489 return 0;
490 }
491
vduse_vq_kick(struct vduse_virtqueue * vq)492 static void vduse_vq_kick(struct vduse_virtqueue *vq)
493 {
494 spin_lock(&vq->kick_lock);
495 if (!vq->ready)
496 goto unlock;
497
498 if (vq->kickfd)
499 eventfd_signal(vq->kickfd, 1);
500 else
501 vq->kicked = true;
502 unlock:
503 spin_unlock(&vq->kick_lock);
504 }
505
vduse_vq_kick_work(struct work_struct * work)506 static void vduse_vq_kick_work(struct work_struct *work)
507 {
508 struct vduse_virtqueue *vq = container_of(work,
509 struct vduse_virtqueue, kick);
510
511 vduse_vq_kick(vq);
512 }
513
vduse_vdpa_kick_vq(struct vdpa_device * vdpa,u16 idx)514 static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
515 {
516 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
517 struct vduse_virtqueue *vq = dev->vqs[idx];
518
519 if (!eventfd_signal_allowed()) {
520 schedule_work(&vq->kick);
521 return;
522 }
523 vduse_vq_kick(vq);
524 }
525
vduse_vdpa_set_vq_cb(struct vdpa_device * vdpa,u16 idx,struct vdpa_callback * cb)526 static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
527 struct vdpa_callback *cb)
528 {
529 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
530 struct vduse_virtqueue *vq = dev->vqs[idx];
531
532 spin_lock(&vq->irq_lock);
533 vq->cb.callback = cb->callback;
534 vq->cb.private = cb->private;
535 vq->cb.trigger = cb->trigger;
536 spin_unlock(&vq->irq_lock);
537 }
538
vduse_vdpa_set_vq_num(struct vdpa_device * vdpa,u16 idx,u32 num)539 static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
540 {
541 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
542 struct vduse_virtqueue *vq = dev->vqs[idx];
543
544 vq->num = num;
545 }
546
vduse_vdpa_set_vq_ready(struct vdpa_device * vdpa,u16 idx,bool ready)547 static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
548 u16 idx, bool ready)
549 {
550 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
551 struct vduse_virtqueue *vq = dev->vqs[idx];
552
553 vq->ready = ready;
554 }
555
vduse_vdpa_get_vq_ready(struct vdpa_device * vdpa,u16 idx)556 static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
557 {
558 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
559 struct vduse_virtqueue *vq = dev->vqs[idx];
560
561 return vq->ready;
562 }
563
vduse_vdpa_set_vq_state(struct vdpa_device * vdpa,u16 idx,const struct vdpa_vq_state * state)564 static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
565 const struct vdpa_vq_state *state)
566 {
567 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
568 struct vduse_virtqueue *vq = dev->vqs[idx];
569
570 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
571 vq->state.packed.last_avail_counter =
572 state->packed.last_avail_counter;
573 vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
574 vq->state.packed.last_used_counter =
575 state->packed.last_used_counter;
576 vq->state.packed.last_used_idx = state->packed.last_used_idx;
577 } else
578 vq->state.split.avail_index = state->split.avail_index;
579
580 return 0;
581 }
582
vduse_vdpa_get_vq_state(struct vdpa_device * vdpa,u16 idx,struct vdpa_vq_state * state)583 static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
584 struct vdpa_vq_state *state)
585 {
586 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
587 struct vduse_virtqueue *vq = dev->vqs[idx];
588
589 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
590 return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
591
592 return vduse_dev_get_vq_state_split(dev, vq, &state->split);
593 }
594
vduse_vdpa_get_vq_align(struct vdpa_device * vdpa)595 static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
596 {
597 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
598
599 return dev->vq_align;
600 }
601
vduse_vdpa_get_device_features(struct vdpa_device * vdpa)602 static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
603 {
604 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
605
606 return dev->device_features;
607 }
608
vduse_vdpa_set_driver_features(struct vdpa_device * vdpa,u64 features)609 static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
610 {
611 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
612
613 dev->driver_features = features;
614 return 0;
615 }
616
vduse_vdpa_get_driver_features(struct vdpa_device * vdpa)617 static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
618 {
619 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
620
621 return dev->driver_features;
622 }
623
vduse_vdpa_set_config_cb(struct vdpa_device * vdpa,struct vdpa_callback * cb)624 static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
625 struct vdpa_callback *cb)
626 {
627 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
628
629 spin_lock(&dev->irq_lock);
630 dev->config_cb.callback = cb->callback;
631 dev->config_cb.private = cb->private;
632 spin_unlock(&dev->irq_lock);
633 }
634
vduse_vdpa_get_vq_num_max(struct vdpa_device * vdpa)635 static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
636 {
637 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
638 u16 num_max = 0;
639 int i;
640
641 for (i = 0; i < dev->vq_num; i++)
642 if (num_max < dev->vqs[i]->num_max)
643 num_max = dev->vqs[i]->num_max;
644
645 return num_max;
646 }
647
vduse_vdpa_get_device_id(struct vdpa_device * vdpa)648 static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
649 {
650 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
651
652 return dev->device_id;
653 }
654
vduse_vdpa_get_vendor_id(struct vdpa_device * vdpa)655 static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
656 {
657 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
658
659 return dev->vendor_id;
660 }
661
vduse_vdpa_get_status(struct vdpa_device * vdpa)662 static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
663 {
664 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
665
666 return dev->status;
667 }
668
vduse_vdpa_set_status(struct vdpa_device * vdpa,u8 status)669 static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
670 {
671 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
672
673 if (vduse_dev_set_status(dev, status))
674 return;
675
676 dev->status = status;
677 }
678
vduse_vdpa_get_config_size(struct vdpa_device * vdpa)679 static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
680 {
681 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
682
683 return dev->config_size;
684 }
685
vduse_vdpa_get_config(struct vdpa_device * vdpa,unsigned int offset,void * buf,unsigned int len)686 static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
687 void *buf, unsigned int len)
688 {
689 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
690
691 /* Initialize the buffer in case of partial copy. */
692 memset(buf, 0, len);
693
694 if (offset > dev->config_size)
695 return;
696
697 if (len > dev->config_size - offset)
698 len = dev->config_size - offset;
699
700 memcpy(buf, dev->config + offset, len);
701 }
702
vduse_vdpa_set_config(struct vdpa_device * vdpa,unsigned int offset,const void * buf,unsigned int len)703 static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
704 const void *buf, unsigned int len)
705 {
706 /* Now we only support read-only configuration space */
707 }
708
vduse_vdpa_reset(struct vdpa_device * vdpa)709 static int vduse_vdpa_reset(struct vdpa_device *vdpa)
710 {
711 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
712 int ret = vduse_dev_set_status(dev, 0);
713
714 vduse_dev_reset(dev);
715
716 return ret;
717 }
718
vduse_vdpa_get_generation(struct vdpa_device * vdpa)719 static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
720 {
721 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
722
723 return dev->generation;
724 }
725
vduse_vdpa_set_vq_affinity(struct vdpa_device * vdpa,u16 idx,const struct cpumask * cpu_mask)726 static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
727 const struct cpumask *cpu_mask)
728 {
729 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
730
731 if (cpu_mask)
732 cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
733 else
734 cpumask_setall(&dev->vqs[idx]->irq_affinity);
735
736 return 0;
737 }
738
739 static const struct cpumask *
vduse_vdpa_get_vq_affinity(struct vdpa_device * vdpa,u16 idx)740 vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
741 {
742 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
743
744 return &dev->vqs[idx]->irq_affinity;
745 }
746
vduse_vdpa_set_map(struct vdpa_device * vdpa,unsigned int asid,struct vhost_iotlb * iotlb)747 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
748 unsigned int asid,
749 struct vhost_iotlb *iotlb)
750 {
751 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
752 int ret;
753
754 ret = vduse_domain_set_map(dev->domain, iotlb);
755 if (ret)
756 return ret;
757
758 ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
759 if (ret) {
760 vduse_domain_clear_map(dev->domain, iotlb);
761 return ret;
762 }
763
764 return 0;
765 }
766
vduse_vdpa_free(struct vdpa_device * vdpa)767 static void vduse_vdpa_free(struct vdpa_device *vdpa)
768 {
769 struct vduse_dev *dev = vdpa_to_vduse(vdpa);
770
771 dev->vdev = NULL;
772 }
773
774 static const struct vdpa_config_ops vduse_vdpa_config_ops = {
775 .set_vq_address = vduse_vdpa_set_vq_address,
776 .kick_vq = vduse_vdpa_kick_vq,
777 .set_vq_cb = vduse_vdpa_set_vq_cb,
778 .set_vq_num = vduse_vdpa_set_vq_num,
779 .set_vq_ready = vduse_vdpa_set_vq_ready,
780 .get_vq_ready = vduse_vdpa_get_vq_ready,
781 .set_vq_state = vduse_vdpa_set_vq_state,
782 .get_vq_state = vduse_vdpa_get_vq_state,
783 .get_vq_align = vduse_vdpa_get_vq_align,
784 .get_device_features = vduse_vdpa_get_device_features,
785 .set_driver_features = vduse_vdpa_set_driver_features,
786 .get_driver_features = vduse_vdpa_get_driver_features,
787 .set_config_cb = vduse_vdpa_set_config_cb,
788 .get_vq_num_max = vduse_vdpa_get_vq_num_max,
789 .get_device_id = vduse_vdpa_get_device_id,
790 .get_vendor_id = vduse_vdpa_get_vendor_id,
791 .get_status = vduse_vdpa_get_status,
792 .set_status = vduse_vdpa_set_status,
793 .get_config_size = vduse_vdpa_get_config_size,
794 .get_config = vduse_vdpa_get_config,
795 .set_config = vduse_vdpa_set_config,
796 .get_generation = vduse_vdpa_get_generation,
797 .set_vq_affinity = vduse_vdpa_set_vq_affinity,
798 .get_vq_affinity = vduse_vdpa_get_vq_affinity,
799 .reset = vduse_vdpa_reset,
800 .set_map = vduse_vdpa_set_map,
801 .free = vduse_vdpa_free,
802 };
803
vduse_dev_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)804 static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
805 unsigned long offset, size_t size,
806 enum dma_data_direction dir,
807 unsigned long attrs)
808 {
809 struct vduse_dev *vdev = dev_to_vduse(dev);
810 struct vduse_iova_domain *domain = vdev->domain;
811
812 return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
813 }
814
vduse_dev_unmap_page(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)815 static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
816 size_t size, enum dma_data_direction dir,
817 unsigned long attrs)
818 {
819 struct vduse_dev *vdev = dev_to_vduse(dev);
820 struct vduse_iova_domain *domain = vdev->domain;
821
822 return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
823 }
824
vduse_dev_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_addr,gfp_t flag,unsigned long attrs)825 static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
826 dma_addr_t *dma_addr, gfp_t flag,
827 unsigned long attrs)
828 {
829 struct vduse_dev *vdev = dev_to_vduse(dev);
830 struct vduse_iova_domain *domain = vdev->domain;
831 unsigned long iova;
832 void *addr;
833
834 *dma_addr = DMA_MAPPING_ERROR;
835 addr = vduse_domain_alloc_coherent(domain, size,
836 (dma_addr_t *)&iova, flag, attrs);
837 if (!addr)
838 return NULL;
839
840 *dma_addr = (dma_addr_t)iova;
841
842 return addr;
843 }
844
vduse_dev_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_addr,unsigned long attrs)845 static void vduse_dev_free_coherent(struct device *dev, size_t size,
846 void *vaddr, dma_addr_t dma_addr,
847 unsigned long attrs)
848 {
849 struct vduse_dev *vdev = dev_to_vduse(dev);
850 struct vduse_iova_domain *domain = vdev->domain;
851
852 vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
853 }
854
vduse_dev_max_mapping_size(struct device * dev)855 static size_t vduse_dev_max_mapping_size(struct device *dev)
856 {
857 struct vduse_dev *vdev = dev_to_vduse(dev);
858 struct vduse_iova_domain *domain = vdev->domain;
859
860 return domain->bounce_size;
861 }
862
863 static const struct dma_map_ops vduse_dev_dma_ops = {
864 .map_page = vduse_dev_map_page,
865 .unmap_page = vduse_dev_unmap_page,
866 .alloc = vduse_dev_alloc_coherent,
867 .free = vduse_dev_free_coherent,
868 .max_mapping_size = vduse_dev_max_mapping_size,
869 };
870
perm_to_file_flags(u8 perm)871 static unsigned int perm_to_file_flags(u8 perm)
872 {
873 unsigned int flags = 0;
874
875 switch (perm) {
876 case VDUSE_ACCESS_WO:
877 flags |= O_WRONLY;
878 break;
879 case VDUSE_ACCESS_RO:
880 flags |= O_RDONLY;
881 break;
882 case VDUSE_ACCESS_RW:
883 flags |= O_RDWR;
884 break;
885 default:
886 WARN(1, "invalidate vhost IOTLB permission\n");
887 break;
888 }
889
890 return flags;
891 }
892
vduse_kickfd_setup(struct vduse_dev * dev,struct vduse_vq_eventfd * eventfd)893 static int vduse_kickfd_setup(struct vduse_dev *dev,
894 struct vduse_vq_eventfd *eventfd)
895 {
896 struct eventfd_ctx *ctx = NULL;
897 struct vduse_virtqueue *vq;
898 u32 index;
899
900 if (eventfd->index >= dev->vq_num)
901 return -EINVAL;
902
903 index = array_index_nospec(eventfd->index, dev->vq_num);
904 vq = dev->vqs[index];
905 if (eventfd->fd >= 0) {
906 ctx = eventfd_ctx_fdget(eventfd->fd);
907 if (IS_ERR(ctx))
908 return PTR_ERR(ctx);
909 } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
910 return 0;
911
912 spin_lock(&vq->kick_lock);
913 if (vq->kickfd)
914 eventfd_ctx_put(vq->kickfd);
915 vq->kickfd = ctx;
916 if (vq->ready && vq->kicked && vq->kickfd) {
917 eventfd_signal(vq->kickfd, 1);
918 vq->kicked = false;
919 }
920 spin_unlock(&vq->kick_lock);
921
922 return 0;
923 }
924
vduse_dev_is_ready(struct vduse_dev * dev)925 static bool vduse_dev_is_ready(struct vduse_dev *dev)
926 {
927 int i;
928
929 for (i = 0; i < dev->vq_num; i++)
930 if (!dev->vqs[i]->num_max)
931 return false;
932
933 return true;
934 }
935
vduse_dev_irq_inject(struct work_struct * work)936 static void vduse_dev_irq_inject(struct work_struct *work)
937 {
938 struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
939
940 spin_lock_bh(&dev->irq_lock);
941 if (dev->config_cb.callback)
942 dev->config_cb.callback(dev->config_cb.private);
943 spin_unlock_bh(&dev->irq_lock);
944 }
945
vduse_vq_irq_inject(struct work_struct * work)946 static void vduse_vq_irq_inject(struct work_struct *work)
947 {
948 struct vduse_virtqueue *vq = container_of(work,
949 struct vduse_virtqueue, inject);
950
951 spin_lock_bh(&vq->irq_lock);
952 if (vq->ready && vq->cb.callback)
953 vq->cb.callback(vq->cb.private);
954 spin_unlock_bh(&vq->irq_lock);
955 }
956
vduse_vq_signal_irqfd(struct vduse_virtqueue * vq)957 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
958 {
959 bool signal = false;
960
961 if (!vq->cb.trigger)
962 return false;
963
964 spin_lock_irq(&vq->irq_lock);
965 if (vq->ready && vq->cb.trigger) {
966 eventfd_signal(vq->cb.trigger, 1);
967 signal = true;
968 }
969 spin_unlock_irq(&vq->irq_lock);
970
971 return signal;
972 }
973
vduse_dev_queue_irq_work(struct vduse_dev * dev,struct work_struct * irq_work,int irq_effective_cpu)974 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
975 struct work_struct *irq_work,
976 int irq_effective_cpu)
977 {
978 int ret = -EINVAL;
979
980 down_read(&dev->rwsem);
981 if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
982 goto unlock;
983
984 ret = 0;
985 if (irq_effective_cpu == IRQ_UNBOUND)
986 queue_work(vduse_irq_wq, irq_work);
987 else
988 queue_work_on(irq_effective_cpu,
989 vduse_irq_bound_wq, irq_work);
990 unlock:
991 up_read(&dev->rwsem);
992
993 return ret;
994 }
995
vduse_dev_dereg_umem(struct vduse_dev * dev,u64 iova,u64 size)996 static int vduse_dev_dereg_umem(struct vduse_dev *dev,
997 u64 iova, u64 size)
998 {
999 int ret;
1000
1001 mutex_lock(&dev->mem_lock);
1002 ret = -ENOENT;
1003 if (!dev->umem)
1004 goto unlock;
1005
1006 ret = -EINVAL;
1007 if (!dev->domain)
1008 goto unlock;
1009
1010 if (dev->umem->iova != iova || size != dev->domain->bounce_size)
1011 goto unlock;
1012
1013 vduse_domain_remove_user_bounce_pages(dev->domain);
1014 unpin_user_pages_dirty_lock(dev->umem->pages,
1015 dev->umem->npages, true);
1016 atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
1017 mmdrop(dev->umem->mm);
1018 vfree(dev->umem->pages);
1019 kfree(dev->umem);
1020 dev->umem = NULL;
1021 ret = 0;
1022 unlock:
1023 mutex_unlock(&dev->mem_lock);
1024 return ret;
1025 }
1026
vduse_dev_reg_umem(struct vduse_dev * dev,u64 iova,u64 uaddr,u64 size)1027 static int vduse_dev_reg_umem(struct vduse_dev *dev,
1028 u64 iova, u64 uaddr, u64 size)
1029 {
1030 struct page **page_list = NULL;
1031 struct vduse_umem *umem = NULL;
1032 long pinned = 0;
1033 unsigned long npages, lock_limit;
1034 int ret;
1035
1036 if (!dev->domain || !dev->domain->bounce_map ||
1037 size != dev->domain->bounce_size ||
1038 iova != 0 || uaddr & ~PAGE_MASK)
1039 return -EINVAL;
1040
1041 mutex_lock(&dev->mem_lock);
1042 ret = -EEXIST;
1043 if (dev->umem)
1044 goto unlock;
1045
1046 ret = -ENOMEM;
1047 npages = size >> PAGE_SHIFT;
1048 page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
1049 GFP_KERNEL_ACCOUNT);
1050 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
1051 if (!page_list || !umem)
1052 goto unlock;
1053
1054 mmap_read_lock(current->mm);
1055
1056 lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
1057 if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit)
1058 goto out;
1059
1060 pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
1061 page_list);
1062 if (pinned != npages) {
1063 ret = pinned < 0 ? pinned : -ENOMEM;
1064 goto out;
1065 }
1066
1067 ret = vduse_domain_add_user_bounce_pages(dev->domain,
1068 page_list, pinned);
1069 if (ret)
1070 goto out;
1071
1072 atomic64_add(npages, ¤t->mm->pinned_vm);
1073
1074 umem->pages = page_list;
1075 umem->npages = pinned;
1076 umem->iova = iova;
1077 umem->mm = current->mm;
1078 mmgrab(current->mm);
1079
1080 dev->umem = umem;
1081 out:
1082 if (ret && pinned > 0)
1083 unpin_user_pages(page_list, pinned);
1084
1085 mmap_read_unlock(current->mm);
1086 unlock:
1087 if (ret) {
1088 vfree(page_list);
1089 kfree(umem);
1090 }
1091 mutex_unlock(&dev->mem_lock);
1092 return ret;
1093 }
1094
vduse_vq_update_effective_cpu(struct vduse_virtqueue * vq)1095 static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
1096 {
1097 int curr_cpu = vq->irq_effective_cpu;
1098
1099 while (true) {
1100 curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
1101 if (cpu_online(curr_cpu))
1102 break;
1103
1104 if (curr_cpu >= nr_cpu_ids)
1105 curr_cpu = IRQ_UNBOUND;
1106 }
1107
1108 vq->irq_effective_cpu = curr_cpu;
1109 }
1110
vduse_dev_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1111 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1112 unsigned long arg)
1113 {
1114 struct vduse_dev *dev = file->private_data;
1115 void __user *argp = (void __user *)arg;
1116 int ret;
1117
1118 if (unlikely(dev->broken))
1119 return -EPERM;
1120
1121 switch (cmd) {
1122 case VDUSE_IOTLB_GET_FD: {
1123 struct vduse_iotlb_entry entry;
1124 struct vhost_iotlb_map *map;
1125 struct vdpa_map_file *map_file;
1126 struct file *f = NULL;
1127
1128 ret = -EFAULT;
1129 if (copy_from_user(&entry, argp, sizeof(entry)))
1130 break;
1131
1132 ret = -EINVAL;
1133 if (entry.start > entry.last)
1134 break;
1135
1136 mutex_lock(&dev->domain_lock);
1137 if (!dev->domain) {
1138 mutex_unlock(&dev->domain_lock);
1139 break;
1140 }
1141 spin_lock(&dev->domain->iotlb_lock);
1142 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1143 entry.start, entry.last);
1144 if (map) {
1145 map_file = (struct vdpa_map_file *)map->opaque;
1146 f = get_file(map_file->file);
1147 entry.offset = map_file->offset;
1148 entry.start = map->start;
1149 entry.last = map->last;
1150 entry.perm = map->perm;
1151 }
1152 spin_unlock(&dev->domain->iotlb_lock);
1153 mutex_unlock(&dev->domain_lock);
1154 ret = -EINVAL;
1155 if (!f)
1156 break;
1157
1158 ret = -EFAULT;
1159 if (copy_to_user(argp, &entry, sizeof(entry))) {
1160 fput(f);
1161 break;
1162 }
1163 ret = receive_fd(f, perm_to_file_flags(entry.perm));
1164 fput(f);
1165 break;
1166 }
1167 case VDUSE_DEV_GET_FEATURES:
1168 /*
1169 * Just mirror what driver wrote here.
1170 * The driver is expected to check FEATURE_OK later.
1171 */
1172 ret = put_user(dev->driver_features, (u64 __user *)argp);
1173 break;
1174 case VDUSE_DEV_SET_CONFIG: {
1175 struct vduse_config_data config;
1176 unsigned long size = offsetof(struct vduse_config_data,
1177 buffer);
1178
1179 ret = -EFAULT;
1180 if (copy_from_user(&config, argp, size))
1181 break;
1182
1183 ret = -EINVAL;
1184 if (config.offset > dev->config_size ||
1185 config.length == 0 ||
1186 config.length > dev->config_size - config.offset)
1187 break;
1188
1189 ret = -EFAULT;
1190 if (copy_from_user(dev->config + config.offset, argp + size,
1191 config.length))
1192 break;
1193
1194 ret = 0;
1195 break;
1196 }
1197 case VDUSE_DEV_INJECT_CONFIG_IRQ:
1198 ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
1199 break;
1200 case VDUSE_VQ_SETUP: {
1201 struct vduse_vq_config config;
1202 u32 index;
1203
1204 ret = -EFAULT;
1205 if (copy_from_user(&config, argp, sizeof(config)))
1206 break;
1207
1208 ret = -EINVAL;
1209 if (config.index >= dev->vq_num)
1210 break;
1211
1212 if (!is_mem_zero((const char *)config.reserved,
1213 sizeof(config.reserved)))
1214 break;
1215
1216 index = array_index_nospec(config.index, dev->vq_num);
1217 dev->vqs[index]->num_max = config.max_size;
1218 ret = 0;
1219 break;
1220 }
1221 case VDUSE_VQ_GET_INFO: {
1222 struct vduse_vq_info vq_info;
1223 struct vduse_virtqueue *vq;
1224 u32 index;
1225
1226 ret = -EFAULT;
1227 if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1228 break;
1229
1230 ret = -EINVAL;
1231 if (vq_info.index >= dev->vq_num)
1232 break;
1233
1234 index = array_index_nospec(vq_info.index, dev->vq_num);
1235 vq = dev->vqs[index];
1236 vq_info.desc_addr = vq->desc_addr;
1237 vq_info.driver_addr = vq->driver_addr;
1238 vq_info.device_addr = vq->device_addr;
1239 vq_info.num = vq->num;
1240
1241 if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1242 vq_info.packed.last_avail_counter =
1243 vq->state.packed.last_avail_counter;
1244 vq_info.packed.last_avail_idx =
1245 vq->state.packed.last_avail_idx;
1246 vq_info.packed.last_used_counter =
1247 vq->state.packed.last_used_counter;
1248 vq_info.packed.last_used_idx =
1249 vq->state.packed.last_used_idx;
1250 } else
1251 vq_info.split.avail_index =
1252 vq->state.split.avail_index;
1253
1254 vq_info.ready = vq->ready;
1255
1256 ret = -EFAULT;
1257 if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1258 break;
1259
1260 ret = 0;
1261 break;
1262 }
1263 case VDUSE_VQ_SETUP_KICKFD: {
1264 struct vduse_vq_eventfd eventfd;
1265
1266 ret = -EFAULT;
1267 if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1268 break;
1269
1270 ret = vduse_kickfd_setup(dev, &eventfd);
1271 break;
1272 }
1273 case VDUSE_VQ_INJECT_IRQ: {
1274 u32 index;
1275
1276 ret = -EFAULT;
1277 if (get_user(index, (u32 __user *)argp))
1278 break;
1279
1280 ret = -EINVAL;
1281 if (index >= dev->vq_num)
1282 break;
1283
1284 ret = 0;
1285 index = array_index_nospec(index, dev->vq_num);
1286 if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
1287 vduse_vq_update_effective_cpu(dev->vqs[index]);
1288 ret = vduse_dev_queue_irq_work(dev,
1289 &dev->vqs[index]->inject,
1290 dev->vqs[index]->irq_effective_cpu);
1291 }
1292 break;
1293 }
1294 case VDUSE_IOTLB_REG_UMEM: {
1295 struct vduse_iova_umem umem;
1296
1297 ret = -EFAULT;
1298 if (copy_from_user(&umem, argp, sizeof(umem)))
1299 break;
1300
1301 ret = -EINVAL;
1302 if (!is_mem_zero((const char *)umem.reserved,
1303 sizeof(umem.reserved)))
1304 break;
1305
1306 mutex_lock(&dev->domain_lock);
1307 ret = vduse_dev_reg_umem(dev, umem.iova,
1308 umem.uaddr, umem.size);
1309 mutex_unlock(&dev->domain_lock);
1310 break;
1311 }
1312 case VDUSE_IOTLB_DEREG_UMEM: {
1313 struct vduse_iova_umem umem;
1314
1315 ret = -EFAULT;
1316 if (copy_from_user(&umem, argp, sizeof(umem)))
1317 break;
1318
1319 ret = -EINVAL;
1320 if (!is_mem_zero((const char *)umem.reserved,
1321 sizeof(umem.reserved)))
1322 break;
1323 mutex_lock(&dev->domain_lock);
1324 ret = vduse_dev_dereg_umem(dev, umem.iova,
1325 umem.size);
1326 mutex_unlock(&dev->domain_lock);
1327 break;
1328 }
1329 case VDUSE_IOTLB_GET_INFO: {
1330 struct vduse_iova_info info;
1331 struct vhost_iotlb_map *map;
1332
1333 ret = -EFAULT;
1334 if (copy_from_user(&info, argp, sizeof(info)))
1335 break;
1336
1337 ret = -EINVAL;
1338 if (info.start > info.last)
1339 break;
1340
1341 if (!is_mem_zero((const char *)info.reserved,
1342 sizeof(info.reserved)))
1343 break;
1344
1345 mutex_lock(&dev->domain_lock);
1346 if (!dev->domain) {
1347 mutex_unlock(&dev->domain_lock);
1348 break;
1349 }
1350 spin_lock(&dev->domain->iotlb_lock);
1351 map = vhost_iotlb_itree_first(dev->domain->iotlb,
1352 info.start, info.last);
1353 if (map) {
1354 info.start = map->start;
1355 info.last = map->last;
1356 info.capability = 0;
1357 if (dev->domain->bounce_map && map->start == 0 &&
1358 map->last == dev->domain->bounce_size - 1)
1359 info.capability |= VDUSE_IOVA_CAP_UMEM;
1360 }
1361 spin_unlock(&dev->domain->iotlb_lock);
1362 mutex_unlock(&dev->domain_lock);
1363 if (!map)
1364 break;
1365
1366 ret = -EFAULT;
1367 if (copy_to_user(argp, &info, sizeof(info)))
1368 break;
1369
1370 ret = 0;
1371 break;
1372 }
1373 default:
1374 ret = -ENOIOCTLCMD;
1375 break;
1376 }
1377
1378 return ret;
1379 }
1380
vduse_dev_release(struct inode * inode,struct file * file)1381 static int vduse_dev_release(struct inode *inode, struct file *file)
1382 {
1383 struct vduse_dev *dev = file->private_data;
1384
1385 mutex_lock(&dev->domain_lock);
1386 if (dev->domain)
1387 vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
1388 mutex_unlock(&dev->domain_lock);
1389 spin_lock(&dev->msg_lock);
1390 /* Make sure the inflight messages can processed after reconncection */
1391 list_splice_init(&dev->recv_list, &dev->send_list);
1392 spin_unlock(&dev->msg_lock);
1393 dev->connected = false;
1394
1395 return 0;
1396 }
1397
vduse_dev_get_from_minor(int minor)1398 static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1399 {
1400 struct vduse_dev *dev;
1401
1402 mutex_lock(&vduse_lock);
1403 dev = idr_find(&vduse_idr, minor);
1404 mutex_unlock(&vduse_lock);
1405
1406 return dev;
1407 }
1408
vduse_dev_open(struct inode * inode,struct file * file)1409 static int vduse_dev_open(struct inode *inode, struct file *file)
1410 {
1411 int ret;
1412 struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1413
1414 if (!dev)
1415 return -ENODEV;
1416
1417 ret = -EBUSY;
1418 mutex_lock(&dev->lock);
1419 if (dev->connected)
1420 goto unlock;
1421
1422 ret = 0;
1423 dev->connected = true;
1424 file->private_data = dev;
1425 unlock:
1426 mutex_unlock(&dev->lock);
1427
1428 return ret;
1429 }
1430
1431 static const struct file_operations vduse_dev_fops = {
1432 .owner = THIS_MODULE,
1433 .open = vduse_dev_open,
1434 .release = vduse_dev_release,
1435 .read_iter = vduse_dev_read_iter,
1436 .write_iter = vduse_dev_write_iter,
1437 .poll = vduse_dev_poll,
1438 .unlocked_ioctl = vduse_dev_ioctl,
1439 .compat_ioctl = compat_ptr_ioctl,
1440 .llseek = noop_llseek,
1441 };
1442
irq_cb_affinity_show(struct vduse_virtqueue * vq,char * buf)1443 static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
1444 {
1445 return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
1446 }
1447
irq_cb_affinity_store(struct vduse_virtqueue * vq,const char * buf,size_t count)1448 static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
1449 const char *buf, size_t count)
1450 {
1451 cpumask_var_t new_value;
1452 int ret;
1453
1454 if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
1455 return -ENOMEM;
1456
1457 ret = cpumask_parse(buf, new_value);
1458 if (ret)
1459 goto free_mask;
1460
1461 ret = -EINVAL;
1462 if (!cpumask_intersects(new_value, cpu_online_mask))
1463 goto free_mask;
1464
1465 cpumask_copy(&vq->irq_affinity, new_value);
1466 ret = count;
1467 free_mask:
1468 free_cpumask_var(new_value);
1469 return ret;
1470 }
1471
1472 struct vq_sysfs_entry {
1473 struct attribute attr;
1474 ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
1475 ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
1476 size_t count);
1477 };
1478
1479 static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
1480
1481 static struct attribute *vq_attrs[] = {
1482 &irq_cb_affinity_attr.attr,
1483 NULL,
1484 };
1485 ATTRIBUTE_GROUPS(vq);
1486
vq_attr_show(struct kobject * kobj,struct attribute * attr,char * buf)1487 static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
1488 char *buf)
1489 {
1490 struct vduse_virtqueue *vq = container_of(kobj,
1491 struct vduse_virtqueue, kobj);
1492 struct vq_sysfs_entry *entry = container_of(attr,
1493 struct vq_sysfs_entry, attr);
1494
1495 if (!entry->show)
1496 return -EIO;
1497
1498 return entry->show(vq, buf);
1499 }
1500
vq_attr_store(struct kobject * kobj,struct attribute * attr,const char * buf,size_t count)1501 static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
1502 const char *buf, size_t count)
1503 {
1504 struct vduse_virtqueue *vq = container_of(kobj,
1505 struct vduse_virtqueue, kobj);
1506 struct vq_sysfs_entry *entry = container_of(attr,
1507 struct vq_sysfs_entry, attr);
1508
1509 if (!entry->store)
1510 return -EIO;
1511
1512 return entry->store(vq, buf, count);
1513 }
1514
1515 static const struct sysfs_ops vq_sysfs_ops = {
1516 .show = vq_attr_show,
1517 .store = vq_attr_store,
1518 };
1519
vq_release(struct kobject * kobj)1520 static void vq_release(struct kobject *kobj)
1521 {
1522 struct vduse_virtqueue *vq = container_of(kobj,
1523 struct vduse_virtqueue, kobj);
1524 kfree(vq);
1525 }
1526
1527 static const struct kobj_type vq_type = {
1528 .release = vq_release,
1529 .sysfs_ops = &vq_sysfs_ops,
1530 .default_groups = vq_groups,
1531 };
1532
vduse_dev_deinit_vqs(struct vduse_dev * dev)1533 static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
1534 {
1535 int i;
1536
1537 if (!dev->vqs)
1538 return;
1539
1540 for (i = 0; i < dev->vq_num; i++)
1541 kobject_put(&dev->vqs[i]->kobj);
1542 kfree(dev->vqs);
1543 }
1544
vduse_dev_init_vqs(struct vduse_dev * dev,u32 vq_align,u32 vq_num)1545 static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
1546 {
1547 int ret, i;
1548
1549 dev->vq_align = vq_align;
1550 dev->vq_num = vq_num;
1551 dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1552 if (!dev->vqs)
1553 return -ENOMEM;
1554
1555 for (i = 0; i < vq_num; i++) {
1556 dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
1557 if (!dev->vqs[i]) {
1558 ret = -ENOMEM;
1559 goto err;
1560 }
1561
1562 dev->vqs[i]->index = i;
1563 dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
1564 INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
1565 INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
1566 spin_lock_init(&dev->vqs[i]->kick_lock);
1567 spin_lock_init(&dev->vqs[i]->irq_lock);
1568 cpumask_setall(&dev->vqs[i]->irq_affinity);
1569
1570 kobject_init(&dev->vqs[i]->kobj, &vq_type);
1571 ret = kobject_add(&dev->vqs[i]->kobj,
1572 &dev->dev->kobj, "vq%d", i);
1573 if (ret) {
1574 kfree(dev->vqs[i]);
1575 goto err;
1576 }
1577 }
1578
1579 return 0;
1580 err:
1581 while (i--)
1582 kobject_put(&dev->vqs[i]->kobj);
1583 kfree(dev->vqs);
1584 dev->vqs = NULL;
1585 return ret;
1586 }
1587
vduse_dev_create(void)1588 static struct vduse_dev *vduse_dev_create(void)
1589 {
1590 struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1591
1592 if (!dev)
1593 return NULL;
1594
1595 mutex_init(&dev->lock);
1596 mutex_init(&dev->mem_lock);
1597 mutex_init(&dev->domain_lock);
1598 spin_lock_init(&dev->msg_lock);
1599 INIT_LIST_HEAD(&dev->send_list);
1600 INIT_LIST_HEAD(&dev->recv_list);
1601 spin_lock_init(&dev->irq_lock);
1602 init_rwsem(&dev->rwsem);
1603
1604 INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1605 init_waitqueue_head(&dev->waitq);
1606
1607 return dev;
1608 }
1609
vduse_dev_destroy(struct vduse_dev * dev)1610 static void vduse_dev_destroy(struct vduse_dev *dev)
1611 {
1612 kfree(dev);
1613 }
1614
vduse_find_dev(const char * name)1615 static struct vduse_dev *vduse_find_dev(const char *name)
1616 {
1617 struct vduse_dev *dev;
1618 int id;
1619
1620 idr_for_each_entry(&vduse_idr, dev, id)
1621 if (!strcmp(dev->name, name))
1622 return dev;
1623
1624 return NULL;
1625 }
1626
vduse_destroy_dev(char * name)1627 static int vduse_destroy_dev(char *name)
1628 {
1629 struct vduse_dev *dev = vduse_find_dev(name);
1630
1631 if (!dev)
1632 return -EINVAL;
1633
1634 mutex_lock(&dev->lock);
1635 if (dev->vdev || dev->connected) {
1636 mutex_unlock(&dev->lock);
1637 return -EBUSY;
1638 }
1639 dev->connected = true;
1640 mutex_unlock(&dev->lock);
1641
1642 vduse_dev_reset(dev);
1643 device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1644 idr_remove(&vduse_idr, dev->minor);
1645 kvfree(dev->config);
1646 vduse_dev_deinit_vqs(dev);
1647 if (dev->domain)
1648 vduse_domain_destroy(dev->domain);
1649 kfree(dev->name);
1650 vduse_dev_destroy(dev);
1651 module_put(THIS_MODULE);
1652
1653 return 0;
1654 }
1655
device_is_allowed(u32 device_id)1656 static bool device_is_allowed(u32 device_id)
1657 {
1658 int i;
1659
1660 for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1661 if (allowed_device_id[i] == device_id)
1662 return true;
1663
1664 return false;
1665 }
1666
features_is_valid(struct vduse_dev_config * config)1667 static bool features_is_valid(struct vduse_dev_config *config)
1668 {
1669 if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1670 return false;
1671
1672 /* Now we only support read-only configuration space */
1673 if ((config->device_id == VIRTIO_ID_BLOCK) &&
1674 (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE)))
1675 return false;
1676 else if ((config->device_id == VIRTIO_ID_NET) &&
1677 (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1678 return false;
1679
1680 return true;
1681 }
1682
vduse_validate_config(struct vduse_dev_config * config)1683 static bool vduse_validate_config(struct vduse_dev_config *config)
1684 {
1685 if (!is_mem_zero((const char *)config->reserved,
1686 sizeof(config->reserved)))
1687 return false;
1688
1689 if (config->vq_align > PAGE_SIZE)
1690 return false;
1691
1692 if (config->config_size > PAGE_SIZE)
1693 return false;
1694
1695 if (config->vq_num > 0xffff)
1696 return false;
1697
1698 if (!config->name[0])
1699 return false;
1700
1701 if (!device_is_allowed(config->device_id))
1702 return false;
1703
1704 if (!features_is_valid(config))
1705 return false;
1706
1707 return true;
1708 }
1709
msg_timeout_show(struct device * device,struct device_attribute * attr,char * buf)1710 static ssize_t msg_timeout_show(struct device *device,
1711 struct device_attribute *attr, char *buf)
1712 {
1713 struct vduse_dev *dev = dev_get_drvdata(device);
1714
1715 return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1716 }
1717
msg_timeout_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1718 static ssize_t msg_timeout_store(struct device *device,
1719 struct device_attribute *attr,
1720 const char *buf, size_t count)
1721 {
1722 struct vduse_dev *dev = dev_get_drvdata(device);
1723 int ret;
1724
1725 ret = kstrtouint(buf, 10, &dev->msg_timeout);
1726 if (ret < 0)
1727 return ret;
1728
1729 return count;
1730 }
1731
1732 static DEVICE_ATTR_RW(msg_timeout);
1733
bounce_size_show(struct device * device,struct device_attribute * attr,char * buf)1734 static ssize_t bounce_size_show(struct device *device,
1735 struct device_attribute *attr, char *buf)
1736 {
1737 struct vduse_dev *dev = dev_get_drvdata(device);
1738
1739 return sysfs_emit(buf, "%u\n", dev->bounce_size);
1740 }
1741
bounce_size_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1742 static ssize_t bounce_size_store(struct device *device,
1743 struct device_attribute *attr,
1744 const char *buf, size_t count)
1745 {
1746 struct vduse_dev *dev = dev_get_drvdata(device);
1747 unsigned int bounce_size;
1748 int ret;
1749
1750 ret = -EPERM;
1751 mutex_lock(&dev->domain_lock);
1752 if (dev->domain)
1753 goto unlock;
1754
1755 ret = kstrtouint(buf, 10, &bounce_size);
1756 if (ret < 0)
1757 goto unlock;
1758
1759 ret = -EINVAL;
1760 if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
1761 bounce_size < VDUSE_MIN_BOUNCE_SIZE)
1762 goto unlock;
1763
1764 dev->bounce_size = bounce_size & PAGE_MASK;
1765 ret = count;
1766 unlock:
1767 mutex_unlock(&dev->domain_lock);
1768 return ret;
1769 }
1770
1771 static DEVICE_ATTR_RW(bounce_size);
1772
1773 static struct attribute *vduse_dev_attrs[] = {
1774 &dev_attr_msg_timeout.attr,
1775 &dev_attr_bounce_size.attr,
1776 NULL
1777 };
1778
1779 ATTRIBUTE_GROUPS(vduse_dev);
1780
vduse_create_dev(struct vduse_dev_config * config,void * config_buf,u64 api_version)1781 static int vduse_create_dev(struct vduse_dev_config *config,
1782 void *config_buf, u64 api_version)
1783 {
1784 int ret;
1785 struct vduse_dev *dev;
1786
1787 ret = -EEXIST;
1788 if (vduse_find_dev(config->name))
1789 goto err;
1790
1791 ret = -ENOMEM;
1792 dev = vduse_dev_create();
1793 if (!dev)
1794 goto err;
1795
1796 dev->api_version = api_version;
1797 dev->device_features = config->features;
1798 dev->device_id = config->device_id;
1799 dev->vendor_id = config->vendor_id;
1800 dev->name = kstrdup(config->name, GFP_KERNEL);
1801 if (!dev->name)
1802 goto err_str;
1803
1804 dev->bounce_size = VDUSE_BOUNCE_SIZE;
1805 dev->config = config_buf;
1806 dev->config_size = config->config_size;
1807
1808 ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1809 if (ret < 0)
1810 goto err_idr;
1811
1812 dev->minor = ret;
1813 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1814 dev->dev = device_create_with_groups(vduse_class, NULL,
1815 MKDEV(MAJOR(vduse_major), dev->minor),
1816 dev, vduse_dev_groups, "%s", config->name);
1817 if (IS_ERR(dev->dev)) {
1818 ret = PTR_ERR(dev->dev);
1819 goto err_dev;
1820 }
1821
1822 ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
1823 if (ret)
1824 goto err_vqs;
1825
1826 __module_get(THIS_MODULE);
1827
1828 return 0;
1829 err_vqs:
1830 device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1831 err_dev:
1832 idr_remove(&vduse_idr, dev->minor);
1833 err_idr:
1834 kfree(dev->name);
1835 err_str:
1836 vduse_dev_destroy(dev);
1837 err:
1838 return ret;
1839 }
1840
vduse_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1841 static long vduse_ioctl(struct file *file, unsigned int cmd,
1842 unsigned long arg)
1843 {
1844 int ret;
1845 void __user *argp = (void __user *)arg;
1846 struct vduse_control *control = file->private_data;
1847
1848 mutex_lock(&vduse_lock);
1849 switch (cmd) {
1850 case VDUSE_GET_API_VERSION:
1851 ret = put_user(control->api_version, (u64 __user *)argp);
1852 break;
1853 case VDUSE_SET_API_VERSION: {
1854 u64 api_version;
1855
1856 ret = -EFAULT;
1857 if (get_user(api_version, (u64 __user *)argp))
1858 break;
1859
1860 ret = -EINVAL;
1861 if (api_version > VDUSE_API_VERSION)
1862 break;
1863
1864 ret = 0;
1865 control->api_version = api_version;
1866 break;
1867 }
1868 case VDUSE_CREATE_DEV: {
1869 struct vduse_dev_config config;
1870 unsigned long size = offsetof(struct vduse_dev_config, config);
1871 void *buf;
1872
1873 ret = -EFAULT;
1874 if (copy_from_user(&config, argp, size))
1875 break;
1876
1877 ret = -EINVAL;
1878 if (vduse_validate_config(&config) == false)
1879 break;
1880
1881 buf = vmemdup_user(argp + size, config.config_size);
1882 if (IS_ERR(buf)) {
1883 ret = PTR_ERR(buf);
1884 break;
1885 }
1886 config.name[VDUSE_NAME_MAX - 1] = '\0';
1887 ret = vduse_create_dev(&config, buf, control->api_version);
1888 if (ret)
1889 kvfree(buf);
1890 break;
1891 }
1892 case VDUSE_DESTROY_DEV: {
1893 char name[VDUSE_NAME_MAX];
1894
1895 ret = -EFAULT;
1896 if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1897 break;
1898
1899 name[VDUSE_NAME_MAX - 1] = '\0';
1900 ret = vduse_destroy_dev(name);
1901 break;
1902 }
1903 default:
1904 ret = -EINVAL;
1905 break;
1906 }
1907 mutex_unlock(&vduse_lock);
1908
1909 return ret;
1910 }
1911
vduse_release(struct inode * inode,struct file * file)1912 static int vduse_release(struct inode *inode, struct file *file)
1913 {
1914 struct vduse_control *control = file->private_data;
1915
1916 kfree(control);
1917 return 0;
1918 }
1919
vduse_open(struct inode * inode,struct file * file)1920 static int vduse_open(struct inode *inode, struct file *file)
1921 {
1922 struct vduse_control *control;
1923
1924 control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1925 if (!control)
1926 return -ENOMEM;
1927
1928 control->api_version = VDUSE_API_VERSION;
1929 file->private_data = control;
1930
1931 return 0;
1932 }
1933
1934 static const struct file_operations vduse_ctrl_fops = {
1935 .owner = THIS_MODULE,
1936 .open = vduse_open,
1937 .release = vduse_release,
1938 .unlocked_ioctl = vduse_ioctl,
1939 .compat_ioctl = compat_ptr_ioctl,
1940 .llseek = noop_llseek,
1941 };
1942
vduse_devnode(const struct device * dev,umode_t * mode)1943 static char *vduse_devnode(const struct device *dev, umode_t *mode)
1944 {
1945 return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1946 }
1947
1948 struct vduse_mgmt_dev {
1949 struct vdpa_mgmt_dev mgmt_dev;
1950 struct device dev;
1951 };
1952
1953 static struct vduse_mgmt_dev *vduse_mgmt;
1954
vduse_dev_init_vdpa(struct vduse_dev * dev,const char * name)1955 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
1956 {
1957 struct vduse_vdpa *vdev;
1958 int ret;
1959
1960 if (dev->vdev)
1961 return -EEXIST;
1962
1963 vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
1964 &vduse_vdpa_config_ops, 1, 1, name, true);
1965 if (IS_ERR(vdev))
1966 return PTR_ERR(vdev);
1967
1968 dev->vdev = vdev;
1969 vdev->dev = dev;
1970 vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
1971 ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
1972 if (ret) {
1973 put_device(&vdev->vdpa.dev);
1974 return ret;
1975 }
1976 set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
1977 vdev->vdpa.dma_dev = &vdev->vdpa.dev;
1978 vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
1979
1980 return 0;
1981 }
1982
vdpa_dev_add(struct vdpa_mgmt_dev * mdev,const char * name,const struct vdpa_dev_set_config * config)1983 static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
1984 const struct vdpa_dev_set_config *config)
1985 {
1986 struct vduse_dev *dev;
1987 int ret;
1988
1989 mutex_lock(&vduse_lock);
1990 dev = vduse_find_dev(name);
1991 if (!dev || !vduse_dev_is_ready(dev)) {
1992 mutex_unlock(&vduse_lock);
1993 return -EINVAL;
1994 }
1995 ret = vduse_dev_init_vdpa(dev, name);
1996 mutex_unlock(&vduse_lock);
1997 if (ret)
1998 return ret;
1999
2000 mutex_lock(&dev->domain_lock);
2001 if (!dev->domain)
2002 dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
2003 dev->bounce_size);
2004 mutex_unlock(&dev->domain_lock);
2005 if (!dev->domain) {
2006 put_device(&dev->vdev->vdpa.dev);
2007 return -ENOMEM;
2008 }
2009
2010 ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
2011 if (ret) {
2012 put_device(&dev->vdev->vdpa.dev);
2013 mutex_lock(&dev->domain_lock);
2014 vduse_domain_destroy(dev->domain);
2015 dev->domain = NULL;
2016 mutex_unlock(&dev->domain_lock);
2017 return ret;
2018 }
2019
2020 return 0;
2021 }
2022
vdpa_dev_del(struct vdpa_mgmt_dev * mdev,struct vdpa_device * dev)2023 static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
2024 {
2025 _vdpa_unregister_device(dev);
2026 }
2027
2028 static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
2029 .dev_add = vdpa_dev_add,
2030 .dev_del = vdpa_dev_del,
2031 };
2032
2033 static struct virtio_device_id id_table[] = {
2034 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
2035 { 0 },
2036 };
2037
vduse_mgmtdev_release(struct device * dev)2038 static void vduse_mgmtdev_release(struct device *dev)
2039 {
2040 struct vduse_mgmt_dev *mgmt_dev;
2041
2042 mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
2043 kfree(mgmt_dev);
2044 }
2045
vduse_mgmtdev_init(void)2046 static int vduse_mgmtdev_init(void)
2047 {
2048 int ret;
2049
2050 vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
2051 if (!vduse_mgmt)
2052 return -ENOMEM;
2053
2054 ret = dev_set_name(&vduse_mgmt->dev, "vduse");
2055 if (ret) {
2056 kfree(vduse_mgmt);
2057 return ret;
2058 }
2059
2060 vduse_mgmt->dev.release = vduse_mgmtdev_release;
2061
2062 ret = device_register(&vduse_mgmt->dev);
2063 if (ret)
2064 goto dev_reg_err;
2065
2066 vduse_mgmt->mgmt_dev.id_table = id_table;
2067 vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
2068 vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
2069 ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
2070 if (ret)
2071 device_unregister(&vduse_mgmt->dev);
2072
2073 return ret;
2074
2075 dev_reg_err:
2076 put_device(&vduse_mgmt->dev);
2077 return ret;
2078 }
2079
vduse_mgmtdev_exit(void)2080 static void vduse_mgmtdev_exit(void)
2081 {
2082 vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
2083 device_unregister(&vduse_mgmt->dev);
2084 }
2085
vduse_init(void)2086 static int vduse_init(void)
2087 {
2088 int ret;
2089 struct device *dev;
2090
2091 vduse_class = class_create("vduse");
2092 if (IS_ERR(vduse_class))
2093 return PTR_ERR(vduse_class);
2094
2095 vduse_class->devnode = vduse_devnode;
2096
2097 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
2098 if (ret)
2099 goto err_chardev_region;
2100
2101 /* /dev/vduse/control */
2102 cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
2103 vduse_ctrl_cdev.owner = THIS_MODULE;
2104 ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
2105 if (ret)
2106 goto err_ctrl_cdev;
2107
2108 dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
2109 if (IS_ERR(dev)) {
2110 ret = PTR_ERR(dev);
2111 goto err_device;
2112 }
2113
2114 /* /dev/vduse/$DEVICE */
2115 cdev_init(&vduse_cdev, &vduse_dev_fops);
2116 vduse_cdev.owner = THIS_MODULE;
2117 ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
2118 VDUSE_DEV_MAX - 1);
2119 if (ret)
2120 goto err_cdev;
2121
2122 ret = -ENOMEM;
2123 vduse_irq_wq = alloc_workqueue("vduse-irq",
2124 WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
2125 if (!vduse_irq_wq)
2126 goto err_wq;
2127
2128 vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
2129 if (!vduse_irq_bound_wq)
2130 goto err_bound_wq;
2131
2132 ret = vduse_domain_init();
2133 if (ret)
2134 goto err_domain;
2135
2136 ret = vduse_mgmtdev_init();
2137 if (ret)
2138 goto err_mgmtdev;
2139
2140 return 0;
2141 err_mgmtdev:
2142 vduse_domain_exit();
2143 err_domain:
2144 destroy_workqueue(vduse_irq_bound_wq);
2145 err_bound_wq:
2146 destroy_workqueue(vduse_irq_wq);
2147 err_wq:
2148 cdev_del(&vduse_cdev);
2149 err_cdev:
2150 device_destroy(vduse_class, vduse_major);
2151 err_device:
2152 cdev_del(&vduse_ctrl_cdev);
2153 err_ctrl_cdev:
2154 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2155 err_chardev_region:
2156 class_destroy(vduse_class);
2157 return ret;
2158 }
2159 module_init(vduse_init);
2160
vduse_exit(void)2161 static void vduse_exit(void)
2162 {
2163 vduse_mgmtdev_exit();
2164 vduse_domain_exit();
2165 destroy_workqueue(vduse_irq_bound_wq);
2166 destroy_workqueue(vduse_irq_wq);
2167 cdev_del(&vduse_cdev);
2168 device_destroy(vduse_class, vduse_major);
2169 cdev_del(&vduse_ctrl_cdev);
2170 unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2171 class_destroy(vduse_class);
2172 }
2173 module_exit(vduse_exit);
2174
2175 MODULE_LICENSE(DRV_LICENSE);
2176 MODULE_AUTHOR(DRV_AUTHOR);
2177 MODULE_DESCRIPTION(DRV_DESC);
2178