1c8a6153bSXie Yongji // SPDX-License-Identifier: GPL-2.0-only
2c8a6153bSXie Yongji /*
3c8a6153bSXie Yongji * VDUSE: vDPA Device in Userspace
4c8a6153bSXie Yongji *
5c8a6153bSXie Yongji * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6c8a6153bSXie Yongji *
7c8a6153bSXie Yongji * Author: Xie Yongji <xieyongji@bytedance.com>
8c8a6153bSXie Yongji *
9c8a6153bSXie Yongji */
10c8a6153bSXie Yongji
11*df475f71SMaxime Coquelin #include "linux/virtio_net.h"
12c8a6153bSXie Yongji #include <linux/init.h>
13c8a6153bSXie Yongji #include <linux/module.h>
14c8a6153bSXie Yongji #include <linux/cdev.h>
15c8a6153bSXie Yongji #include <linux/device.h>
16c8a6153bSXie Yongji #include <linux/eventfd.h>
17c8a6153bSXie Yongji #include <linux/slab.h>
18c8a6153bSXie Yongji #include <linux/wait.h>
19c8a6153bSXie Yongji #include <linux/dma-map-ops.h>
20c8a6153bSXie Yongji #include <linux/poll.h>
21c8a6153bSXie Yongji #include <linux/file.h>
22c8a6153bSXie Yongji #include <linux/uio.h>
23c8a6153bSXie Yongji #include <linux/vdpa.h>
24c8a6153bSXie Yongji #include <linux/nospec.h>
2579a463beSXie Yongji #include <linux/vmalloc.h>
2679a463beSXie Yongji #include <linux/sched/mm.h>
27c8a6153bSXie Yongji #include <uapi/linux/vduse.h>
28c8a6153bSXie Yongji #include <uapi/linux/vdpa.h>
29c8a6153bSXie Yongji #include <uapi/linux/virtio_config.h>
30c8a6153bSXie Yongji #include <uapi/linux/virtio_ids.h>
31c8a6153bSXie Yongji #include <uapi/linux/virtio_blk.h>
32*df475f71SMaxime Coquelin #include <uapi/linux/virtio_ring.h>
33c8a6153bSXie Yongji #include <linux/mod_devicetable.h>
34c8a6153bSXie Yongji
35c8a6153bSXie Yongji #include "iova_domain.h"
36c8a6153bSXie Yongji
37c8a6153bSXie Yongji #define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
38c8a6153bSXie Yongji #define DRV_DESC "vDPA Device in Userspace"
39c8a6153bSXie Yongji #define DRV_LICENSE "GPL v2"
40c8a6153bSXie Yongji
41c8a6153bSXie Yongji #define VDUSE_DEV_MAX (1U << MINORBITS)
42b774f93dSXie Yongji #define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024)
43b774f93dSXie Yongji #define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024)
44c8a6153bSXie Yongji #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
45b774f93dSXie Yongji /* 128 MB reserved for virtqueue creation */
46b774f93dSXie Yongji #define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024)
47c8a6153bSXie Yongji #define VDUSE_MSG_DEFAULT_TIMEOUT 30
48c8a6153bSXie Yongji
4928f6288eSXie Yongji #define IRQ_UNBOUND -1
5028f6288eSXie Yongji
51c8a6153bSXie Yongji struct vduse_virtqueue {
52c8a6153bSXie Yongji u16 index;
53c8a6153bSXie Yongji u16 num_max;
54c8a6153bSXie Yongji u32 num;
55c8a6153bSXie Yongji u64 desc_addr;
56c8a6153bSXie Yongji u64 driver_addr;
57c8a6153bSXie Yongji u64 device_addr;
58c8a6153bSXie Yongji struct vdpa_vq_state state;
59c8a6153bSXie Yongji bool ready;
60c8a6153bSXie Yongji bool kicked;
61c8a6153bSXie Yongji spinlock_t kick_lock;
62c8a6153bSXie Yongji spinlock_t irq_lock;
63c8a6153bSXie Yongji struct eventfd_ctx *kickfd;
64c8a6153bSXie Yongji struct vdpa_callback cb;
65c8a6153bSXie Yongji struct work_struct inject;
66c8a6153bSXie Yongji struct work_struct kick;
6728f6288eSXie Yongji int irq_effective_cpu;
6828f6288eSXie Yongji struct cpumask irq_affinity;
6966640f4aSXie Yongji struct kobject kobj;
70c8a6153bSXie Yongji };
71c8a6153bSXie Yongji
72c8a6153bSXie Yongji struct vduse_dev;
73c8a6153bSXie Yongji
74c8a6153bSXie Yongji struct vduse_vdpa {
75c8a6153bSXie Yongji struct vdpa_device vdpa;
76c8a6153bSXie Yongji struct vduse_dev *dev;
77c8a6153bSXie Yongji };
78c8a6153bSXie Yongji
7979a463beSXie Yongji struct vduse_umem {
8079a463beSXie Yongji unsigned long iova;
8179a463beSXie Yongji unsigned long npages;
8279a463beSXie Yongji struct page **pages;
8379a463beSXie Yongji struct mm_struct *mm;
8479a463beSXie Yongji };
8579a463beSXie Yongji
86c8a6153bSXie Yongji struct vduse_dev {
87c8a6153bSXie Yongji struct vduse_vdpa *vdev;
88c8a6153bSXie Yongji struct device *dev;
8978885597SXie Yongji struct vduse_virtqueue **vqs;
90c8a6153bSXie Yongji struct vduse_iova_domain *domain;
91c8a6153bSXie Yongji char *name;
92c8a6153bSXie Yongji struct mutex lock;
93c8a6153bSXie Yongji spinlock_t msg_lock;
94c8a6153bSXie Yongji u64 msg_unique;
95c8a6153bSXie Yongji u32 msg_timeout;
96c8a6153bSXie Yongji wait_queue_head_t waitq;
97c8a6153bSXie Yongji struct list_head send_list;
98c8a6153bSXie Yongji struct list_head recv_list;
99c8a6153bSXie Yongji struct vdpa_callback config_cb;
100c8a6153bSXie Yongji struct work_struct inject;
101c8a6153bSXie Yongji spinlock_t irq_lock;
1020943aacfSXie Yongji struct rw_semaphore rwsem;
103c8a6153bSXie Yongji int minor;
104c8a6153bSXie Yongji bool broken;
105c8a6153bSXie Yongji bool connected;
106c8a6153bSXie Yongji u64 api_version;
107c8a6153bSXie Yongji u64 device_features;
108c8a6153bSXie Yongji u64 driver_features;
109c8a6153bSXie Yongji u32 device_id;
110c8a6153bSXie Yongji u32 vendor_id;
111c8a6153bSXie Yongji u32 generation;
112c8a6153bSXie Yongji u32 config_size;
113c8a6153bSXie Yongji void *config;
114c8a6153bSXie Yongji u8 status;
115c8a6153bSXie Yongji u32 vq_num;
116c8a6153bSXie Yongji u32 vq_align;
11779a463beSXie Yongji struct vduse_umem *umem;
11879a463beSXie Yongji struct mutex mem_lock;
119d4438d23SXie Yongji unsigned int bounce_size;
120d4438d23SXie Yongji struct mutex domain_lock;
121c8a6153bSXie Yongji };
122c8a6153bSXie Yongji
123c8a6153bSXie Yongji struct vduse_dev_msg {
124c8a6153bSXie Yongji struct vduse_dev_request req;
125c8a6153bSXie Yongji struct vduse_dev_response resp;
126c8a6153bSXie Yongji struct list_head list;
127c8a6153bSXie Yongji wait_queue_head_t waitq;
128c8a6153bSXie Yongji bool completed;
129c8a6153bSXie Yongji };
130c8a6153bSXie Yongji
131c8a6153bSXie Yongji struct vduse_control {
132c8a6153bSXie Yongji u64 api_version;
133c8a6153bSXie Yongji };
134c8a6153bSXie Yongji
135c8a6153bSXie Yongji static DEFINE_MUTEX(vduse_lock);
136c8a6153bSXie Yongji static DEFINE_IDR(vduse_idr);
137c8a6153bSXie Yongji
138c8a6153bSXie Yongji static dev_t vduse_major;
139c8a6153bSXie Yongji static struct class *vduse_class;
140c8a6153bSXie Yongji static struct cdev vduse_ctrl_cdev;
141c8a6153bSXie Yongji static struct cdev vduse_cdev;
142c8a6153bSXie Yongji static struct workqueue_struct *vduse_irq_wq;
14328f6288eSXie Yongji static struct workqueue_struct *vduse_irq_bound_wq;
144c8a6153bSXie Yongji
145c8a6153bSXie Yongji static u32 allowed_device_id[] = {
146c8a6153bSXie Yongji VIRTIO_ID_BLOCK,
147c8a6153bSXie Yongji };
148c8a6153bSXie Yongji
vdpa_to_vduse(struct vdpa_device * vdpa)149c8a6153bSXie Yongji static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
150c8a6153bSXie Yongji {
151c8a6153bSXie Yongji struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
152c8a6153bSXie Yongji
153c8a6153bSXie Yongji return vdev->dev;
154c8a6153bSXie Yongji }
155c8a6153bSXie Yongji
dev_to_vduse(struct device * dev)156c8a6153bSXie Yongji static inline struct vduse_dev *dev_to_vduse(struct device *dev)
157c8a6153bSXie Yongji {
158c8a6153bSXie Yongji struct vdpa_device *vdpa = dev_to_vdpa(dev);
159c8a6153bSXie Yongji
160c8a6153bSXie Yongji return vdpa_to_vduse(vdpa);
161c8a6153bSXie Yongji }
162c8a6153bSXie Yongji
vduse_find_msg(struct list_head * head,uint32_t request_id)163c8a6153bSXie Yongji static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
164c8a6153bSXie Yongji uint32_t request_id)
165c8a6153bSXie Yongji {
166c8a6153bSXie Yongji struct vduse_dev_msg *msg;
167c8a6153bSXie Yongji
168c8a6153bSXie Yongji list_for_each_entry(msg, head, list) {
169c8a6153bSXie Yongji if (msg->req.request_id == request_id) {
170c8a6153bSXie Yongji list_del(&msg->list);
171c8a6153bSXie Yongji return msg;
172c8a6153bSXie Yongji }
173c8a6153bSXie Yongji }
174c8a6153bSXie Yongji
175c8a6153bSXie Yongji return NULL;
176c8a6153bSXie Yongji }
177c8a6153bSXie Yongji
vduse_dequeue_msg(struct list_head * head)178c8a6153bSXie Yongji static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
179c8a6153bSXie Yongji {
180c8a6153bSXie Yongji struct vduse_dev_msg *msg = NULL;
181c8a6153bSXie Yongji
182c8a6153bSXie Yongji if (!list_empty(head)) {
183c8a6153bSXie Yongji msg = list_first_entry(head, struct vduse_dev_msg, list);
184c8a6153bSXie Yongji list_del(&msg->list);
185c8a6153bSXie Yongji }
186c8a6153bSXie Yongji
187c8a6153bSXie Yongji return msg;
188c8a6153bSXie Yongji }
189c8a6153bSXie Yongji
vduse_enqueue_msg(struct list_head * head,struct vduse_dev_msg * msg)190c8a6153bSXie Yongji static void vduse_enqueue_msg(struct list_head *head,
191c8a6153bSXie Yongji struct vduse_dev_msg *msg)
192c8a6153bSXie Yongji {
193c8a6153bSXie Yongji list_add_tail(&msg->list, head);
194c8a6153bSXie Yongji }
195c8a6153bSXie Yongji
vduse_dev_broken(struct vduse_dev * dev)196c8a6153bSXie Yongji static void vduse_dev_broken(struct vduse_dev *dev)
197c8a6153bSXie Yongji {
198c8a6153bSXie Yongji struct vduse_dev_msg *msg, *tmp;
199c8a6153bSXie Yongji
200c8a6153bSXie Yongji if (unlikely(dev->broken))
201c8a6153bSXie Yongji return;
202c8a6153bSXie Yongji
203c8a6153bSXie Yongji list_splice_init(&dev->recv_list, &dev->send_list);
204c8a6153bSXie Yongji list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
205c8a6153bSXie Yongji list_del(&msg->list);
206c8a6153bSXie Yongji msg->completed = 1;
207c8a6153bSXie Yongji msg->resp.result = VDUSE_REQ_RESULT_FAILED;
208c8a6153bSXie Yongji wake_up(&msg->waitq);
209c8a6153bSXie Yongji }
210c8a6153bSXie Yongji dev->broken = true;
211c8a6153bSXie Yongji wake_up(&dev->waitq);
212c8a6153bSXie Yongji }
213c8a6153bSXie Yongji
vduse_dev_msg_sync(struct vduse_dev * dev,struct vduse_dev_msg * msg)214c8a6153bSXie Yongji static int vduse_dev_msg_sync(struct vduse_dev *dev,
215c8a6153bSXie Yongji struct vduse_dev_msg *msg)
216c8a6153bSXie Yongji {
217c8a6153bSXie Yongji int ret;
218c8a6153bSXie Yongji
219c8a6153bSXie Yongji if (unlikely(dev->broken))
220c8a6153bSXie Yongji return -EIO;
221c8a6153bSXie Yongji
222c8a6153bSXie Yongji init_waitqueue_head(&msg->waitq);
223c8a6153bSXie Yongji spin_lock(&dev->msg_lock);
224c8a6153bSXie Yongji if (unlikely(dev->broken)) {
225c8a6153bSXie Yongji spin_unlock(&dev->msg_lock);
226c8a6153bSXie Yongji return -EIO;
227c8a6153bSXie Yongji }
228c8a6153bSXie Yongji msg->req.request_id = dev->msg_unique++;
229c8a6153bSXie Yongji vduse_enqueue_msg(&dev->send_list, msg);
230c8a6153bSXie Yongji wake_up(&dev->waitq);
231c8a6153bSXie Yongji spin_unlock(&dev->msg_lock);
232c8a6153bSXie Yongji if (dev->msg_timeout)
233c8a6153bSXie Yongji ret = wait_event_killable_timeout(msg->waitq, msg->completed,
234c8a6153bSXie Yongji (long)dev->msg_timeout * HZ);
235c8a6153bSXie Yongji else
236c8a6153bSXie Yongji ret = wait_event_killable(msg->waitq, msg->completed);
237c8a6153bSXie Yongji
238c8a6153bSXie Yongji spin_lock(&dev->msg_lock);
239c8a6153bSXie Yongji if (!msg->completed) {
240c8a6153bSXie Yongji list_del(&msg->list);
241c8a6153bSXie Yongji msg->resp.result = VDUSE_REQ_RESULT_FAILED;
242c8a6153bSXie Yongji /* Mark the device as malfunction when there is a timeout */
243c8a6153bSXie Yongji if (!ret)
244c8a6153bSXie Yongji vduse_dev_broken(dev);
245c8a6153bSXie Yongji }
246c8a6153bSXie Yongji ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
247c8a6153bSXie Yongji spin_unlock(&dev->msg_lock);
248c8a6153bSXie Yongji
249c8a6153bSXie Yongji return ret;
250c8a6153bSXie Yongji }
251c8a6153bSXie Yongji
vduse_dev_get_vq_state_packed(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_packed * packed)252c8a6153bSXie Yongji static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
253c8a6153bSXie Yongji struct vduse_virtqueue *vq,
254c8a6153bSXie Yongji struct vdpa_vq_state_packed *packed)
255c8a6153bSXie Yongji {
256c8a6153bSXie Yongji struct vduse_dev_msg msg = { 0 };
257c8a6153bSXie Yongji int ret;
258c8a6153bSXie Yongji
259c8a6153bSXie Yongji msg.req.type = VDUSE_GET_VQ_STATE;
260c8a6153bSXie Yongji msg.req.vq_state.index = vq->index;
261c8a6153bSXie Yongji
262c8a6153bSXie Yongji ret = vduse_dev_msg_sync(dev, &msg);
263c8a6153bSXie Yongji if (ret)
264c8a6153bSXie Yongji return ret;
265c8a6153bSXie Yongji
266c8a6153bSXie Yongji packed->last_avail_counter =
267c8a6153bSXie Yongji msg.resp.vq_state.packed.last_avail_counter & 0x0001;
268c8a6153bSXie Yongji packed->last_avail_idx =
269c8a6153bSXie Yongji msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
270c8a6153bSXie Yongji packed->last_used_counter =
271c8a6153bSXie Yongji msg.resp.vq_state.packed.last_used_counter & 0x0001;
272c8a6153bSXie Yongji packed->last_used_idx =
273c8a6153bSXie Yongji msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
274c8a6153bSXie Yongji
275c8a6153bSXie Yongji return 0;
276c8a6153bSXie Yongji }
277c8a6153bSXie Yongji
vduse_dev_get_vq_state_split(struct vduse_dev * dev,struct vduse_virtqueue * vq,struct vdpa_vq_state_split * split)278c8a6153bSXie Yongji static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
279c8a6153bSXie Yongji struct vduse_virtqueue *vq,
280c8a6153bSXie Yongji struct vdpa_vq_state_split *split)
281c8a6153bSXie Yongji {
282c8a6153bSXie Yongji struct vduse_dev_msg msg = { 0 };
283c8a6153bSXie Yongji int ret;
284c8a6153bSXie Yongji
285c8a6153bSXie Yongji msg.req.type = VDUSE_GET_VQ_STATE;
286c8a6153bSXie Yongji msg.req.vq_state.index = vq->index;
287c8a6153bSXie Yongji
288c8a6153bSXie Yongji ret = vduse_dev_msg_sync(dev, &msg);
289c8a6153bSXie Yongji if (ret)
290c8a6153bSXie Yongji return ret;
291c8a6153bSXie Yongji
292c8a6153bSXie Yongji split->avail_index = msg.resp.vq_state.split.avail_index;
293c8a6153bSXie Yongji
294c8a6153bSXie Yongji return 0;
295c8a6153bSXie Yongji }
296c8a6153bSXie Yongji
vduse_dev_set_status(struct vduse_dev * dev,u8 status)297c8a6153bSXie Yongji static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
298c8a6153bSXie Yongji {
299c8a6153bSXie Yongji struct vduse_dev_msg msg = { 0 };
300c8a6153bSXie Yongji
301c8a6153bSXie Yongji msg.req.type = VDUSE_SET_STATUS;
302c8a6153bSXie Yongji msg.req.s.status = status;
303c8a6153bSXie Yongji
304c8a6153bSXie Yongji return vduse_dev_msg_sync(dev, &msg);
305c8a6153bSXie Yongji }
306c8a6153bSXie Yongji
vduse_dev_update_iotlb(struct vduse_dev * dev,u64 start,u64 last)307c8a6153bSXie Yongji static int vduse_dev_update_iotlb(struct vduse_dev *dev,
308c8a6153bSXie Yongji u64 start, u64 last)
309c8a6153bSXie Yongji {
310c8a6153bSXie Yongji struct vduse_dev_msg msg = { 0 };
311c8a6153bSXie Yongji
312c8a6153bSXie Yongji if (last < start)
313c8a6153bSXie Yongji return -EINVAL;
314c8a6153bSXie Yongji
315c8a6153bSXie Yongji msg.req.type = VDUSE_UPDATE_IOTLB;
316c8a6153bSXie Yongji msg.req.iova.start = start;
317c8a6153bSXie Yongji msg.req.iova.last = last;
318c8a6153bSXie Yongji
319c8a6153bSXie Yongji return vduse_dev_msg_sync(dev, &msg);
320c8a6153bSXie Yongji }
321c8a6153bSXie Yongji
vduse_dev_read_iter(struct kiocb * iocb,struct iov_iter * to)322c8a6153bSXie Yongji static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
323c8a6153bSXie Yongji {
324c8a6153bSXie Yongji struct file *file = iocb->ki_filp;
325c8a6153bSXie Yongji struct vduse_dev *dev = file->private_data;
326c8a6153bSXie Yongji struct vduse_dev_msg *msg;
327c8a6153bSXie Yongji int size = sizeof(struct vduse_dev_request);
328c8a6153bSXie Yongji ssize_t ret;
329c8a6153bSXie Yongji
330c8a6153bSXie Yongji if (iov_iter_count(to) < size)
331c8a6153bSXie Yongji return -EINVAL;
332c8a6153bSXie Yongji
333c8a6153bSXie Yongji spin_lock(&dev->msg_lock);
334c8a6153bSXie Yongji while (1) {
335c8a6153bSXie Yongji msg = vduse_dequeue_msg(&dev->send_list);
336c8a6153bSXie Yongji if (msg)
337c8a6153bSXie Yongji break;
338c8a6153bSXie Yongji
339c8a6153bSXie Yongji ret = -EAGAIN;
340c8a6153bSXie Yongji if (file->f_flags & O_NONBLOCK)
341c8a6153bSXie Yongji goto unlock;
342c8a6153bSXie Yongji
343c8a6153bSXie Yongji spin_unlock(&dev->msg_lock);
344c8a6153bSXie Yongji ret = wait_event_interruptible_exclusive(dev->waitq,
345c8a6153bSXie Yongji !list_empty(&dev->send_list));
346c8a6153bSXie Yongji if (ret)
347c8a6153bSXie Yongji return ret;
348c8a6153bSXie Yongji
349c8a6153bSXie Yongji spin_lock(&dev->msg_lock);
350c8a6153bSXie Yongji }
351c8a6153bSXie Yongji spin_unlock(&dev->msg_lock);
352c8a6153bSXie Yongji ret = copy_to_iter(&msg->req, size, to);
353c8a6153bSXie Yongji spin_lock(&dev->msg_lock);
354c8a6153bSXie Yongji if (ret != size) {
355c8a6153bSXie Yongji ret = -EFAULT;
356c8a6153bSXie Yongji vduse_enqueue_msg(&dev->send_list, msg);
357c8a6153bSXie Yongji goto unlock;
358c8a6153bSXie Yongji }
359c8a6153bSXie Yongji vduse_enqueue_msg(&dev->recv_list, msg);
360c8a6153bSXie Yongji unlock:
361c8a6153bSXie Yongji spin_unlock(&dev->msg_lock);
362c8a6153bSXie Yongji
363c8a6153bSXie Yongji return ret;
364c8a6153bSXie Yongji }
365c8a6153bSXie Yongji
is_mem_zero(const char * ptr,int size)366c8a6153bSXie Yongji static bool is_mem_zero(const char *ptr, int size)
367c8a6153bSXie Yongji {
368c8a6153bSXie Yongji int i;
369c8a6153bSXie Yongji
370c8a6153bSXie Yongji for (i = 0; i < size; i++) {
371c8a6153bSXie Yongji if (ptr[i])
372c8a6153bSXie Yongji return false;
373c8a6153bSXie Yongji }
374c8a6153bSXie Yongji return true;
375c8a6153bSXie Yongji }
376c8a6153bSXie Yongji
vduse_dev_write_iter(struct kiocb * iocb,struct iov_iter * from)377c8a6153bSXie Yongji static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
378c8a6153bSXie Yongji {
379c8a6153bSXie Yongji struct file *file = iocb->ki_filp;
380c8a6153bSXie Yongji struct vduse_dev *dev = file->private_data;
381c8a6153bSXie Yongji struct vduse_dev_response resp;
382c8a6153bSXie Yongji struct vduse_dev_msg *msg;
383c8a6153bSXie Yongji size_t ret;
384c8a6153bSXie Yongji
385c8a6153bSXie Yongji ret = copy_from_iter(&resp, sizeof(resp), from);
386c8a6153bSXie Yongji if (ret != sizeof(resp))
387c8a6153bSXie Yongji return -EINVAL;
388c8a6153bSXie Yongji
389c8a6153bSXie Yongji if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
390c8a6153bSXie Yongji return -EINVAL;
391c8a6153bSXie Yongji
392c8a6153bSXie Yongji spin_lock(&dev->msg_lock);
393c8a6153bSXie Yongji msg = vduse_find_msg(&dev->recv_list, resp.request_id);
394c8a6153bSXie Yongji if (!msg) {
395c8a6153bSXie Yongji ret = -ENOENT;
396c8a6153bSXie Yongji goto unlock;
397c8a6153bSXie Yongji }
398c8a6153bSXie Yongji
399c8a6153bSXie Yongji memcpy(&msg->resp, &resp, sizeof(resp));
400c8a6153bSXie Yongji msg->completed = 1;
401c8a6153bSXie Yongji wake_up(&msg->waitq);
402c8a6153bSXie Yongji unlock:
403c8a6153bSXie Yongji spin_unlock(&dev->msg_lock);
404c8a6153bSXie Yongji
405c8a6153bSXie Yongji return ret;
406c8a6153bSXie Yongji }
407c8a6153bSXie Yongji
vduse_dev_poll(struct file * file,poll_table * wait)408c8a6153bSXie Yongji static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
409c8a6153bSXie Yongji {
410c8a6153bSXie Yongji struct vduse_dev *dev = file->private_data;
411c8a6153bSXie Yongji __poll_t mask = 0;
412c8a6153bSXie Yongji
413c8a6153bSXie Yongji poll_wait(file, &dev->waitq, wait);
414c8a6153bSXie Yongji
415c8a6153bSXie Yongji spin_lock(&dev->msg_lock);
416c8a6153bSXie Yongji
417c8a6153bSXie Yongji if (unlikely(dev->broken))
418c8a6153bSXie Yongji mask |= EPOLLERR;
419c8a6153bSXie Yongji if (!list_empty(&dev->send_list))
420c8a6153bSXie Yongji mask |= EPOLLIN | EPOLLRDNORM;
421c8a6153bSXie Yongji if (!list_empty(&dev->recv_list))
422c8a6153bSXie Yongji mask |= EPOLLOUT | EPOLLWRNORM;
423c8a6153bSXie Yongji
424c8a6153bSXie Yongji spin_unlock(&dev->msg_lock);
425c8a6153bSXie Yongji
426c8a6153bSXie Yongji return mask;
427c8a6153bSXie Yongji }
428c8a6153bSXie Yongji
vduse_dev_reset(struct vduse_dev * dev)429c8a6153bSXie Yongji static void vduse_dev_reset(struct vduse_dev *dev)
430c8a6153bSXie Yongji {
431c8a6153bSXie Yongji int i;
432c8a6153bSXie Yongji struct vduse_iova_domain *domain = dev->domain;
433c8a6153bSXie Yongji
434c8a6153bSXie Yongji /* The coherent mappings are handled in vduse_dev_free_coherent() */
435d4438d23SXie Yongji if (domain && domain->bounce_map)
436c8a6153bSXie Yongji vduse_domain_reset_bounce_map(domain);
437c8a6153bSXie Yongji
4380943aacfSXie Yongji down_write(&dev->rwsem);
4390943aacfSXie Yongji
440c8a6153bSXie Yongji dev->status = 0;
441c8a6153bSXie Yongji dev->driver_features = 0;
442c8a6153bSXie Yongji dev->generation++;
443c8a6153bSXie Yongji spin_lock(&dev->irq_lock);
444c8a6153bSXie Yongji dev->config_cb.callback = NULL;
445c8a6153bSXie Yongji dev->config_cb.private = NULL;
446c8a6153bSXie Yongji spin_unlock(&dev->irq_lock);
447c8a6153bSXie Yongji flush_work(&dev->inject);
448c8a6153bSXie Yongji
449c8a6153bSXie Yongji for (i = 0; i < dev->vq_num; i++) {
45078885597SXie Yongji struct vduse_virtqueue *vq = dev->vqs[i];
451c8a6153bSXie Yongji
452c8a6153bSXie Yongji vq->ready = false;
453c8a6153bSXie Yongji vq->desc_addr = 0;
454c8a6153bSXie Yongji vq->driver_addr = 0;
455c8a6153bSXie Yongji vq->device_addr = 0;
456c8a6153bSXie Yongji vq->num = 0;
457c8a6153bSXie Yongji memset(&vq->state, 0, sizeof(vq->state));
458c8a6153bSXie Yongji
459c8a6153bSXie Yongji spin_lock(&vq->kick_lock);
460c8a6153bSXie Yongji vq->kicked = false;
461c8a6153bSXie Yongji if (vq->kickfd)
462c8a6153bSXie Yongji eventfd_ctx_put(vq->kickfd);
463c8a6153bSXie Yongji vq->kickfd = NULL;
464c8a6153bSXie Yongji spin_unlock(&vq->kick_lock);
465c8a6153bSXie Yongji
466c8a6153bSXie Yongji spin_lock(&vq->irq_lock);
467c8a6153bSXie Yongji vq->cb.callback = NULL;
468c8a6153bSXie Yongji vq->cb.private = NULL;
469e38632ddSXie Yongji vq->cb.trigger = NULL;
470c8a6153bSXie Yongji spin_unlock(&vq->irq_lock);
471c8a6153bSXie Yongji flush_work(&vq->inject);
472c8a6153bSXie Yongji flush_work(&vq->kick);
473c8a6153bSXie Yongji }
4740943aacfSXie Yongji
4750943aacfSXie Yongji up_write(&dev->rwsem);
476c8a6153bSXie Yongji }
477c8a6153bSXie Yongji
vduse_vdpa_set_vq_address(struct vdpa_device * vdpa,u16 idx,u64 desc_area,u64 driver_area,u64 device_area)478c8a6153bSXie Yongji static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
479c8a6153bSXie Yongji u64 desc_area, u64 driver_area,
480c8a6153bSXie Yongji u64 device_area)
481c8a6153bSXie Yongji {
482c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
48378885597SXie Yongji struct vduse_virtqueue *vq = dev->vqs[idx];
484c8a6153bSXie Yongji
485c8a6153bSXie Yongji vq->desc_addr = desc_area;
486c8a6153bSXie Yongji vq->driver_addr = driver_area;
487c8a6153bSXie Yongji vq->device_addr = device_area;
488c8a6153bSXie Yongji
489c8a6153bSXie Yongji return 0;
490c8a6153bSXie Yongji }
491c8a6153bSXie Yongji
vduse_vq_kick(struct vduse_virtqueue * vq)492c8a6153bSXie Yongji static void vduse_vq_kick(struct vduse_virtqueue *vq)
493c8a6153bSXie Yongji {
494c8a6153bSXie Yongji spin_lock(&vq->kick_lock);
495c8a6153bSXie Yongji if (!vq->ready)
496c8a6153bSXie Yongji goto unlock;
497c8a6153bSXie Yongji
498c8a6153bSXie Yongji if (vq->kickfd)
499c8a6153bSXie Yongji eventfd_signal(vq->kickfd, 1);
500c8a6153bSXie Yongji else
501c8a6153bSXie Yongji vq->kicked = true;
502c8a6153bSXie Yongji unlock:
503c8a6153bSXie Yongji spin_unlock(&vq->kick_lock);
504c8a6153bSXie Yongji }
505c8a6153bSXie Yongji
vduse_vq_kick_work(struct work_struct * work)506c8a6153bSXie Yongji static void vduse_vq_kick_work(struct work_struct *work)
507c8a6153bSXie Yongji {
508c8a6153bSXie Yongji struct vduse_virtqueue *vq = container_of(work,
509c8a6153bSXie Yongji struct vduse_virtqueue, kick);
510c8a6153bSXie Yongji
511c8a6153bSXie Yongji vduse_vq_kick(vq);
512c8a6153bSXie Yongji }
513c8a6153bSXie Yongji
vduse_vdpa_kick_vq(struct vdpa_device * vdpa,u16 idx)514c8a6153bSXie Yongji static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
515c8a6153bSXie Yongji {
516c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
51778885597SXie Yongji struct vduse_virtqueue *vq = dev->vqs[idx];
518c8a6153bSXie Yongji
51978e70952SLinus Torvalds if (!eventfd_signal_allowed()) {
520c8a6153bSXie Yongji schedule_work(&vq->kick);
521c8a6153bSXie Yongji return;
522c8a6153bSXie Yongji }
523c8a6153bSXie Yongji vduse_vq_kick(vq);
524c8a6153bSXie Yongji }
525c8a6153bSXie Yongji
vduse_vdpa_set_vq_cb(struct vdpa_device * vdpa,u16 idx,struct vdpa_callback * cb)526c8a6153bSXie Yongji static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
527c8a6153bSXie Yongji struct vdpa_callback *cb)
528c8a6153bSXie Yongji {
529c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
53078885597SXie Yongji struct vduse_virtqueue *vq = dev->vqs[idx];
531c8a6153bSXie Yongji
532c8a6153bSXie Yongji spin_lock(&vq->irq_lock);
533c8a6153bSXie Yongji vq->cb.callback = cb->callback;
534c8a6153bSXie Yongji vq->cb.private = cb->private;
535e38632ddSXie Yongji vq->cb.trigger = cb->trigger;
536c8a6153bSXie Yongji spin_unlock(&vq->irq_lock);
537c8a6153bSXie Yongji }
538c8a6153bSXie Yongji
vduse_vdpa_set_vq_num(struct vdpa_device * vdpa,u16 idx,u32 num)539c8a6153bSXie Yongji static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
540c8a6153bSXie Yongji {
541c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
54278885597SXie Yongji struct vduse_virtqueue *vq = dev->vqs[idx];
543c8a6153bSXie Yongji
544c8a6153bSXie Yongji vq->num = num;
545c8a6153bSXie Yongji }
546c8a6153bSXie Yongji
vduse_vdpa_set_vq_ready(struct vdpa_device * vdpa,u16 idx,bool ready)547c8a6153bSXie Yongji static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
548c8a6153bSXie Yongji u16 idx, bool ready)
549c8a6153bSXie Yongji {
550c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
55178885597SXie Yongji struct vduse_virtqueue *vq = dev->vqs[idx];
552c8a6153bSXie Yongji
553c8a6153bSXie Yongji vq->ready = ready;
554c8a6153bSXie Yongji }
555c8a6153bSXie Yongji
vduse_vdpa_get_vq_ready(struct vdpa_device * vdpa,u16 idx)556c8a6153bSXie Yongji static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
557c8a6153bSXie Yongji {
558c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
55978885597SXie Yongji struct vduse_virtqueue *vq = dev->vqs[idx];
560c8a6153bSXie Yongji
561c8a6153bSXie Yongji return vq->ready;
562c8a6153bSXie Yongji }
563c8a6153bSXie Yongji
vduse_vdpa_set_vq_state(struct vdpa_device * vdpa,u16 idx,const struct vdpa_vq_state * state)564c8a6153bSXie Yongji static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
565c8a6153bSXie Yongji const struct vdpa_vq_state *state)
566c8a6153bSXie Yongji {
567c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
56878885597SXie Yongji struct vduse_virtqueue *vq = dev->vqs[idx];
569c8a6153bSXie Yongji
570c8a6153bSXie Yongji if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
571c8a6153bSXie Yongji vq->state.packed.last_avail_counter =
572c8a6153bSXie Yongji state->packed.last_avail_counter;
573c8a6153bSXie Yongji vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
574c8a6153bSXie Yongji vq->state.packed.last_used_counter =
575c8a6153bSXie Yongji state->packed.last_used_counter;
576c8a6153bSXie Yongji vq->state.packed.last_used_idx = state->packed.last_used_idx;
577c8a6153bSXie Yongji } else
578c8a6153bSXie Yongji vq->state.split.avail_index = state->split.avail_index;
579c8a6153bSXie Yongji
580c8a6153bSXie Yongji return 0;
581c8a6153bSXie Yongji }
582c8a6153bSXie Yongji
vduse_vdpa_get_vq_state(struct vdpa_device * vdpa,u16 idx,struct vdpa_vq_state * state)583c8a6153bSXie Yongji static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
584c8a6153bSXie Yongji struct vdpa_vq_state *state)
585c8a6153bSXie Yongji {
586c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
58778885597SXie Yongji struct vduse_virtqueue *vq = dev->vqs[idx];
588c8a6153bSXie Yongji
589c8a6153bSXie Yongji if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
590c8a6153bSXie Yongji return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
591c8a6153bSXie Yongji
592c8a6153bSXie Yongji return vduse_dev_get_vq_state_split(dev, vq, &state->split);
593c8a6153bSXie Yongji }
594c8a6153bSXie Yongji
vduse_vdpa_get_vq_align(struct vdpa_device * vdpa)595c8a6153bSXie Yongji static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
596c8a6153bSXie Yongji {
597c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
598c8a6153bSXie Yongji
599c8a6153bSXie Yongji return dev->vq_align;
600c8a6153bSXie Yongji }
601c8a6153bSXie Yongji
vduse_vdpa_get_device_features(struct vdpa_device * vdpa)602a64917bcSEli Cohen static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
603c8a6153bSXie Yongji {
604c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
605c8a6153bSXie Yongji
606c8a6153bSXie Yongji return dev->device_features;
607c8a6153bSXie Yongji }
608c8a6153bSXie Yongji
vduse_vdpa_set_driver_features(struct vdpa_device * vdpa,u64 features)609a64917bcSEli Cohen static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
610c8a6153bSXie Yongji {
611c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
612c8a6153bSXie Yongji
613c8a6153bSXie Yongji dev->driver_features = features;
614c8a6153bSXie Yongji return 0;
615c8a6153bSXie Yongji }
616c8a6153bSXie Yongji
vduse_vdpa_get_driver_features(struct vdpa_device * vdpa)617a64917bcSEli Cohen static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
618a64917bcSEli Cohen {
619a64917bcSEli Cohen struct vduse_dev *dev = vdpa_to_vduse(vdpa);
620a64917bcSEli Cohen
621a64917bcSEli Cohen return dev->driver_features;
622a64917bcSEli Cohen }
623a64917bcSEli Cohen
vduse_vdpa_set_config_cb(struct vdpa_device * vdpa,struct vdpa_callback * cb)624c8a6153bSXie Yongji static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
625c8a6153bSXie Yongji struct vdpa_callback *cb)
626c8a6153bSXie Yongji {
627c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
628c8a6153bSXie Yongji
629c8a6153bSXie Yongji spin_lock(&dev->irq_lock);
630c8a6153bSXie Yongji dev->config_cb.callback = cb->callback;
631c8a6153bSXie Yongji dev->config_cb.private = cb->private;
632c8a6153bSXie Yongji spin_unlock(&dev->irq_lock);
633c8a6153bSXie Yongji }
634c8a6153bSXie Yongji
vduse_vdpa_get_vq_num_max(struct vdpa_device * vdpa)635c8a6153bSXie Yongji static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
636c8a6153bSXie Yongji {
637c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
638c8a6153bSXie Yongji u16 num_max = 0;
639c8a6153bSXie Yongji int i;
640c8a6153bSXie Yongji
641c8a6153bSXie Yongji for (i = 0; i < dev->vq_num; i++)
64278885597SXie Yongji if (num_max < dev->vqs[i]->num_max)
64378885597SXie Yongji num_max = dev->vqs[i]->num_max;
644c8a6153bSXie Yongji
645c8a6153bSXie Yongji return num_max;
646c8a6153bSXie Yongji }
647c8a6153bSXie Yongji
vduse_vdpa_get_device_id(struct vdpa_device * vdpa)648c8a6153bSXie Yongji static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
649c8a6153bSXie Yongji {
650c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
651c8a6153bSXie Yongji
652c8a6153bSXie Yongji return dev->device_id;
653c8a6153bSXie Yongji }
654c8a6153bSXie Yongji
vduse_vdpa_get_vendor_id(struct vdpa_device * vdpa)655c8a6153bSXie Yongji static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
656c8a6153bSXie Yongji {
657c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
658c8a6153bSXie Yongji
659c8a6153bSXie Yongji return dev->vendor_id;
660c8a6153bSXie Yongji }
661c8a6153bSXie Yongji
vduse_vdpa_get_status(struct vdpa_device * vdpa)662c8a6153bSXie Yongji static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
663c8a6153bSXie Yongji {
664c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
665c8a6153bSXie Yongji
666c8a6153bSXie Yongji return dev->status;
667c8a6153bSXie Yongji }
668c8a6153bSXie Yongji
vduse_vdpa_set_status(struct vdpa_device * vdpa,u8 status)669c8a6153bSXie Yongji static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
670c8a6153bSXie Yongji {
671c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
672c8a6153bSXie Yongji
673c8a6153bSXie Yongji if (vduse_dev_set_status(dev, status))
674c8a6153bSXie Yongji return;
675c8a6153bSXie Yongji
676c8a6153bSXie Yongji dev->status = status;
677c8a6153bSXie Yongji }
678c8a6153bSXie Yongji
vduse_vdpa_get_config_size(struct vdpa_device * vdpa)679c8a6153bSXie Yongji static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
680c8a6153bSXie Yongji {
681c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
682c8a6153bSXie Yongji
683c8a6153bSXie Yongji return dev->config_size;
684c8a6153bSXie Yongji }
685c8a6153bSXie Yongji
vduse_vdpa_get_config(struct vdpa_device * vdpa,unsigned int offset,void * buf,unsigned int len)686c8a6153bSXie Yongji static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
687c8a6153bSXie Yongji void *buf, unsigned int len)
688c8a6153bSXie Yongji {
689c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
690c8a6153bSXie Yongji
69146f8a292SMaxime Coquelin /* Initialize the buffer in case of partial copy. */
69246f8a292SMaxime Coquelin memset(buf, 0, len);
69346f8a292SMaxime Coquelin
69446f8a292SMaxime Coquelin if (offset > dev->config_size)
695c8a6153bSXie Yongji return;
696c8a6153bSXie Yongji
69746f8a292SMaxime Coquelin if (len > dev->config_size - offset)
69846f8a292SMaxime Coquelin len = dev->config_size - offset;
69946f8a292SMaxime Coquelin
700c8a6153bSXie Yongji memcpy(buf, dev->config + offset, len);
701c8a6153bSXie Yongji }
702c8a6153bSXie Yongji
vduse_vdpa_set_config(struct vdpa_device * vdpa,unsigned int offset,const void * buf,unsigned int len)703c8a6153bSXie Yongji static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
704c8a6153bSXie Yongji const void *buf, unsigned int len)
705c8a6153bSXie Yongji {
706c8a6153bSXie Yongji /* Now we only support read-only configuration space */
707c8a6153bSXie Yongji }
708c8a6153bSXie Yongji
vduse_vdpa_reset(struct vdpa_device * vdpa)709c8a6153bSXie Yongji static int vduse_vdpa_reset(struct vdpa_device *vdpa)
710c8a6153bSXie Yongji {
711c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
7127bb5fb20SXie Yongji int ret = vduse_dev_set_status(dev, 0);
713c8a6153bSXie Yongji
714c8a6153bSXie Yongji vduse_dev_reset(dev);
715c8a6153bSXie Yongji
7167bb5fb20SXie Yongji return ret;
717c8a6153bSXie Yongji }
718c8a6153bSXie Yongji
vduse_vdpa_get_generation(struct vdpa_device * vdpa)719c8a6153bSXie Yongji static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
720c8a6153bSXie Yongji {
721c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
722c8a6153bSXie Yongji
723c8a6153bSXie Yongji return dev->generation;
724c8a6153bSXie Yongji }
725c8a6153bSXie Yongji
vduse_vdpa_set_vq_affinity(struct vdpa_device * vdpa,u16 idx,const struct cpumask * cpu_mask)72628f6288eSXie Yongji static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
72728f6288eSXie Yongji const struct cpumask *cpu_mask)
72828f6288eSXie Yongji {
72928f6288eSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
73028f6288eSXie Yongji
731f06cf1e1SMaxime Coquelin if (cpu_mask)
73228f6288eSXie Yongji cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
733f06cf1e1SMaxime Coquelin else
734f06cf1e1SMaxime Coquelin cpumask_setall(&dev->vqs[idx]->irq_affinity);
735f06cf1e1SMaxime Coquelin
73628f6288eSXie Yongji return 0;
73728f6288eSXie Yongji }
73828f6288eSXie Yongji
739bfae1648SXie Yongji static const struct cpumask *
vduse_vdpa_get_vq_affinity(struct vdpa_device * vdpa,u16 idx)740bfae1648SXie Yongji vduse_vdpa_get_vq_affinity(struct vdpa_device *vdpa, u16 idx)
741bfae1648SXie Yongji {
742bfae1648SXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
743bfae1648SXie Yongji
744bfae1648SXie Yongji return &dev->vqs[idx]->irq_affinity;
745bfae1648SXie Yongji }
746bfae1648SXie Yongji
vduse_vdpa_set_map(struct vdpa_device * vdpa,unsigned int asid,struct vhost_iotlb * iotlb)747c8a6153bSXie Yongji static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
748db9adcbfSGautam Dawar unsigned int asid,
749c8a6153bSXie Yongji struct vhost_iotlb *iotlb)
750c8a6153bSXie Yongji {
751c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
752c8a6153bSXie Yongji int ret;
753c8a6153bSXie Yongji
754c8a6153bSXie Yongji ret = vduse_domain_set_map(dev->domain, iotlb);
755c8a6153bSXie Yongji if (ret)
756c8a6153bSXie Yongji return ret;
757c8a6153bSXie Yongji
758c8a6153bSXie Yongji ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
759c8a6153bSXie Yongji if (ret) {
760c8a6153bSXie Yongji vduse_domain_clear_map(dev->domain, iotlb);
761c8a6153bSXie Yongji return ret;
762c8a6153bSXie Yongji }
763c8a6153bSXie Yongji
764c8a6153bSXie Yongji return 0;
765c8a6153bSXie Yongji }
766c8a6153bSXie Yongji
vduse_vdpa_free(struct vdpa_device * vdpa)767c8a6153bSXie Yongji static void vduse_vdpa_free(struct vdpa_device *vdpa)
768c8a6153bSXie Yongji {
769c8a6153bSXie Yongji struct vduse_dev *dev = vdpa_to_vduse(vdpa);
770c8a6153bSXie Yongji
771c8a6153bSXie Yongji dev->vdev = NULL;
772c8a6153bSXie Yongji }
773c8a6153bSXie Yongji
774c8a6153bSXie Yongji static const struct vdpa_config_ops vduse_vdpa_config_ops = {
775c8a6153bSXie Yongji .set_vq_address = vduse_vdpa_set_vq_address,
776c8a6153bSXie Yongji .kick_vq = vduse_vdpa_kick_vq,
777c8a6153bSXie Yongji .set_vq_cb = vduse_vdpa_set_vq_cb,
778c8a6153bSXie Yongji .set_vq_num = vduse_vdpa_set_vq_num,
779c8a6153bSXie Yongji .set_vq_ready = vduse_vdpa_set_vq_ready,
780c8a6153bSXie Yongji .get_vq_ready = vduse_vdpa_get_vq_ready,
781c8a6153bSXie Yongji .set_vq_state = vduse_vdpa_set_vq_state,
782c8a6153bSXie Yongji .get_vq_state = vduse_vdpa_get_vq_state,
783c8a6153bSXie Yongji .get_vq_align = vduse_vdpa_get_vq_align,
784a64917bcSEli Cohen .get_device_features = vduse_vdpa_get_device_features,
785a64917bcSEli Cohen .set_driver_features = vduse_vdpa_set_driver_features,
786a64917bcSEli Cohen .get_driver_features = vduse_vdpa_get_driver_features,
787c8a6153bSXie Yongji .set_config_cb = vduse_vdpa_set_config_cb,
788c8a6153bSXie Yongji .get_vq_num_max = vduse_vdpa_get_vq_num_max,
789c8a6153bSXie Yongji .get_device_id = vduse_vdpa_get_device_id,
790c8a6153bSXie Yongji .get_vendor_id = vduse_vdpa_get_vendor_id,
791c8a6153bSXie Yongji .get_status = vduse_vdpa_get_status,
792c8a6153bSXie Yongji .set_status = vduse_vdpa_set_status,
793c8a6153bSXie Yongji .get_config_size = vduse_vdpa_get_config_size,
794c8a6153bSXie Yongji .get_config = vduse_vdpa_get_config,
795c8a6153bSXie Yongji .set_config = vduse_vdpa_set_config,
796c8a6153bSXie Yongji .get_generation = vduse_vdpa_get_generation,
79728f6288eSXie Yongji .set_vq_affinity = vduse_vdpa_set_vq_affinity,
798bfae1648SXie Yongji .get_vq_affinity = vduse_vdpa_get_vq_affinity,
799c8a6153bSXie Yongji .reset = vduse_vdpa_reset,
800c8a6153bSXie Yongji .set_map = vduse_vdpa_set_map,
801c8a6153bSXie Yongji .free = vduse_vdpa_free,
802c8a6153bSXie Yongji };
803c8a6153bSXie Yongji
vduse_dev_map_page(struct device * dev,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)804c8a6153bSXie Yongji static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
805c8a6153bSXie Yongji unsigned long offset, size_t size,
806c8a6153bSXie Yongji enum dma_data_direction dir,
807c8a6153bSXie Yongji unsigned long attrs)
808c8a6153bSXie Yongji {
809c8a6153bSXie Yongji struct vduse_dev *vdev = dev_to_vduse(dev);
810c8a6153bSXie Yongji struct vduse_iova_domain *domain = vdev->domain;
811c8a6153bSXie Yongji
812c8a6153bSXie Yongji return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
813c8a6153bSXie Yongji }
814c8a6153bSXie Yongji
vduse_dev_unmap_page(struct device * dev,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)815c8a6153bSXie Yongji static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
816c8a6153bSXie Yongji size_t size, enum dma_data_direction dir,
817c8a6153bSXie Yongji unsigned long attrs)
818c8a6153bSXie Yongji {
819c8a6153bSXie Yongji struct vduse_dev *vdev = dev_to_vduse(dev);
820c8a6153bSXie Yongji struct vduse_iova_domain *domain = vdev->domain;
821c8a6153bSXie Yongji
822c8a6153bSXie Yongji return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
823c8a6153bSXie Yongji }
824c8a6153bSXie Yongji
vduse_dev_alloc_coherent(struct device * dev,size_t size,dma_addr_t * dma_addr,gfp_t flag,unsigned long attrs)825c8a6153bSXie Yongji static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
826c8a6153bSXie Yongji dma_addr_t *dma_addr, gfp_t flag,
827c8a6153bSXie Yongji unsigned long attrs)
828c8a6153bSXie Yongji {
829c8a6153bSXie Yongji struct vduse_dev *vdev = dev_to_vduse(dev);
830c8a6153bSXie Yongji struct vduse_iova_domain *domain = vdev->domain;
831c8a6153bSXie Yongji unsigned long iova;
832c8a6153bSXie Yongji void *addr;
833c8a6153bSXie Yongji
834c8a6153bSXie Yongji *dma_addr = DMA_MAPPING_ERROR;
835c8a6153bSXie Yongji addr = vduse_domain_alloc_coherent(domain, size,
836c8a6153bSXie Yongji (dma_addr_t *)&iova, flag, attrs);
837c8a6153bSXie Yongji if (!addr)
838c8a6153bSXie Yongji return NULL;
839c8a6153bSXie Yongji
840c8a6153bSXie Yongji *dma_addr = (dma_addr_t)iova;
841c8a6153bSXie Yongji
842c8a6153bSXie Yongji return addr;
843c8a6153bSXie Yongji }
844c8a6153bSXie Yongji
vduse_dev_free_coherent(struct device * dev,size_t size,void * vaddr,dma_addr_t dma_addr,unsigned long attrs)845c8a6153bSXie Yongji static void vduse_dev_free_coherent(struct device *dev, size_t size,
846c8a6153bSXie Yongji void *vaddr, dma_addr_t dma_addr,
847c8a6153bSXie Yongji unsigned long attrs)
848c8a6153bSXie Yongji {
849c8a6153bSXie Yongji struct vduse_dev *vdev = dev_to_vduse(dev);
850c8a6153bSXie Yongji struct vduse_iova_domain *domain = vdev->domain;
851c8a6153bSXie Yongji
852c8a6153bSXie Yongji vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
853c8a6153bSXie Yongji }
854c8a6153bSXie Yongji
vduse_dev_max_mapping_size(struct device * dev)855c8a6153bSXie Yongji static size_t vduse_dev_max_mapping_size(struct device *dev)
856c8a6153bSXie Yongji {
857c8a6153bSXie Yongji struct vduse_dev *vdev = dev_to_vduse(dev);
858c8a6153bSXie Yongji struct vduse_iova_domain *domain = vdev->domain;
859c8a6153bSXie Yongji
860c8a6153bSXie Yongji return domain->bounce_size;
861c8a6153bSXie Yongji }
862c8a6153bSXie Yongji
863c8a6153bSXie Yongji static const struct dma_map_ops vduse_dev_dma_ops = {
864c8a6153bSXie Yongji .map_page = vduse_dev_map_page,
865c8a6153bSXie Yongji .unmap_page = vduse_dev_unmap_page,
866c8a6153bSXie Yongji .alloc = vduse_dev_alloc_coherent,
867c8a6153bSXie Yongji .free = vduse_dev_free_coherent,
868c8a6153bSXie Yongji .max_mapping_size = vduse_dev_max_mapping_size,
869c8a6153bSXie Yongji };
870c8a6153bSXie Yongji
perm_to_file_flags(u8 perm)871c8a6153bSXie Yongji static unsigned int perm_to_file_flags(u8 perm)
872c8a6153bSXie Yongji {
873c8a6153bSXie Yongji unsigned int flags = 0;
874c8a6153bSXie Yongji
875c8a6153bSXie Yongji switch (perm) {
876c8a6153bSXie Yongji case VDUSE_ACCESS_WO:
877c8a6153bSXie Yongji flags |= O_WRONLY;
878c8a6153bSXie Yongji break;
879c8a6153bSXie Yongji case VDUSE_ACCESS_RO:
880c8a6153bSXie Yongji flags |= O_RDONLY;
881c8a6153bSXie Yongji break;
882c8a6153bSXie Yongji case VDUSE_ACCESS_RW:
883c8a6153bSXie Yongji flags |= O_RDWR;
884c8a6153bSXie Yongji break;
885c8a6153bSXie Yongji default:
886c8a6153bSXie Yongji WARN(1, "invalidate vhost IOTLB permission\n");
887c8a6153bSXie Yongji break;
888c8a6153bSXie Yongji }
889c8a6153bSXie Yongji
890c8a6153bSXie Yongji return flags;
891c8a6153bSXie Yongji }
892c8a6153bSXie Yongji
vduse_kickfd_setup(struct vduse_dev * dev,struct vduse_vq_eventfd * eventfd)893c8a6153bSXie Yongji static int vduse_kickfd_setup(struct vduse_dev *dev,
894c8a6153bSXie Yongji struct vduse_vq_eventfd *eventfd)
895c8a6153bSXie Yongji {
896c8a6153bSXie Yongji struct eventfd_ctx *ctx = NULL;
897c8a6153bSXie Yongji struct vduse_virtqueue *vq;
898c8a6153bSXie Yongji u32 index;
899c8a6153bSXie Yongji
900c8a6153bSXie Yongji if (eventfd->index >= dev->vq_num)
901c8a6153bSXie Yongji return -EINVAL;
902c8a6153bSXie Yongji
903c8a6153bSXie Yongji index = array_index_nospec(eventfd->index, dev->vq_num);
90478885597SXie Yongji vq = dev->vqs[index];
905c8a6153bSXie Yongji if (eventfd->fd >= 0) {
906c8a6153bSXie Yongji ctx = eventfd_ctx_fdget(eventfd->fd);
907c8a6153bSXie Yongji if (IS_ERR(ctx))
908c8a6153bSXie Yongji return PTR_ERR(ctx);
909c8a6153bSXie Yongji } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
910c8a6153bSXie Yongji return 0;
911c8a6153bSXie Yongji
912c8a6153bSXie Yongji spin_lock(&vq->kick_lock);
913c8a6153bSXie Yongji if (vq->kickfd)
914c8a6153bSXie Yongji eventfd_ctx_put(vq->kickfd);
915c8a6153bSXie Yongji vq->kickfd = ctx;
916c8a6153bSXie Yongji if (vq->ready && vq->kicked && vq->kickfd) {
917c8a6153bSXie Yongji eventfd_signal(vq->kickfd, 1);
918c8a6153bSXie Yongji vq->kicked = false;
919c8a6153bSXie Yongji }
920c8a6153bSXie Yongji spin_unlock(&vq->kick_lock);
921c8a6153bSXie Yongji
922c8a6153bSXie Yongji return 0;
923c8a6153bSXie Yongji }
924c8a6153bSXie Yongji
vduse_dev_is_ready(struct vduse_dev * dev)925c8a6153bSXie Yongji static bool vduse_dev_is_ready(struct vduse_dev *dev)
926c8a6153bSXie Yongji {
927c8a6153bSXie Yongji int i;
928c8a6153bSXie Yongji
929c8a6153bSXie Yongji for (i = 0; i < dev->vq_num; i++)
93078885597SXie Yongji if (!dev->vqs[i]->num_max)
931c8a6153bSXie Yongji return false;
932c8a6153bSXie Yongji
933c8a6153bSXie Yongji return true;
934c8a6153bSXie Yongji }
935c8a6153bSXie Yongji
vduse_dev_irq_inject(struct work_struct * work)936c8a6153bSXie Yongji static void vduse_dev_irq_inject(struct work_struct *work)
937c8a6153bSXie Yongji {
938c8a6153bSXie Yongji struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
939c8a6153bSXie Yongji
9407ca26efbSMaxime Coquelin spin_lock_bh(&dev->irq_lock);
941c8a6153bSXie Yongji if (dev->config_cb.callback)
942c8a6153bSXie Yongji dev->config_cb.callback(dev->config_cb.private);
9437ca26efbSMaxime Coquelin spin_unlock_bh(&dev->irq_lock);
944c8a6153bSXie Yongji }
945c8a6153bSXie Yongji
vduse_vq_irq_inject(struct work_struct * work)946c8a6153bSXie Yongji static void vduse_vq_irq_inject(struct work_struct *work)
947c8a6153bSXie Yongji {
948c8a6153bSXie Yongji struct vduse_virtqueue *vq = container_of(work,
949c8a6153bSXie Yongji struct vduse_virtqueue, inject);
950c8a6153bSXie Yongji
9517ca26efbSMaxime Coquelin spin_lock_bh(&vq->irq_lock);
952c8a6153bSXie Yongji if (vq->ready && vq->cb.callback)
953c8a6153bSXie Yongji vq->cb.callback(vq->cb.private);
9547ca26efbSMaxime Coquelin spin_unlock_bh(&vq->irq_lock);
955c8a6153bSXie Yongji }
956c8a6153bSXie Yongji
vduse_vq_signal_irqfd(struct vduse_virtqueue * vq)957e38632ddSXie Yongji static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
958e38632ddSXie Yongji {
959e38632ddSXie Yongji bool signal = false;
960e38632ddSXie Yongji
961e38632ddSXie Yongji if (!vq->cb.trigger)
962e38632ddSXie Yongji return false;
963e38632ddSXie Yongji
964e38632ddSXie Yongji spin_lock_irq(&vq->irq_lock);
965e38632ddSXie Yongji if (vq->ready && vq->cb.trigger) {
966e38632ddSXie Yongji eventfd_signal(vq->cb.trigger, 1);
967e38632ddSXie Yongji signal = true;
968e38632ddSXie Yongji }
969e38632ddSXie Yongji spin_unlock_irq(&vq->irq_lock);
970e38632ddSXie Yongji
971e38632ddSXie Yongji return signal;
972e38632ddSXie Yongji }
973e38632ddSXie Yongji
vduse_dev_queue_irq_work(struct vduse_dev * dev,struct work_struct * irq_work,int irq_effective_cpu)9740943aacfSXie Yongji static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
97528f6288eSXie Yongji struct work_struct *irq_work,
97628f6288eSXie Yongji int irq_effective_cpu)
9770943aacfSXie Yongji {
9780943aacfSXie Yongji int ret = -EINVAL;
9790943aacfSXie Yongji
9800943aacfSXie Yongji down_read(&dev->rwsem);
9810943aacfSXie Yongji if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
9820943aacfSXie Yongji goto unlock;
9830943aacfSXie Yongji
9840943aacfSXie Yongji ret = 0;
98528f6288eSXie Yongji if (irq_effective_cpu == IRQ_UNBOUND)
9860943aacfSXie Yongji queue_work(vduse_irq_wq, irq_work);
98728f6288eSXie Yongji else
98828f6288eSXie Yongji queue_work_on(irq_effective_cpu,
98928f6288eSXie Yongji vduse_irq_bound_wq, irq_work);
9900943aacfSXie Yongji unlock:
9910943aacfSXie Yongji up_read(&dev->rwsem);
9920943aacfSXie Yongji
9930943aacfSXie Yongji return ret;
9940943aacfSXie Yongji }
9950943aacfSXie Yongji
vduse_dev_dereg_umem(struct vduse_dev * dev,u64 iova,u64 size)99679a463beSXie Yongji static int vduse_dev_dereg_umem(struct vduse_dev *dev,
99779a463beSXie Yongji u64 iova, u64 size)
99879a463beSXie Yongji {
99979a463beSXie Yongji int ret;
100079a463beSXie Yongji
100179a463beSXie Yongji mutex_lock(&dev->mem_lock);
100279a463beSXie Yongji ret = -ENOENT;
100379a463beSXie Yongji if (!dev->umem)
100479a463beSXie Yongji goto unlock;
100579a463beSXie Yongji
100679a463beSXie Yongji ret = -EINVAL;
1007d4438d23SXie Yongji if (!dev->domain)
1008d4438d23SXie Yongji goto unlock;
1009d4438d23SXie Yongji
101079a463beSXie Yongji if (dev->umem->iova != iova || size != dev->domain->bounce_size)
101179a463beSXie Yongji goto unlock;
101279a463beSXie Yongji
101379a463beSXie Yongji vduse_domain_remove_user_bounce_pages(dev->domain);
101479a463beSXie Yongji unpin_user_pages_dirty_lock(dev->umem->pages,
101579a463beSXie Yongji dev->umem->npages, true);
101679a463beSXie Yongji atomic64_sub(dev->umem->npages, &dev->umem->mm->pinned_vm);
101779a463beSXie Yongji mmdrop(dev->umem->mm);
101879a463beSXie Yongji vfree(dev->umem->pages);
101979a463beSXie Yongji kfree(dev->umem);
102079a463beSXie Yongji dev->umem = NULL;
102179a463beSXie Yongji ret = 0;
102279a463beSXie Yongji unlock:
102379a463beSXie Yongji mutex_unlock(&dev->mem_lock);
102479a463beSXie Yongji return ret;
102579a463beSXie Yongji }
102679a463beSXie Yongji
vduse_dev_reg_umem(struct vduse_dev * dev,u64 iova,u64 uaddr,u64 size)102779a463beSXie Yongji static int vduse_dev_reg_umem(struct vduse_dev *dev,
102879a463beSXie Yongji u64 iova, u64 uaddr, u64 size)
102979a463beSXie Yongji {
103079a463beSXie Yongji struct page **page_list = NULL;
103179a463beSXie Yongji struct vduse_umem *umem = NULL;
103279a463beSXie Yongji long pinned = 0;
103379a463beSXie Yongji unsigned long npages, lock_limit;
103479a463beSXie Yongji int ret;
103579a463beSXie Yongji
1036d4438d23SXie Yongji if (!dev->domain || !dev->domain->bounce_map ||
103779a463beSXie Yongji size != dev->domain->bounce_size ||
103879a463beSXie Yongji iova != 0 || uaddr & ~PAGE_MASK)
103979a463beSXie Yongji return -EINVAL;
104079a463beSXie Yongji
104179a463beSXie Yongji mutex_lock(&dev->mem_lock);
104279a463beSXie Yongji ret = -EEXIST;
104379a463beSXie Yongji if (dev->umem)
104479a463beSXie Yongji goto unlock;
104579a463beSXie Yongji
104679a463beSXie Yongji ret = -ENOMEM;
104779a463beSXie Yongji npages = size >> PAGE_SHIFT;
104879a463beSXie Yongji page_list = __vmalloc(array_size(npages, sizeof(struct page *)),
104979a463beSXie Yongji GFP_KERNEL_ACCOUNT);
105079a463beSXie Yongji umem = kzalloc(sizeof(*umem), GFP_KERNEL);
105179a463beSXie Yongji if (!page_list || !umem)
105279a463beSXie Yongji goto unlock;
105379a463beSXie Yongji
105479a463beSXie Yongji mmap_read_lock(current->mm);
105579a463beSXie Yongji
105679a463beSXie Yongji lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
105779a463beSXie Yongji if (npages + atomic64_read(¤t->mm->pinned_vm) > lock_limit)
105879a463beSXie Yongji goto out;
105979a463beSXie Yongji
106079a463beSXie Yongji pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
10614c630f30SLorenzo Stoakes page_list);
106279a463beSXie Yongji if (pinned != npages) {
106379a463beSXie Yongji ret = pinned < 0 ? pinned : -ENOMEM;
106479a463beSXie Yongji goto out;
106579a463beSXie Yongji }
106679a463beSXie Yongji
106779a463beSXie Yongji ret = vduse_domain_add_user_bounce_pages(dev->domain,
106879a463beSXie Yongji page_list, pinned);
106979a463beSXie Yongji if (ret)
107079a463beSXie Yongji goto out;
107179a463beSXie Yongji
107279a463beSXie Yongji atomic64_add(npages, ¤t->mm->pinned_vm);
107379a463beSXie Yongji
107479a463beSXie Yongji umem->pages = page_list;
107579a463beSXie Yongji umem->npages = pinned;
107679a463beSXie Yongji umem->iova = iova;
107779a463beSXie Yongji umem->mm = current->mm;
107879a463beSXie Yongji mmgrab(current->mm);
107979a463beSXie Yongji
108079a463beSXie Yongji dev->umem = umem;
108179a463beSXie Yongji out:
108279a463beSXie Yongji if (ret && pinned > 0)
108379a463beSXie Yongji unpin_user_pages(page_list, pinned);
108479a463beSXie Yongji
108579a463beSXie Yongji mmap_read_unlock(current->mm);
108679a463beSXie Yongji unlock:
108779a463beSXie Yongji if (ret) {
108879a463beSXie Yongji vfree(page_list);
108979a463beSXie Yongji kfree(umem);
109079a463beSXie Yongji }
109179a463beSXie Yongji mutex_unlock(&dev->mem_lock);
109279a463beSXie Yongji return ret;
109379a463beSXie Yongji }
109479a463beSXie Yongji
vduse_vq_update_effective_cpu(struct vduse_virtqueue * vq)109528f6288eSXie Yongji static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
109628f6288eSXie Yongji {
109728f6288eSXie Yongji int curr_cpu = vq->irq_effective_cpu;
109828f6288eSXie Yongji
109928f6288eSXie Yongji while (true) {
110028f6288eSXie Yongji curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
110128f6288eSXie Yongji if (cpu_online(curr_cpu))
110228f6288eSXie Yongji break;
110328f6288eSXie Yongji
110428f6288eSXie Yongji if (curr_cpu >= nr_cpu_ids)
110528f6288eSXie Yongji curr_cpu = IRQ_UNBOUND;
110628f6288eSXie Yongji }
110728f6288eSXie Yongji
110828f6288eSXie Yongji vq->irq_effective_cpu = curr_cpu;
110928f6288eSXie Yongji }
111028f6288eSXie Yongji
vduse_dev_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1111c8a6153bSXie Yongji static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
1112c8a6153bSXie Yongji unsigned long arg)
1113c8a6153bSXie Yongji {
1114c8a6153bSXie Yongji struct vduse_dev *dev = file->private_data;
1115c8a6153bSXie Yongji void __user *argp = (void __user *)arg;
1116c8a6153bSXie Yongji int ret;
1117c8a6153bSXie Yongji
1118c8a6153bSXie Yongji if (unlikely(dev->broken))
1119c8a6153bSXie Yongji return -EPERM;
1120c8a6153bSXie Yongji
1121c8a6153bSXie Yongji switch (cmd) {
1122c8a6153bSXie Yongji case VDUSE_IOTLB_GET_FD: {
1123c8a6153bSXie Yongji struct vduse_iotlb_entry entry;
1124c8a6153bSXie Yongji struct vhost_iotlb_map *map;
1125c8a6153bSXie Yongji struct vdpa_map_file *map_file;
1126c8a6153bSXie Yongji struct file *f = NULL;
1127c8a6153bSXie Yongji
1128c8a6153bSXie Yongji ret = -EFAULT;
1129c8a6153bSXie Yongji if (copy_from_user(&entry, argp, sizeof(entry)))
1130c8a6153bSXie Yongji break;
1131c8a6153bSXie Yongji
1132c8a6153bSXie Yongji ret = -EINVAL;
1133c8a6153bSXie Yongji if (entry.start > entry.last)
1134c8a6153bSXie Yongji break;
1135c8a6153bSXie Yongji
1136d4438d23SXie Yongji mutex_lock(&dev->domain_lock);
1137d4438d23SXie Yongji if (!dev->domain) {
1138d4438d23SXie Yongji mutex_unlock(&dev->domain_lock);
1139d4438d23SXie Yongji break;
1140d4438d23SXie Yongji }
1141d4438d23SXie Yongji spin_lock(&dev->domain->iotlb_lock);
1142d4438d23SXie Yongji map = vhost_iotlb_itree_first(dev->domain->iotlb,
1143c8a6153bSXie Yongji entry.start, entry.last);
1144c8a6153bSXie Yongji if (map) {
1145c8a6153bSXie Yongji map_file = (struct vdpa_map_file *)map->opaque;
1146c8a6153bSXie Yongji f = get_file(map_file->file);
1147c8a6153bSXie Yongji entry.offset = map_file->offset;
1148c8a6153bSXie Yongji entry.start = map->start;
1149c8a6153bSXie Yongji entry.last = map->last;
1150c8a6153bSXie Yongji entry.perm = map->perm;
1151c8a6153bSXie Yongji }
1152d4438d23SXie Yongji spin_unlock(&dev->domain->iotlb_lock);
1153d4438d23SXie Yongji mutex_unlock(&dev->domain_lock);
1154c8a6153bSXie Yongji ret = -EINVAL;
1155c8a6153bSXie Yongji if (!f)
1156c8a6153bSXie Yongji break;
1157c8a6153bSXie Yongji
1158c8a6153bSXie Yongji ret = -EFAULT;
1159c8a6153bSXie Yongji if (copy_to_user(argp, &entry, sizeof(entry))) {
1160c8a6153bSXie Yongji fput(f);
1161c8a6153bSXie Yongji break;
1162c8a6153bSXie Yongji }
1163c8a6153bSXie Yongji ret = receive_fd(f, perm_to_file_flags(entry.perm));
1164c8a6153bSXie Yongji fput(f);
1165c8a6153bSXie Yongji break;
1166c8a6153bSXie Yongji }
1167c8a6153bSXie Yongji case VDUSE_DEV_GET_FEATURES:
1168c8a6153bSXie Yongji /*
1169c8a6153bSXie Yongji * Just mirror what driver wrote here.
1170c8a6153bSXie Yongji * The driver is expected to check FEATURE_OK later.
1171c8a6153bSXie Yongji */
1172c8a6153bSXie Yongji ret = put_user(dev->driver_features, (u64 __user *)argp);
1173c8a6153bSXie Yongji break;
1174c8a6153bSXie Yongji case VDUSE_DEV_SET_CONFIG: {
1175c8a6153bSXie Yongji struct vduse_config_data config;
1176c8a6153bSXie Yongji unsigned long size = offsetof(struct vduse_config_data,
1177c8a6153bSXie Yongji buffer);
1178c8a6153bSXie Yongji
1179c8a6153bSXie Yongji ret = -EFAULT;
1180c8a6153bSXie Yongji if (copy_from_user(&config, argp, size))
1181c8a6153bSXie Yongji break;
1182c8a6153bSXie Yongji
1183c8a6153bSXie Yongji ret = -EINVAL;
1184ff9f9c6eSDan Carpenter if (config.offset > dev->config_size ||
1185ff9f9c6eSDan Carpenter config.length == 0 ||
1186c8a6153bSXie Yongji config.length > dev->config_size - config.offset)
1187c8a6153bSXie Yongji break;
1188c8a6153bSXie Yongji
1189c8a6153bSXie Yongji ret = -EFAULT;
1190c8a6153bSXie Yongji if (copy_from_user(dev->config + config.offset, argp + size,
1191c8a6153bSXie Yongji config.length))
1192c8a6153bSXie Yongji break;
1193c8a6153bSXie Yongji
1194c8a6153bSXie Yongji ret = 0;
1195c8a6153bSXie Yongji break;
1196c8a6153bSXie Yongji }
1197c8a6153bSXie Yongji case VDUSE_DEV_INJECT_CONFIG_IRQ:
119828f6288eSXie Yongji ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
1199c8a6153bSXie Yongji break;
1200c8a6153bSXie Yongji case VDUSE_VQ_SETUP: {
1201c8a6153bSXie Yongji struct vduse_vq_config config;
1202c8a6153bSXie Yongji u32 index;
1203c8a6153bSXie Yongji
1204c8a6153bSXie Yongji ret = -EFAULT;
1205c8a6153bSXie Yongji if (copy_from_user(&config, argp, sizeof(config)))
1206c8a6153bSXie Yongji break;
1207c8a6153bSXie Yongji
1208c8a6153bSXie Yongji ret = -EINVAL;
1209c8a6153bSXie Yongji if (config.index >= dev->vq_num)
1210c8a6153bSXie Yongji break;
1211c8a6153bSXie Yongji
1212c8a6153bSXie Yongji if (!is_mem_zero((const char *)config.reserved,
1213c8a6153bSXie Yongji sizeof(config.reserved)))
1214c8a6153bSXie Yongji break;
1215c8a6153bSXie Yongji
1216c8a6153bSXie Yongji index = array_index_nospec(config.index, dev->vq_num);
121778885597SXie Yongji dev->vqs[index]->num_max = config.max_size;
1218c8a6153bSXie Yongji ret = 0;
1219c8a6153bSXie Yongji break;
1220c8a6153bSXie Yongji }
1221c8a6153bSXie Yongji case VDUSE_VQ_GET_INFO: {
1222c8a6153bSXie Yongji struct vduse_vq_info vq_info;
1223c8a6153bSXie Yongji struct vduse_virtqueue *vq;
1224c8a6153bSXie Yongji u32 index;
1225c8a6153bSXie Yongji
1226c8a6153bSXie Yongji ret = -EFAULT;
1227c8a6153bSXie Yongji if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1228c8a6153bSXie Yongji break;
1229c8a6153bSXie Yongji
1230c8a6153bSXie Yongji ret = -EINVAL;
1231c8a6153bSXie Yongji if (vq_info.index >= dev->vq_num)
1232c8a6153bSXie Yongji break;
1233c8a6153bSXie Yongji
1234c8a6153bSXie Yongji index = array_index_nospec(vq_info.index, dev->vq_num);
123578885597SXie Yongji vq = dev->vqs[index];
1236c8a6153bSXie Yongji vq_info.desc_addr = vq->desc_addr;
1237c8a6153bSXie Yongji vq_info.driver_addr = vq->driver_addr;
1238c8a6153bSXie Yongji vq_info.device_addr = vq->device_addr;
1239c8a6153bSXie Yongji vq_info.num = vq->num;
1240c8a6153bSXie Yongji
1241c8a6153bSXie Yongji if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1242c8a6153bSXie Yongji vq_info.packed.last_avail_counter =
1243c8a6153bSXie Yongji vq->state.packed.last_avail_counter;
1244c8a6153bSXie Yongji vq_info.packed.last_avail_idx =
1245c8a6153bSXie Yongji vq->state.packed.last_avail_idx;
1246c8a6153bSXie Yongji vq_info.packed.last_used_counter =
1247c8a6153bSXie Yongji vq->state.packed.last_used_counter;
1248c8a6153bSXie Yongji vq_info.packed.last_used_idx =
1249c8a6153bSXie Yongji vq->state.packed.last_used_idx;
1250c8a6153bSXie Yongji } else
1251c8a6153bSXie Yongji vq_info.split.avail_index =
1252c8a6153bSXie Yongji vq->state.split.avail_index;
1253c8a6153bSXie Yongji
1254c8a6153bSXie Yongji vq_info.ready = vq->ready;
1255c8a6153bSXie Yongji
1256c8a6153bSXie Yongji ret = -EFAULT;
1257c8a6153bSXie Yongji if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1258c8a6153bSXie Yongji break;
1259c8a6153bSXie Yongji
1260c8a6153bSXie Yongji ret = 0;
1261c8a6153bSXie Yongji break;
1262c8a6153bSXie Yongji }
1263c8a6153bSXie Yongji case VDUSE_VQ_SETUP_KICKFD: {
1264c8a6153bSXie Yongji struct vduse_vq_eventfd eventfd;
1265c8a6153bSXie Yongji
1266c8a6153bSXie Yongji ret = -EFAULT;
1267c8a6153bSXie Yongji if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1268c8a6153bSXie Yongji break;
1269c8a6153bSXie Yongji
1270c8a6153bSXie Yongji ret = vduse_kickfd_setup(dev, &eventfd);
1271c8a6153bSXie Yongji break;
1272c8a6153bSXie Yongji }
1273c8a6153bSXie Yongji case VDUSE_VQ_INJECT_IRQ: {
1274c8a6153bSXie Yongji u32 index;
1275c8a6153bSXie Yongji
1276c8a6153bSXie Yongji ret = -EFAULT;
1277c8a6153bSXie Yongji if (get_user(index, (u32 __user *)argp))
1278c8a6153bSXie Yongji break;
1279c8a6153bSXie Yongji
1280c8a6153bSXie Yongji ret = -EINVAL;
1281c8a6153bSXie Yongji if (index >= dev->vq_num)
1282c8a6153bSXie Yongji break;
1283c8a6153bSXie Yongji
1284e38632ddSXie Yongji ret = 0;
1285c8a6153bSXie Yongji index = array_index_nospec(index, dev->vq_num);
1286e38632ddSXie Yongji if (!vduse_vq_signal_irqfd(dev->vqs[index])) {
128728f6288eSXie Yongji vduse_vq_update_effective_cpu(dev->vqs[index]);
1288e38632ddSXie Yongji ret = vduse_dev_queue_irq_work(dev,
1289e38632ddSXie Yongji &dev->vqs[index]->inject,
129028f6288eSXie Yongji dev->vqs[index]->irq_effective_cpu);
1291e38632ddSXie Yongji }
1292c8a6153bSXie Yongji break;
1293c8a6153bSXie Yongji }
129479a463beSXie Yongji case VDUSE_IOTLB_REG_UMEM: {
129579a463beSXie Yongji struct vduse_iova_umem umem;
129679a463beSXie Yongji
129779a463beSXie Yongji ret = -EFAULT;
129879a463beSXie Yongji if (copy_from_user(&umem, argp, sizeof(umem)))
129979a463beSXie Yongji break;
130079a463beSXie Yongji
130179a463beSXie Yongji ret = -EINVAL;
130279a463beSXie Yongji if (!is_mem_zero((const char *)umem.reserved,
130379a463beSXie Yongji sizeof(umem.reserved)))
130479a463beSXie Yongji break;
130579a463beSXie Yongji
1306d4438d23SXie Yongji mutex_lock(&dev->domain_lock);
130779a463beSXie Yongji ret = vduse_dev_reg_umem(dev, umem.iova,
130879a463beSXie Yongji umem.uaddr, umem.size);
1309d4438d23SXie Yongji mutex_unlock(&dev->domain_lock);
131079a463beSXie Yongji break;
131179a463beSXie Yongji }
131279a463beSXie Yongji case VDUSE_IOTLB_DEREG_UMEM: {
131379a463beSXie Yongji struct vduse_iova_umem umem;
131479a463beSXie Yongji
131579a463beSXie Yongji ret = -EFAULT;
131679a463beSXie Yongji if (copy_from_user(&umem, argp, sizeof(umem)))
131779a463beSXie Yongji break;
131879a463beSXie Yongji
131979a463beSXie Yongji ret = -EINVAL;
132079a463beSXie Yongji if (!is_mem_zero((const char *)umem.reserved,
132179a463beSXie Yongji sizeof(umem.reserved)))
132279a463beSXie Yongji break;
1323d4438d23SXie Yongji mutex_lock(&dev->domain_lock);
132479a463beSXie Yongji ret = vduse_dev_dereg_umem(dev, umem.iova,
132579a463beSXie Yongji umem.size);
1326d4438d23SXie Yongji mutex_unlock(&dev->domain_lock);
132779a463beSXie Yongji break;
132879a463beSXie Yongji }
1329ad146355SXie Yongji case VDUSE_IOTLB_GET_INFO: {
1330ad146355SXie Yongji struct vduse_iova_info info;
1331ad146355SXie Yongji struct vhost_iotlb_map *map;
1332ad146355SXie Yongji
1333ad146355SXie Yongji ret = -EFAULT;
1334ad146355SXie Yongji if (copy_from_user(&info, argp, sizeof(info)))
1335ad146355SXie Yongji break;
1336ad146355SXie Yongji
1337ad146355SXie Yongji ret = -EINVAL;
1338ad146355SXie Yongji if (info.start > info.last)
1339ad146355SXie Yongji break;
1340ad146355SXie Yongji
1341ad146355SXie Yongji if (!is_mem_zero((const char *)info.reserved,
1342ad146355SXie Yongji sizeof(info.reserved)))
1343ad146355SXie Yongji break;
1344ad146355SXie Yongji
1345d4438d23SXie Yongji mutex_lock(&dev->domain_lock);
1346d4438d23SXie Yongji if (!dev->domain) {
1347d4438d23SXie Yongji mutex_unlock(&dev->domain_lock);
1348d4438d23SXie Yongji break;
1349d4438d23SXie Yongji }
1350d4438d23SXie Yongji spin_lock(&dev->domain->iotlb_lock);
1351d4438d23SXie Yongji map = vhost_iotlb_itree_first(dev->domain->iotlb,
1352ad146355SXie Yongji info.start, info.last);
1353ad146355SXie Yongji if (map) {
1354ad146355SXie Yongji info.start = map->start;
1355ad146355SXie Yongji info.last = map->last;
1356ad146355SXie Yongji info.capability = 0;
1357d4438d23SXie Yongji if (dev->domain->bounce_map && map->start == 0 &&
1358d4438d23SXie Yongji map->last == dev->domain->bounce_size - 1)
1359ad146355SXie Yongji info.capability |= VDUSE_IOVA_CAP_UMEM;
1360ad146355SXie Yongji }
1361d4438d23SXie Yongji spin_unlock(&dev->domain->iotlb_lock);
1362d4438d23SXie Yongji mutex_unlock(&dev->domain_lock);
1363ad146355SXie Yongji if (!map)
1364ad146355SXie Yongji break;
1365ad146355SXie Yongji
1366ad146355SXie Yongji ret = -EFAULT;
1367ad146355SXie Yongji if (copy_to_user(argp, &info, sizeof(info)))
1368ad146355SXie Yongji break;
1369ad146355SXie Yongji
1370ad146355SXie Yongji ret = 0;
1371ad146355SXie Yongji break;
1372ad146355SXie Yongji }
1373c8a6153bSXie Yongji default:
1374c8a6153bSXie Yongji ret = -ENOIOCTLCMD;
1375c8a6153bSXie Yongji break;
1376c8a6153bSXie Yongji }
1377c8a6153bSXie Yongji
1378c8a6153bSXie Yongji return ret;
1379c8a6153bSXie Yongji }
1380c8a6153bSXie Yongji
vduse_dev_release(struct inode * inode,struct file * file)1381c8a6153bSXie Yongji static int vduse_dev_release(struct inode *inode, struct file *file)
1382c8a6153bSXie Yongji {
1383c8a6153bSXie Yongji struct vduse_dev *dev = file->private_data;
1384c8a6153bSXie Yongji
1385d4438d23SXie Yongji mutex_lock(&dev->domain_lock);
1386d4438d23SXie Yongji if (dev->domain)
138779a463beSXie Yongji vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
1388d4438d23SXie Yongji mutex_unlock(&dev->domain_lock);
1389c8a6153bSXie Yongji spin_lock(&dev->msg_lock);
1390c8a6153bSXie Yongji /* Make sure the inflight messages can processed after reconncection */
1391c8a6153bSXie Yongji list_splice_init(&dev->recv_list, &dev->send_list);
1392c8a6153bSXie Yongji spin_unlock(&dev->msg_lock);
1393c8a6153bSXie Yongji dev->connected = false;
1394c8a6153bSXie Yongji
1395c8a6153bSXie Yongji return 0;
1396c8a6153bSXie Yongji }
1397c8a6153bSXie Yongji
vduse_dev_get_from_minor(int minor)1398c8a6153bSXie Yongji static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1399c8a6153bSXie Yongji {
1400c8a6153bSXie Yongji struct vduse_dev *dev;
1401c8a6153bSXie Yongji
1402c8a6153bSXie Yongji mutex_lock(&vduse_lock);
1403c8a6153bSXie Yongji dev = idr_find(&vduse_idr, minor);
1404c8a6153bSXie Yongji mutex_unlock(&vduse_lock);
1405c8a6153bSXie Yongji
1406c8a6153bSXie Yongji return dev;
1407c8a6153bSXie Yongji }
1408c8a6153bSXie Yongji
vduse_dev_open(struct inode * inode,struct file * file)1409c8a6153bSXie Yongji static int vduse_dev_open(struct inode *inode, struct file *file)
1410c8a6153bSXie Yongji {
1411c8a6153bSXie Yongji int ret;
1412c8a6153bSXie Yongji struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1413c8a6153bSXie Yongji
1414c8a6153bSXie Yongji if (!dev)
1415c8a6153bSXie Yongji return -ENODEV;
1416c8a6153bSXie Yongji
1417c8a6153bSXie Yongji ret = -EBUSY;
1418c8a6153bSXie Yongji mutex_lock(&dev->lock);
1419c8a6153bSXie Yongji if (dev->connected)
1420c8a6153bSXie Yongji goto unlock;
1421c8a6153bSXie Yongji
1422c8a6153bSXie Yongji ret = 0;
1423c8a6153bSXie Yongji dev->connected = true;
1424c8a6153bSXie Yongji file->private_data = dev;
1425c8a6153bSXie Yongji unlock:
1426c8a6153bSXie Yongji mutex_unlock(&dev->lock);
1427c8a6153bSXie Yongji
1428c8a6153bSXie Yongji return ret;
1429c8a6153bSXie Yongji }
1430c8a6153bSXie Yongji
1431c8a6153bSXie Yongji static const struct file_operations vduse_dev_fops = {
1432c8a6153bSXie Yongji .owner = THIS_MODULE,
1433c8a6153bSXie Yongji .open = vduse_dev_open,
1434c8a6153bSXie Yongji .release = vduse_dev_release,
1435c8a6153bSXie Yongji .read_iter = vduse_dev_read_iter,
1436c8a6153bSXie Yongji .write_iter = vduse_dev_write_iter,
1437c8a6153bSXie Yongji .poll = vduse_dev_poll,
1438c8a6153bSXie Yongji .unlocked_ioctl = vduse_dev_ioctl,
1439c8a6153bSXie Yongji .compat_ioctl = compat_ptr_ioctl,
1440c8a6153bSXie Yongji .llseek = noop_llseek,
1441c8a6153bSXie Yongji };
1442c8a6153bSXie Yongji
irq_cb_affinity_show(struct vduse_virtqueue * vq,char * buf)144366640f4aSXie Yongji static ssize_t irq_cb_affinity_show(struct vduse_virtqueue *vq, char *buf)
144466640f4aSXie Yongji {
144566640f4aSXie Yongji return sprintf(buf, "%*pb\n", cpumask_pr_args(&vq->irq_affinity));
144666640f4aSXie Yongji }
144766640f4aSXie Yongji
irq_cb_affinity_store(struct vduse_virtqueue * vq,const char * buf,size_t count)144866640f4aSXie Yongji static ssize_t irq_cb_affinity_store(struct vduse_virtqueue *vq,
144966640f4aSXie Yongji const char *buf, size_t count)
145066640f4aSXie Yongji {
145166640f4aSXie Yongji cpumask_var_t new_value;
145266640f4aSXie Yongji int ret;
145366640f4aSXie Yongji
145466640f4aSXie Yongji if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
145566640f4aSXie Yongji return -ENOMEM;
145666640f4aSXie Yongji
145766640f4aSXie Yongji ret = cpumask_parse(buf, new_value);
145866640f4aSXie Yongji if (ret)
145966640f4aSXie Yongji goto free_mask;
146066640f4aSXie Yongji
146166640f4aSXie Yongji ret = -EINVAL;
146266640f4aSXie Yongji if (!cpumask_intersects(new_value, cpu_online_mask))
146366640f4aSXie Yongji goto free_mask;
146466640f4aSXie Yongji
146566640f4aSXie Yongji cpumask_copy(&vq->irq_affinity, new_value);
146666640f4aSXie Yongji ret = count;
146766640f4aSXie Yongji free_mask:
146866640f4aSXie Yongji free_cpumask_var(new_value);
146966640f4aSXie Yongji return ret;
147066640f4aSXie Yongji }
147166640f4aSXie Yongji
147266640f4aSXie Yongji struct vq_sysfs_entry {
147366640f4aSXie Yongji struct attribute attr;
147466640f4aSXie Yongji ssize_t (*show)(struct vduse_virtqueue *vq, char *buf);
147566640f4aSXie Yongji ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf,
147666640f4aSXie Yongji size_t count);
147766640f4aSXie Yongji };
147866640f4aSXie Yongji
147966640f4aSXie Yongji static struct vq_sysfs_entry irq_cb_affinity_attr = __ATTR_RW(irq_cb_affinity);
148066640f4aSXie Yongji
148166640f4aSXie Yongji static struct attribute *vq_attrs[] = {
148266640f4aSXie Yongji &irq_cb_affinity_attr.attr,
148366640f4aSXie Yongji NULL,
148466640f4aSXie Yongji };
148566640f4aSXie Yongji ATTRIBUTE_GROUPS(vq);
148666640f4aSXie Yongji
vq_attr_show(struct kobject * kobj,struct attribute * attr,char * buf)148766640f4aSXie Yongji static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr,
148866640f4aSXie Yongji char *buf)
148966640f4aSXie Yongji {
149066640f4aSXie Yongji struct vduse_virtqueue *vq = container_of(kobj,
149166640f4aSXie Yongji struct vduse_virtqueue, kobj);
149266640f4aSXie Yongji struct vq_sysfs_entry *entry = container_of(attr,
149366640f4aSXie Yongji struct vq_sysfs_entry, attr);
149466640f4aSXie Yongji
149566640f4aSXie Yongji if (!entry->show)
149666640f4aSXie Yongji return -EIO;
149766640f4aSXie Yongji
149866640f4aSXie Yongji return entry->show(vq, buf);
149966640f4aSXie Yongji }
150066640f4aSXie Yongji
vq_attr_store(struct kobject * kobj,struct attribute * attr,const char * buf,size_t count)150166640f4aSXie Yongji static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr,
150266640f4aSXie Yongji const char *buf, size_t count)
150366640f4aSXie Yongji {
150466640f4aSXie Yongji struct vduse_virtqueue *vq = container_of(kobj,
150566640f4aSXie Yongji struct vduse_virtqueue, kobj);
150666640f4aSXie Yongji struct vq_sysfs_entry *entry = container_of(attr,
150766640f4aSXie Yongji struct vq_sysfs_entry, attr);
150866640f4aSXie Yongji
150966640f4aSXie Yongji if (!entry->store)
151066640f4aSXie Yongji return -EIO;
151166640f4aSXie Yongji
151266640f4aSXie Yongji return entry->store(vq, buf, count);
151366640f4aSXie Yongji }
151466640f4aSXie Yongji
151566640f4aSXie Yongji static const struct sysfs_ops vq_sysfs_ops = {
151666640f4aSXie Yongji .show = vq_attr_show,
151766640f4aSXie Yongji .store = vq_attr_store,
151866640f4aSXie Yongji };
151966640f4aSXie Yongji
vq_release(struct kobject * kobj)152066640f4aSXie Yongji static void vq_release(struct kobject *kobj)
152166640f4aSXie Yongji {
152266640f4aSXie Yongji struct vduse_virtqueue *vq = container_of(kobj,
152366640f4aSXie Yongji struct vduse_virtqueue, kobj);
152466640f4aSXie Yongji kfree(vq);
152566640f4aSXie Yongji }
152666640f4aSXie Yongji
152766640f4aSXie Yongji static const struct kobj_type vq_type = {
152866640f4aSXie Yongji .release = vq_release,
152966640f4aSXie Yongji .sysfs_ops = &vq_sysfs_ops,
153066640f4aSXie Yongji .default_groups = vq_groups,
153166640f4aSXie Yongji };
153266640f4aSXie Yongji
vduse_dev_deinit_vqs(struct vduse_dev * dev)153378885597SXie Yongji static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
153478885597SXie Yongji {
153578885597SXie Yongji int i;
153678885597SXie Yongji
153778885597SXie Yongji if (!dev->vqs)
153878885597SXie Yongji return;
153978885597SXie Yongji
154078885597SXie Yongji for (i = 0; i < dev->vq_num; i++)
154166640f4aSXie Yongji kobject_put(&dev->vqs[i]->kobj);
154278885597SXie Yongji kfree(dev->vqs);
154378885597SXie Yongji }
154478885597SXie Yongji
vduse_dev_init_vqs(struct vduse_dev * dev,u32 vq_align,u32 vq_num)154578885597SXie Yongji static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
154678885597SXie Yongji {
154766640f4aSXie Yongji int ret, i;
154878885597SXie Yongji
154978885597SXie Yongji dev->vq_align = vq_align;
155078885597SXie Yongji dev->vq_num = vq_num;
155178885597SXie Yongji dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
155278885597SXie Yongji if (!dev->vqs)
155378885597SXie Yongji return -ENOMEM;
155478885597SXie Yongji
155578885597SXie Yongji for (i = 0; i < vq_num; i++) {
155678885597SXie Yongji dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL);
155766640f4aSXie Yongji if (!dev->vqs[i]) {
155866640f4aSXie Yongji ret = -ENOMEM;
155978885597SXie Yongji goto err;
156066640f4aSXie Yongji }
156178885597SXie Yongji
156278885597SXie Yongji dev->vqs[i]->index = i;
156328f6288eSXie Yongji dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
156478885597SXie Yongji INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
156578885597SXie Yongji INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
156678885597SXie Yongji spin_lock_init(&dev->vqs[i]->kick_lock);
156778885597SXie Yongji spin_lock_init(&dev->vqs[i]->irq_lock);
156828f6288eSXie Yongji cpumask_setall(&dev->vqs[i]->irq_affinity);
156966640f4aSXie Yongji
157066640f4aSXie Yongji kobject_init(&dev->vqs[i]->kobj, &vq_type);
157166640f4aSXie Yongji ret = kobject_add(&dev->vqs[i]->kobj,
157266640f4aSXie Yongji &dev->dev->kobj, "vq%d", i);
157366640f4aSXie Yongji if (ret) {
157466640f4aSXie Yongji kfree(dev->vqs[i]);
157566640f4aSXie Yongji goto err;
157666640f4aSXie Yongji }
157778885597SXie Yongji }
157878885597SXie Yongji
157978885597SXie Yongji return 0;
158078885597SXie Yongji err:
158178885597SXie Yongji while (i--)
158266640f4aSXie Yongji kobject_put(&dev->vqs[i]->kobj);
158378885597SXie Yongji kfree(dev->vqs);
158478885597SXie Yongji dev->vqs = NULL;
158566640f4aSXie Yongji return ret;
158678885597SXie Yongji }
158778885597SXie Yongji
vduse_dev_create(void)1588c8a6153bSXie Yongji static struct vduse_dev *vduse_dev_create(void)
1589c8a6153bSXie Yongji {
1590c8a6153bSXie Yongji struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1591c8a6153bSXie Yongji
1592c8a6153bSXie Yongji if (!dev)
1593c8a6153bSXie Yongji return NULL;
1594c8a6153bSXie Yongji
1595c8a6153bSXie Yongji mutex_init(&dev->lock);
159679a463beSXie Yongji mutex_init(&dev->mem_lock);
1597d4438d23SXie Yongji mutex_init(&dev->domain_lock);
1598c8a6153bSXie Yongji spin_lock_init(&dev->msg_lock);
1599c8a6153bSXie Yongji INIT_LIST_HEAD(&dev->send_list);
1600c8a6153bSXie Yongji INIT_LIST_HEAD(&dev->recv_list);
1601c8a6153bSXie Yongji spin_lock_init(&dev->irq_lock);
16020943aacfSXie Yongji init_rwsem(&dev->rwsem);
1603c8a6153bSXie Yongji
1604c8a6153bSXie Yongji INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1605c8a6153bSXie Yongji init_waitqueue_head(&dev->waitq);
1606c8a6153bSXie Yongji
1607c8a6153bSXie Yongji return dev;
1608c8a6153bSXie Yongji }
1609c8a6153bSXie Yongji
vduse_dev_destroy(struct vduse_dev * dev)1610c8a6153bSXie Yongji static void vduse_dev_destroy(struct vduse_dev *dev)
1611c8a6153bSXie Yongji {
1612c8a6153bSXie Yongji kfree(dev);
1613c8a6153bSXie Yongji }
1614c8a6153bSXie Yongji
vduse_find_dev(const char * name)1615c8a6153bSXie Yongji static struct vduse_dev *vduse_find_dev(const char *name)
1616c8a6153bSXie Yongji {
1617c8a6153bSXie Yongji struct vduse_dev *dev;
1618c8a6153bSXie Yongji int id;
1619c8a6153bSXie Yongji
1620c8a6153bSXie Yongji idr_for_each_entry(&vduse_idr, dev, id)
1621c8a6153bSXie Yongji if (!strcmp(dev->name, name))
1622c8a6153bSXie Yongji return dev;
1623c8a6153bSXie Yongji
1624c8a6153bSXie Yongji return NULL;
1625c8a6153bSXie Yongji }
1626c8a6153bSXie Yongji
vduse_destroy_dev(char * name)1627c8a6153bSXie Yongji static int vduse_destroy_dev(char *name)
1628c8a6153bSXie Yongji {
1629c8a6153bSXie Yongji struct vduse_dev *dev = vduse_find_dev(name);
1630c8a6153bSXie Yongji
1631c8a6153bSXie Yongji if (!dev)
1632c8a6153bSXie Yongji return -EINVAL;
1633c8a6153bSXie Yongji
1634c8a6153bSXie Yongji mutex_lock(&dev->lock);
1635c8a6153bSXie Yongji if (dev->vdev || dev->connected) {
1636c8a6153bSXie Yongji mutex_unlock(&dev->lock);
1637c8a6153bSXie Yongji return -EBUSY;
1638c8a6153bSXie Yongji }
1639c8a6153bSXie Yongji dev->connected = true;
1640c8a6153bSXie Yongji mutex_unlock(&dev->lock);
1641c8a6153bSXie Yongji
1642c8a6153bSXie Yongji vduse_dev_reset(dev);
1643c8a6153bSXie Yongji device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1644c8a6153bSXie Yongji idr_remove(&vduse_idr, dev->minor);
1645c8a6153bSXie Yongji kvfree(dev->config);
164678885597SXie Yongji vduse_dev_deinit_vqs(dev);
1647d4438d23SXie Yongji if (dev->domain)
1648c8a6153bSXie Yongji vduse_domain_destroy(dev->domain);
1649c8a6153bSXie Yongji kfree(dev->name);
1650c8a6153bSXie Yongji vduse_dev_destroy(dev);
1651c8a6153bSXie Yongji module_put(THIS_MODULE);
1652c8a6153bSXie Yongji
1653c8a6153bSXie Yongji return 0;
1654c8a6153bSXie Yongji }
1655c8a6153bSXie Yongji
device_is_allowed(u32 device_id)1656c8a6153bSXie Yongji static bool device_is_allowed(u32 device_id)
1657c8a6153bSXie Yongji {
1658c8a6153bSXie Yongji int i;
1659c8a6153bSXie Yongji
1660c8a6153bSXie Yongji for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1661c8a6153bSXie Yongji if (allowed_device_id[i] == device_id)
1662c8a6153bSXie Yongji return true;
1663c8a6153bSXie Yongji
1664c8a6153bSXie Yongji return false;
1665c8a6153bSXie Yongji }
1666c8a6153bSXie Yongji
features_is_valid(struct vduse_dev_config * config)16678d4d1453SMaxime Coquelin static bool features_is_valid(struct vduse_dev_config *config)
1668c8a6153bSXie Yongji {
16698d4d1453SMaxime Coquelin if (!(config->features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1670c8a6153bSXie Yongji return false;
1671c8a6153bSXie Yongji
1672c8a6153bSXie Yongji /* Now we only support read-only configuration space */
16738d4d1453SMaxime Coquelin if ((config->device_id == VIRTIO_ID_BLOCK) &&
16748d4d1453SMaxime Coquelin (config->features & BIT_ULL(VIRTIO_BLK_F_CONFIG_WCE)))
1675c8a6153bSXie Yongji return false;
1676*df475f71SMaxime Coquelin else if ((config->device_id == VIRTIO_ID_NET) &&
1677*df475f71SMaxime Coquelin (config->features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1678*df475f71SMaxime Coquelin return false;
1679c8a6153bSXie Yongji
1680c8a6153bSXie Yongji return true;
1681c8a6153bSXie Yongji }
1682c8a6153bSXie Yongji
vduse_validate_config(struct vduse_dev_config * config)1683c8a6153bSXie Yongji static bool vduse_validate_config(struct vduse_dev_config *config)
1684c8a6153bSXie Yongji {
1685c8a6153bSXie Yongji if (!is_mem_zero((const char *)config->reserved,
1686c8a6153bSXie Yongji sizeof(config->reserved)))
1687c8a6153bSXie Yongji return false;
1688c8a6153bSXie Yongji
1689c8a6153bSXie Yongji if (config->vq_align > PAGE_SIZE)
1690c8a6153bSXie Yongji return false;
1691c8a6153bSXie Yongji
1692c8a6153bSXie Yongji if (config->config_size > PAGE_SIZE)
1693c8a6153bSXie Yongji return false;
1694c8a6153bSXie Yongji
1695937c783aSHarshit Mogalapalli if (config->vq_num > 0xffff)
1696937c783aSHarshit Mogalapalli return false;
1697937c783aSHarshit Mogalapalli
1698a90e8608SSheng Zhao if (!config->name[0])
1699a90e8608SSheng Zhao return false;
1700a90e8608SSheng Zhao
1701c8a6153bSXie Yongji if (!device_is_allowed(config->device_id))
1702c8a6153bSXie Yongji return false;
1703c8a6153bSXie Yongji
17048d4d1453SMaxime Coquelin if (!features_is_valid(config))
1705c8a6153bSXie Yongji return false;
1706c8a6153bSXie Yongji
1707c8a6153bSXie Yongji return true;
1708c8a6153bSXie Yongji }
1709c8a6153bSXie Yongji
msg_timeout_show(struct device * device,struct device_attribute * attr,char * buf)1710c8a6153bSXie Yongji static ssize_t msg_timeout_show(struct device *device,
1711c8a6153bSXie Yongji struct device_attribute *attr, char *buf)
1712c8a6153bSXie Yongji {
1713c8a6153bSXie Yongji struct vduse_dev *dev = dev_get_drvdata(device);
1714c8a6153bSXie Yongji
1715c8a6153bSXie Yongji return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1716c8a6153bSXie Yongji }
1717c8a6153bSXie Yongji
msg_timeout_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1718c8a6153bSXie Yongji static ssize_t msg_timeout_store(struct device *device,
1719c8a6153bSXie Yongji struct device_attribute *attr,
1720c8a6153bSXie Yongji const char *buf, size_t count)
1721c8a6153bSXie Yongji {
1722c8a6153bSXie Yongji struct vduse_dev *dev = dev_get_drvdata(device);
1723c8a6153bSXie Yongji int ret;
1724c8a6153bSXie Yongji
1725c8a6153bSXie Yongji ret = kstrtouint(buf, 10, &dev->msg_timeout);
1726c8a6153bSXie Yongji if (ret < 0)
1727c8a6153bSXie Yongji return ret;
1728c8a6153bSXie Yongji
1729c8a6153bSXie Yongji return count;
1730c8a6153bSXie Yongji }
1731c8a6153bSXie Yongji
1732c8a6153bSXie Yongji static DEVICE_ATTR_RW(msg_timeout);
1733c8a6153bSXie Yongji
bounce_size_show(struct device * device,struct device_attribute * attr,char * buf)1734b774f93dSXie Yongji static ssize_t bounce_size_show(struct device *device,
1735b774f93dSXie Yongji struct device_attribute *attr, char *buf)
1736b774f93dSXie Yongji {
1737b774f93dSXie Yongji struct vduse_dev *dev = dev_get_drvdata(device);
1738b774f93dSXie Yongji
1739b774f93dSXie Yongji return sysfs_emit(buf, "%u\n", dev->bounce_size);
1740b774f93dSXie Yongji }
1741b774f93dSXie Yongji
bounce_size_store(struct device * device,struct device_attribute * attr,const char * buf,size_t count)1742b774f93dSXie Yongji static ssize_t bounce_size_store(struct device *device,
1743b774f93dSXie Yongji struct device_attribute *attr,
1744b774f93dSXie Yongji const char *buf, size_t count)
1745b774f93dSXie Yongji {
1746b774f93dSXie Yongji struct vduse_dev *dev = dev_get_drvdata(device);
1747b774f93dSXie Yongji unsigned int bounce_size;
1748b774f93dSXie Yongji int ret;
1749b774f93dSXie Yongji
1750b774f93dSXie Yongji ret = -EPERM;
1751b774f93dSXie Yongji mutex_lock(&dev->domain_lock);
1752b774f93dSXie Yongji if (dev->domain)
1753b774f93dSXie Yongji goto unlock;
1754b774f93dSXie Yongji
1755b774f93dSXie Yongji ret = kstrtouint(buf, 10, &bounce_size);
1756b774f93dSXie Yongji if (ret < 0)
1757b774f93dSXie Yongji goto unlock;
1758b774f93dSXie Yongji
1759b774f93dSXie Yongji ret = -EINVAL;
1760b774f93dSXie Yongji if (bounce_size > VDUSE_MAX_BOUNCE_SIZE ||
1761b774f93dSXie Yongji bounce_size < VDUSE_MIN_BOUNCE_SIZE)
1762b774f93dSXie Yongji goto unlock;
1763b774f93dSXie Yongji
1764b774f93dSXie Yongji dev->bounce_size = bounce_size & PAGE_MASK;
1765b774f93dSXie Yongji ret = count;
1766b774f93dSXie Yongji unlock:
1767b774f93dSXie Yongji mutex_unlock(&dev->domain_lock);
1768b774f93dSXie Yongji return ret;
1769b774f93dSXie Yongji }
1770b774f93dSXie Yongji
1771b774f93dSXie Yongji static DEVICE_ATTR_RW(bounce_size);
1772b774f93dSXie Yongji
1773c8a6153bSXie Yongji static struct attribute *vduse_dev_attrs[] = {
1774c8a6153bSXie Yongji &dev_attr_msg_timeout.attr,
1775b774f93dSXie Yongji &dev_attr_bounce_size.attr,
1776c8a6153bSXie Yongji NULL
1777c8a6153bSXie Yongji };
1778c8a6153bSXie Yongji
1779c8a6153bSXie Yongji ATTRIBUTE_GROUPS(vduse_dev);
1780c8a6153bSXie Yongji
vduse_create_dev(struct vduse_dev_config * config,void * config_buf,u64 api_version)1781c8a6153bSXie Yongji static int vduse_create_dev(struct vduse_dev_config *config,
1782c8a6153bSXie Yongji void *config_buf, u64 api_version)
1783c8a6153bSXie Yongji {
178478885597SXie Yongji int ret;
1785c8a6153bSXie Yongji struct vduse_dev *dev;
1786c8a6153bSXie Yongji
1787c8a6153bSXie Yongji ret = -EEXIST;
1788c8a6153bSXie Yongji if (vduse_find_dev(config->name))
1789c8a6153bSXie Yongji goto err;
1790c8a6153bSXie Yongji
1791c8a6153bSXie Yongji ret = -ENOMEM;
1792c8a6153bSXie Yongji dev = vduse_dev_create();
1793c8a6153bSXie Yongji if (!dev)
1794c8a6153bSXie Yongji goto err;
1795c8a6153bSXie Yongji
1796c8a6153bSXie Yongji dev->api_version = api_version;
1797c8a6153bSXie Yongji dev->device_features = config->features;
1798c8a6153bSXie Yongji dev->device_id = config->device_id;
1799c8a6153bSXie Yongji dev->vendor_id = config->vendor_id;
1800c8a6153bSXie Yongji dev->name = kstrdup(config->name, GFP_KERNEL);
1801c8a6153bSXie Yongji if (!dev->name)
1802c8a6153bSXie Yongji goto err_str;
1803c8a6153bSXie Yongji
1804d4438d23SXie Yongji dev->bounce_size = VDUSE_BOUNCE_SIZE;
1805c8a6153bSXie Yongji dev->config = config_buf;
1806c8a6153bSXie Yongji dev->config_size = config->config_size;
1807c8a6153bSXie Yongji
1808c8a6153bSXie Yongji ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1809c8a6153bSXie Yongji if (ret < 0)
1810c8a6153bSXie Yongji goto err_idr;
1811c8a6153bSXie Yongji
1812c8a6153bSXie Yongji dev->minor = ret;
1813c8a6153bSXie Yongji dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1814b27ee76cSXie Yongji dev->dev = device_create_with_groups(vduse_class, NULL,
1815c8a6153bSXie Yongji MKDEV(MAJOR(vduse_major), dev->minor),
1816b27ee76cSXie Yongji dev, vduse_dev_groups, "%s", config->name);
1817c8a6153bSXie Yongji if (IS_ERR(dev->dev)) {
1818c8a6153bSXie Yongji ret = PTR_ERR(dev->dev);
1819c8a6153bSXie Yongji goto err_dev;
1820c8a6153bSXie Yongji }
182166640f4aSXie Yongji
182266640f4aSXie Yongji ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num);
182366640f4aSXie Yongji if (ret)
182466640f4aSXie Yongji goto err_vqs;
182566640f4aSXie Yongji
1826c8a6153bSXie Yongji __module_get(THIS_MODULE);
1827c8a6153bSXie Yongji
1828c8a6153bSXie Yongji return 0;
182966640f4aSXie Yongji err_vqs:
183066640f4aSXie Yongji device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1831c8a6153bSXie Yongji err_dev:
1832c8a6153bSXie Yongji idr_remove(&vduse_idr, dev->minor);
1833c8a6153bSXie Yongji err_idr:
1834c8a6153bSXie Yongji kfree(dev->name);
1835c8a6153bSXie Yongji err_str:
1836c8a6153bSXie Yongji vduse_dev_destroy(dev);
1837c8a6153bSXie Yongji err:
1838c8a6153bSXie Yongji return ret;
1839c8a6153bSXie Yongji }
1840c8a6153bSXie Yongji
vduse_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1841c8a6153bSXie Yongji static long vduse_ioctl(struct file *file, unsigned int cmd,
1842c8a6153bSXie Yongji unsigned long arg)
1843c8a6153bSXie Yongji {
1844c8a6153bSXie Yongji int ret;
1845c8a6153bSXie Yongji void __user *argp = (void __user *)arg;
1846c8a6153bSXie Yongji struct vduse_control *control = file->private_data;
1847c8a6153bSXie Yongji
1848c8a6153bSXie Yongji mutex_lock(&vduse_lock);
1849c8a6153bSXie Yongji switch (cmd) {
1850c8a6153bSXie Yongji case VDUSE_GET_API_VERSION:
1851c8a6153bSXie Yongji ret = put_user(control->api_version, (u64 __user *)argp);
1852c8a6153bSXie Yongji break;
1853c8a6153bSXie Yongji case VDUSE_SET_API_VERSION: {
1854c8a6153bSXie Yongji u64 api_version;
1855c8a6153bSXie Yongji
1856c8a6153bSXie Yongji ret = -EFAULT;
1857c8a6153bSXie Yongji if (get_user(api_version, (u64 __user *)argp))
1858c8a6153bSXie Yongji break;
1859c8a6153bSXie Yongji
1860c8a6153bSXie Yongji ret = -EINVAL;
1861c8a6153bSXie Yongji if (api_version > VDUSE_API_VERSION)
1862c8a6153bSXie Yongji break;
1863c8a6153bSXie Yongji
1864c8a6153bSXie Yongji ret = 0;
1865c8a6153bSXie Yongji control->api_version = api_version;
1866c8a6153bSXie Yongji break;
1867c8a6153bSXie Yongji }
1868c8a6153bSXie Yongji case VDUSE_CREATE_DEV: {
1869c8a6153bSXie Yongji struct vduse_dev_config config;
1870c8a6153bSXie Yongji unsigned long size = offsetof(struct vduse_dev_config, config);
1871c8a6153bSXie Yongji void *buf;
1872c8a6153bSXie Yongji
1873c8a6153bSXie Yongji ret = -EFAULT;
1874c8a6153bSXie Yongji if (copy_from_user(&config, argp, size))
1875c8a6153bSXie Yongji break;
1876c8a6153bSXie Yongji
1877c8a6153bSXie Yongji ret = -EINVAL;
1878c8a6153bSXie Yongji if (vduse_validate_config(&config) == false)
1879c8a6153bSXie Yongji break;
1880c8a6153bSXie Yongji
1881c8a6153bSXie Yongji buf = vmemdup_user(argp + size, config.config_size);
1882c8a6153bSXie Yongji if (IS_ERR(buf)) {
1883c8a6153bSXie Yongji ret = PTR_ERR(buf);
1884c8a6153bSXie Yongji break;
1885c8a6153bSXie Yongji }
1886c8a6153bSXie Yongji config.name[VDUSE_NAME_MAX - 1] = '\0';
1887c8a6153bSXie Yongji ret = vduse_create_dev(&config, buf, control->api_version);
1888b4d80c8dSGuanjun if (ret)
1889b4d80c8dSGuanjun kvfree(buf);
1890c8a6153bSXie Yongji break;
1891c8a6153bSXie Yongji }
1892c8a6153bSXie Yongji case VDUSE_DESTROY_DEV: {
1893c8a6153bSXie Yongji char name[VDUSE_NAME_MAX];
1894c8a6153bSXie Yongji
1895c8a6153bSXie Yongji ret = -EFAULT;
1896c8a6153bSXie Yongji if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1897c8a6153bSXie Yongji break;
1898c8a6153bSXie Yongji
1899c8a6153bSXie Yongji name[VDUSE_NAME_MAX - 1] = '\0';
1900c8a6153bSXie Yongji ret = vduse_destroy_dev(name);
1901c8a6153bSXie Yongji break;
1902c8a6153bSXie Yongji }
1903c8a6153bSXie Yongji default:
1904c8a6153bSXie Yongji ret = -EINVAL;
1905c8a6153bSXie Yongji break;
1906c8a6153bSXie Yongji }
1907c8a6153bSXie Yongji mutex_unlock(&vduse_lock);
1908c8a6153bSXie Yongji
1909c8a6153bSXie Yongji return ret;
1910c8a6153bSXie Yongji }
1911c8a6153bSXie Yongji
vduse_release(struct inode * inode,struct file * file)1912c8a6153bSXie Yongji static int vduse_release(struct inode *inode, struct file *file)
1913c8a6153bSXie Yongji {
1914c8a6153bSXie Yongji struct vduse_control *control = file->private_data;
1915c8a6153bSXie Yongji
1916c8a6153bSXie Yongji kfree(control);
1917c8a6153bSXie Yongji return 0;
1918c8a6153bSXie Yongji }
1919c8a6153bSXie Yongji
vduse_open(struct inode * inode,struct file * file)1920c8a6153bSXie Yongji static int vduse_open(struct inode *inode, struct file *file)
1921c8a6153bSXie Yongji {
1922c8a6153bSXie Yongji struct vduse_control *control;
1923c8a6153bSXie Yongji
1924c8a6153bSXie Yongji control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1925c8a6153bSXie Yongji if (!control)
1926c8a6153bSXie Yongji return -ENOMEM;
1927c8a6153bSXie Yongji
1928c8a6153bSXie Yongji control->api_version = VDUSE_API_VERSION;
1929c8a6153bSXie Yongji file->private_data = control;
1930c8a6153bSXie Yongji
1931c8a6153bSXie Yongji return 0;
1932c8a6153bSXie Yongji }
1933c8a6153bSXie Yongji
1934c8a6153bSXie Yongji static const struct file_operations vduse_ctrl_fops = {
1935c8a6153bSXie Yongji .owner = THIS_MODULE,
1936c8a6153bSXie Yongji .open = vduse_open,
1937c8a6153bSXie Yongji .release = vduse_release,
1938c8a6153bSXie Yongji .unlocked_ioctl = vduse_ioctl,
1939c8a6153bSXie Yongji .compat_ioctl = compat_ptr_ioctl,
1940c8a6153bSXie Yongji .llseek = noop_llseek,
1941c8a6153bSXie Yongji };
1942c8a6153bSXie Yongji
vduse_devnode(const struct device * dev,umode_t * mode)1943ff62b8e6SGreg Kroah-Hartman static char *vduse_devnode(const struct device *dev, umode_t *mode)
1944c8a6153bSXie Yongji {
1945c8a6153bSXie Yongji return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1946c8a6153bSXie Yongji }
1947c8a6153bSXie Yongji
19480e0348acSParav Pandit struct vduse_mgmt_dev {
19490e0348acSParav Pandit struct vdpa_mgmt_dev mgmt_dev;
19500e0348acSParav Pandit struct device dev;
1951c8a6153bSXie Yongji };
1952c8a6153bSXie Yongji
19530e0348acSParav Pandit static struct vduse_mgmt_dev *vduse_mgmt;
1954c8a6153bSXie Yongji
vduse_dev_init_vdpa(struct vduse_dev * dev,const char * name)1955c8a6153bSXie Yongji static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
1956c8a6153bSXie Yongji {
1957c8a6153bSXie Yongji struct vduse_vdpa *vdev;
1958c8a6153bSXie Yongji int ret;
1959c8a6153bSXie Yongji
1960c8a6153bSXie Yongji if (dev->vdev)
1961c8a6153bSXie Yongji return -EEXIST;
1962c8a6153bSXie Yongji
1963c8a6153bSXie Yongji vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
1964db9adcbfSGautam Dawar &vduse_vdpa_config_ops, 1, 1, name, true);
1965c8a6153bSXie Yongji if (IS_ERR(vdev))
1966c8a6153bSXie Yongji return PTR_ERR(vdev);
1967c8a6153bSXie Yongji
1968c8a6153bSXie Yongji dev->vdev = vdev;
1969c8a6153bSXie Yongji vdev->dev = dev;
1970c8a6153bSXie Yongji vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
1971c8a6153bSXie Yongji ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
1972c8a6153bSXie Yongji if (ret) {
1973c8a6153bSXie Yongji put_device(&vdev->vdpa.dev);
1974c8a6153bSXie Yongji return ret;
1975c8a6153bSXie Yongji }
1976c8a6153bSXie Yongji set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
1977c8a6153bSXie Yongji vdev->vdpa.dma_dev = &vdev->vdpa.dev;
19780e0348acSParav Pandit vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
1979c8a6153bSXie Yongji
1980c8a6153bSXie Yongji return 0;
1981c8a6153bSXie Yongji }
1982c8a6153bSXie Yongji
vdpa_dev_add(struct vdpa_mgmt_dev * mdev,const char * name,const struct vdpa_dev_set_config * config)1983d8ca2fa5SParav Pandit static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
1984d8ca2fa5SParav Pandit const struct vdpa_dev_set_config *config)
1985c8a6153bSXie Yongji {
1986c8a6153bSXie Yongji struct vduse_dev *dev;
1987c8a6153bSXie Yongji int ret;
1988c8a6153bSXie Yongji
1989c8a6153bSXie Yongji mutex_lock(&vduse_lock);
1990c8a6153bSXie Yongji dev = vduse_find_dev(name);
1991c8a6153bSXie Yongji if (!dev || !vduse_dev_is_ready(dev)) {
1992c8a6153bSXie Yongji mutex_unlock(&vduse_lock);
1993c8a6153bSXie Yongji return -EINVAL;
1994c8a6153bSXie Yongji }
1995c8a6153bSXie Yongji ret = vduse_dev_init_vdpa(dev, name);
1996c8a6153bSXie Yongji mutex_unlock(&vduse_lock);
1997c8a6153bSXie Yongji if (ret)
1998c8a6153bSXie Yongji return ret;
1999c8a6153bSXie Yongji
2000d4438d23SXie Yongji mutex_lock(&dev->domain_lock);
2001d4438d23SXie Yongji if (!dev->domain)
2002d4438d23SXie Yongji dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
2003d4438d23SXie Yongji dev->bounce_size);
2004d4438d23SXie Yongji mutex_unlock(&dev->domain_lock);
2005d4438d23SXie Yongji if (!dev->domain) {
2006d4438d23SXie Yongji put_device(&dev->vdev->vdpa.dev);
2007d4438d23SXie Yongji return -ENOMEM;
2008d4438d23SXie Yongji }
2009d4438d23SXie Yongji
2010c8a6153bSXie Yongji ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
2011c8a6153bSXie Yongji if (ret) {
2012c8a6153bSXie Yongji put_device(&dev->vdev->vdpa.dev);
2013d4438d23SXie Yongji mutex_lock(&dev->domain_lock);
2014d4438d23SXie Yongji vduse_domain_destroy(dev->domain);
2015d4438d23SXie Yongji dev->domain = NULL;
2016d4438d23SXie Yongji mutex_unlock(&dev->domain_lock);
2017c8a6153bSXie Yongji return ret;
2018c8a6153bSXie Yongji }
2019c8a6153bSXie Yongji
2020c8a6153bSXie Yongji return 0;
2021c8a6153bSXie Yongji }
2022c8a6153bSXie Yongji
vdpa_dev_del(struct vdpa_mgmt_dev * mdev,struct vdpa_device * dev)2023c8a6153bSXie Yongji static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
2024c8a6153bSXie Yongji {
2025c8a6153bSXie Yongji _vdpa_unregister_device(dev);
2026c8a6153bSXie Yongji }
2027c8a6153bSXie Yongji
2028c8a6153bSXie Yongji static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
2029c8a6153bSXie Yongji .dev_add = vdpa_dev_add,
2030c8a6153bSXie Yongji .dev_del = vdpa_dev_del,
2031c8a6153bSXie Yongji };
2032c8a6153bSXie Yongji
2033c8a6153bSXie Yongji static struct virtio_device_id id_table[] = {
2034c8a6153bSXie Yongji { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
2035c8a6153bSXie Yongji { 0 },
2036c8a6153bSXie Yongji };
2037c8a6153bSXie Yongji
vduse_mgmtdev_release(struct device * dev)20380e0348acSParav Pandit static void vduse_mgmtdev_release(struct device *dev)
20390e0348acSParav Pandit {
20400e0348acSParav Pandit struct vduse_mgmt_dev *mgmt_dev;
20410e0348acSParav Pandit
20420e0348acSParav Pandit mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
20430e0348acSParav Pandit kfree(mgmt_dev);
20440e0348acSParav Pandit }
2045c8a6153bSXie Yongji
vduse_mgmtdev_init(void)2046c8a6153bSXie Yongji static int vduse_mgmtdev_init(void)
2047c8a6153bSXie Yongji {
2048c8a6153bSXie Yongji int ret;
2049c8a6153bSXie Yongji
20500e0348acSParav Pandit vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
20510e0348acSParav Pandit if (!vduse_mgmt)
20520e0348acSParav Pandit return -ENOMEM;
20530e0348acSParav Pandit
20540e0348acSParav Pandit ret = dev_set_name(&vduse_mgmt->dev, "vduse");
20550e0348acSParav Pandit if (ret) {
20560e0348acSParav Pandit kfree(vduse_mgmt);
20570e0348acSParav Pandit return ret;
20580e0348acSParav Pandit }
20590e0348acSParav Pandit
20600e0348acSParav Pandit vduse_mgmt->dev.release = vduse_mgmtdev_release;
20610e0348acSParav Pandit
20620e0348acSParav Pandit ret = device_register(&vduse_mgmt->dev);
2063c8a6153bSXie Yongji if (ret)
20640e0348acSParav Pandit goto dev_reg_err;
20650e0348acSParav Pandit
20660e0348acSParav Pandit vduse_mgmt->mgmt_dev.id_table = id_table;
20670e0348acSParav Pandit vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
20680e0348acSParav Pandit vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
20690e0348acSParav Pandit ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
20700e0348acSParav Pandit if (ret)
20710e0348acSParav Pandit device_unregister(&vduse_mgmt->dev);
20720e0348acSParav Pandit
2073c8a6153bSXie Yongji return ret;
2074c8a6153bSXie Yongji
20750e0348acSParav Pandit dev_reg_err:
20760e0348acSParav Pandit put_device(&vduse_mgmt->dev);
2077c8a6153bSXie Yongji return ret;
2078c8a6153bSXie Yongji }
2079c8a6153bSXie Yongji
vduse_mgmtdev_exit(void)2080c8a6153bSXie Yongji static void vduse_mgmtdev_exit(void)
2081c8a6153bSXie Yongji {
20820e0348acSParav Pandit vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
20830e0348acSParav Pandit device_unregister(&vduse_mgmt->dev);
2084c8a6153bSXie Yongji }
2085c8a6153bSXie Yongji
vduse_init(void)2086c8a6153bSXie Yongji static int vduse_init(void)
2087c8a6153bSXie Yongji {
2088c8a6153bSXie Yongji int ret;
2089c8a6153bSXie Yongji struct device *dev;
2090c8a6153bSXie Yongji
20911aaba11dSGreg Kroah-Hartman vduse_class = class_create("vduse");
2092c8a6153bSXie Yongji if (IS_ERR(vduse_class))
2093c8a6153bSXie Yongji return PTR_ERR(vduse_class);
2094c8a6153bSXie Yongji
2095c8a6153bSXie Yongji vduse_class->devnode = vduse_devnode;
2096c8a6153bSXie Yongji
2097c8a6153bSXie Yongji ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
2098c8a6153bSXie Yongji if (ret)
2099c8a6153bSXie Yongji goto err_chardev_region;
2100c8a6153bSXie Yongji
2101c8a6153bSXie Yongji /* /dev/vduse/control */
2102c8a6153bSXie Yongji cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
2103c8a6153bSXie Yongji vduse_ctrl_cdev.owner = THIS_MODULE;
2104c8a6153bSXie Yongji ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
2105c8a6153bSXie Yongji if (ret)
2106c8a6153bSXie Yongji goto err_ctrl_cdev;
2107c8a6153bSXie Yongji
2108c8a6153bSXie Yongji dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
2109c8a6153bSXie Yongji if (IS_ERR(dev)) {
2110c8a6153bSXie Yongji ret = PTR_ERR(dev);
2111c8a6153bSXie Yongji goto err_device;
2112c8a6153bSXie Yongji }
2113c8a6153bSXie Yongji
2114c8a6153bSXie Yongji /* /dev/vduse/$DEVICE */
2115c8a6153bSXie Yongji cdev_init(&vduse_cdev, &vduse_dev_fops);
2116c8a6153bSXie Yongji vduse_cdev.owner = THIS_MODULE;
2117c8a6153bSXie Yongji ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
2118c8a6153bSXie Yongji VDUSE_DEV_MAX - 1);
2119c8a6153bSXie Yongji if (ret)
2120c8a6153bSXie Yongji goto err_cdev;
2121c8a6153bSXie Yongji
212228f6288eSXie Yongji ret = -ENOMEM;
2123c8a6153bSXie Yongji vduse_irq_wq = alloc_workqueue("vduse-irq",
2124c8a6153bSXie Yongji WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
212528f6288eSXie Yongji if (!vduse_irq_wq)
2126c8a6153bSXie Yongji goto err_wq;
212728f6288eSXie Yongji
212828f6288eSXie Yongji vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
212928f6288eSXie Yongji if (!vduse_irq_bound_wq)
213028f6288eSXie Yongji goto err_bound_wq;
2131c8a6153bSXie Yongji
2132c8a6153bSXie Yongji ret = vduse_domain_init();
2133c8a6153bSXie Yongji if (ret)
2134c8a6153bSXie Yongji goto err_domain;
2135c8a6153bSXie Yongji
2136c8a6153bSXie Yongji ret = vduse_mgmtdev_init();
2137c8a6153bSXie Yongji if (ret)
2138c8a6153bSXie Yongji goto err_mgmtdev;
2139c8a6153bSXie Yongji
2140c8a6153bSXie Yongji return 0;
2141c8a6153bSXie Yongji err_mgmtdev:
2142c8a6153bSXie Yongji vduse_domain_exit();
2143c8a6153bSXie Yongji err_domain:
214428f6288eSXie Yongji destroy_workqueue(vduse_irq_bound_wq);
214528f6288eSXie Yongji err_bound_wq:
2146c8a6153bSXie Yongji destroy_workqueue(vduse_irq_wq);
2147c8a6153bSXie Yongji err_wq:
2148c8a6153bSXie Yongji cdev_del(&vduse_cdev);
2149c8a6153bSXie Yongji err_cdev:
2150c8a6153bSXie Yongji device_destroy(vduse_class, vduse_major);
2151c8a6153bSXie Yongji err_device:
2152c8a6153bSXie Yongji cdev_del(&vduse_ctrl_cdev);
2153c8a6153bSXie Yongji err_ctrl_cdev:
2154c8a6153bSXie Yongji unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2155c8a6153bSXie Yongji err_chardev_region:
2156c8a6153bSXie Yongji class_destroy(vduse_class);
2157c8a6153bSXie Yongji return ret;
2158c8a6153bSXie Yongji }
2159c8a6153bSXie Yongji module_init(vduse_init);
2160c8a6153bSXie Yongji
vduse_exit(void)2161c8a6153bSXie Yongji static void vduse_exit(void)
2162c8a6153bSXie Yongji {
2163c8a6153bSXie Yongji vduse_mgmtdev_exit();
2164c8a6153bSXie Yongji vduse_domain_exit();
216528f6288eSXie Yongji destroy_workqueue(vduse_irq_bound_wq);
2166c8a6153bSXie Yongji destroy_workqueue(vduse_irq_wq);
2167c8a6153bSXie Yongji cdev_del(&vduse_cdev);
2168c8a6153bSXie Yongji device_destroy(vduse_class, vduse_major);
2169c8a6153bSXie Yongji cdev_del(&vduse_ctrl_cdev);
2170c8a6153bSXie Yongji unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
2171c8a6153bSXie Yongji class_destroy(vduse_class);
2172c8a6153bSXie Yongji }
2173c8a6153bSXie Yongji module_exit(vduse_exit);
2174c8a6153bSXie Yongji
2175c8a6153bSXie Yongji MODULE_LICENSE(DRV_LICENSE);
2176c8a6153bSXie Yongji MODULE_AUTHOR(DRV_AUTHOR);
2177c8a6153bSXie Yongji MODULE_DESCRIPTION(DRV_DESC);
2178