1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VFIO PCI I/O Port & MMIO access
4 *
5 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
6 * Author: Alex Williamson <alex.williamson@redhat.com>
7 *
8 * Derived from original vfio:
9 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
10 * Author: Tom Lyon, pugs@cisco.com
11 */
12
13 #include <linux/fs.h>
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
16 #include <linux/io.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
19 #include <linux/io-64-nonatomic-lo-hi.h>
20
21 #include "vfio_pci_priv.h"
22
23 #ifdef __LITTLE_ENDIAN
24 #define vfio_ioread64 ioread64
25 #define vfio_iowrite64 iowrite64
26 #define vfio_ioread32 ioread32
27 #define vfio_iowrite32 iowrite32
28 #define vfio_ioread16 ioread16
29 #define vfio_iowrite16 iowrite16
30 #else
31 #define vfio_ioread64 ioread64be
32 #define vfio_iowrite64 iowrite64be
33 #define vfio_ioread32 ioread32be
34 #define vfio_iowrite32 iowrite32be
35 #define vfio_ioread16 ioread16be
36 #define vfio_iowrite16 iowrite16be
37 #endif
38 #define vfio_ioread8 ioread8
39 #define vfio_iowrite8 iowrite8
40
41 #define VFIO_IOWRITE(size) \
42 static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev, \
43 bool test_mem, u##size val, void __iomem *io) \
44 { \
45 if (test_mem) { \
46 down_read(&vdev->memory_lock); \
47 if (!__vfio_pci_memory_enabled(vdev)) { \
48 up_read(&vdev->memory_lock); \
49 return -EIO; \
50 } \
51 } \
52 \
53 vfio_iowrite##size(val, io); \
54 \
55 if (test_mem) \
56 up_read(&vdev->memory_lock); \
57 \
58 return 0; \
59 }
60
61 VFIO_IOWRITE(8)
62 VFIO_IOWRITE(16)
63 VFIO_IOWRITE(32)
64 #ifdef iowrite64
65 VFIO_IOWRITE(64)
66 #endif
67
68 #define VFIO_IOREAD(size) \
69 static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev, \
70 bool test_mem, u##size *val, void __iomem *io) \
71 { \
72 if (test_mem) { \
73 down_read(&vdev->memory_lock); \
74 if (!__vfio_pci_memory_enabled(vdev)) { \
75 up_read(&vdev->memory_lock); \
76 return -EIO; \
77 } \
78 } \
79 \
80 *val = vfio_ioread##size(io); \
81 \
82 if (test_mem) \
83 up_read(&vdev->memory_lock); \
84 \
85 return 0; \
86 }
87
88 VFIO_IOREAD(8)
89 VFIO_IOREAD(16)
90 VFIO_IOREAD(32)
91
92 /*
93 * Read or write from an __iomem region (MMIO or I/O port) with an excluded
94 * range which is inaccessible. The excluded range drops writes and fills
95 * reads with -1. This is intended for handling MSI-X vector tables and
96 * leftover space for ROM BARs.
97 */
do_io_rw(struct vfio_pci_core_device * vdev,bool test_mem,void __iomem * io,char __user * buf,loff_t off,size_t count,size_t x_start,size_t x_end,bool iswrite)98 static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
99 void __iomem *io, char __user *buf,
100 loff_t off, size_t count, size_t x_start,
101 size_t x_end, bool iswrite)
102 {
103 ssize_t done = 0;
104 int ret;
105
106 while (count) {
107 size_t fillable, filled;
108
109 if (off < x_start)
110 fillable = min(count, (size_t)(x_start - off));
111 else if (off >= x_end)
112 fillable = count;
113 else
114 fillable = 0;
115
116 if (fillable >= 4 && !(off % 4)) {
117 u32 val;
118
119 if (iswrite) {
120 if (copy_from_user(&val, buf, 4))
121 return -EFAULT;
122
123 ret = vfio_pci_iowrite32(vdev, test_mem,
124 val, io + off);
125 if (ret)
126 return ret;
127 } else {
128 ret = vfio_pci_ioread32(vdev, test_mem,
129 &val, io + off);
130 if (ret)
131 return ret;
132
133 if (copy_to_user(buf, &val, 4))
134 return -EFAULT;
135 }
136
137 filled = 4;
138 } else if (fillable >= 2 && !(off % 2)) {
139 u16 val;
140
141 if (iswrite) {
142 if (copy_from_user(&val, buf, 2))
143 return -EFAULT;
144
145 ret = vfio_pci_iowrite16(vdev, test_mem,
146 val, io + off);
147 if (ret)
148 return ret;
149 } else {
150 ret = vfio_pci_ioread16(vdev, test_mem,
151 &val, io + off);
152 if (ret)
153 return ret;
154
155 if (copy_to_user(buf, &val, 2))
156 return -EFAULT;
157 }
158
159 filled = 2;
160 } else if (fillable) {
161 u8 val;
162
163 if (iswrite) {
164 if (copy_from_user(&val, buf, 1))
165 return -EFAULT;
166
167 ret = vfio_pci_iowrite8(vdev, test_mem,
168 val, io + off);
169 if (ret)
170 return ret;
171 } else {
172 ret = vfio_pci_ioread8(vdev, test_mem,
173 &val, io + off);
174 if (ret)
175 return ret;
176
177 if (copy_to_user(buf, &val, 1))
178 return -EFAULT;
179 }
180
181 filled = 1;
182 } else {
183 /* Fill reads with -1, drop writes */
184 filled = min(count, (size_t)(x_end - off));
185 if (!iswrite) {
186 u8 val = 0xFF;
187 size_t i;
188
189 for (i = 0; i < filled; i++)
190 if (copy_to_user(buf + i, &val, 1))
191 return -EFAULT;
192 }
193 }
194
195 count -= filled;
196 done += filled;
197 off += filled;
198 buf += filled;
199 }
200
201 return done;
202 }
203
vfio_pci_setup_barmap(struct vfio_pci_core_device * vdev,int bar)204 static int vfio_pci_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
205 {
206 struct pci_dev *pdev = vdev->pdev;
207 int ret;
208 void __iomem *io;
209
210 if (vdev->barmap[bar])
211 return 0;
212
213 ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
214 if (ret)
215 return ret;
216
217 io = pci_iomap(pdev, bar, 0);
218 if (!io) {
219 pci_release_selected_regions(pdev, 1 << bar);
220 return -ENOMEM;
221 }
222
223 vdev->barmap[bar] = io;
224
225 return 0;
226 }
227
vfio_pci_bar_rw(struct vfio_pci_core_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)228 ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
229 size_t count, loff_t *ppos, bool iswrite)
230 {
231 struct pci_dev *pdev = vdev->pdev;
232 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
233 int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
234 size_t x_start = 0, x_end = 0;
235 resource_size_t end;
236 void __iomem *io;
237 struct resource *res = &vdev->pdev->resource[bar];
238 ssize_t done;
239
240 if (pci_resource_start(pdev, bar))
241 end = pci_resource_len(pdev, bar);
242 else if (bar == PCI_ROM_RESOURCE &&
243 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
244 end = 0x20000;
245 else
246 return -EINVAL;
247
248 if (pos >= end)
249 return -EINVAL;
250
251 count = min(count, (size_t)(end - pos));
252
253 if (bar == PCI_ROM_RESOURCE) {
254 /*
255 * The ROM can fill less space than the BAR, so we start the
256 * excluded range at the end of the actual ROM. This makes
257 * filling large ROM BARs much faster.
258 */
259 io = pci_map_rom(pdev, &x_start);
260 if (!io) {
261 done = -ENOMEM;
262 goto out;
263 }
264 x_end = end;
265 } else {
266 int ret = vfio_pci_setup_barmap(vdev, bar);
267 if (ret) {
268 done = ret;
269 goto out;
270 }
271
272 io = vdev->barmap[bar];
273 }
274
275 if (bar == vdev->msix_bar) {
276 x_start = vdev->msix_offset;
277 x_end = vdev->msix_offset + vdev->msix_size;
278 }
279
280 done = do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
281 count, x_start, x_end, iswrite);
282
283 if (done >= 0)
284 *ppos += done;
285
286 if (bar == PCI_ROM_RESOURCE)
287 pci_unmap_rom(pdev, io);
288 out:
289 return done;
290 }
291
292 #ifdef CONFIG_VFIO_PCI_VGA
vfio_pci_vga_rw(struct vfio_pci_core_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)293 ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
294 size_t count, loff_t *ppos, bool iswrite)
295 {
296 int ret;
297 loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
298 void __iomem *iomem = NULL;
299 unsigned int rsrc;
300 bool is_ioport;
301 ssize_t done;
302
303 if (!vdev->has_vga)
304 return -EINVAL;
305
306 if (pos > 0xbfffful)
307 return -EINVAL;
308
309 switch ((u32)pos) {
310 case 0xa0000 ... 0xbffff:
311 count = min(count, (size_t)(0xc0000 - pos));
312 iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
313 off = pos - 0xa0000;
314 rsrc = VGA_RSRC_LEGACY_MEM;
315 is_ioport = false;
316 break;
317 case 0x3b0 ... 0x3bb:
318 count = min(count, (size_t)(0x3bc - pos));
319 iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
320 off = pos - 0x3b0;
321 rsrc = VGA_RSRC_LEGACY_IO;
322 is_ioport = true;
323 break;
324 case 0x3c0 ... 0x3df:
325 count = min(count, (size_t)(0x3e0 - pos));
326 iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
327 off = pos - 0x3c0;
328 rsrc = VGA_RSRC_LEGACY_IO;
329 is_ioport = true;
330 break;
331 default:
332 return -EINVAL;
333 }
334
335 if (!iomem)
336 return -ENOMEM;
337
338 ret = vga_get_interruptible(vdev->pdev, rsrc);
339 if (ret) {
340 is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
341 return ret;
342 }
343
344 /*
345 * VGA MMIO is a legacy, non-BAR resource that hopefully allows
346 * probing, so we don't currently worry about access in relation
347 * to the memory enable bit in the command register.
348 */
349 done = do_io_rw(vdev, false, iomem, buf, off, count, 0, 0, iswrite);
350
351 vga_put(vdev->pdev, rsrc);
352
353 is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
354
355 if (done >= 0)
356 *ppos += done;
357
358 return done;
359 }
360 #endif
361
vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd * ioeventfd,bool test_mem)362 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
363 bool test_mem)
364 {
365 switch (ioeventfd->count) {
366 case 1:
367 vfio_pci_iowrite8(ioeventfd->vdev, test_mem,
368 ioeventfd->data, ioeventfd->addr);
369 break;
370 case 2:
371 vfio_pci_iowrite16(ioeventfd->vdev, test_mem,
372 ioeventfd->data, ioeventfd->addr);
373 break;
374 case 4:
375 vfio_pci_iowrite32(ioeventfd->vdev, test_mem,
376 ioeventfd->data, ioeventfd->addr);
377 break;
378 #ifdef iowrite64
379 case 8:
380 vfio_pci_iowrite64(ioeventfd->vdev, test_mem,
381 ioeventfd->data, ioeventfd->addr);
382 break;
383 #endif
384 }
385 }
386
vfio_pci_ioeventfd_handler(void * opaque,void * unused)387 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
388 {
389 struct vfio_pci_ioeventfd *ioeventfd = opaque;
390 struct vfio_pci_core_device *vdev = ioeventfd->vdev;
391
392 if (ioeventfd->test_mem) {
393 if (!down_read_trylock(&vdev->memory_lock))
394 return 1; /* Lock contended, use thread */
395 if (!__vfio_pci_memory_enabled(vdev)) {
396 up_read(&vdev->memory_lock);
397 return 0;
398 }
399 }
400
401 vfio_pci_ioeventfd_do_write(ioeventfd, false);
402
403 if (ioeventfd->test_mem)
404 up_read(&vdev->memory_lock);
405
406 return 0;
407 }
408
vfio_pci_ioeventfd_thread(void * opaque,void * unused)409 static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
410 {
411 struct vfio_pci_ioeventfd *ioeventfd = opaque;
412
413 vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
414 }
415
vfio_pci_ioeventfd(struct vfio_pci_core_device * vdev,loff_t offset,uint64_t data,int count,int fd)416 int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
417 uint64_t data, int count, int fd)
418 {
419 struct pci_dev *pdev = vdev->pdev;
420 loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
421 int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
422 struct vfio_pci_ioeventfd *ioeventfd;
423
424 /* Only support ioeventfds into BARs */
425 if (bar > VFIO_PCI_BAR5_REGION_INDEX)
426 return -EINVAL;
427
428 if (pos + count > pci_resource_len(pdev, bar))
429 return -EINVAL;
430
431 /* Disallow ioeventfds working around MSI-X table writes */
432 if (bar == vdev->msix_bar &&
433 !(pos + count <= vdev->msix_offset ||
434 pos >= vdev->msix_offset + vdev->msix_size))
435 return -EINVAL;
436
437 #ifndef iowrite64
438 if (count == 8)
439 return -EINVAL;
440 #endif
441
442 ret = vfio_pci_setup_barmap(vdev, bar);
443 if (ret)
444 return ret;
445
446 mutex_lock(&vdev->ioeventfds_lock);
447
448 list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
449 if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
450 ioeventfd->data == data && ioeventfd->count == count) {
451 if (fd == -1) {
452 vfio_virqfd_disable(&ioeventfd->virqfd);
453 list_del(&ioeventfd->next);
454 vdev->ioeventfds_nr--;
455 kfree(ioeventfd);
456 ret = 0;
457 } else
458 ret = -EEXIST;
459
460 goto out_unlock;
461 }
462 }
463
464 if (fd < 0) {
465 ret = -ENODEV;
466 goto out_unlock;
467 }
468
469 if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
470 ret = -ENOSPC;
471 goto out_unlock;
472 }
473
474 ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT);
475 if (!ioeventfd) {
476 ret = -ENOMEM;
477 goto out_unlock;
478 }
479
480 ioeventfd->vdev = vdev;
481 ioeventfd->addr = vdev->barmap[bar] + pos;
482 ioeventfd->data = data;
483 ioeventfd->pos = pos;
484 ioeventfd->bar = bar;
485 ioeventfd->count = count;
486 ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
487
488 ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
489 vfio_pci_ioeventfd_thread, NULL,
490 &ioeventfd->virqfd, fd);
491 if (ret) {
492 kfree(ioeventfd);
493 goto out_unlock;
494 }
495
496 list_add(&ioeventfd->next, &vdev->ioeventfds_list);
497 vdev->ioeventfds_nr++;
498
499 out_unlock:
500 mutex_unlock(&vdev->ioeventfds_lock);
501
502 return ret;
503 }
504