xref: /openbmc/linux/drivers/vfio/pci/vfio_pci_rdwr.c (revision fd5e9fccbd504c5179ab57ff695c610bca8809d6)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VFIO PCI I/O Port & MMIO access
4  *
5  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6  *     Author: Alex Williamson <alex.williamson@redhat.com>
7  *
8  * Derived from original vfio:
9  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
10  * Author: Tom Lyon, pugs@cisco.com
11  */
12 
13 #include <linux/fs.h>
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
16 #include <linux/io.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
19 #include <linux/io-64-nonatomic-lo-hi.h>
20 
21 #include "vfio_pci_priv.h"
22 
23 #ifdef __LITTLE_ENDIAN
24 #define vfio_ioread64	ioread64
25 #define vfio_iowrite64	iowrite64
26 #define vfio_ioread32	ioread32
27 #define vfio_iowrite32	iowrite32
28 #define vfio_ioread16	ioread16
29 #define vfio_iowrite16	iowrite16
30 #else
31 #define vfio_ioread64	ioread64be
32 #define vfio_iowrite64	iowrite64be
33 #define vfio_ioread32	ioread32be
34 #define vfio_iowrite32	iowrite32be
35 #define vfio_ioread16	ioread16be
36 #define vfio_iowrite16	iowrite16be
37 #endif
38 #define vfio_ioread8	ioread8
39 #define vfio_iowrite8	iowrite8
40 
41 #define VFIO_IOWRITE(size) \
42 static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev,		\
43 			bool test_mem, u##size val, void __iomem *io)	\
44 {									\
45 	if (test_mem) {							\
46 		down_read(&vdev->memory_lock);				\
47 		if (!__vfio_pci_memory_enabled(vdev)) {			\
48 			up_read(&vdev->memory_lock);			\
49 			return -EIO;					\
50 		}							\
51 	}								\
52 									\
53 	vfio_iowrite##size(val, io);					\
54 									\
55 	if (test_mem)							\
56 		up_read(&vdev->memory_lock);				\
57 									\
58 	return 0;							\
59 }
60 
61 VFIO_IOWRITE(8)
62 VFIO_IOWRITE(16)
63 VFIO_IOWRITE(32)
64 #ifdef iowrite64
65 VFIO_IOWRITE(64)
66 #endif
67 
68 #define VFIO_IOREAD(size) \
69 static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev,		\
70 			bool test_mem, u##size *val, void __iomem *io)	\
71 {									\
72 	if (test_mem) {							\
73 		down_read(&vdev->memory_lock);				\
74 		if (!__vfio_pci_memory_enabled(vdev)) {			\
75 			up_read(&vdev->memory_lock);			\
76 			return -EIO;					\
77 		}							\
78 	}								\
79 									\
80 	*val = vfio_ioread##size(io);					\
81 									\
82 	if (test_mem)							\
83 		up_read(&vdev->memory_lock);				\
84 									\
85 	return 0;							\
86 }
87 
88 VFIO_IOREAD(8)
89 VFIO_IOREAD(16)
90 VFIO_IOREAD(32)
91 
92 /*
93  * Read or write from an __iomem region (MMIO or I/O port) with an excluded
94  * range which is inaccessible.  The excluded range drops writes and fills
95  * reads with -1.  This is intended for handling MSI-X vector tables and
96  * leftover space for ROM BARs.
97  */
do_io_rw(struct vfio_pci_core_device * vdev,bool test_mem,void __iomem * io,char __user * buf,loff_t off,size_t count,size_t x_start,size_t x_end,bool iswrite)98 static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
99 			void __iomem *io, char __user *buf,
100 			loff_t off, size_t count, size_t x_start,
101 			size_t x_end, bool iswrite)
102 {
103 	ssize_t done = 0;
104 	int ret;
105 
106 	while (count) {
107 		size_t fillable, filled;
108 
109 		if (off < x_start)
110 			fillable = min(count, (size_t)(x_start - off));
111 		else if (off >= x_end)
112 			fillable = count;
113 		else
114 			fillable = 0;
115 
116 		if (fillable >= 4 && !(off % 4)) {
117 			u32 val;
118 
119 			if (iswrite) {
120 				if (copy_from_user(&val, buf, 4))
121 					return -EFAULT;
122 
123 				ret = vfio_pci_iowrite32(vdev, test_mem,
124 							 val, io + off);
125 				if (ret)
126 					return ret;
127 			} else {
128 				ret = vfio_pci_ioread32(vdev, test_mem,
129 							&val, io + off);
130 				if (ret)
131 					return ret;
132 
133 				if (copy_to_user(buf, &val, 4))
134 					return -EFAULT;
135 			}
136 
137 			filled = 4;
138 		} else if (fillable >= 2 && !(off % 2)) {
139 			u16 val;
140 
141 			if (iswrite) {
142 				if (copy_from_user(&val, buf, 2))
143 					return -EFAULT;
144 
145 				ret = vfio_pci_iowrite16(vdev, test_mem,
146 							 val, io + off);
147 				if (ret)
148 					return ret;
149 			} else {
150 				ret = vfio_pci_ioread16(vdev, test_mem,
151 							&val, io + off);
152 				if (ret)
153 					return ret;
154 
155 				if (copy_to_user(buf, &val, 2))
156 					return -EFAULT;
157 			}
158 
159 			filled = 2;
160 		} else if (fillable) {
161 			u8 val;
162 
163 			if (iswrite) {
164 				if (copy_from_user(&val, buf, 1))
165 					return -EFAULT;
166 
167 				ret = vfio_pci_iowrite8(vdev, test_mem,
168 							val, io + off);
169 				if (ret)
170 					return ret;
171 			} else {
172 				ret = vfio_pci_ioread8(vdev, test_mem,
173 						       &val, io + off);
174 				if (ret)
175 					return ret;
176 
177 				if (copy_to_user(buf, &val, 1))
178 					return -EFAULT;
179 			}
180 
181 			filled = 1;
182 		} else {
183 			/* Fill reads with -1, drop writes */
184 			filled = min(count, (size_t)(x_end - off));
185 			if (!iswrite) {
186 				u8 val = 0xFF;
187 				size_t i;
188 
189 				for (i = 0; i < filled; i++)
190 					if (copy_to_user(buf + i, &val, 1))
191 						return -EFAULT;
192 			}
193 		}
194 
195 		count -= filled;
196 		done += filled;
197 		off += filled;
198 		buf += filled;
199 	}
200 
201 	return done;
202 }
203 
vfio_pci_setup_barmap(struct vfio_pci_core_device * vdev,int bar)204 static int vfio_pci_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
205 {
206 	struct pci_dev *pdev = vdev->pdev;
207 	int ret;
208 	void __iomem *io;
209 
210 	if (vdev->barmap[bar])
211 		return 0;
212 
213 	ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
214 	if (ret)
215 		return ret;
216 
217 	io = pci_iomap(pdev, bar, 0);
218 	if (!io) {
219 		pci_release_selected_regions(pdev, 1 << bar);
220 		return -ENOMEM;
221 	}
222 
223 	vdev->barmap[bar] = io;
224 
225 	return 0;
226 }
227 
vfio_pci_bar_rw(struct vfio_pci_core_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)228 ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
229 			size_t count, loff_t *ppos, bool iswrite)
230 {
231 	struct pci_dev *pdev = vdev->pdev;
232 	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
233 	int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
234 	size_t x_start = 0, x_end = 0;
235 	resource_size_t end;
236 	void __iomem *io;
237 	struct resource *res = &vdev->pdev->resource[bar];
238 	ssize_t done;
239 
240 	if (pci_resource_start(pdev, bar))
241 		end = pci_resource_len(pdev, bar);
242 	else if (bar == PCI_ROM_RESOURCE &&
243 		 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
244 		end = 0x20000;
245 	else
246 		return -EINVAL;
247 
248 	if (pos >= end)
249 		return -EINVAL;
250 
251 	count = min(count, (size_t)(end - pos));
252 
253 	if (bar == PCI_ROM_RESOURCE) {
254 		/*
255 		 * The ROM can fill less space than the BAR, so we start the
256 		 * excluded range at the end of the actual ROM.  This makes
257 		 * filling large ROM BARs much faster.
258 		 */
259 		io = pci_map_rom(pdev, &x_start);
260 		if (!io) {
261 			done = -ENOMEM;
262 			goto out;
263 		}
264 		x_end = end;
265 	} else {
266 		int ret = vfio_pci_setup_barmap(vdev, bar);
267 		if (ret) {
268 			done = ret;
269 			goto out;
270 		}
271 
272 		io = vdev->barmap[bar];
273 	}
274 
275 	if (bar == vdev->msix_bar) {
276 		x_start = vdev->msix_offset;
277 		x_end = vdev->msix_offset + vdev->msix_size;
278 	}
279 
280 	done = do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
281 			count, x_start, x_end, iswrite);
282 
283 	if (done >= 0)
284 		*ppos += done;
285 
286 	if (bar == PCI_ROM_RESOURCE)
287 		pci_unmap_rom(pdev, io);
288 out:
289 	return done;
290 }
291 
292 #ifdef CONFIG_VFIO_PCI_VGA
vfio_pci_vga_rw(struct vfio_pci_core_device * vdev,char __user * buf,size_t count,loff_t * ppos,bool iswrite)293 ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
294 			       size_t count, loff_t *ppos, bool iswrite)
295 {
296 	int ret;
297 	loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
298 	void __iomem *iomem = NULL;
299 	unsigned int rsrc;
300 	bool is_ioport;
301 	ssize_t done;
302 
303 	if (!vdev->has_vga)
304 		return -EINVAL;
305 
306 	if (pos > 0xbfffful)
307 		return -EINVAL;
308 
309 	switch ((u32)pos) {
310 	case 0xa0000 ... 0xbffff:
311 		count = min(count, (size_t)(0xc0000 - pos));
312 		iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
313 		off = pos - 0xa0000;
314 		rsrc = VGA_RSRC_LEGACY_MEM;
315 		is_ioport = false;
316 		break;
317 	case 0x3b0 ... 0x3bb:
318 		count = min(count, (size_t)(0x3bc - pos));
319 		iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
320 		off = pos - 0x3b0;
321 		rsrc = VGA_RSRC_LEGACY_IO;
322 		is_ioport = true;
323 		break;
324 	case 0x3c0 ... 0x3df:
325 		count = min(count, (size_t)(0x3e0 - pos));
326 		iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
327 		off = pos - 0x3c0;
328 		rsrc = VGA_RSRC_LEGACY_IO;
329 		is_ioport = true;
330 		break;
331 	default:
332 		return -EINVAL;
333 	}
334 
335 	if (!iomem)
336 		return -ENOMEM;
337 
338 	ret = vga_get_interruptible(vdev->pdev, rsrc);
339 	if (ret) {
340 		is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
341 		return ret;
342 	}
343 
344 	/*
345 	 * VGA MMIO is a legacy, non-BAR resource that hopefully allows
346 	 * probing, so we don't currently worry about access in relation
347 	 * to the memory enable bit in the command register.
348 	 */
349 	done = do_io_rw(vdev, false, iomem, buf, off, count, 0, 0, iswrite);
350 
351 	vga_put(vdev->pdev, rsrc);
352 
353 	is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
354 
355 	if (done >= 0)
356 		*ppos += done;
357 
358 	return done;
359 }
360 #endif
361 
vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd * ioeventfd,bool test_mem)362 static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
363 					bool test_mem)
364 {
365 	switch (ioeventfd->count) {
366 	case 1:
367 		vfio_pci_iowrite8(ioeventfd->vdev, test_mem,
368 				  ioeventfd->data, ioeventfd->addr);
369 		break;
370 	case 2:
371 		vfio_pci_iowrite16(ioeventfd->vdev, test_mem,
372 				   ioeventfd->data, ioeventfd->addr);
373 		break;
374 	case 4:
375 		vfio_pci_iowrite32(ioeventfd->vdev, test_mem,
376 				   ioeventfd->data, ioeventfd->addr);
377 		break;
378 #ifdef iowrite64
379 	case 8:
380 		vfio_pci_iowrite64(ioeventfd->vdev, test_mem,
381 				   ioeventfd->data, ioeventfd->addr);
382 		break;
383 #endif
384 	}
385 }
386 
vfio_pci_ioeventfd_handler(void * opaque,void * unused)387 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
388 {
389 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
390 	struct vfio_pci_core_device *vdev = ioeventfd->vdev;
391 
392 	if (ioeventfd->test_mem) {
393 		if (!down_read_trylock(&vdev->memory_lock))
394 			return 1; /* Lock contended, use thread */
395 		if (!__vfio_pci_memory_enabled(vdev)) {
396 			up_read(&vdev->memory_lock);
397 			return 0;
398 		}
399 	}
400 
401 	vfio_pci_ioeventfd_do_write(ioeventfd, false);
402 
403 	if (ioeventfd->test_mem)
404 		up_read(&vdev->memory_lock);
405 
406 	return 0;
407 }
408 
vfio_pci_ioeventfd_thread(void * opaque,void * unused)409 static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
410 {
411 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
412 
413 	vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
414 }
415 
vfio_pci_ioeventfd(struct vfio_pci_core_device * vdev,loff_t offset,uint64_t data,int count,int fd)416 int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
417 		       uint64_t data, int count, int fd)
418 {
419 	struct pci_dev *pdev = vdev->pdev;
420 	loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
421 	int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
422 	struct vfio_pci_ioeventfd *ioeventfd;
423 
424 	/* Only support ioeventfds into BARs */
425 	if (bar > VFIO_PCI_BAR5_REGION_INDEX)
426 		return -EINVAL;
427 
428 	if (pos + count > pci_resource_len(pdev, bar))
429 		return -EINVAL;
430 
431 	/* Disallow ioeventfds working around MSI-X table writes */
432 	if (bar == vdev->msix_bar &&
433 	    !(pos + count <= vdev->msix_offset ||
434 	      pos >= vdev->msix_offset + vdev->msix_size))
435 		return -EINVAL;
436 
437 #ifndef iowrite64
438 	if (count == 8)
439 		return -EINVAL;
440 #endif
441 
442 	ret = vfio_pci_setup_barmap(vdev, bar);
443 	if (ret)
444 		return ret;
445 
446 	mutex_lock(&vdev->ioeventfds_lock);
447 
448 	list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
449 		if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
450 		    ioeventfd->data == data && ioeventfd->count == count) {
451 			if (fd == -1) {
452 				vfio_virqfd_disable(&ioeventfd->virqfd);
453 				list_del(&ioeventfd->next);
454 				vdev->ioeventfds_nr--;
455 				kfree(ioeventfd);
456 				ret = 0;
457 			} else
458 				ret = -EEXIST;
459 
460 			goto out_unlock;
461 		}
462 	}
463 
464 	if (fd < 0) {
465 		ret = -ENODEV;
466 		goto out_unlock;
467 	}
468 
469 	if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
470 		ret = -ENOSPC;
471 		goto out_unlock;
472 	}
473 
474 	ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL_ACCOUNT);
475 	if (!ioeventfd) {
476 		ret = -ENOMEM;
477 		goto out_unlock;
478 	}
479 
480 	ioeventfd->vdev = vdev;
481 	ioeventfd->addr = vdev->barmap[bar] + pos;
482 	ioeventfd->data = data;
483 	ioeventfd->pos = pos;
484 	ioeventfd->bar = bar;
485 	ioeventfd->count = count;
486 	ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
487 
488 	ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
489 				 vfio_pci_ioeventfd_thread, NULL,
490 				 &ioeventfd->virqfd, fd);
491 	if (ret) {
492 		kfree(ioeventfd);
493 		goto out_unlock;
494 	}
495 
496 	list_add(&ioeventfd->next, &vdev->ioeventfds_list);
497 	vdev->ioeventfds_nr++;
498 
499 out_unlock:
500 	mutex_unlock(&vdev->ioeventfds_lock);
501 
502 	return ret;
503 }
504