xref: /openbmc/linux/drivers/vfio/pci/vfio_pci_rdwr.c (revision 8365a898)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VFIO PCI I/O Port & MMIO access
4  *
5  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
6  *     Author: Alex Williamson <alex.williamson@redhat.com>
7  *
8  * Derived from original vfio:
9  * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
10  * Author: Tom Lyon, pugs@cisco.com
11  */
12 
13 #include <linux/fs.h>
14 #include <linux/pci.h>
15 #include <linux/uaccess.h>
16 #include <linux/io.h>
17 #include <linux/vfio.h>
18 #include <linux/vgaarb.h>
19 
20 #include "vfio_pci_private.h"
21 
22 #ifdef __LITTLE_ENDIAN
23 #define vfio_ioread64	ioread64
24 #define vfio_iowrite64	iowrite64
25 #define vfio_ioread32	ioread32
26 #define vfio_iowrite32	iowrite32
27 #define vfio_ioread16	ioread16
28 #define vfio_iowrite16	iowrite16
29 #else
30 #define vfio_ioread64	ioread64be
31 #define vfio_iowrite64	iowrite64be
32 #define vfio_ioread32	ioread32be
33 #define vfio_iowrite32	iowrite32be
34 #define vfio_ioread16	ioread16be
35 #define vfio_iowrite16	iowrite16be
36 #endif
37 #define vfio_ioread8	ioread8
38 #define vfio_iowrite8	iowrite8
39 
40 /*
41  * Read or write from an __iomem region (MMIO or I/O port) with an excluded
42  * range which is inaccessible.  The excluded range drops writes and fills
43  * reads with -1.  This is intended for handling MSI-X vector tables and
44  * leftover space for ROM BARs.
45  */
46 static ssize_t do_io_rw(void __iomem *io, char __user *buf,
47 			loff_t off, size_t count, size_t x_start,
48 			size_t x_end, bool iswrite)
49 {
50 	ssize_t done = 0;
51 
52 	while (count) {
53 		size_t fillable, filled;
54 
55 		if (off < x_start)
56 			fillable = min(count, (size_t)(x_start - off));
57 		else if (off >= x_end)
58 			fillable = count;
59 		else
60 			fillable = 0;
61 
62 		if (fillable >= 4 && !(off % 4)) {
63 			u32 val;
64 
65 			if (iswrite) {
66 				if (copy_from_user(&val, buf, 4))
67 					return -EFAULT;
68 
69 				vfio_iowrite32(val, io + off);
70 			} else {
71 				val = vfio_ioread32(io + off);
72 
73 				if (copy_to_user(buf, &val, 4))
74 					return -EFAULT;
75 			}
76 
77 			filled = 4;
78 		} else if (fillable >= 2 && !(off % 2)) {
79 			u16 val;
80 
81 			if (iswrite) {
82 				if (copy_from_user(&val, buf, 2))
83 					return -EFAULT;
84 
85 				vfio_iowrite16(val, io + off);
86 			} else {
87 				val = vfio_ioread16(io + off);
88 
89 				if (copy_to_user(buf, &val, 2))
90 					return -EFAULT;
91 			}
92 
93 			filled = 2;
94 		} else if (fillable) {
95 			u8 val;
96 
97 			if (iswrite) {
98 				if (copy_from_user(&val, buf, 1))
99 					return -EFAULT;
100 
101 				vfio_iowrite8(val, io + off);
102 			} else {
103 				val = vfio_ioread8(io + off);
104 
105 				if (copy_to_user(buf, &val, 1))
106 					return -EFAULT;
107 			}
108 
109 			filled = 1;
110 		} else {
111 			/* Fill reads with -1, drop writes */
112 			filled = min(count, (size_t)(x_end - off));
113 			if (!iswrite) {
114 				u8 val = 0xFF;
115 				size_t i;
116 
117 				for (i = 0; i < filled; i++)
118 					if (copy_to_user(buf + i, &val, 1))
119 						return -EFAULT;
120 			}
121 		}
122 
123 		count -= filled;
124 		done += filled;
125 		off += filled;
126 		buf += filled;
127 	}
128 
129 	return done;
130 }
131 
132 static int vfio_pci_setup_barmap(struct vfio_pci_device *vdev, int bar)
133 {
134 	struct pci_dev *pdev = vdev->pdev;
135 	int ret;
136 	void __iomem *io;
137 
138 	if (vdev->barmap[bar])
139 		return 0;
140 
141 	ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
142 	if (ret)
143 		return ret;
144 
145 	io = pci_iomap(pdev, bar, 0);
146 	if (!io) {
147 		pci_release_selected_regions(pdev, 1 << bar);
148 		return -ENOMEM;
149 	}
150 
151 	vdev->barmap[bar] = io;
152 
153 	return 0;
154 }
155 
156 ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
157 			size_t count, loff_t *ppos, bool iswrite)
158 {
159 	struct pci_dev *pdev = vdev->pdev;
160 	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
161 	int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
162 	size_t x_start = 0, x_end = 0;
163 	resource_size_t end;
164 	void __iomem *io;
165 	struct resource *res = &vdev->pdev->resource[bar];
166 	ssize_t done;
167 
168 	if (pci_resource_start(pdev, bar))
169 		end = pci_resource_len(pdev, bar);
170 	else if (bar == PCI_ROM_RESOURCE &&
171 		 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
172 		end = 0x20000;
173 	else
174 		return -EINVAL;
175 
176 	if (pos >= end)
177 		return -EINVAL;
178 
179 	count = min(count, (size_t)(end - pos));
180 
181 	if (res->flags & IORESOURCE_MEM) {
182 		down_read(&vdev->memory_lock);
183 		if (!__vfio_pci_memory_enabled(vdev)) {
184 			up_read(&vdev->memory_lock);
185 			return -EIO;
186 		}
187 	}
188 
189 	if (bar == PCI_ROM_RESOURCE) {
190 		/*
191 		 * The ROM can fill less space than the BAR, so we start the
192 		 * excluded range at the end of the actual ROM.  This makes
193 		 * filling large ROM BARs much faster.
194 		 */
195 		io = pci_map_rom(pdev, &x_start);
196 		if (!io) {
197 			done = -ENOMEM;
198 			goto out;
199 		}
200 		x_end = end;
201 	} else {
202 		int ret = vfio_pci_setup_barmap(vdev, bar);
203 		if (ret) {
204 			done = ret;
205 			goto out;
206 		}
207 
208 		io = vdev->barmap[bar];
209 	}
210 
211 	if (bar == vdev->msix_bar) {
212 		x_start = vdev->msix_offset;
213 		x_end = vdev->msix_offset + vdev->msix_size;
214 	}
215 
216 	done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite);
217 
218 	if (done >= 0)
219 		*ppos += done;
220 
221 	if (bar == PCI_ROM_RESOURCE)
222 		pci_unmap_rom(pdev, io);
223 out:
224 	if (res->flags & IORESOURCE_MEM)
225 		up_read(&vdev->memory_lock);
226 
227 	return done;
228 }
229 
230 ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
231 			       size_t count, loff_t *ppos, bool iswrite)
232 {
233 	int ret;
234 	loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
235 	void __iomem *iomem = NULL;
236 	unsigned int rsrc;
237 	bool is_ioport;
238 	ssize_t done;
239 
240 	if (!vdev->has_vga)
241 		return -EINVAL;
242 
243 	if (pos > 0xbfffful)
244 		return -EINVAL;
245 
246 	switch ((u32)pos) {
247 	case 0xa0000 ... 0xbffff:
248 		count = min(count, (size_t)(0xc0000 - pos));
249 		iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
250 		off = pos - 0xa0000;
251 		rsrc = VGA_RSRC_LEGACY_MEM;
252 		is_ioport = false;
253 		break;
254 	case 0x3b0 ... 0x3bb:
255 		count = min(count, (size_t)(0x3bc - pos));
256 		iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
257 		off = pos - 0x3b0;
258 		rsrc = VGA_RSRC_LEGACY_IO;
259 		is_ioport = true;
260 		break;
261 	case 0x3c0 ... 0x3df:
262 		count = min(count, (size_t)(0x3e0 - pos));
263 		iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
264 		off = pos - 0x3c0;
265 		rsrc = VGA_RSRC_LEGACY_IO;
266 		is_ioport = true;
267 		break;
268 	default:
269 		return -EINVAL;
270 	}
271 
272 	if (!iomem)
273 		return -ENOMEM;
274 
275 	ret = vga_get_interruptible(vdev->pdev, rsrc);
276 	if (ret) {
277 		is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
278 		return ret;
279 	}
280 
281 	done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite);
282 
283 	vga_put(vdev->pdev, rsrc);
284 
285 	is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
286 
287 	if (done >= 0)
288 		*ppos += done;
289 
290 	return done;
291 }
292 
293 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
294 {
295 	struct vfio_pci_ioeventfd *ioeventfd = opaque;
296 
297 	switch (ioeventfd->count) {
298 	case 1:
299 		vfio_iowrite8(ioeventfd->data, ioeventfd->addr);
300 		break;
301 	case 2:
302 		vfio_iowrite16(ioeventfd->data, ioeventfd->addr);
303 		break;
304 	case 4:
305 		vfio_iowrite32(ioeventfd->data, ioeventfd->addr);
306 		break;
307 #ifdef iowrite64
308 	case 8:
309 		vfio_iowrite64(ioeventfd->data, ioeventfd->addr);
310 		break;
311 #endif
312 	}
313 
314 	return 0;
315 }
316 
317 long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset,
318 			uint64_t data, int count, int fd)
319 {
320 	struct pci_dev *pdev = vdev->pdev;
321 	loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
322 	int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
323 	struct vfio_pci_ioeventfd *ioeventfd;
324 
325 	/* Only support ioeventfds into BARs */
326 	if (bar > VFIO_PCI_BAR5_REGION_INDEX)
327 		return -EINVAL;
328 
329 	if (pos + count > pci_resource_len(pdev, bar))
330 		return -EINVAL;
331 
332 	/* Disallow ioeventfds working around MSI-X table writes */
333 	if (bar == vdev->msix_bar &&
334 	    !(pos + count <= vdev->msix_offset ||
335 	      pos >= vdev->msix_offset + vdev->msix_size))
336 		return -EINVAL;
337 
338 #ifndef iowrite64
339 	if (count == 8)
340 		return -EINVAL;
341 #endif
342 
343 	ret = vfio_pci_setup_barmap(vdev, bar);
344 	if (ret)
345 		return ret;
346 
347 	mutex_lock(&vdev->ioeventfds_lock);
348 
349 	list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
350 		if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
351 		    ioeventfd->data == data && ioeventfd->count == count) {
352 			if (fd == -1) {
353 				vfio_virqfd_disable(&ioeventfd->virqfd);
354 				list_del(&ioeventfd->next);
355 				vdev->ioeventfds_nr--;
356 				kfree(ioeventfd);
357 				ret = 0;
358 			} else
359 				ret = -EEXIST;
360 
361 			goto out_unlock;
362 		}
363 	}
364 
365 	if (fd < 0) {
366 		ret = -ENODEV;
367 		goto out_unlock;
368 	}
369 
370 	if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
371 		ret = -ENOSPC;
372 		goto out_unlock;
373 	}
374 
375 	ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL);
376 	if (!ioeventfd) {
377 		ret = -ENOMEM;
378 		goto out_unlock;
379 	}
380 
381 	ioeventfd->addr = vdev->barmap[bar] + pos;
382 	ioeventfd->data = data;
383 	ioeventfd->pos = pos;
384 	ioeventfd->bar = bar;
385 	ioeventfd->count = count;
386 
387 	ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
388 				 NULL, NULL, &ioeventfd->virqfd, fd);
389 	if (ret) {
390 		kfree(ioeventfd);
391 		goto out_unlock;
392 	}
393 
394 	list_add(&ioeventfd->next, &vdev->ioeventfds_list);
395 	vdev->ioeventfds_nr++;
396 
397 out_unlock:
398 	mutex_unlock(&vdev->ioeventfds_lock);
399 
400 	return ret;
401 }
402