1 /* 2 * VFIO PCI I/O Port & MMIO access 3 * 4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved. 5 * Author: Alex Williamson <alex.williamson@redhat.com> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 * 11 * Derived from original vfio: 12 * Copyright 2010 Cisco Systems, Inc. All rights reserved. 13 * Author: Tom Lyon, pugs@cisco.com 14 */ 15 16 #include <linux/fs.h> 17 #include <linux/pci.h> 18 #include <linux/uaccess.h> 19 #include <linux/io.h> 20 #include <linux/vfio.h> 21 #include <linux/vgaarb.h> 22 23 #include "vfio_pci_private.h" 24 25 #ifdef __LITTLE_ENDIAN 26 #define vfio_ioread64 ioread64 27 #define vfio_iowrite64 iowrite64 28 #define vfio_ioread32 ioread32 29 #define vfio_iowrite32 iowrite32 30 #define vfio_ioread16 ioread16 31 #define vfio_iowrite16 iowrite16 32 #else 33 #define vfio_ioread64 ioread64be 34 #define vfio_iowrite64 iowrite64be 35 #define vfio_ioread32 ioread32be 36 #define vfio_iowrite32 iowrite32be 37 #define vfio_ioread16 ioread16be 38 #define vfio_iowrite16 iowrite16be 39 #endif 40 #define vfio_ioread8 ioread8 41 #define vfio_iowrite8 iowrite8 42 43 /* 44 * Read or write from an __iomem region (MMIO or I/O port) with an excluded 45 * range which is inaccessible. The excluded range drops writes and fills 46 * reads with -1. This is intended for handling MSI-X vector tables and 47 * leftover space for ROM BARs. 48 */ 49 static ssize_t do_io_rw(void __iomem *io, char __user *buf, 50 loff_t off, size_t count, size_t x_start, 51 size_t x_end, bool iswrite) 52 { 53 ssize_t done = 0; 54 55 while (count) { 56 size_t fillable, filled; 57 58 if (off < x_start) 59 fillable = min(count, (size_t)(x_start - off)); 60 else if (off >= x_end) 61 fillable = count; 62 else 63 fillable = 0; 64 65 if (fillable >= 4 && !(off % 4)) { 66 u32 val; 67 68 if (iswrite) { 69 if (copy_from_user(&val, buf, 4)) 70 return -EFAULT; 71 72 vfio_iowrite32(val, io + off); 73 } else { 74 val = vfio_ioread32(io + off); 75 76 if (copy_to_user(buf, &val, 4)) 77 return -EFAULT; 78 } 79 80 filled = 4; 81 } else if (fillable >= 2 && !(off % 2)) { 82 u16 val; 83 84 if (iswrite) { 85 if (copy_from_user(&val, buf, 2)) 86 return -EFAULT; 87 88 vfio_iowrite16(val, io + off); 89 } else { 90 val = vfio_ioread16(io + off); 91 92 if (copy_to_user(buf, &val, 2)) 93 return -EFAULT; 94 } 95 96 filled = 2; 97 } else if (fillable) { 98 u8 val; 99 100 if (iswrite) { 101 if (copy_from_user(&val, buf, 1)) 102 return -EFAULT; 103 104 vfio_iowrite8(val, io + off); 105 } else { 106 val = vfio_ioread8(io + off); 107 108 if (copy_to_user(buf, &val, 1)) 109 return -EFAULT; 110 } 111 112 filled = 1; 113 } else { 114 /* Fill reads with -1, drop writes */ 115 filled = min(count, (size_t)(x_end - off)); 116 if (!iswrite) { 117 u8 val = 0xFF; 118 size_t i; 119 120 for (i = 0; i < filled; i++) 121 if (copy_to_user(buf + i, &val, 1)) 122 return -EFAULT; 123 } 124 } 125 126 count -= filled; 127 done += filled; 128 off += filled; 129 buf += filled; 130 } 131 132 return done; 133 } 134 135 static int vfio_pci_setup_barmap(struct vfio_pci_device *vdev, int bar) 136 { 137 struct pci_dev *pdev = vdev->pdev; 138 int ret; 139 void __iomem *io; 140 141 if (vdev->barmap[bar]) 142 return 0; 143 144 ret = pci_request_selected_regions(pdev, 1 << bar, "vfio"); 145 if (ret) 146 return ret; 147 148 io = pci_iomap(pdev, bar, 0); 149 if (!io) { 150 pci_release_selected_regions(pdev, 1 << bar); 151 return -ENOMEM; 152 } 153 154 vdev->barmap[bar] = io; 155 156 return 0; 157 } 158 159 ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, 160 size_t count, loff_t *ppos, bool iswrite) 161 { 162 struct pci_dev *pdev = vdev->pdev; 163 loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; 164 int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos); 165 size_t x_start = 0, x_end = 0; 166 resource_size_t end; 167 void __iomem *io; 168 ssize_t done; 169 170 if (pci_resource_start(pdev, bar)) 171 end = pci_resource_len(pdev, bar); 172 else if (bar == PCI_ROM_RESOURCE && 173 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW) 174 end = 0x20000; 175 else 176 return -EINVAL; 177 178 if (pos >= end) 179 return -EINVAL; 180 181 count = min(count, (size_t)(end - pos)); 182 183 if (bar == PCI_ROM_RESOURCE) { 184 /* 185 * The ROM can fill less space than the BAR, so we start the 186 * excluded range at the end of the actual ROM. This makes 187 * filling large ROM BARs much faster. 188 */ 189 io = pci_map_rom(pdev, &x_start); 190 if (!io) 191 return -ENOMEM; 192 x_end = end; 193 } else { 194 int ret = vfio_pci_setup_barmap(vdev, bar); 195 if (ret) 196 return ret; 197 198 io = vdev->barmap[bar]; 199 } 200 201 if (bar == vdev->msix_bar) { 202 x_start = vdev->msix_offset; 203 x_end = vdev->msix_offset + vdev->msix_size; 204 } 205 206 done = do_io_rw(io, buf, pos, count, x_start, x_end, iswrite); 207 208 if (done >= 0) 209 *ppos += done; 210 211 if (bar == PCI_ROM_RESOURCE) 212 pci_unmap_rom(pdev, io); 213 214 return done; 215 } 216 217 ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, 218 size_t count, loff_t *ppos, bool iswrite) 219 { 220 int ret; 221 loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK; 222 void __iomem *iomem = NULL; 223 unsigned int rsrc; 224 bool is_ioport; 225 ssize_t done; 226 227 if (!vdev->has_vga) 228 return -EINVAL; 229 230 if (pos > 0xbfffful) 231 return -EINVAL; 232 233 switch ((u32)pos) { 234 case 0xa0000 ... 0xbffff: 235 count = min(count, (size_t)(0xc0000 - pos)); 236 iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1); 237 off = pos - 0xa0000; 238 rsrc = VGA_RSRC_LEGACY_MEM; 239 is_ioport = false; 240 break; 241 case 0x3b0 ... 0x3bb: 242 count = min(count, (size_t)(0x3bc - pos)); 243 iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1); 244 off = pos - 0x3b0; 245 rsrc = VGA_RSRC_LEGACY_IO; 246 is_ioport = true; 247 break; 248 case 0x3c0 ... 0x3df: 249 count = min(count, (size_t)(0x3e0 - pos)); 250 iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1); 251 off = pos - 0x3c0; 252 rsrc = VGA_RSRC_LEGACY_IO; 253 is_ioport = true; 254 break; 255 default: 256 return -EINVAL; 257 } 258 259 if (!iomem) 260 return -ENOMEM; 261 262 ret = vga_get_interruptible(vdev->pdev, rsrc); 263 if (ret) { 264 is_ioport ? ioport_unmap(iomem) : iounmap(iomem); 265 return ret; 266 } 267 268 done = do_io_rw(iomem, buf, off, count, 0, 0, iswrite); 269 270 vga_put(vdev->pdev, rsrc); 271 272 is_ioport ? ioport_unmap(iomem) : iounmap(iomem); 273 274 if (done >= 0) 275 *ppos += done; 276 277 return done; 278 } 279 280 static int vfio_pci_ioeventfd_handler(void *opaque, void *unused) 281 { 282 struct vfio_pci_ioeventfd *ioeventfd = opaque; 283 284 switch (ioeventfd->count) { 285 case 1: 286 vfio_iowrite8(ioeventfd->data, ioeventfd->addr); 287 break; 288 case 2: 289 vfio_iowrite16(ioeventfd->data, ioeventfd->addr); 290 break; 291 case 4: 292 vfio_iowrite32(ioeventfd->data, ioeventfd->addr); 293 break; 294 #ifdef iowrite64 295 case 8: 296 vfio_iowrite64(ioeventfd->data, ioeventfd->addr); 297 break; 298 #endif 299 } 300 301 return 0; 302 } 303 304 long vfio_pci_ioeventfd(struct vfio_pci_device *vdev, loff_t offset, 305 uint64_t data, int count, int fd) 306 { 307 struct pci_dev *pdev = vdev->pdev; 308 loff_t pos = offset & VFIO_PCI_OFFSET_MASK; 309 int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset); 310 struct vfio_pci_ioeventfd *ioeventfd; 311 312 /* Only support ioeventfds into BARs */ 313 if (bar > VFIO_PCI_BAR5_REGION_INDEX) 314 return -EINVAL; 315 316 if (pos + count > pci_resource_len(pdev, bar)) 317 return -EINVAL; 318 319 /* Disallow ioeventfds working around MSI-X table writes */ 320 if (bar == vdev->msix_bar && 321 !(pos + count <= vdev->msix_offset || 322 pos >= vdev->msix_offset + vdev->msix_size)) 323 return -EINVAL; 324 325 #ifndef iowrite64 326 if (count == 8) 327 return -EINVAL; 328 #endif 329 330 ret = vfio_pci_setup_barmap(vdev, bar); 331 if (ret) 332 return ret; 333 334 mutex_lock(&vdev->ioeventfds_lock); 335 336 list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) { 337 if (ioeventfd->pos == pos && ioeventfd->bar == bar && 338 ioeventfd->data == data && ioeventfd->count == count) { 339 if (fd == -1) { 340 vfio_virqfd_disable(&ioeventfd->virqfd); 341 list_del(&ioeventfd->next); 342 vdev->ioeventfds_nr--; 343 kfree(ioeventfd); 344 ret = 0; 345 } else 346 ret = -EEXIST; 347 348 goto out_unlock; 349 } 350 } 351 352 if (fd < 0) { 353 ret = -ENODEV; 354 goto out_unlock; 355 } 356 357 if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) { 358 ret = -ENOSPC; 359 goto out_unlock; 360 } 361 362 ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL); 363 if (!ioeventfd) { 364 ret = -ENOMEM; 365 goto out_unlock; 366 } 367 368 ioeventfd->addr = vdev->barmap[bar] + pos; 369 ioeventfd->data = data; 370 ioeventfd->pos = pos; 371 ioeventfd->bar = bar; 372 ioeventfd->count = count; 373 374 ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler, 375 NULL, NULL, &ioeventfd->virqfd, fd); 376 if (ret) { 377 kfree(ioeventfd); 378 goto out_unlock; 379 } 380 381 list_add(&ioeventfd->next, &vdev->ioeventfds_list); 382 vdev->ioeventfds_nr++; 383 384 out_unlock: 385 mutex_unlock(&vdev->ioeventfds_lock); 386 387 return ret; 388 } 389