1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Virtio PCI driver - modern (virtio 1.0) device support 4 * 5 * This module allows virtio devices to be used over a virtual PCI device. 6 * This can be used with QEMU based VMMs like KVM or Xen. 7 * 8 * Copyright IBM Corp. 2007 9 * Copyright Red Hat, Inc. 2014 10 * 11 * Authors: 12 * Anthony Liguori <aliguori@us.ibm.com> 13 * Rusty Russell <rusty@rustcorp.com.au> 14 * Michael S. Tsirkin <mst@redhat.com> 15 */ 16 17 #include <linux/delay.h> 18 #define VIRTIO_PCI_NO_LEGACY 19 #define VIRTIO_RING_NO_LEGACY 20 #include "virtio_pci_common.h" 21 22 static u64 vp_get_features(struct virtio_device *vdev) 23 { 24 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 25 26 return vp_modern_get_features(&vp_dev->mdev); 27 } 28 29 static void vp_transport_features(struct virtio_device *vdev, u64 features) 30 { 31 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 32 struct pci_dev *pci_dev = vp_dev->pci_dev; 33 34 if ((features & BIT_ULL(VIRTIO_F_SR_IOV)) && 35 pci_find_ext_capability(pci_dev, PCI_EXT_CAP_ID_SRIOV)) 36 __virtio_set_bit(vdev, VIRTIO_F_SR_IOV); 37 38 if (features & BIT_ULL(VIRTIO_F_RING_RESET)) 39 __virtio_set_bit(vdev, VIRTIO_F_RING_RESET); 40 } 41 42 /* virtio config->finalize_features() implementation */ 43 static int vp_finalize_features(struct virtio_device *vdev) 44 { 45 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 46 u64 features = vdev->features; 47 48 /* Give virtio_ring a chance to accept features. */ 49 vring_transport_features(vdev); 50 51 /* Give virtio_pci a chance to accept features. */ 52 vp_transport_features(vdev, features); 53 54 if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) { 55 dev_err(&vdev->dev, "virtio: device uses modern interface " 56 "but does not have VIRTIO_F_VERSION_1\n"); 57 return -EINVAL; 58 } 59 60 vp_modern_set_features(&vp_dev->mdev, vdev->features); 61 62 return 0; 63 } 64 65 /* virtio config->get() implementation */ 66 static void vp_get(struct virtio_device *vdev, unsigned int offset, 67 void *buf, unsigned int len) 68 { 69 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 70 struct virtio_pci_modern_device *mdev = &vp_dev->mdev; 71 void __iomem *device = mdev->device; 72 u8 b; 73 __le16 w; 74 __le32 l; 75 76 BUG_ON(offset + len > mdev->device_len); 77 78 switch (len) { 79 case 1: 80 b = ioread8(device + offset); 81 memcpy(buf, &b, sizeof b); 82 break; 83 case 2: 84 w = cpu_to_le16(ioread16(device + offset)); 85 memcpy(buf, &w, sizeof w); 86 break; 87 case 4: 88 l = cpu_to_le32(ioread32(device + offset)); 89 memcpy(buf, &l, sizeof l); 90 break; 91 case 8: 92 l = cpu_to_le32(ioread32(device + offset)); 93 memcpy(buf, &l, sizeof l); 94 l = cpu_to_le32(ioread32(device + offset + sizeof l)); 95 memcpy(buf + sizeof l, &l, sizeof l); 96 break; 97 default: 98 BUG(); 99 } 100 } 101 102 /* the config->set() implementation. it's symmetric to the config->get() 103 * implementation */ 104 static void vp_set(struct virtio_device *vdev, unsigned int offset, 105 const void *buf, unsigned int len) 106 { 107 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 108 struct virtio_pci_modern_device *mdev = &vp_dev->mdev; 109 void __iomem *device = mdev->device; 110 u8 b; 111 __le16 w; 112 __le32 l; 113 114 BUG_ON(offset + len > mdev->device_len); 115 116 switch (len) { 117 case 1: 118 memcpy(&b, buf, sizeof b); 119 iowrite8(b, device + offset); 120 break; 121 case 2: 122 memcpy(&w, buf, sizeof w); 123 iowrite16(le16_to_cpu(w), device + offset); 124 break; 125 case 4: 126 memcpy(&l, buf, sizeof l); 127 iowrite32(le32_to_cpu(l), device + offset); 128 break; 129 case 8: 130 memcpy(&l, buf, sizeof l); 131 iowrite32(le32_to_cpu(l), device + offset); 132 memcpy(&l, buf + sizeof l, sizeof l); 133 iowrite32(le32_to_cpu(l), device + offset + sizeof l); 134 break; 135 default: 136 BUG(); 137 } 138 } 139 140 static u32 vp_generation(struct virtio_device *vdev) 141 { 142 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 143 144 return vp_modern_generation(&vp_dev->mdev); 145 } 146 147 /* config->{get,set}_status() implementations */ 148 static u8 vp_get_status(struct virtio_device *vdev) 149 { 150 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 151 152 return vp_modern_get_status(&vp_dev->mdev); 153 } 154 155 static void vp_set_status(struct virtio_device *vdev, u8 status) 156 { 157 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 158 159 /* We should never be setting status to 0. */ 160 BUG_ON(status == 0); 161 vp_modern_set_status(&vp_dev->mdev, status); 162 } 163 164 static void vp_reset(struct virtio_device *vdev) 165 { 166 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 167 struct virtio_pci_modern_device *mdev = &vp_dev->mdev; 168 169 /* 0 status means a reset. */ 170 vp_modern_set_status(mdev, 0); 171 /* After writing 0 to device_status, the driver MUST wait for a read of 172 * device_status to return 0 before reinitializing the device. 173 * This will flush out the status write, and flush in device writes, 174 * including MSI-X interrupts, if any. 175 */ 176 while (vp_modern_get_status(mdev)) 177 msleep(1); 178 /* Flush pending VQ/configuration callbacks. */ 179 vp_synchronize_vectors(vdev); 180 } 181 182 static int vp_active_vq(struct virtqueue *vq, u16 msix_vec) 183 { 184 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); 185 struct virtio_pci_modern_device *mdev = &vp_dev->mdev; 186 unsigned long index; 187 188 index = vq->index; 189 190 /* activate the queue */ 191 vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq)); 192 vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq), 193 virtqueue_get_avail_addr(vq), 194 virtqueue_get_used_addr(vq)); 195 196 if (msix_vec != VIRTIO_MSI_NO_VECTOR) { 197 msix_vec = vp_modern_queue_vector(mdev, index, msix_vec); 198 if (msix_vec == VIRTIO_MSI_NO_VECTOR) 199 return -EBUSY; 200 } 201 202 return 0; 203 } 204 205 static int vp_modern_disable_vq_and_reset(struct virtqueue *vq) 206 { 207 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); 208 struct virtio_pci_modern_device *mdev = &vp_dev->mdev; 209 struct virtio_pci_vq_info *info; 210 unsigned long flags; 211 212 if (!virtio_has_feature(vq->vdev, VIRTIO_F_RING_RESET)) 213 return -ENOENT; 214 215 vp_modern_set_queue_reset(mdev, vq->index); 216 217 info = vp_dev->vqs[vq->index]; 218 219 /* delete vq from irq handler */ 220 spin_lock_irqsave(&vp_dev->lock, flags); 221 list_del(&info->node); 222 spin_unlock_irqrestore(&vp_dev->lock, flags); 223 224 INIT_LIST_HEAD(&info->node); 225 226 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 227 __virtqueue_break(vq); 228 #endif 229 230 /* For the case where vq has an exclusive irq, call synchronize_irq() to 231 * wait for completion. 232 * 233 * note: We can't use disable_irq() since it conflicts with the affinity 234 * managed IRQ that is used by some drivers. 235 */ 236 if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR) 237 synchronize_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector)); 238 239 vq->reset = true; 240 241 return 0; 242 } 243 244 static int vp_modern_enable_vq_after_reset(struct virtqueue *vq) 245 { 246 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); 247 struct virtio_pci_modern_device *mdev = &vp_dev->mdev; 248 struct virtio_pci_vq_info *info; 249 unsigned long flags, index; 250 int err; 251 252 if (!vq->reset) 253 return -EBUSY; 254 255 index = vq->index; 256 info = vp_dev->vqs[index]; 257 258 if (vp_modern_get_queue_reset(mdev, index)) 259 return -EBUSY; 260 261 if (vp_modern_get_queue_enable(mdev, index)) 262 return -EBUSY; 263 264 err = vp_active_vq(vq, info->msix_vector); 265 if (err) 266 return err; 267 268 if (vq->callback) { 269 spin_lock_irqsave(&vp_dev->lock, flags); 270 list_add(&info->node, &vp_dev->virtqueues); 271 spin_unlock_irqrestore(&vp_dev->lock, flags); 272 } else { 273 INIT_LIST_HEAD(&info->node); 274 } 275 276 #ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION 277 __virtqueue_unbreak(vq); 278 #endif 279 280 vp_modern_set_queue_enable(&vp_dev->mdev, index, true); 281 vq->reset = false; 282 283 return 0; 284 } 285 286 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) 287 { 288 return vp_modern_config_vector(&vp_dev->mdev, vector); 289 } 290 291 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, 292 struct virtio_pci_vq_info *info, 293 unsigned int index, 294 void (*callback)(struct virtqueue *vq), 295 const char *name, 296 u32 size, 297 bool ctx, 298 u16 msix_vec) 299 { 300 301 struct virtio_pci_modern_device *mdev = &vp_dev->mdev; 302 struct virtqueue *vq; 303 u16 num; 304 int err; 305 306 if (index >= vp_modern_get_num_queues(mdev)) 307 return ERR_PTR(-ENOENT); 308 309 /* Check if queue is either not available or already active. */ 310 num = vp_modern_get_queue_size(mdev, index); 311 if (!num || vp_modern_get_queue_enable(mdev, index)) 312 return ERR_PTR(-ENOENT); 313 314 if (!size || size > num) 315 size = num; 316 317 if (size & (size - 1)) { 318 dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", size); 319 return ERR_PTR(-EINVAL); 320 } 321 322 info->msix_vector = msix_vec; 323 324 /* create the vring */ 325 vq = vring_create_virtqueue(index, size, 326 SMP_CACHE_BYTES, &vp_dev->vdev, 327 true, true, ctx, 328 vp_notify, callback, name); 329 if (!vq) 330 return ERR_PTR(-ENOMEM); 331 332 vq->num_max = num; 333 334 err = vp_active_vq(vq, msix_vec); 335 if (err) 336 goto err; 337 338 vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL); 339 if (!vq->priv) { 340 err = -ENOMEM; 341 goto err; 342 } 343 344 return vq; 345 346 err: 347 vring_del_virtqueue(vq); 348 return ERR_PTR(err); 349 } 350 351 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned int nvqs, 352 struct virtqueue *vqs[], 353 vq_callback_t *callbacks[], 354 const char * const names[], 355 u32 sizes[], 356 const bool *ctx, 357 struct irq_affinity *desc) 358 { 359 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 360 struct virtqueue *vq; 361 int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names, sizes, ctx, 362 desc); 363 364 if (rc) 365 return rc; 366 367 /* Select and activate all queues. Has to be done last: once we do 368 * this, there's no way to go back except reset. 369 */ 370 list_for_each_entry(vq, &vdev->vqs, list) 371 vp_modern_set_queue_enable(&vp_dev->mdev, vq->index, true); 372 373 return 0; 374 } 375 376 static void del_vq(struct virtio_pci_vq_info *info) 377 { 378 struct virtqueue *vq = info->vq; 379 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); 380 struct virtio_pci_modern_device *mdev = &vp_dev->mdev; 381 382 if (vp_dev->msix_enabled) 383 vp_modern_queue_vector(mdev, vq->index, 384 VIRTIO_MSI_NO_VECTOR); 385 386 if (!mdev->notify_base) 387 pci_iounmap(mdev->pci_dev, (void __force __iomem *)vq->priv); 388 389 vring_del_virtqueue(vq); 390 } 391 392 static int virtio_pci_find_shm_cap(struct pci_dev *dev, u8 required_id, 393 u8 *bar, u64 *offset, u64 *len) 394 { 395 int pos; 396 397 for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); pos > 0; 398 pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) { 399 u8 type, cap_len, id, res_bar; 400 u32 tmp32; 401 u64 res_offset, res_length; 402 403 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap, 404 cfg_type), &type); 405 if (type != VIRTIO_PCI_CAP_SHARED_MEMORY_CFG) 406 continue; 407 408 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap, 409 cap_len), &cap_len); 410 if (cap_len != sizeof(struct virtio_pci_cap64)) { 411 dev_err(&dev->dev, "%s: shm cap with bad size offset:" 412 " %d size: %d\n", __func__, pos, cap_len); 413 continue; 414 } 415 416 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap, 417 id), &id); 418 if (id != required_id) 419 continue; 420 421 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap, 422 bar), &res_bar); 423 if (res_bar >= PCI_STD_NUM_BARS) 424 continue; 425 426 /* Type and ID match, and the BAR value isn't reserved. 427 * Looks good. 428 */ 429 430 /* Read the lower 32bit of length and offset */ 431 pci_read_config_dword(dev, pos + offsetof(struct virtio_pci_cap, 432 offset), &tmp32); 433 res_offset = tmp32; 434 pci_read_config_dword(dev, pos + offsetof(struct virtio_pci_cap, 435 length), &tmp32); 436 res_length = tmp32; 437 438 /* and now the top half */ 439 pci_read_config_dword(dev, 440 pos + offsetof(struct virtio_pci_cap64, 441 offset_hi), &tmp32); 442 res_offset |= ((u64)tmp32) << 32; 443 pci_read_config_dword(dev, 444 pos + offsetof(struct virtio_pci_cap64, 445 length_hi), &tmp32); 446 res_length |= ((u64)tmp32) << 32; 447 448 *bar = res_bar; 449 *offset = res_offset; 450 *len = res_length; 451 452 return pos; 453 } 454 return 0; 455 } 456 457 static bool vp_get_shm_region(struct virtio_device *vdev, 458 struct virtio_shm_region *region, u8 id) 459 { 460 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 461 struct pci_dev *pci_dev = vp_dev->pci_dev; 462 u8 bar; 463 u64 offset, len; 464 phys_addr_t phys_addr; 465 size_t bar_len; 466 467 if (!virtio_pci_find_shm_cap(pci_dev, id, &bar, &offset, &len)) 468 return false; 469 470 phys_addr = pci_resource_start(pci_dev, bar); 471 bar_len = pci_resource_len(pci_dev, bar); 472 473 if ((offset + len) < offset) { 474 dev_err(&pci_dev->dev, "%s: cap offset+len overflow detected\n", 475 __func__); 476 return false; 477 } 478 479 if (offset + len > bar_len) { 480 dev_err(&pci_dev->dev, "%s: bar shorter than cap offset+len\n", 481 __func__); 482 return false; 483 } 484 485 region->len = len; 486 region->addr = (u64) phys_addr + offset; 487 488 return true; 489 } 490 491 static const struct virtio_config_ops virtio_pci_config_nodev_ops = { 492 .get = NULL, 493 .set = NULL, 494 .generation = vp_generation, 495 .get_status = vp_get_status, 496 .set_status = vp_set_status, 497 .reset = vp_reset, 498 .find_vqs = vp_modern_find_vqs, 499 .del_vqs = vp_del_vqs, 500 .synchronize_cbs = vp_synchronize_vectors, 501 .get_features = vp_get_features, 502 .finalize_features = vp_finalize_features, 503 .bus_name = vp_bus_name, 504 .set_vq_affinity = vp_set_vq_affinity, 505 .get_vq_affinity = vp_get_vq_affinity, 506 .get_shm_region = vp_get_shm_region, 507 .disable_vq_and_reset = vp_modern_disable_vq_and_reset, 508 .enable_vq_after_reset = vp_modern_enable_vq_after_reset, 509 }; 510 511 static const struct virtio_config_ops virtio_pci_config_ops = { 512 .get = vp_get, 513 .set = vp_set, 514 .generation = vp_generation, 515 .get_status = vp_get_status, 516 .set_status = vp_set_status, 517 .reset = vp_reset, 518 .find_vqs = vp_modern_find_vqs, 519 .del_vqs = vp_del_vqs, 520 .synchronize_cbs = vp_synchronize_vectors, 521 .get_features = vp_get_features, 522 .finalize_features = vp_finalize_features, 523 .bus_name = vp_bus_name, 524 .set_vq_affinity = vp_set_vq_affinity, 525 .get_vq_affinity = vp_get_vq_affinity, 526 .get_shm_region = vp_get_shm_region, 527 .disable_vq_and_reset = vp_modern_disable_vq_and_reset, 528 .enable_vq_after_reset = vp_modern_enable_vq_after_reset, 529 }; 530 531 /* the PCI probing function */ 532 int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) 533 { 534 struct virtio_pci_modern_device *mdev = &vp_dev->mdev; 535 struct pci_dev *pci_dev = vp_dev->pci_dev; 536 int err; 537 538 mdev->pci_dev = pci_dev; 539 540 err = vp_modern_probe(mdev); 541 if (err) 542 return err; 543 544 if (mdev->device) 545 vp_dev->vdev.config = &virtio_pci_config_ops; 546 else 547 vp_dev->vdev.config = &virtio_pci_config_nodev_ops; 548 549 vp_dev->config_vector = vp_config_vector; 550 vp_dev->setup_vq = setup_vq; 551 vp_dev->del_vq = del_vq; 552 vp_dev->isr = mdev->isr; 553 vp_dev->vdev.id = mdev->id; 554 555 return 0; 556 } 557 558 void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev) 559 { 560 struct virtio_pci_modern_device *mdev = &vp_dev->mdev; 561 562 vp_modern_remove(mdev); 563 } 564