1 /* 2 * MSI-X device support 3 * 4 * This module includes support for MSI-X in pci devices. 5 * 6 * Author: Michael S. Tsirkin <mst@redhat.com> 7 * 8 * Copyright (c) 2009, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com) 9 * 10 * This work is licensed under the terms of the GNU GPL, version 2. See 11 * the COPYING file in the top-level directory. 12 * 13 * Contributions after 2012-01-13 are licensed under the terms of the 14 * GNU GPL, version 2 or (at your option) any later version. 15 */ 16 17 #include "hw/hw.h" 18 #include "hw/pci/msi.h" 19 #include "hw/pci/msix.h" 20 #include "hw/pci/pci.h" 21 #include "qemu/range.h" 22 23 #define MSIX_CAP_LENGTH 12 24 25 /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */ 26 #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1) 27 #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) 28 #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) 29 30 MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) 31 { 32 uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE; 33 MSIMessage msg; 34 35 msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR); 36 msg.data = pci_get_long(table_entry + PCI_MSIX_ENTRY_DATA); 37 return msg; 38 } 39 40 /* 41 * Special API for POWER to configure the vectors through 42 * a side channel. Should never be used by devices. 43 */ 44 void msix_set_message(PCIDevice *dev, int vector, struct MSIMessage msg) 45 { 46 uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE; 47 48 pci_set_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR, msg.address); 49 pci_set_long(table_entry + PCI_MSIX_ENTRY_DATA, msg.data); 50 table_entry[PCI_MSIX_ENTRY_VECTOR_CTRL] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; 51 } 52 53 static uint8_t msix_pending_mask(int vector) 54 { 55 return 1 << (vector % 8); 56 } 57 58 static uint8_t *msix_pending_byte(PCIDevice *dev, int vector) 59 { 60 return dev->msix_pba + vector / 8; 61 } 62 63 static int msix_is_pending(PCIDevice *dev, int vector) 64 { 65 return *msix_pending_byte(dev, vector) & msix_pending_mask(vector); 66 } 67 68 void msix_set_pending(PCIDevice *dev, unsigned int vector) 69 { 70 *msix_pending_byte(dev, vector) |= msix_pending_mask(vector); 71 } 72 73 static void msix_clr_pending(PCIDevice *dev, int vector) 74 { 75 *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector); 76 } 77 78 static bool msix_vector_masked(PCIDevice *dev, unsigned int vector, bool fmask) 79 { 80 unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; 81 return fmask || dev->msix_table[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT; 82 } 83 84 bool msix_is_masked(PCIDevice *dev, unsigned int vector) 85 { 86 return msix_vector_masked(dev, vector, dev->msix_function_masked); 87 } 88 89 static void msix_fire_vector_notifier(PCIDevice *dev, 90 unsigned int vector, bool is_masked) 91 { 92 MSIMessage msg; 93 int ret; 94 95 if (!dev->msix_vector_use_notifier) { 96 return; 97 } 98 if (is_masked) { 99 dev->msix_vector_release_notifier(dev, vector); 100 } else { 101 msg = msix_get_message(dev, vector); 102 ret = dev->msix_vector_use_notifier(dev, vector, msg); 103 assert(ret >= 0); 104 } 105 } 106 107 static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) 108 { 109 bool is_masked = msix_is_masked(dev, vector); 110 111 if (is_masked == was_masked) { 112 return; 113 } 114 115 msix_fire_vector_notifier(dev, vector, is_masked); 116 117 if (!is_masked && msix_is_pending(dev, vector)) { 118 msix_clr_pending(dev, vector); 119 msix_notify(dev, vector); 120 } 121 } 122 123 static void msix_update_function_masked(PCIDevice *dev) 124 { 125 dev->msix_function_masked = !msix_enabled(dev) || 126 (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK); 127 } 128 129 /* Handle MSI-X capability config write. */ 130 void msix_write_config(PCIDevice *dev, uint32_t addr, 131 uint32_t val, int len) 132 { 133 unsigned enable_pos = dev->msix_cap + MSIX_CONTROL_OFFSET; 134 int vector; 135 bool was_masked; 136 137 if (!msix_present(dev) || !range_covers_byte(addr, len, enable_pos)) { 138 return; 139 } 140 141 was_masked = dev->msix_function_masked; 142 msix_update_function_masked(dev); 143 144 if (!msix_enabled(dev)) { 145 return; 146 } 147 148 pci_device_deassert_intx(dev); 149 150 if (dev->msix_function_masked == was_masked) { 151 return; 152 } 153 154 for (vector = 0; vector < dev->msix_entries_nr; ++vector) { 155 msix_handle_mask_update(dev, vector, 156 msix_vector_masked(dev, vector, was_masked)); 157 } 158 } 159 160 static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr, 161 unsigned size) 162 { 163 PCIDevice *dev = opaque; 164 165 return pci_get_long(dev->msix_table + addr); 166 } 167 168 static void msix_table_mmio_write(void *opaque, hwaddr addr, 169 uint64_t val, unsigned size) 170 { 171 PCIDevice *dev = opaque; 172 int vector = addr / PCI_MSIX_ENTRY_SIZE; 173 bool was_masked; 174 175 was_masked = msix_is_masked(dev, vector); 176 pci_set_long(dev->msix_table + addr, val); 177 msix_handle_mask_update(dev, vector, was_masked); 178 } 179 180 static const MemoryRegionOps msix_table_mmio_ops = { 181 .read = msix_table_mmio_read, 182 .write = msix_table_mmio_write, 183 .endianness = DEVICE_LITTLE_ENDIAN, 184 .valid = { 185 .min_access_size = 4, 186 .max_access_size = 4, 187 }, 188 }; 189 190 static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr, 191 unsigned size) 192 { 193 PCIDevice *dev = opaque; 194 if (dev->msix_vector_poll_notifier) { 195 unsigned vector_start = addr * 8; 196 unsigned vector_end = MIN(addr + size * 8, dev->msix_entries_nr); 197 dev->msix_vector_poll_notifier(dev, vector_start, vector_end); 198 } 199 200 return pci_get_long(dev->msix_pba + addr); 201 } 202 203 static void msix_pba_mmio_write(void *opaque, hwaddr addr, 204 uint64_t val, unsigned size) 205 { 206 } 207 208 static const MemoryRegionOps msix_pba_mmio_ops = { 209 .read = msix_pba_mmio_read, 210 .write = msix_pba_mmio_write, 211 .endianness = DEVICE_LITTLE_ENDIAN, 212 .valid = { 213 .min_access_size = 4, 214 .max_access_size = 4, 215 }, 216 }; 217 218 static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) 219 { 220 int vector; 221 222 for (vector = 0; vector < nentries; ++vector) { 223 unsigned offset = 224 vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; 225 bool was_masked = msix_is_masked(dev, vector); 226 227 dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; 228 msix_handle_mask_update(dev, vector, was_masked); 229 } 230 } 231 232 /* Initialize the MSI-X structures */ 233 int msix_init(struct PCIDevice *dev, unsigned short nentries, 234 MemoryRegion *table_bar, uint8_t table_bar_nr, 235 unsigned table_offset, MemoryRegion *pba_bar, 236 uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos) 237 { 238 int cap; 239 unsigned table_size, pba_size; 240 uint8_t *config; 241 242 /* Nothing to do if MSI is not supported by interrupt controller */ 243 if (!msi_supported) { 244 return -ENOTSUP; 245 } 246 247 if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) { 248 return -EINVAL; 249 } 250 251 table_size = nentries * PCI_MSIX_ENTRY_SIZE; 252 pba_size = QEMU_ALIGN_UP(nentries, 64) / 8; 253 254 /* Sanity test: table & pba don't overlap, fit within BARs, min aligned */ 255 if ((table_bar_nr == pba_bar_nr && 256 ranges_overlap(table_offset, table_size, pba_offset, pba_size)) || 257 table_offset + table_size > memory_region_size(table_bar) || 258 pba_offset + pba_size > memory_region_size(pba_bar) || 259 (table_offset | pba_offset) & PCI_MSIX_FLAGS_BIRMASK) { 260 return -EINVAL; 261 } 262 263 cap = pci_add_capability(dev, PCI_CAP_ID_MSIX, cap_pos, MSIX_CAP_LENGTH); 264 if (cap < 0) { 265 return cap; 266 } 267 268 dev->msix_cap = cap; 269 dev->cap_present |= QEMU_PCI_CAP_MSIX; 270 config = dev->config + cap; 271 272 pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1); 273 dev->msix_entries_nr = nentries; 274 dev->msix_function_masked = true; 275 276 pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar_nr); 277 pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar_nr); 278 279 /* Make flags bit writable. */ 280 dev->wmask[cap + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK | 281 MSIX_MASKALL_MASK; 282 283 dev->msix_table = g_malloc0(table_size); 284 dev->msix_pba = g_malloc0(pba_size); 285 dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used); 286 287 msix_mask_all(dev, nentries); 288 289 memory_region_init_io(&dev->msix_table_mmio, OBJECT(dev), &msix_table_mmio_ops, dev, 290 "msix-table", table_size); 291 memory_region_add_subregion(table_bar, table_offset, &dev->msix_table_mmio); 292 memory_region_init_io(&dev->msix_pba_mmio, OBJECT(dev), &msix_pba_mmio_ops, dev, 293 "msix-pba", pba_size); 294 memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio); 295 296 return 0; 297 } 298 299 int msix_init_exclusive_bar(PCIDevice *dev, unsigned short nentries, 300 uint8_t bar_nr) 301 { 302 int ret; 303 char *name; 304 uint32_t bar_size = 4096; 305 uint32_t bar_pba_offset = bar_size / 2; 306 uint32_t bar_pba_size = (nentries / 8 + 1) * 8; 307 308 /* 309 * Migration compatibility dictates that this remains a 4k 310 * BAR with the vector table in the lower half and PBA in 311 * the upper half for nentries which is lower or equal to 128. 312 * No need to care about using more than 65 entries for legacy 313 * machine types who has at most 64 queues. 314 */ 315 if (nentries * PCI_MSIX_ENTRY_SIZE > bar_pba_offset) { 316 bar_pba_offset = nentries * PCI_MSIX_ENTRY_SIZE; 317 } 318 319 if (bar_pba_offset + bar_pba_size > 4096) { 320 bar_size = bar_pba_offset + bar_pba_size; 321 } 322 323 bar_size = pow2ceil(bar_size); 324 325 name = g_strdup_printf("%s-msix", dev->name); 326 memory_region_init(&dev->msix_exclusive_bar, OBJECT(dev), name, bar_size); 327 g_free(name); 328 329 ret = msix_init(dev, nentries, &dev->msix_exclusive_bar, bar_nr, 330 0, &dev->msix_exclusive_bar, 331 bar_nr, bar_pba_offset, 332 0); 333 if (ret) { 334 return ret; 335 } 336 337 pci_register_bar(dev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY, 338 &dev->msix_exclusive_bar); 339 340 return 0; 341 } 342 343 static void msix_free_irq_entries(PCIDevice *dev) 344 { 345 int vector; 346 347 for (vector = 0; vector < dev->msix_entries_nr; ++vector) { 348 dev->msix_entry_used[vector] = 0; 349 msix_clr_pending(dev, vector); 350 } 351 } 352 353 static void msix_clear_all_vectors(PCIDevice *dev) 354 { 355 int vector; 356 357 for (vector = 0; vector < dev->msix_entries_nr; ++vector) { 358 msix_clr_pending(dev, vector); 359 } 360 } 361 362 /* Clean up resources for the device. */ 363 void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar) 364 { 365 if (!msix_present(dev)) { 366 return; 367 } 368 pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); 369 dev->msix_cap = 0; 370 msix_free_irq_entries(dev); 371 dev->msix_entries_nr = 0; 372 memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio); 373 g_free(dev->msix_pba); 374 dev->msix_pba = NULL; 375 memory_region_del_subregion(table_bar, &dev->msix_table_mmio); 376 g_free(dev->msix_table); 377 dev->msix_table = NULL; 378 g_free(dev->msix_entry_used); 379 dev->msix_entry_used = NULL; 380 dev->cap_present &= ~QEMU_PCI_CAP_MSIX; 381 } 382 383 void msix_uninit_exclusive_bar(PCIDevice *dev) 384 { 385 if (msix_present(dev)) { 386 msix_uninit(dev, &dev->msix_exclusive_bar, &dev->msix_exclusive_bar); 387 } 388 } 389 390 void msix_save(PCIDevice *dev, QEMUFile *f) 391 { 392 unsigned n = dev->msix_entries_nr; 393 394 if (!msix_present(dev)) { 395 return; 396 } 397 398 qemu_put_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE); 399 qemu_put_buffer(f, dev->msix_pba, (n + 7) / 8); 400 } 401 402 /* Should be called after restoring the config space. */ 403 void msix_load(PCIDevice *dev, QEMUFile *f) 404 { 405 unsigned n = dev->msix_entries_nr; 406 unsigned int vector; 407 408 if (!msix_present(dev)) { 409 return; 410 } 411 412 msix_clear_all_vectors(dev); 413 qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE); 414 qemu_get_buffer(f, dev->msix_pba, (n + 7) / 8); 415 msix_update_function_masked(dev); 416 417 for (vector = 0; vector < n; vector++) { 418 msix_handle_mask_update(dev, vector, true); 419 } 420 } 421 422 /* Does device support MSI-X? */ 423 int msix_present(PCIDevice *dev) 424 { 425 return dev->cap_present & QEMU_PCI_CAP_MSIX; 426 } 427 428 /* Is MSI-X enabled? */ 429 int msix_enabled(PCIDevice *dev) 430 { 431 return (dev->cap_present & QEMU_PCI_CAP_MSIX) && 432 (dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & 433 MSIX_ENABLE_MASK); 434 } 435 436 /* Send an MSI-X message */ 437 void msix_notify(PCIDevice *dev, unsigned vector) 438 { 439 MSIMessage msg; 440 441 if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) 442 return; 443 if (msix_is_masked(dev, vector)) { 444 msix_set_pending(dev, vector); 445 return; 446 } 447 448 msg = msix_get_message(dev, vector); 449 450 msi_send_message(dev, msg); 451 } 452 453 void msix_reset(PCIDevice *dev) 454 { 455 if (!msix_present(dev)) { 456 return; 457 } 458 msix_clear_all_vectors(dev); 459 dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &= 460 ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET]; 461 memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE); 462 memset(dev->msix_pba, 0, QEMU_ALIGN_UP(dev->msix_entries_nr, 64) / 8); 463 msix_mask_all(dev, dev->msix_entries_nr); 464 } 465 466 /* PCI spec suggests that devices make it possible for software to configure 467 * less vectors than supported by the device, but does not specify a standard 468 * mechanism for devices to do so. 469 * 470 * We support this by asking devices to declare vectors software is going to 471 * actually use, and checking this on the notification path. Devices that 472 * don't want to follow the spec suggestion can declare all vectors as used. */ 473 474 /* Mark vector as used. */ 475 int msix_vector_use(PCIDevice *dev, unsigned vector) 476 { 477 if (vector >= dev->msix_entries_nr) 478 return -EINVAL; 479 dev->msix_entry_used[vector]++; 480 return 0; 481 } 482 483 /* Mark vector as unused. */ 484 void msix_vector_unuse(PCIDevice *dev, unsigned vector) 485 { 486 if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) { 487 return; 488 } 489 if (--dev->msix_entry_used[vector]) { 490 return; 491 } 492 msix_clr_pending(dev, vector); 493 } 494 495 void msix_unuse_all_vectors(PCIDevice *dev) 496 { 497 if (!msix_present(dev)) { 498 return; 499 } 500 msix_free_irq_entries(dev); 501 } 502 503 unsigned int msix_nr_vectors_allocated(const PCIDevice *dev) 504 { 505 return dev->msix_entries_nr; 506 } 507 508 static int msix_set_notifier_for_vector(PCIDevice *dev, unsigned int vector) 509 { 510 MSIMessage msg; 511 512 if (msix_is_masked(dev, vector)) { 513 return 0; 514 } 515 msg = msix_get_message(dev, vector); 516 return dev->msix_vector_use_notifier(dev, vector, msg); 517 } 518 519 static void msix_unset_notifier_for_vector(PCIDevice *dev, unsigned int vector) 520 { 521 if (msix_is_masked(dev, vector)) { 522 return; 523 } 524 dev->msix_vector_release_notifier(dev, vector); 525 } 526 527 int msix_set_vector_notifiers(PCIDevice *dev, 528 MSIVectorUseNotifier use_notifier, 529 MSIVectorReleaseNotifier release_notifier, 530 MSIVectorPollNotifier poll_notifier) 531 { 532 int vector, ret; 533 534 assert(use_notifier && release_notifier); 535 536 dev->msix_vector_use_notifier = use_notifier; 537 dev->msix_vector_release_notifier = release_notifier; 538 dev->msix_vector_poll_notifier = poll_notifier; 539 540 if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & 541 (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) { 542 for (vector = 0; vector < dev->msix_entries_nr; vector++) { 543 ret = msix_set_notifier_for_vector(dev, vector); 544 if (ret < 0) { 545 goto undo; 546 } 547 } 548 } 549 if (dev->msix_vector_poll_notifier) { 550 dev->msix_vector_poll_notifier(dev, 0, dev->msix_entries_nr); 551 } 552 return 0; 553 554 undo: 555 while (--vector >= 0) { 556 msix_unset_notifier_for_vector(dev, vector); 557 } 558 dev->msix_vector_use_notifier = NULL; 559 dev->msix_vector_release_notifier = NULL; 560 return ret; 561 } 562 563 void msix_unset_vector_notifiers(PCIDevice *dev) 564 { 565 int vector; 566 567 assert(dev->msix_vector_use_notifier && 568 dev->msix_vector_release_notifier); 569 570 if ((dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & 571 (MSIX_ENABLE_MASK | MSIX_MASKALL_MASK)) == MSIX_ENABLE_MASK) { 572 for (vector = 0; vector < dev->msix_entries_nr; vector++) { 573 msix_unset_notifier_for_vector(dev, vector); 574 } 575 } 576 dev->msix_vector_use_notifier = NULL; 577 dev->msix_vector_release_notifier = NULL; 578 dev->msix_vector_poll_notifier = NULL; 579 } 580 581 static void put_msix_state(QEMUFile *f, void *pv, size_t size) 582 { 583 msix_save(pv, f); 584 } 585 586 static int get_msix_state(QEMUFile *f, void *pv, size_t size) 587 { 588 msix_load(pv, f); 589 return 0; 590 } 591 592 static VMStateInfo vmstate_info_msix = { 593 .name = "msix state", 594 .get = get_msix_state, 595 .put = put_msix_state, 596 }; 597 598 const VMStateDescription vmstate_msix = { 599 .name = "msix", 600 .fields = (VMStateField[]) { 601 { 602 .name = "msix", 603 .version_id = 0, 604 .field_exists = NULL, 605 .size = 0, /* ouch */ 606 .info = &vmstate_info_msix, 607 .flags = VMS_SINGLE, 608 .offset = 0, 609 }, 610 VMSTATE_END_OF_LIST() 611 } 612 }; 613