1 /* 2 * QEMU PowerPC sPAPR XIVE interrupt controller model 3 * 4 * Copyright (c) 2017-2019, IBM Corporation. 5 * 6 * This code is licensed under the GPL version 2 or later. See the 7 * COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/log.h" 12 #include "qemu/error-report.h" 13 #include "qapi/error.h" 14 #include "target/ppc/cpu.h" 15 #include "sysemu/cpus.h" 16 #include "sysemu/kvm.h" 17 #include "hw/ppc/spapr.h" 18 #include "hw/ppc/spapr_xive.h" 19 #include "hw/ppc/xive.h" 20 #include "kvm_ppc.h" 21 22 #include <sys/ioctl.h> 23 24 /* 25 * Helpers for CPU hotplug 26 * 27 * TODO: make a common KVMEnabledCPU layer for XICS and XIVE 28 */ 29 typedef struct KVMEnabledCPU { 30 unsigned long vcpu_id; 31 QLIST_ENTRY(KVMEnabledCPU) node; 32 } KVMEnabledCPU; 33 34 static QLIST_HEAD(, KVMEnabledCPU) 35 kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus); 36 37 static bool kvm_cpu_is_enabled(CPUState *cs) 38 { 39 KVMEnabledCPU *enabled_cpu; 40 unsigned long vcpu_id = kvm_arch_vcpu_id(cs); 41 42 QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { 43 if (enabled_cpu->vcpu_id == vcpu_id) { 44 return true; 45 } 46 } 47 return false; 48 } 49 50 static void kvm_cpu_enable(CPUState *cs) 51 { 52 KVMEnabledCPU *enabled_cpu; 53 unsigned long vcpu_id = kvm_arch_vcpu_id(cs); 54 55 enabled_cpu = g_malloc(sizeof(*enabled_cpu)); 56 enabled_cpu->vcpu_id = vcpu_id; 57 QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node); 58 } 59 60 /* 61 * XIVE Thread Interrupt Management context (KVM) 62 */ 63 static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp) 64 { 65 uint64_t state[2] = { 0 }; 66 int ret; 67 68 ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); 69 if (ret != 0) { 70 error_setg_errno(errp, errno, 71 "XIVE: could not capture KVM state of CPU %ld", 72 kvm_arch_vcpu_id(tctx->cs)); 73 return; 74 } 75 76 /* word0 and word1 of the OS ring. */ 77 *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0]; 78 } 79 80 typedef struct { 81 XiveTCTX *tctx; 82 Error *err; 83 } XiveCpuGetState; 84 85 static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu, 86 run_on_cpu_data arg) 87 { 88 XiveCpuGetState *s = arg.host_ptr; 89 90 kvmppc_xive_cpu_get_state(s->tctx, &s->err); 91 } 92 93 void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) 94 { 95 XiveCpuGetState s = { 96 .tctx = tctx, 97 .err = NULL, 98 }; 99 100 /* 101 * Kick the vCPU to make sure they are available for the KVM ioctl. 102 */ 103 run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state, 104 RUN_ON_CPU_HOST_PTR(&s)); 105 106 if (s.err) { 107 error_propagate(errp, s.err); 108 return; 109 } 110 } 111 112 void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) 113 { 114 SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive; 115 unsigned long vcpu_id; 116 int ret; 117 118 /* Check if CPU was hot unplugged and replugged. */ 119 if (kvm_cpu_is_enabled(tctx->cs)) { 120 return; 121 } 122 123 vcpu_id = kvm_arch_vcpu_id(tctx->cs); 124 125 ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd, 126 vcpu_id, 0); 127 if (ret < 0) { 128 error_setg(errp, "XIVE: unable to connect CPU%ld to KVM device: %s", 129 vcpu_id, strerror(errno)); 130 return; 131 } 132 133 kvm_cpu_enable(tctx->cs); 134 } 135 136 /* 137 * XIVE Interrupt Source (KVM) 138 */ 139 140 void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, 141 Error **errp) 142 { 143 uint32_t end_idx; 144 uint32_t end_blk; 145 uint8_t priority; 146 uint32_t server; 147 bool masked; 148 uint32_t eisn; 149 uint64_t kvm_src; 150 Error *local_err = NULL; 151 152 assert(xive_eas_is_valid(eas)); 153 154 end_idx = xive_get_field64(EAS_END_INDEX, eas->w); 155 end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); 156 eisn = xive_get_field64(EAS_END_DATA, eas->w); 157 masked = xive_eas_is_masked(eas); 158 159 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 160 161 kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT & 162 KVM_XIVE_SOURCE_PRIORITY_MASK; 163 kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT & 164 KVM_XIVE_SOURCE_SERVER_MASK; 165 kvm_src |= ((uint64_t) masked << KVM_XIVE_SOURCE_MASKED_SHIFT) & 166 KVM_XIVE_SOURCE_MASKED_MASK; 167 kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) & 168 KVM_XIVE_SOURCE_EISN_MASK; 169 170 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, 171 &kvm_src, true, &local_err); 172 if (local_err) { 173 error_propagate(errp, local_err); 174 return; 175 } 176 } 177 178 void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp) 179 { 180 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn, 181 NULL, true, errp); 182 } 183 184 /* 185 * At reset, the interrupt sources are simply created and MASKED. We 186 * only need to inform the KVM XIVE device about their type: LSI or 187 * MSI. 188 */ 189 void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) 190 { 191 SpaprXive *xive = SPAPR_XIVE(xsrc->xive); 192 uint64_t state = 0; 193 194 if (xive_source_irq_is_lsi(xsrc, srcno)) { 195 state |= KVM_XIVE_LEVEL_SENSITIVE; 196 if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { 197 state |= KVM_XIVE_LEVEL_ASSERTED; 198 } 199 } 200 201 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state, 202 true, errp); 203 } 204 205 void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) 206 { 207 int i; 208 209 for (i = 0; i < xsrc->nr_irqs; i++) { 210 Error *local_err = NULL; 211 212 kvmppc_xive_source_reset_one(xsrc, i, &local_err); 213 if (local_err) { 214 error_propagate(errp, local_err); 215 return; 216 } 217 } 218 } 219 220 /* 221 * This is used to perform the magic loads on the ESB pages, described 222 * in xive.h. 223 * 224 * Memory barriers should not be needed for loads (no store for now). 225 */ 226 static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, 227 uint64_t data, bool write) 228 { 229 uint64_t *addr = xsrc->esb_mmap + xive_source_esb_mgmt(xsrc, srcno) + 230 offset; 231 232 if (write) { 233 *addr = cpu_to_be64(data); 234 return -1; 235 } else { 236 /* Prevent the compiler from optimizing away the load */ 237 volatile uint64_t value = be64_to_cpu(*addr); 238 return value; 239 } 240 } 241 242 static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset) 243 { 244 return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3; 245 } 246 247 static void xive_esb_trigger(XiveSource *xsrc, int srcno) 248 { 249 uint64_t *addr = xsrc->esb_mmap + xive_source_esb_page(xsrc, srcno); 250 251 *addr = 0x0; 252 } 253 254 uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, 255 uint64_t data, bool write) 256 { 257 if (write) { 258 return xive_esb_rw(xsrc, srcno, offset, data, 1); 259 } 260 261 /* 262 * Special Load EOI handling for LSI sources. Q bit is never set 263 * and the interrupt should be re-triggered if the level is still 264 * asserted. 265 */ 266 if (xive_source_irq_is_lsi(xsrc, srcno) && 267 offset == XIVE_ESB_LOAD_EOI) { 268 xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00); 269 if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { 270 xive_esb_trigger(xsrc, srcno); 271 } 272 return 0; 273 } else { 274 return xive_esb_rw(xsrc, srcno, offset, 0, 0); 275 } 276 } 277 278 static void kvmppc_xive_source_get_state(XiveSource *xsrc) 279 { 280 int i; 281 282 for (i = 0; i < xsrc->nr_irqs; i++) { 283 /* Perform a load without side effect to retrieve the PQ bits */ 284 uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); 285 286 /* and save PQ locally */ 287 xive_source_esb_set(xsrc, i, pq); 288 } 289 } 290 291 void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) 292 { 293 XiveSource *xsrc = opaque; 294 struct kvm_irq_level args; 295 int rc; 296 297 args.irq = srcno; 298 if (!xive_source_irq_is_lsi(xsrc, srcno)) { 299 if (!val) { 300 return; 301 } 302 args.level = KVM_INTERRUPT_SET; 303 } else { 304 if (val) { 305 xsrc->status[srcno] |= XIVE_STATUS_ASSERTED; 306 args.level = KVM_INTERRUPT_SET_LEVEL; 307 } else { 308 xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED; 309 args.level = KVM_INTERRUPT_UNSET; 310 } 311 } 312 rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args); 313 if (rc < 0) { 314 error_report("XIVE: kvm_irq_line() failed : %s", strerror(errno)); 315 } 316 } 317 318 /* 319 * sPAPR XIVE interrupt controller (KVM) 320 */ 321 void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, 322 uint32_t end_idx, XiveEND *end, 323 Error **errp) 324 { 325 struct kvm_ppc_xive_eq kvm_eq = { 0 }; 326 uint64_t kvm_eq_idx; 327 uint8_t priority; 328 uint32_t server; 329 Error *local_err = NULL; 330 331 assert(xive_end_is_valid(end)); 332 333 /* Encode the tuple (server, prio) as a KVM EQ index */ 334 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 335 336 kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & 337 KVM_XIVE_EQ_PRIORITY_MASK; 338 kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & 339 KVM_XIVE_EQ_SERVER_MASK; 340 341 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, 342 &kvm_eq, false, &local_err); 343 if (local_err) { 344 error_propagate(errp, local_err); 345 return; 346 } 347 348 /* 349 * The EQ index and toggle bit are updated by HW. These are the 350 * only fields from KVM we want to update QEMU with. The other END 351 * fields should already be in the QEMU END table. 352 */ 353 end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) | 354 xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex); 355 } 356 357 void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, 358 uint32_t end_idx, XiveEND *end, 359 Error **errp) 360 { 361 struct kvm_ppc_xive_eq kvm_eq = { 0 }; 362 uint64_t kvm_eq_idx; 363 uint8_t priority; 364 uint32_t server; 365 Error *local_err = NULL; 366 367 /* 368 * Build the KVM state from the local END structure. 369 */ 370 371 kvm_eq.flags = 0; 372 if (xive_get_field32(END_W0_UCOND_NOTIFY, end->w0)) { 373 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 374 } 375 376 /* 377 * If the hcall is disabling the EQ, set the size and page address 378 * to zero. When migrating, only valid ENDs are taken into 379 * account. 380 */ 381 if (xive_end_is_valid(end)) { 382 kvm_eq.qshift = xive_get_field32(END_W0_QSIZE, end->w0) + 12; 383 kvm_eq.qaddr = xive_end_qaddr(end); 384 /* 385 * The EQ toggle bit and index should only be relevant when 386 * restoring the EQ state 387 */ 388 kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1); 389 kvm_eq.qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); 390 } else { 391 kvm_eq.qshift = 0; 392 kvm_eq.qaddr = 0; 393 } 394 395 /* Encode the tuple (server, prio) as a KVM EQ index */ 396 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 397 398 kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & 399 KVM_XIVE_EQ_PRIORITY_MASK; 400 kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & 401 KVM_XIVE_EQ_SERVER_MASK; 402 403 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, 404 &kvm_eq, true, &local_err); 405 if (local_err) { 406 error_propagate(errp, local_err); 407 return; 408 } 409 } 410 411 void kvmppc_xive_reset(SpaprXive *xive, Error **errp) 412 { 413 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET, 414 NULL, true, errp); 415 } 416 417 static void kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) 418 { 419 Error *local_err = NULL; 420 int i; 421 422 for (i = 0; i < xive->nr_ends; i++) { 423 if (!xive_end_is_valid(&xive->endt[i])) { 424 continue; 425 } 426 427 kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, 428 &xive->endt[i], &local_err); 429 if (local_err) { 430 error_propagate(errp, local_err); 431 return; 432 } 433 } 434 } 435 436 /* 437 * The primary goal of the XIVE VM change handler is to mark the EQ 438 * pages dirty when all XIVE event notifications have stopped. 439 * 440 * Whenever the VM is stopped, the VM change handler sets the source 441 * PQs to PENDING to stop the flow of events and to possibly catch a 442 * triggered interrupt occuring while the VM is stopped. The previous 443 * state is saved in anticipation of a migration. The XIVE controller 444 * is then synced through KVM to flush any in-flight event 445 * notification and stabilize the EQs. 446 * 447 * At this stage, we can mark the EQ page dirty and let a migration 448 * sequence transfer the EQ pages to the destination, which is done 449 * just after the stop state. 450 * 451 * The previous configuration of the sources is restored when the VM 452 * runs again. If an interrupt was queued while the VM was stopped, 453 * simply generate a trigger. 454 */ 455 static void kvmppc_xive_change_state_handler(void *opaque, int running, 456 RunState state) 457 { 458 SpaprXive *xive = opaque; 459 XiveSource *xsrc = &xive->source; 460 Error *local_err = NULL; 461 int i; 462 463 /* 464 * Restore the sources to their initial state. This is called when 465 * the VM resumes after a stop or a migration. 466 */ 467 if (running) { 468 for (i = 0; i < xsrc->nr_irqs; i++) { 469 uint8_t pq = xive_source_esb_get(xsrc, i); 470 uint8_t old_pq; 471 472 old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8)); 473 474 /* 475 * An interrupt was queued while the VM was stopped, 476 * generate a trigger. 477 */ 478 if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) { 479 xive_esb_trigger(xsrc, i); 480 } 481 } 482 483 return; 484 } 485 486 /* 487 * Mask the sources, to stop the flow of event notifications, and 488 * save the PQs locally in the XiveSource object. The XiveSource 489 * state will be collected later on by its vmstate handler if a 490 * migration is in progress. 491 */ 492 for (i = 0; i < xsrc->nr_irqs; i++) { 493 uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); 494 495 /* 496 * PQ is set to PENDING to possibly catch a triggered 497 * interrupt occuring while the VM is stopped (hotplug event 498 * for instance) . 499 */ 500 if (pq != XIVE_ESB_OFF) { 501 pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_10); 502 } 503 xive_source_esb_set(xsrc, i, pq); 504 } 505 506 /* 507 * Sync the XIVE controller in KVM, to flush in-flight event 508 * notification that should be enqueued in the EQs and mark the 509 * XIVE EQ pages dirty to collect all updates. 510 */ 511 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, 512 KVM_DEV_XIVE_EQ_SYNC, NULL, true, &local_err); 513 if (local_err) { 514 error_report_err(local_err); 515 return; 516 } 517 } 518 519 void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp) 520 { 521 /* 522 * When the VM is stopped, the sources are masked and the previous 523 * state is saved in anticipation of a migration. We should not 524 * synchronize the source state in that case else we will override 525 * the saved state. 526 */ 527 if (runstate_is_running()) { 528 kvmppc_xive_source_get_state(&xive->source); 529 } 530 531 /* EAT: there is no extra state to query from KVM */ 532 533 /* ENDT */ 534 kvmppc_xive_get_queues(xive, errp); 535 } 536 537 static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len, 538 Error **errp) 539 { 540 void *addr; 541 uint32_t page_shift = 16; /* TODO: fix page_shift */ 542 543 addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd, 544 pgoff << page_shift); 545 if (addr == MAP_FAILED) { 546 error_setg_errno(errp, errno, "XIVE: unable to set memory mapping"); 547 return NULL; 548 } 549 550 return addr; 551 } 552 553 /* 554 * All the XIVE memory regions are now backed by mappings from the KVM 555 * XIVE device. 556 */ 557 void kvmppc_xive_connect(SpaprXive *xive, Error **errp) 558 { 559 XiveSource *xsrc = &xive->source; 560 XiveENDSource *end_xsrc = &xive->end_source; 561 Error *local_err = NULL; 562 size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs; 563 size_t tima_len = 4ull << TM_SHIFT; 564 565 if (!kvmppc_has_cap_xive()) { 566 error_setg(errp, "IRQ_XIVE capability must be present for KVM"); 567 return; 568 } 569 570 /* First, create the KVM XIVE device */ 571 xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false); 572 if (xive->fd < 0) { 573 error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device"); 574 return; 575 } 576 577 /* 578 * 1. Source ESB pages - KVM mapping 579 */ 580 xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len, 581 &local_err); 582 if (local_err) { 583 error_propagate(errp, local_err); 584 return; 585 } 586 587 memory_region_init_ram_device_ptr(&xsrc->esb_mmio, OBJECT(xsrc), 588 "xive.esb", esb_len, xsrc->esb_mmap); 589 sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xsrc->esb_mmio); 590 591 /* 592 * 2. END ESB pages (No KVM support yet) 593 */ 594 sysbus_init_mmio(SYS_BUS_DEVICE(xive), &end_xsrc->esb_mmio); 595 596 /* 597 * 3. TIMA pages - KVM mapping 598 */ 599 xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len, 600 &local_err); 601 if (local_err) { 602 error_propagate(errp, local_err); 603 return; 604 } 605 memory_region_init_ram_device_ptr(&xive->tm_mmio, OBJECT(xive), 606 "xive.tima", tima_len, xive->tm_mmap); 607 sysbus_init_mmio(SYS_BUS_DEVICE(xive), &xive->tm_mmio); 608 609 xive->change = qemu_add_vm_change_state_handler( 610 kvmppc_xive_change_state_handler, xive); 611 612 kvm_kernel_irqchip = true; 613 kvm_msi_via_irqfd_allowed = true; 614 kvm_gsi_direct_mapping = true; 615 616 /* Map all regions */ 617 spapr_xive_map_mmio(xive); 618 } 619