1 /* 2 * QEMU PowerPC sPAPR XIVE interrupt controller model 3 * 4 * Copyright (c) 2017-2019, IBM Corporation. 5 * 6 * This code is licensed under the GPL version 2 or later. See the 7 * COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/log.h" 12 #include "qemu/error-report.h" 13 #include "qapi/error.h" 14 #include "target/ppc/cpu.h" 15 #include "sysemu/cpus.h" 16 #include "sysemu/kvm.h" 17 #include "sysemu/runstate.h" 18 #include "hw/ppc/spapr.h" 19 #include "hw/ppc/spapr_cpu_core.h" 20 #include "hw/ppc/spapr_xive.h" 21 #include "hw/ppc/xive.h" 22 #include "kvm_ppc.h" 23 24 #include <sys/ioctl.h> 25 26 /* 27 * Helpers for CPU hotplug 28 * 29 * TODO: make a common KVMEnabledCPU layer for XICS and XIVE 30 */ 31 typedef struct KVMEnabledCPU { 32 unsigned long vcpu_id; 33 QLIST_ENTRY(KVMEnabledCPU) node; 34 } KVMEnabledCPU; 35 36 static QLIST_HEAD(, KVMEnabledCPU) 37 kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus); 38 39 static bool kvm_cpu_is_enabled(unsigned long vcpu_id) 40 { 41 KVMEnabledCPU *enabled_cpu; 42 43 QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { 44 if (enabled_cpu->vcpu_id == vcpu_id) { 45 return true; 46 } 47 } 48 return false; 49 } 50 51 static void kvm_cpu_enable(CPUState *cs) 52 { 53 KVMEnabledCPU *enabled_cpu; 54 unsigned long vcpu_id = kvm_arch_vcpu_id(cs); 55 56 enabled_cpu = g_malloc(sizeof(*enabled_cpu)); 57 enabled_cpu->vcpu_id = vcpu_id; 58 QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node); 59 } 60 61 static void kvm_cpu_disable_all(void) 62 { 63 KVMEnabledCPU *enabled_cpu, *next; 64 65 QLIST_FOREACH_SAFE(enabled_cpu, &kvm_enabled_cpus, node, next) { 66 QLIST_REMOVE(enabled_cpu, node); 67 g_free(enabled_cpu); 68 } 69 } 70 71 /* 72 * XIVE Thread Interrupt Management context (KVM) 73 */ 74 75 int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) 76 { 77 SpaprXive *xive = SPAPR_XIVE(tctx->xptr); 78 uint64_t state[2]; 79 int ret; 80 81 assert(xive->fd != -1); 82 83 /* word0 and word1 of the OS ring. */ 84 state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]); 85 86 ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); 87 if (ret != 0) { 88 error_setg_errno(errp, -ret, 89 "XIVE: could not restore KVM state of CPU %ld", 90 kvm_arch_vcpu_id(tctx->cs)); 91 return ret; 92 } 93 94 return 0; 95 } 96 97 int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp) 98 { 99 SpaprXive *xive = SPAPR_XIVE(tctx->xptr); 100 uint64_t state[2] = { 0 }; 101 int ret; 102 103 assert(xive->fd != -1); 104 105 ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); 106 if (ret != 0) { 107 error_setg_errno(errp, -ret, 108 "XIVE: could not capture KVM state of CPU %ld", 109 kvm_arch_vcpu_id(tctx->cs)); 110 return ret; 111 } 112 113 /* word0 and word1 of the OS ring. */ 114 *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0]; 115 116 return 0; 117 } 118 119 typedef struct { 120 XiveTCTX *tctx; 121 Error **errp; 122 int ret; 123 } XiveCpuGetState; 124 125 static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu, 126 run_on_cpu_data arg) 127 { 128 XiveCpuGetState *s = arg.host_ptr; 129 130 s->ret = kvmppc_xive_cpu_get_state(s->tctx, s->errp); 131 } 132 133 int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) 134 { 135 XiveCpuGetState s = { 136 .tctx = tctx, 137 .errp = errp, 138 }; 139 140 /* 141 * Kick the vCPU to make sure they are available for the KVM ioctl. 142 */ 143 run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state, 144 RUN_ON_CPU_HOST_PTR(&s)); 145 146 return s.ret; 147 } 148 149 /* 150 * Allocate the vCPU IPIs from the vCPU context. This will allocate 151 * the XIVE IPI interrupt on the chip on which the vCPU is running. 152 * This gives a better distribution of IPIs when the guest has a lot 153 * of vCPUs. When the vCPUs are pinned, this will make the IPI local 154 * to the chip of the vCPU. It will reduce rerouting between interrupt 155 * controllers and gives better performance. 156 */ 157 typedef struct { 158 SpaprXive *xive; 159 Error *err; 160 int rc; 161 } XiveInitIPI; 162 163 static void kvmppc_xive_reset_ipi_on_cpu(CPUState *cs, run_on_cpu_data arg) 164 { 165 unsigned long ipi = kvm_arch_vcpu_id(cs); 166 XiveInitIPI *s = arg.host_ptr; 167 uint64_t state = 0; 168 169 s->rc = kvm_device_access(s->xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi, 170 &state, true, &s->err); 171 } 172 173 static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp) 174 { 175 XiveInitIPI s = { 176 .xive = xive, 177 .err = NULL, 178 .rc = 0, 179 }; 180 181 run_on_cpu(cs, kvmppc_xive_reset_ipi_on_cpu, RUN_ON_CPU_HOST_PTR(&s)); 182 if (s.err) { 183 error_propagate(errp, s.err); 184 } 185 return s.rc; 186 } 187 188 int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) 189 { 190 ERRP_GUARD(); 191 SpaprXive *xive = SPAPR_XIVE(tctx->xptr); 192 unsigned long vcpu_id; 193 int ret; 194 195 assert(xive->fd != -1); 196 197 /* Check if CPU was hot unplugged and replugged. */ 198 if (kvm_cpu_is_enabled(kvm_arch_vcpu_id(tctx->cs))) { 199 return 0; 200 } 201 202 vcpu_id = kvm_arch_vcpu_id(tctx->cs); 203 204 ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd, 205 vcpu_id, 0); 206 if (ret < 0) { 207 error_setg_errno(errp, -ret, 208 "XIVE: unable to connect CPU%ld to KVM device", 209 vcpu_id); 210 if (ret == -ENOSPC) { 211 error_append_hint(errp, "Try -smp maxcpus=N with N < %u\n", 212 MACHINE(qdev_get_machine())->smp.max_cpus); 213 } 214 return ret; 215 } 216 217 /* Create/reset the vCPU IPI */ 218 ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp); 219 if (ret < 0) { 220 return ret; 221 } 222 223 kvm_cpu_enable(tctx->cs); 224 return 0; 225 } 226 227 /* 228 * XIVE Interrupt Source (KVM) 229 */ 230 231 int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, 232 Error **errp) 233 { 234 uint32_t end_idx; 235 uint32_t end_blk; 236 uint8_t priority; 237 uint32_t server; 238 bool masked; 239 uint32_t eisn; 240 uint64_t kvm_src; 241 242 assert(xive_eas_is_valid(eas)); 243 244 end_idx = xive_get_field64(EAS_END_INDEX, eas->w); 245 end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); 246 eisn = xive_get_field64(EAS_END_DATA, eas->w); 247 masked = xive_eas_is_masked(eas); 248 249 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 250 251 kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT & 252 KVM_XIVE_SOURCE_PRIORITY_MASK; 253 kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT & 254 KVM_XIVE_SOURCE_SERVER_MASK; 255 kvm_src |= ((uint64_t) masked << KVM_XIVE_SOURCE_MASKED_SHIFT) & 256 KVM_XIVE_SOURCE_MASKED_MASK; 257 kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) & 258 KVM_XIVE_SOURCE_EISN_MASK; 259 260 return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, 261 &kvm_src, true, errp); 262 } 263 264 void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp) 265 { 266 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn, 267 NULL, true, errp); 268 } 269 270 /* 271 * At reset, the interrupt sources are simply created and MASKED. We 272 * only need to inform the KVM XIVE device about their type: LSI or 273 * MSI. 274 */ 275 int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) 276 { 277 SpaprXive *xive = SPAPR_XIVE(xsrc->xive); 278 uint64_t state = 0; 279 280 assert(xive->fd != -1); 281 282 /* 283 * The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect() 284 * and not with all sources in kvmppc_xive_source_reset() 285 */ 286 assert(srcno >= SPAPR_XIRQ_BASE); 287 288 if (xive_source_irq_is_lsi(xsrc, srcno)) { 289 state |= KVM_XIVE_LEVEL_SENSITIVE; 290 if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { 291 state |= KVM_XIVE_LEVEL_ASSERTED; 292 } 293 } 294 295 return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state, 296 true, errp); 297 } 298 299 /* 300 * To be valid, a source must have been claimed by the machine (valid 301 * entry in the EAS table) and if it is a vCPU IPI, the vCPU should 302 * have been enabled, which means the IPI has been allocated in 303 * kvmppc_xive_cpu_connect(). 304 */ 305 static bool xive_source_is_valid(SpaprXive *xive, int i) 306 { 307 return xive_eas_is_valid(&xive->eat[i]) && 308 (i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i)); 309 } 310 311 static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) 312 { 313 SpaprXive *xive = SPAPR_XIVE(xsrc->xive); 314 int i; 315 316 /* 317 * Skip the vCPU IPIs. These are created/reset when the vCPUs are 318 * connected in kvmppc_xive_cpu_connect() 319 */ 320 for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) { 321 int ret; 322 323 if (!xive_eas_is_valid(&xive->eat[i])) { 324 continue; 325 } 326 327 ret = kvmppc_xive_source_reset_one(xsrc, i, errp); 328 if (ret < 0) { 329 return ret; 330 } 331 } 332 333 return 0; 334 } 335 336 /* 337 * This is used to perform the magic loads on the ESB pages, described 338 * in xive.h. 339 * 340 * Memory barriers should not be needed for loads (no store for now). 341 */ 342 static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, 343 uint64_t data, bool write) 344 { 345 uint64_t *addr = xsrc->esb_mmap + xive_source_esb_mgmt(xsrc, srcno) + 346 offset; 347 348 if (write) { 349 *addr = cpu_to_be64(data); 350 return -1; 351 } else { 352 /* Prevent the compiler from optimizing away the load */ 353 volatile uint64_t value = be64_to_cpu(*addr); 354 return value; 355 } 356 } 357 358 static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset) 359 { 360 return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3; 361 } 362 363 static void xive_esb_trigger(XiveSource *xsrc, int srcno) 364 { 365 uint64_t *addr = xsrc->esb_mmap + xive_source_esb_page(xsrc, srcno); 366 367 *addr = 0x0; 368 } 369 370 uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, 371 uint64_t data, bool write) 372 { 373 if (write) { 374 return xive_esb_rw(xsrc, srcno, offset, data, 1); 375 } 376 377 /* 378 * Special Load EOI handling for LSI sources. Q bit is never set 379 * and the interrupt should be re-triggered if the level is still 380 * asserted. 381 */ 382 if (xive_source_irq_is_lsi(xsrc, srcno) && 383 offset == XIVE_ESB_LOAD_EOI) { 384 xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00); 385 if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { 386 xive_esb_trigger(xsrc, srcno); 387 } 388 return 0; 389 } else { 390 return xive_esb_rw(xsrc, srcno, offset, 0, 0); 391 } 392 } 393 394 static void kvmppc_xive_source_get_state(XiveSource *xsrc) 395 { 396 SpaprXive *xive = SPAPR_XIVE(xsrc->xive); 397 int i; 398 399 for (i = 0; i < xsrc->nr_irqs; i++) { 400 uint8_t pq; 401 402 if (!xive_source_is_valid(xive, i)) { 403 continue; 404 } 405 406 /* Perform a load without side effect to retrieve the PQ bits */ 407 pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); 408 409 /* and save PQ locally */ 410 xive_source_esb_set(xsrc, i, pq); 411 } 412 } 413 414 void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) 415 { 416 XiveSource *xsrc = opaque; 417 418 if (!xive_source_irq_is_lsi(xsrc, srcno)) { 419 if (!val) { 420 return; 421 } 422 } else { 423 if (val) { 424 xsrc->status[srcno] |= XIVE_STATUS_ASSERTED; 425 } else { 426 xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED; 427 } 428 } 429 430 xive_esb_trigger(xsrc, srcno); 431 } 432 433 /* 434 * sPAPR XIVE interrupt controller (KVM) 435 */ 436 int kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, 437 uint32_t end_idx, XiveEND *end, 438 Error **errp) 439 { 440 struct kvm_ppc_xive_eq kvm_eq = { 0 }; 441 uint64_t kvm_eq_idx; 442 uint8_t priority; 443 uint32_t server; 444 int ret; 445 446 assert(xive_end_is_valid(end)); 447 448 /* Encode the tuple (server, prio) as a KVM EQ index */ 449 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 450 451 kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & 452 KVM_XIVE_EQ_PRIORITY_MASK; 453 kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & 454 KVM_XIVE_EQ_SERVER_MASK; 455 456 ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, 457 &kvm_eq, false, errp); 458 if (ret < 0) { 459 return ret; 460 } 461 462 /* 463 * The EQ index and toggle bit are updated by HW. These are the 464 * only fields from KVM we want to update QEMU with. The other END 465 * fields should already be in the QEMU END table. 466 */ 467 end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) | 468 xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex); 469 470 return 0; 471 } 472 473 int kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, 474 uint32_t end_idx, XiveEND *end, 475 Error **errp) 476 { 477 struct kvm_ppc_xive_eq kvm_eq = { 0 }; 478 uint64_t kvm_eq_idx; 479 uint8_t priority; 480 uint32_t server; 481 482 /* 483 * Build the KVM state from the local END structure. 484 */ 485 486 kvm_eq.flags = 0; 487 if (xive_get_field32(END_W0_UCOND_NOTIFY, end->w0)) { 488 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 489 } 490 491 /* 492 * If the hcall is disabling the EQ, set the size and page address 493 * to zero. When migrating, only valid ENDs are taken into 494 * account. 495 */ 496 if (xive_end_is_valid(end)) { 497 kvm_eq.qshift = xive_get_field32(END_W0_QSIZE, end->w0) + 12; 498 kvm_eq.qaddr = xive_end_qaddr(end); 499 /* 500 * The EQ toggle bit and index should only be relevant when 501 * restoring the EQ state 502 */ 503 kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1); 504 kvm_eq.qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); 505 } else { 506 kvm_eq.qshift = 0; 507 kvm_eq.qaddr = 0; 508 } 509 510 /* Encode the tuple (server, prio) as a KVM EQ index */ 511 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 512 513 kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & 514 KVM_XIVE_EQ_PRIORITY_MASK; 515 kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & 516 KVM_XIVE_EQ_SERVER_MASK; 517 518 return 519 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, 520 &kvm_eq, true, errp); 521 } 522 523 void kvmppc_xive_reset(SpaprXive *xive, Error **errp) 524 { 525 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET, 526 NULL, true, errp); 527 } 528 529 static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) 530 { 531 int i; 532 int ret; 533 534 for (i = 0; i < xive->nr_ends; i++) { 535 if (!xive_end_is_valid(&xive->endt[i])) { 536 continue; 537 } 538 539 ret = kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, 540 &xive->endt[i], errp); 541 if (ret < 0) { 542 return ret; 543 } 544 } 545 546 return 0; 547 } 548 549 /* 550 * The primary goal of the XIVE VM change handler is to mark the EQ 551 * pages dirty when all XIVE event notifications have stopped. 552 * 553 * Whenever the VM is stopped, the VM change handler sets the source 554 * PQs to PENDING to stop the flow of events and to possibly catch a 555 * triggered interrupt occuring while the VM is stopped. The previous 556 * state is saved in anticipation of a migration. The XIVE controller 557 * is then synced through KVM to flush any in-flight event 558 * notification and stabilize the EQs. 559 * 560 * At this stage, we can mark the EQ page dirty and let a migration 561 * sequence transfer the EQ pages to the destination, which is done 562 * just after the stop state. 563 * 564 * The previous configuration of the sources is restored when the VM 565 * runs again. If an interrupt was queued while the VM was stopped, 566 * simply generate a trigger. 567 */ 568 static void kvmppc_xive_change_state_handler(void *opaque, int running, 569 RunState state) 570 { 571 SpaprXive *xive = opaque; 572 XiveSource *xsrc = &xive->source; 573 Error *local_err = NULL; 574 int i; 575 576 /* 577 * Restore the sources to their initial state. This is called when 578 * the VM resumes after a stop or a migration. 579 */ 580 if (running) { 581 for (i = 0; i < xsrc->nr_irqs; i++) { 582 uint8_t pq; 583 uint8_t old_pq; 584 585 if (!xive_source_is_valid(xive, i)) { 586 continue; 587 } 588 589 pq = xive_source_esb_get(xsrc, i); 590 old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8)); 591 592 /* 593 * An interrupt was queued while the VM was stopped, 594 * generate a trigger. 595 */ 596 if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) { 597 xive_esb_trigger(xsrc, i); 598 } 599 } 600 601 return; 602 } 603 604 /* 605 * Mask the sources, to stop the flow of event notifications, and 606 * save the PQs locally in the XiveSource object. The XiveSource 607 * state will be collected later on by its vmstate handler if a 608 * migration is in progress. 609 */ 610 for (i = 0; i < xsrc->nr_irqs; i++) { 611 uint8_t pq; 612 613 if (!xive_source_is_valid(xive, i)) { 614 continue; 615 } 616 617 pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); 618 619 /* 620 * PQ is set to PENDING to possibly catch a triggered 621 * interrupt occuring while the VM is stopped (hotplug event 622 * for instance) . 623 */ 624 if (pq != XIVE_ESB_OFF) { 625 pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_10); 626 } 627 xive_source_esb_set(xsrc, i, pq); 628 } 629 630 /* 631 * Sync the XIVE controller in KVM, to flush in-flight event 632 * notification that should be enqueued in the EQs and mark the 633 * XIVE EQ pages dirty to collect all updates. 634 */ 635 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, 636 KVM_DEV_XIVE_EQ_SYNC, NULL, true, &local_err); 637 if (local_err) { 638 error_report_err(local_err); 639 return; 640 } 641 } 642 643 void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp) 644 { 645 assert(xive->fd != -1); 646 647 /* 648 * When the VM is stopped, the sources are masked and the previous 649 * state is saved in anticipation of a migration. We should not 650 * synchronize the source state in that case else we will override 651 * the saved state. 652 */ 653 if (runstate_is_running()) { 654 kvmppc_xive_source_get_state(&xive->source); 655 } 656 657 /* EAT: there is no extra state to query from KVM */ 658 659 /* ENDT */ 660 kvmppc_xive_get_queues(xive, errp); 661 } 662 663 /* 664 * The SpaprXive 'pre_save' method is called by the vmstate handler of 665 * the SpaprXive model, after the XIVE controller is synced in the VM 666 * change handler. 667 */ 668 int kvmppc_xive_pre_save(SpaprXive *xive) 669 { 670 Error *local_err = NULL; 671 int ret; 672 673 assert(xive->fd != -1); 674 675 /* EAT: there is no extra state to query from KVM */ 676 677 /* ENDT */ 678 ret = kvmppc_xive_get_queues(xive, &local_err); 679 if (ret < 0) { 680 error_report_err(local_err); 681 return ret; 682 } 683 684 return 0; 685 } 686 687 /* 688 * The SpaprXive 'post_load' method is not called by a vmstate 689 * handler. It is called at the sPAPR machine level at the end of the 690 * migration sequence by the sPAPR IRQ backend 'post_load' method, 691 * when all XIVE states have been transferred and loaded. 692 */ 693 int kvmppc_xive_post_load(SpaprXive *xive, int version_id) 694 { 695 Error *local_err = NULL; 696 CPUState *cs; 697 int i; 698 int ret; 699 700 /* The KVM XIVE device should be in use */ 701 assert(xive->fd != -1); 702 703 /* Restore the ENDT first. The targetting depends on it. */ 704 for (i = 0; i < xive->nr_ends; i++) { 705 if (!xive_end_is_valid(&xive->endt[i])) { 706 continue; 707 } 708 709 ret = kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, 710 &xive->endt[i], &local_err); 711 if (ret < 0) { 712 goto fail; 713 } 714 } 715 716 /* 717 * We can only restore the source config if the source has been 718 * previously set in KVM. Since we don't do that at reset time 719 * when restoring a VM, let's do it now. 720 */ 721 ret = kvmppc_xive_source_reset(&xive->source, &local_err); 722 if (ret < 0) { 723 goto fail; 724 } 725 726 /* Restore the EAT */ 727 for (i = 0; i < xive->nr_irqs; i++) { 728 if (!xive_source_is_valid(xive, i)) { 729 continue; 730 } 731 732 ret = kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err); 733 if (ret < 0) { 734 goto fail; 735 } 736 } 737 738 /* 739 * Restore the thread interrupt contexts of initial CPUs. 740 * 741 * The context of hotplugged CPUs is restored later, by the 742 * 'post_load' handler of the XiveTCTX model because they are not 743 * available at the time the SpaprXive 'post_load' method is 744 * called. We can not restore the context of all CPUs in the 745 * 'post_load' handler of XiveTCTX because the machine is not 746 * necessarily connected to the KVM device at that time. 747 */ 748 CPU_FOREACH(cs) { 749 PowerPCCPU *cpu = POWERPC_CPU(cs); 750 751 ret = kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err); 752 if (ret < 0) { 753 goto fail; 754 } 755 } 756 757 /* The source states will be restored when the machine starts running */ 758 return 0; 759 760 fail: 761 error_report_err(local_err); 762 return ret; 763 } 764 765 /* Returns MAP_FAILED on error and sets errno */ 766 static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len, 767 Error **errp) 768 { 769 void *addr; 770 uint32_t page_shift = 16; /* TODO: fix page_shift */ 771 772 addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd, 773 pgoff << page_shift); 774 if (addr == MAP_FAILED) { 775 error_setg_errno(errp, errno, "XIVE: unable to set memory mapping"); 776 } 777 778 return addr; 779 } 780 781 /* 782 * All the XIVE memory regions are now backed by mappings from the KVM 783 * XIVE device. 784 */ 785 int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, 786 Error **errp) 787 { 788 SpaprXive *xive = SPAPR_XIVE(intc); 789 XiveSource *xsrc = &xive->source; 790 size_t esb_len = xive_source_esb_len(xsrc); 791 size_t tima_len = 4ull << TM_SHIFT; 792 CPUState *cs; 793 int fd; 794 void *addr; 795 int ret; 796 797 /* 798 * The KVM XIVE device already in use. This is the case when 799 * rebooting under the XIVE-only interrupt mode. 800 */ 801 if (xive->fd != -1) { 802 return 0; 803 } 804 805 if (!kvmppc_has_cap_xive()) { 806 error_setg(errp, "IRQ_XIVE capability must be present for KVM"); 807 return -1; 808 } 809 810 /* First, create the KVM XIVE device */ 811 fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false); 812 if (fd < 0) { 813 error_setg_errno(errp, -fd, "XIVE: error creating KVM device"); 814 return -1; 815 } 816 xive->fd = fd; 817 818 /* Tell KVM about the # of VCPUs we may have */ 819 if (kvm_device_check_attr(xive->fd, KVM_DEV_XIVE_GRP_CTRL, 820 KVM_DEV_XIVE_NR_SERVERS)) { 821 ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, 822 KVM_DEV_XIVE_NR_SERVERS, &nr_servers, true, 823 errp); 824 if (ret < 0) { 825 goto fail; 826 } 827 } 828 829 /* 830 * 1. Source ESB pages - KVM mapping 831 */ 832 addr = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len, errp); 833 if (addr == MAP_FAILED) { 834 goto fail; 835 } 836 xsrc->esb_mmap = addr; 837 838 memory_region_init_ram_device_ptr(&xsrc->esb_mmio_kvm, OBJECT(xsrc), 839 "xive.esb-kvm", esb_len, xsrc->esb_mmap); 840 memory_region_add_subregion_overlap(&xsrc->esb_mmio, 0, 841 &xsrc->esb_mmio_kvm, 1); 842 843 /* 844 * 2. END ESB pages (No KVM support yet) 845 */ 846 847 /* 848 * 3. TIMA pages - KVM mapping 849 */ 850 addr = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len, errp); 851 if (addr == MAP_FAILED) { 852 goto fail; 853 } 854 xive->tm_mmap = addr; 855 856 memory_region_init_ram_device_ptr(&xive->tm_mmio_kvm, OBJECT(xive), 857 "xive.tima", tima_len, xive->tm_mmap); 858 memory_region_add_subregion_overlap(&xive->tm_mmio, 0, 859 &xive->tm_mmio_kvm, 1); 860 861 xive->change = qemu_add_vm_change_state_handler( 862 kvmppc_xive_change_state_handler, xive); 863 864 /* Connect the presenters to the initial VCPUs of the machine */ 865 CPU_FOREACH(cs) { 866 PowerPCCPU *cpu = POWERPC_CPU(cs); 867 868 ret = kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, errp); 869 if (ret < 0) { 870 goto fail; 871 } 872 } 873 874 /* Update the KVM sources */ 875 ret = kvmppc_xive_source_reset(xsrc, errp); 876 if (ret < 0) { 877 goto fail; 878 } 879 880 kvm_kernel_irqchip = true; 881 kvm_msi_via_irqfd_allowed = true; 882 kvm_gsi_direct_mapping = true; 883 return 0; 884 885 fail: 886 kvmppc_xive_disconnect(intc); 887 return -1; 888 } 889 890 void kvmppc_xive_disconnect(SpaprInterruptController *intc) 891 { 892 SpaprXive *xive = SPAPR_XIVE(intc); 893 XiveSource *xsrc; 894 size_t esb_len; 895 896 assert(xive->fd != -1); 897 898 /* Clear the KVM mapping */ 899 xsrc = &xive->source; 900 esb_len = xive_source_esb_len(xsrc); 901 902 if (xsrc->esb_mmap) { 903 memory_region_del_subregion(&xsrc->esb_mmio, &xsrc->esb_mmio_kvm); 904 object_unparent(OBJECT(&xsrc->esb_mmio_kvm)); 905 munmap(xsrc->esb_mmap, esb_len); 906 xsrc->esb_mmap = NULL; 907 } 908 909 if (xive->tm_mmap) { 910 memory_region_del_subregion(&xive->tm_mmio, &xive->tm_mmio_kvm); 911 object_unparent(OBJECT(&xive->tm_mmio_kvm)); 912 munmap(xive->tm_mmap, 4ull << TM_SHIFT); 913 xive->tm_mmap = NULL; 914 } 915 916 /* 917 * When the KVM device fd is closed, the KVM device is destroyed 918 * and removed from the list of devices of the VM. The VCPU 919 * presenters are also detached from the device. 920 */ 921 close(xive->fd); 922 xive->fd = -1; 923 924 kvm_kernel_irqchip = false; 925 kvm_msi_via_irqfd_allowed = false; 926 kvm_gsi_direct_mapping = false; 927 928 /* Clear the local list of presenter (hotplug) */ 929 kvm_cpu_disable_all(); 930 931 /* VM Change state handler is not needed anymore */ 932 if (xive->change) { 933 qemu_del_vm_change_state_handler(xive->change); 934 xive->change = NULL; 935 } 936 } 937