1 /* 2 * QEMU PowerPC sPAPR XIVE interrupt controller model 3 * 4 * Copyright (c) 2017-2019, IBM Corporation. 5 * 6 * This code is licensed under the GPL version 2 or later. See the 7 * COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/log.h" 12 #include "qemu/error-report.h" 13 #include "qapi/error.h" 14 #include "target/ppc/cpu.h" 15 #include "sysemu/cpus.h" 16 #include "sysemu/kvm.h" 17 #include "sysemu/runstate.h" 18 #include "hw/ppc/spapr.h" 19 #include "hw/ppc/spapr_cpu_core.h" 20 #include "hw/ppc/spapr_xive.h" 21 #include "hw/ppc/xive.h" 22 #include "kvm_ppc.h" 23 24 #include <sys/ioctl.h> 25 26 /* 27 * Helpers for CPU hotplug 28 * 29 * TODO: make a common KVMEnabledCPU layer for XICS and XIVE 30 */ 31 typedef struct KVMEnabledCPU { 32 unsigned long vcpu_id; 33 QLIST_ENTRY(KVMEnabledCPU) node; 34 } KVMEnabledCPU; 35 36 static QLIST_HEAD(, KVMEnabledCPU) 37 kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus); 38 39 static bool kvm_cpu_is_enabled(unsigned long vcpu_id) 40 { 41 KVMEnabledCPU *enabled_cpu; 42 43 QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { 44 if (enabled_cpu->vcpu_id == vcpu_id) { 45 return true; 46 } 47 } 48 return false; 49 } 50 51 static void kvm_cpu_enable(CPUState *cs) 52 { 53 KVMEnabledCPU *enabled_cpu; 54 unsigned long vcpu_id = kvm_arch_vcpu_id(cs); 55 56 enabled_cpu = g_malloc(sizeof(*enabled_cpu)); 57 enabled_cpu->vcpu_id = vcpu_id; 58 QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node); 59 } 60 61 static void kvm_cpu_disable_all(void) 62 { 63 KVMEnabledCPU *enabled_cpu, *next; 64 65 QLIST_FOREACH_SAFE(enabled_cpu, &kvm_enabled_cpus, node, next) { 66 QLIST_REMOVE(enabled_cpu, node); 67 g_free(enabled_cpu); 68 } 69 } 70 71 /* 72 * XIVE Thread Interrupt Management context (KVM) 73 */ 74 75 int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) 76 { 77 SpaprXive *xive = SPAPR_XIVE(tctx->xptr); 78 uint64_t state[2]; 79 int ret; 80 81 assert(xive->fd != -1); 82 83 /* word0 and word1 of the OS ring. */ 84 state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]); 85 86 ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); 87 if (ret != 0) { 88 error_setg_errno(errp, -ret, 89 "XIVE: could not restore KVM state of CPU %ld", 90 kvm_arch_vcpu_id(tctx->cs)); 91 return ret; 92 } 93 94 return 0; 95 } 96 97 int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp) 98 { 99 SpaprXive *xive = SPAPR_XIVE(tctx->xptr); 100 uint64_t state[2] = { 0 }; 101 int ret; 102 103 assert(xive->fd != -1); 104 105 ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); 106 if (ret != 0) { 107 error_setg_errno(errp, -ret, 108 "XIVE: could not capture KVM state of CPU %ld", 109 kvm_arch_vcpu_id(tctx->cs)); 110 return ret; 111 } 112 113 /* word0 and word1 of the OS ring. */ 114 *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0]; 115 116 return 0; 117 } 118 119 typedef struct { 120 XiveTCTX *tctx; 121 Error **errp; 122 int ret; 123 } XiveCpuGetState; 124 125 static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu, 126 run_on_cpu_data arg) 127 { 128 XiveCpuGetState *s = arg.host_ptr; 129 130 s->ret = kvmppc_xive_cpu_get_state(s->tctx, s->errp); 131 } 132 133 int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) 134 { 135 XiveCpuGetState s = { 136 .tctx = tctx, 137 .errp = errp, 138 }; 139 140 /* 141 * Kick the vCPU to make sure they are available for the KVM ioctl. 142 */ 143 run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state, 144 RUN_ON_CPU_HOST_PTR(&s)); 145 146 return s.ret; 147 } 148 149 static int kvmppc_xive_reset_ipi(SpaprXive *xive, CPUState *cs, Error **errp) 150 { 151 unsigned long ipi = kvm_arch_vcpu_id(cs); 152 uint64_t state = 0; 153 154 return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, ipi, 155 &state, true, errp); 156 } 157 158 int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) 159 { 160 ERRP_GUARD(); 161 SpaprXive *xive = SPAPR_XIVE(tctx->xptr); 162 unsigned long vcpu_id; 163 int ret; 164 165 assert(xive->fd != -1); 166 167 /* Check if CPU was hot unplugged and replugged. */ 168 if (kvm_cpu_is_enabled(kvm_arch_vcpu_id(tctx->cs))) { 169 return 0; 170 } 171 172 vcpu_id = kvm_arch_vcpu_id(tctx->cs); 173 174 ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd, 175 vcpu_id, 0); 176 if (ret < 0) { 177 error_setg_errno(errp, -ret, 178 "XIVE: unable to connect CPU%ld to KVM device", 179 vcpu_id); 180 if (ret == -ENOSPC) { 181 error_append_hint(errp, "Try -smp maxcpus=N with N < %u\n", 182 MACHINE(qdev_get_machine())->smp.max_cpus); 183 } 184 return ret; 185 } 186 187 /* Create/reset the vCPU IPI */ 188 ret = kvmppc_xive_reset_ipi(xive, tctx->cs, errp); 189 if (ret < 0) { 190 return ret; 191 } 192 193 kvm_cpu_enable(tctx->cs); 194 return 0; 195 } 196 197 /* 198 * XIVE Interrupt Source (KVM) 199 */ 200 201 int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, 202 Error **errp) 203 { 204 uint32_t end_idx; 205 uint32_t end_blk; 206 uint8_t priority; 207 uint32_t server; 208 bool masked; 209 uint32_t eisn; 210 uint64_t kvm_src; 211 212 assert(xive_eas_is_valid(eas)); 213 214 end_idx = xive_get_field64(EAS_END_INDEX, eas->w); 215 end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); 216 eisn = xive_get_field64(EAS_END_DATA, eas->w); 217 masked = xive_eas_is_masked(eas); 218 219 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 220 221 kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT & 222 KVM_XIVE_SOURCE_PRIORITY_MASK; 223 kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT & 224 KVM_XIVE_SOURCE_SERVER_MASK; 225 kvm_src |= ((uint64_t) masked << KVM_XIVE_SOURCE_MASKED_SHIFT) & 226 KVM_XIVE_SOURCE_MASKED_MASK; 227 kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) & 228 KVM_XIVE_SOURCE_EISN_MASK; 229 230 return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, 231 &kvm_src, true, errp); 232 } 233 234 void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp) 235 { 236 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn, 237 NULL, true, errp); 238 } 239 240 /* 241 * At reset, the interrupt sources are simply created and MASKED. We 242 * only need to inform the KVM XIVE device about their type: LSI or 243 * MSI. 244 */ 245 int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) 246 { 247 SpaprXive *xive = SPAPR_XIVE(xsrc->xive); 248 uint64_t state = 0; 249 250 assert(xive->fd != -1); 251 252 /* 253 * The vCPU IPIs are now allocated in kvmppc_xive_cpu_connect() 254 * and not with all sources in kvmppc_xive_source_reset() 255 */ 256 assert(srcno >= SPAPR_XIRQ_BASE); 257 258 if (xive_source_irq_is_lsi(xsrc, srcno)) { 259 state |= KVM_XIVE_LEVEL_SENSITIVE; 260 if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { 261 state |= KVM_XIVE_LEVEL_ASSERTED; 262 } 263 } 264 265 return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state, 266 true, errp); 267 } 268 269 /* 270 * To be valid, a source must have been claimed by the machine (valid 271 * entry in the EAS table) and if it is a vCPU IPI, the vCPU should 272 * have been enabled, which means the IPI has been allocated in 273 * kvmppc_xive_cpu_connect(). 274 */ 275 static bool xive_source_is_valid(SpaprXive *xive, int i) 276 { 277 return xive_eas_is_valid(&xive->eat[i]) && 278 (i >= SPAPR_XIRQ_BASE || kvm_cpu_is_enabled(i)); 279 } 280 281 static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) 282 { 283 SpaprXive *xive = SPAPR_XIVE(xsrc->xive); 284 int i; 285 286 /* 287 * Skip the vCPU IPIs. These are created/reset when the vCPUs are 288 * connected in kvmppc_xive_cpu_connect() 289 */ 290 for (i = SPAPR_XIRQ_BASE; i < xsrc->nr_irqs; i++) { 291 int ret; 292 293 if (!xive_eas_is_valid(&xive->eat[i])) { 294 continue; 295 } 296 297 ret = kvmppc_xive_source_reset_one(xsrc, i, errp); 298 if (ret < 0) { 299 return ret; 300 } 301 } 302 303 return 0; 304 } 305 306 /* 307 * This is used to perform the magic loads on the ESB pages, described 308 * in xive.h. 309 * 310 * Memory barriers should not be needed for loads (no store for now). 311 */ 312 static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, 313 uint64_t data, bool write) 314 { 315 uint64_t *addr = xsrc->esb_mmap + xive_source_esb_mgmt(xsrc, srcno) + 316 offset; 317 318 if (write) { 319 *addr = cpu_to_be64(data); 320 return -1; 321 } else { 322 /* Prevent the compiler from optimizing away the load */ 323 volatile uint64_t value = be64_to_cpu(*addr); 324 return value; 325 } 326 } 327 328 static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset) 329 { 330 return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3; 331 } 332 333 static void xive_esb_trigger(XiveSource *xsrc, int srcno) 334 { 335 uint64_t *addr = xsrc->esb_mmap + xive_source_esb_page(xsrc, srcno); 336 337 *addr = 0x0; 338 } 339 340 uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, 341 uint64_t data, bool write) 342 { 343 if (write) { 344 return xive_esb_rw(xsrc, srcno, offset, data, 1); 345 } 346 347 /* 348 * Special Load EOI handling for LSI sources. Q bit is never set 349 * and the interrupt should be re-triggered if the level is still 350 * asserted. 351 */ 352 if (xive_source_irq_is_lsi(xsrc, srcno) && 353 offset == XIVE_ESB_LOAD_EOI) { 354 xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00); 355 if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { 356 xive_esb_trigger(xsrc, srcno); 357 } 358 return 0; 359 } else { 360 return xive_esb_rw(xsrc, srcno, offset, 0, 0); 361 } 362 } 363 364 static void kvmppc_xive_source_get_state(XiveSource *xsrc) 365 { 366 SpaprXive *xive = SPAPR_XIVE(xsrc->xive); 367 int i; 368 369 for (i = 0; i < xsrc->nr_irqs; i++) { 370 uint8_t pq; 371 372 if (!xive_source_is_valid(xive, i)) { 373 continue; 374 } 375 376 /* Perform a load without side effect to retrieve the PQ bits */ 377 pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); 378 379 /* and save PQ locally */ 380 xive_source_esb_set(xsrc, i, pq); 381 } 382 } 383 384 void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) 385 { 386 XiveSource *xsrc = opaque; 387 388 if (!xive_source_irq_is_lsi(xsrc, srcno)) { 389 if (!val) { 390 return; 391 } 392 } else { 393 if (val) { 394 xsrc->status[srcno] |= XIVE_STATUS_ASSERTED; 395 } else { 396 xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED; 397 } 398 } 399 400 xive_esb_trigger(xsrc, srcno); 401 } 402 403 /* 404 * sPAPR XIVE interrupt controller (KVM) 405 */ 406 int kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, 407 uint32_t end_idx, XiveEND *end, 408 Error **errp) 409 { 410 struct kvm_ppc_xive_eq kvm_eq = { 0 }; 411 uint64_t kvm_eq_idx; 412 uint8_t priority; 413 uint32_t server; 414 int ret; 415 416 assert(xive_end_is_valid(end)); 417 418 /* Encode the tuple (server, prio) as a KVM EQ index */ 419 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 420 421 kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & 422 KVM_XIVE_EQ_PRIORITY_MASK; 423 kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & 424 KVM_XIVE_EQ_SERVER_MASK; 425 426 ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, 427 &kvm_eq, false, errp); 428 if (ret < 0) { 429 return ret; 430 } 431 432 /* 433 * The EQ index and toggle bit are updated by HW. These are the 434 * only fields from KVM we want to update QEMU with. The other END 435 * fields should already be in the QEMU END table. 436 */ 437 end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) | 438 xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex); 439 440 return 0; 441 } 442 443 int kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, 444 uint32_t end_idx, XiveEND *end, 445 Error **errp) 446 { 447 struct kvm_ppc_xive_eq kvm_eq = { 0 }; 448 uint64_t kvm_eq_idx; 449 uint8_t priority; 450 uint32_t server; 451 452 /* 453 * Build the KVM state from the local END structure. 454 */ 455 456 kvm_eq.flags = 0; 457 if (xive_get_field32(END_W0_UCOND_NOTIFY, end->w0)) { 458 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 459 } 460 461 /* 462 * If the hcall is disabling the EQ, set the size and page address 463 * to zero. When migrating, only valid ENDs are taken into 464 * account. 465 */ 466 if (xive_end_is_valid(end)) { 467 kvm_eq.qshift = xive_get_field32(END_W0_QSIZE, end->w0) + 12; 468 kvm_eq.qaddr = xive_end_qaddr(end); 469 /* 470 * The EQ toggle bit and index should only be relevant when 471 * restoring the EQ state 472 */ 473 kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1); 474 kvm_eq.qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); 475 } else { 476 kvm_eq.qshift = 0; 477 kvm_eq.qaddr = 0; 478 } 479 480 /* Encode the tuple (server, prio) as a KVM EQ index */ 481 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 482 483 kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & 484 KVM_XIVE_EQ_PRIORITY_MASK; 485 kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & 486 KVM_XIVE_EQ_SERVER_MASK; 487 488 return 489 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, 490 &kvm_eq, true, errp); 491 } 492 493 void kvmppc_xive_reset(SpaprXive *xive, Error **errp) 494 { 495 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET, 496 NULL, true, errp); 497 } 498 499 static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) 500 { 501 int i; 502 int ret; 503 504 for (i = 0; i < xive->nr_ends; i++) { 505 if (!xive_end_is_valid(&xive->endt[i])) { 506 continue; 507 } 508 509 ret = kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, 510 &xive->endt[i], errp); 511 if (ret < 0) { 512 return ret; 513 } 514 } 515 516 return 0; 517 } 518 519 /* 520 * The primary goal of the XIVE VM change handler is to mark the EQ 521 * pages dirty when all XIVE event notifications have stopped. 522 * 523 * Whenever the VM is stopped, the VM change handler sets the source 524 * PQs to PENDING to stop the flow of events and to possibly catch a 525 * triggered interrupt occuring while the VM is stopped. The previous 526 * state is saved in anticipation of a migration. The XIVE controller 527 * is then synced through KVM to flush any in-flight event 528 * notification and stabilize the EQs. 529 * 530 * At this stage, we can mark the EQ page dirty and let a migration 531 * sequence transfer the EQ pages to the destination, which is done 532 * just after the stop state. 533 * 534 * The previous configuration of the sources is restored when the VM 535 * runs again. If an interrupt was queued while the VM was stopped, 536 * simply generate a trigger. 537 */ 538 static void kvmppc_xive_change_state_handler(void *opaque, int running, 539 RunState state) 540 { 541 SpaprXive *xive = opaque; 542 XiveSource *xsrc = &xive->source; 543 Error *local_err = NULL; 544 int i; 545 546 /* 547 * Restore the sources to their initial state. This is called when 548 * the VM resumes after a stop or a migration. 549 */ 550 if (running) { 551 for (i = 0; i < xsrc->nr_irqs; i++) { 552 uint8_t pq; 553 uint8_t old_pq; 554 555 if (!xive_source_is_valid(xive, i)) { 556 continue; 557 } 558 559 pq = xive_source_esb_get(xsrc, i); 560 old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8)); 561 562 /* 563 * An interrupt was queued while the VM was stopped, 564 * generate a trigger. 565 */ 566 if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) { 567 xive_esb_trigger(xsrc, i); 568 } 569 } 570 571 return; 572 } 573 574 /* 575 * Mask the sources, to stop the flow of event notifications, and 576 * save the PQs locally in the XiveSource object. The XiveSource 577 * state will be collected later on by its vmstate handler if a 578 * migration is in progress. 579 */ 580 for (i = 0; i < xsrc->nr_irqs; i++) { 581 uint8_t pq; 582 583 if (!xive_source_is_valid(xive, i)) { 584 continue; 585 } 586 587 pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); 588 589 /* 590 * PQ is set to PENDING to possibly catch a triggered 591 * interrupt occuring while the VM is stopped (hotplug event 592 * for instance) . 593 */ 594 if (pq != XIVE_ESB_OFF) { 595 pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_10); 596 } 597 xive_source_esb_set(xsrc, i, pq); 598 } 599 600 /* 601 * Sync the XIVE controller in KVM, to flush in-flight event 602 * notification that should be enqueued in the EQs and mark the 603 * XIVE EQ pages dirty to collect all updates. 604 */ 605 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, 606 KVM_DEV_XIVE_EQ_SYNC, NULL, true, &local_err); 607 if (local_err) { 608 error_report_err(local_err); 609 return; 610 } 611 } 612 613 void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp) 614 { 615 assert(xive->fd != -1); 616 617 /* 618 * When the VM is stopped, the sources are masked and the previous 619 * state is saved in anticipation of a migration. We should not 620 * synchronize the source state in that case else we will override 621 * the saved state. 622 */ 623 if (runstate_is_running()) { 624 kvmppc_xive_source_get_state(&xive->source); 625 } 626 627 /* EAT: there is no extra state to query from KVM */ 628 629 /* ENDT */ 630 kvmppc_xive_get_queues(xive, errp); 631 } 632 633 /* 634 * The SpaprXive 'pre_save' method is called by the vmstate handler of 635 * the SpaprXive model, after the XIVE controller is synced in the VM 636 * change handler. 637 */ 638 int kvmppc_xive_pre_save(SpaprXive *xive) 639 { 640 Error *local_err = NULL; 641 int ret; 642 643 assert(xive->fd != -1); 644 645 /* EAT: there is no extra state to query from KVM */ 646 647 /* ENDT */ 648 ret = kvmppc_xive_get_queues(xive, &local_err); 649 if (ret < 0) { 650 error_report_err(local_err); 651 return ret; 652 } 653 654 return 0; 655 } 656 657 /* 658 * The SpaprXive 'post_load' method is not called by a vmstate 659 * handler. It is called at the sPAPR machine level at the end of the 660 * migration sequence by the sPAPR IRQ backend 'post_load' method, 661 * when all XIVE states have been transferred and loaded. 662 */ 663 int kvmppc_xive_post_load(SpaprXive *xive, int version_id) 664 { 665 Error *local_err = NULL; 666 CPUState *cs; 667 int i; 668 int ret; 669 670 /* The KVM XIVE device should be in use */ 671 assert(xive->fd != -1); 672 673 /* Restore the ENDT first. The targetting depends on it. */ 674 for (i = 0; i < xive->nr_ends; i++) { 675 if (!xive_end_is_valid(&xive->endt[i])) { 676 continue; 677 } 678 679 ret = kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, 680 &xive->endt[i], &local_err); 681 if (ret < 0) { 682 goto fail; 683 } 684 } 685 686 /* 687 * We can only restore the source config if the source has been 688 * previously set in KVM. Since we don't do that at reset time 689 * when restoring a VM, let's do it now. 690 */ 691 ret = kvmppc_xive_source_reset(&xive->source, &local_err); 692 if (ret < 0) { 693 goto fail; 694 } 695 696 /* Restore the EAT */ 697 for (i = 0; i < xive->nr_irqs; i++) { 698 if (!xive_source_is_valid(xive, i)) { 699 continue; 700 } 701 702 ret = kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err); 703 if (ret < 0) { 704 goto fail; 705 } 706 } 707 708 /* 709 * Restore the thread interrupt contexts of initial CPUs. 710 * 711 * The context of hotplugged CPUs is restored later, by the 712 * 'post_load' handler of the XiveTCTX model because they are not 713 * available at the time the SpaprXive 'post_load' method is 714 * called. We can not restore the context of all CPUs in the 715 * 'post_load' handler of XiveTCTX because the machine is not 716 * necessarily connected to the KVM device at that time. 717 */ 718 CPU_FOREACH(cs) { 719 PowerPCCPU *cpu = POWERPC_CPU(cs); 720 721 ret = kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err); 722 if (ret < 0) { 723 goto fail; 724 } 725 } 726 727 /* The source states will be restored when the machine starts running */ 728 return 0; 729 730 fail: 731 error_report_err(local_err); 732 return ret; 733 } 734 735 /* Returns MAP_FAILED on error and sets errno */ 736 static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len, 737 Error **errp) 738 { 739 void *addr; 740 uint32_t page_shift = 16; /* TODO: fix page_shift */ 741 742 addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd, 743 pgoff << page_shift); 744 if (addr == MAP_FAILED) { 745 error_setg_errno(errp, errno, "XIVE: unable to set memory mapping"); 746 } 747 748 return addr; 749 } 750 751 /* 752 * All the XIVE memory regions are now backed by mappings from the KVM 753 * XIVE device. 754 */ 755 int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, 756 Error **errp) 757 { 758 SpaprXive *xive = SPAPR_XIVE(intc); 759 XiveSource *xsrc = &xive->source; 760 size_t esb_len = xive_source_esb_len(xsrc); 761 size_t tima_len = 4ull << TM_SHIFT; 762 CPUState *cs; 763 int fd; 764 void *addr; 765 int ret; 766 767 /* 768 * The KVM XIVE device already in use. This is the case when 769 * rebooting under the XIVE-only interrupt mode. 770 */ 771 if (xive->fd != -1) { 772 return 0; 773 } 774 775 if (!kvmppc_has_cap_xive()) { 776 error_setg(errp, "IRQ_XIVE capability must be present for KVM"); 777 return -1; 778 } 779 780 /* First, create the KVM XIVE device */ 781 fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false); 782 if (fd < 0) { 783 error_setg_errno(errp, -fd, "XIVE: error creating KVM device"); 784 return -1; 785 } 786 xive->fd = fd; 787 788 /* Tell KVM about the # of VCPUs we may have */ 789 if (kvm_device_check_attr(xive->fd, KVM_DEV_XIVE_GRP_CTRL, 790 KVM_DEV_XIVE_NR_SERVERS)) { 791 ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, 792 KVM_DEV_XIVE_NR_SERVERS, &nr_servers, true, 793 errp); 794 if (ret < 0) { 795 goto fail; 796 } 797 } 798 799 /* 800 * 1. Source ESB pages - KVM mapping 801 */ 802 addr = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len, errp); 803 if (addr == MAP_FAILED) { 804 goto fail; 805 } 806 xsrc->esb_mmap = addr; 807 808 memory_region_init_ram_device_ptr(&xsrc->esb_mmio_kvm, OBJECT(xsrc), 809 "xive.esb-kvm", esb_len, xsrc->esb_mmap); 810 memory_region_add_subregion_overlap(&xsrc->esb_mmio, 0, 811 &xsrc->esb_mmio_kvm, 1); 812 813 /* 814 * 2. END ESB pages (No KVM support yet) 815 */ 816 817 /* 818 * 3. TIMA pages - KVM mapping 819 */ 820 addr = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len, errp); 821 if (addr == MAP_FAILED) { 822 goto fail; 823 } 824 xive->tm_mmap = addr; 825 826 memory_region_init_ram_device_ptr(&xive->tm_mmio_kvm, OBJECT(xive), 827 "xive.tima", tima_len, xive->tm_mmap); 828 memory_region_add_subregion_overlap(&xive->tm_mmio, 0, 829 &xive->tm_mmio_kvm, 1); 830 831 xive->change = qemu_add_vm_change_state_handler( 832 kvmppc_xive_change_state_handler, xive); 833 834 /* Connect the presenters to the initial VCPUs of the machine */ 835 CPU_FOREACH(cs) { 836 PowerPCCPU *cpu = POWERPC_CPU(cs); 837 838 ret = kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, errp); 839 if (ret < 0) { 840 goto fail; 841 } 842 } 843 844 /* Update the KVM sources */ 845 ret = kvmppc_xive_source_reset(xsrc, errp); 846 if (ret < 0) { 847 goto fail; 848 } 849 850 kvm_kernel_irqchip = true; 851 kvm_msi_via_irqfd_allowed = true; 852 kvm_gsi_direct_mapping = true; 853 return 0; 854 855 fail: 856 kvmppc_xive_disconnect(intc); 857 return -1; 858 } 859 860 void kvmppc_xive_disconnect(SpaprInterruptController *intc) 861 { 862 SpaprXive *xive = SPAPR_XIVE(intc); 863 XiveSource *xsrc; 864 size_t esb_len; 865 866 assert(xive->fd != -1); 867 868 /* Clear the KVM mapping */ 869 xsrc = &xive->source; 870 esb_len = xive_source_esb_len(xsrc); 871 872 if (xsrc->esb_mmap) { 873 memory_region_del_subregion(&xsrc->esb_mmio, &xsrc->esb_mmio_kvm); 874 object_unparent(OBJECT(&xsrc->esb_mmio_kvm)); 875 munmap(xsrc->esb_mmap, esb_len); 876 xsrc->esb_mmap = NULL; 877 } 878 879 if (xive->tm_mmap) { 880 memory_region_del_subregion(&xive->tm_mmio, &xive->tm_mmio_kvm); 881 object_unparent(OBJECT(&xive->tm_mmio_kvm)); 882 munmap(xive->tm_mmap, 4ull << TM_SHIFT); 883 xive->tm_mmap = NULL; 884 } 885 886 /* 887 * When the KVM device fd is closed, the KVM device is destroyed 888 * and removed from the list of devices of the VM. The VCPU 889 * presenters are also detached from the device. 890 */ 891 close(xive->fd); 892 xive->fd = -1; 893 894 kvm_kernel_irqchip = false; 895 kvm_msi_via_irqfd_allowed = false; 896 kvm_gsi_direct_mapping = false; 897 898 /* Clear the local list of presenter (hotplug) */ 899 kvm_cpu_disable_all(); 900 901 /* VM Change state handler is not needed anymore */ 902 if (xive->change) { 903 qemu_del_vm_change_state_handler(xive->change); 904 xive->change = NULL; 905 } 906 } 907