1 /* 2 * QEMU PowerPC sPAPR XIVE interrupt controller model 3 * 4 * Copyright (c) 2017-2019, IBM Corporation. 5 * 6 * This code is licensed under the GPL version 2 or later. See the 7 * COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/log.h" 12 #include "qemu/error-report.h" 13 #include "qapi/error.h" 14 #include "target/ppc/cpu.h" 15 #include "sysemu/cpus.h" 16 #include "sysemu/kvm.h" 17 #include "sysemu/runstate.h" 18 #include "hw/ppc/spapr.h" 19 #include "hw/ppc/spapr_cpu_core.h" 20 #include "hw/ppc/spapr_xive.h" 21 #include "hw/ppc/xive.h" 22 #include "kvm_ppc.h" 23 24 #include <sys/ioctl.h> 25 26 /* 27 * Helpers for CPU hotplug 28 * 29 * TODO: make a common KVMEnabledCPU layer for XICS and XIVE 30 */ 31 typedef struct KVMEnabledCPU { 32 unsigned long vcpu_id; 33 QLIST_ENTRY(KVMEnabledCPU) node; 34 } KVMEnabledCPU; 35 36 static QLIST_HEAD(, KVMEnabledCPU) 37 kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus); 38 39 static bool kvm_cpu_is_enabled(CPUState *cs) 40 { 41 KVMEnabledCPU *enabled_cpu; 42 unsigned long vcpu_id = kvm_arch_vcpu_id(cs); 43 44 QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { 45 if (enabled_cpu->vcpu_id == vcpu_id) { 46 return true; 47 } 48 } 49 return false; 50 } 51 52 static void kvm_cpu_enable(CPUState *cs) 53 { 54 KVMEnabledCPU *enabled_cpu; 55 unsigned long vcpu_id = kvm_arch_vcpu_id(cs); 56 57 enabled_cpu = g_malloc(sizeof(*enabled_cpu)); 58 enabled_cpu->vcpu_id = vcpu_id; 59 QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node); 60 } 61 62 static void kvm_cpu_disable_all(void) 63 { 64 KVMEnabledCPU *enabled_cpu, *next; 65 66 QLIST_FOREACH_SAFE(enabled_cpu, &kvm_enabled_cpus, node, next) { 67 QLIST_REMOVE(enabled_cpu, node); 68 g_free(enabled_cpu); 69 } 70 } 71 72 /* 73 * XIVE Thread Interrupt Management context (KVM) 74 */ 75 76 void kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) 77 { 78 SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive; 79 uint64_t state[2]; 80 int ret; 81 82 /* The KVM XIVE device is not in use yet */ 83 if (xive->fd == -1) { 84 return; 85 } 86 87 /* word0 and word1 of the OS ring. */ 88 state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]); 89 90 ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); 91 if (ret != 0) { 92 error_setg_errno(errp, errno, 93 "XIVE: could not restore KVM state of CPU %ld", 94 kvm_arch_vcpu_id(tctx->cs)); 95 } 96 } 97 98 void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp) 99 { 100 SpaprXive *xive = SPAPR_MACHINE(qdev_get_machine())->xive; 101 uint64_t state[2] = { 0 }; 102 int ret; 103 104 /* The KVM XIVE device is not in use */ 105 if (xive->fd == -1) { 106 return; 107 } 108 109 ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); 110 if (ret != 0) { 111 error_setg_errno(errp, errno, 112 "XIVE: could not capture KVM state of CPU %ld", 113 kvm_arch_vcpu_id(tctx->cs)); 114 return; 115 } 116 117 /* word0 and word1 of the OS ring. */ 118 *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0]; 119 } 120 121 typedef struct { 122 XiveTCTX *tctx; 123 Error *err; 124 } XiveCpuGetState; 125 126 static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu, 127 run_on_cpu_data arg) 128 { 129 XiveCpuGetState *s = arg.host_ptr; 130 131 kvmppc_xive_cpu_get_state(s->tctx, &s->err); 132 } 133 134 void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) 135 { 136 XiveCpuGetState s = { 137 .tctx = tctx, 138 .err = NULL, 139 }; 140 141 /* 142 * Kick the vCPU to make sure they are available for the KVM ioctl. 143 */ 144 run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state, 145 RUN_ON_CPU_HOST_PTR(&s)); 146 147 if (s.err) { 148 error_propagate(errp, s.err); 149 return; 150 } 151 } 152 153 void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) 154 { 155 MachineState *ms = MACHINE(qdev_get_machine()); 156 SpaprXive *xive = SPAPR_MACHINE(ms)->xive; 157 unsigned long vcpu_id; 158 int ret; 159 160 /* The KVM XIVE device is not in use */ 161 if (xive->fd == -1) { 162 return; 163 } 164 165 /* Check if CPU was hot unplugged and replugged. */ 166 if (kvm_cpu_is_enabled(tctx->cs)) { 167 return; 168 } 169 170 vcpu_id = kvm_arch_vcpu_id(tctx->cs); 171 172 ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd, 173 vcpu_id, 0); 174 if (ret < 0) { 175 Error *local_err = NULL; 176 177 error_setg(&local_err, 178 "XIVE: unable to connect CPU%ld to KVM device: %s", 179 vcpu_id, strerror(errno)); 180 if (errno == ENOSPC) { 181 error_append_hint(&local_err, "Try -smp maxcpus=N with N < %u\n", 182 ms->smp.max_cpus); 183 } 184 error_propagate(errp, local_err); 185 return; 186 } 187 188 kvm_cpu_enable(tctx->cs); 189 } 190 191 /* 192 * XIVE Interrupt Source (KVM) 193 */ 194 195 void kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, 196 Error **errp) 197 { 198 uint32_t end_idx; 199 uint32_t end_blk; 200 uint8_t priority; 201 uint32_t server; 202 bool masked; 203 uint32_t eisn; 204 uint64_t kvm_src; 205 Error *local_err = NULL; 206 207 assert(xive_eas_is_valid(eas)); 208 209 end_idx = xive_get_field64(EAS_END_INDEX, eas->w); 210 end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); 211 eisn = xive_get_field64(EAS_END_DATA, eas->w); 212 masked = xive_eas_is_masked(eas); 213 214 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 215 216 kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT & 217 KVM_XIVE_SOURCE_PRIORITY_MASK; 218 kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT & 219 KVM_XIVE_SOURCE_SERVER_MASK; 220 kvm_src |= ((uint64_t) masked << KVM_XIVE_SOURCE_MASKED_SHIFT) & 221 KVM_XIVE_SOURCE_MASKED_MASK; 222 kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) & 223 KVM_XIVE_SOURCE_EISN_MASK; 224 225 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, 226 &kvm_src, true, &local_err); 227 if (local_err) { 228 error_propagate(errp, local_err); 229 return; 230 } 231 } 232 233 void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp) 234 { 235 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn, 236 NULL, true, errp); 237 } 238 239 /* 240 * At reset, the interrupt sources are simply created and MASKED. We 241 * only need to inform the KVM XIVE device about their type: LSI or 242 * MSI. 243 */ 244 int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) 245 { 246 SpaprXive *xive = SPAPR_XIVE(xsrc->xive); 247 uint64_t state = 0; 248 249 /* The KVM XIVE device is not in use */ 250 if (xive->fd == -1) { 251 return -ENODEV; 252 } 253 254 if (xive_source_irq_is_lsi(xsrc, srcno)) { 255 state |= KVM_XIVE_LEVEL_SENSITIVE; 256 if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { 257 state |= KVM_XIVE_LEVEL_ASSERTED; 258 } 259 } 260 261 return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state, 262 true, errp); 263 } 264 265 static void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) 266 { 267 SpaprXive *xive = SPAPR_XIVE(xsrc->xive); 268 int i; 269 270 for (i = 0; i < xsrc->nr_irqs; i++) { 271 Error *local_err = NULL; 272 273 if (!xive_eas_is_valid(&xive->eat[i])) { 274 continue; 275 } 276 277 kvmppc_xive_source_reset_one(xsrc, i, &local_err); 278 if (local_err) { 279 error_propagate(errp, local_err); 280 return; 281 } 282 } 283 } 284 285 /* 286 * This is used to perform the magic loads on the ESB pages, described 287 * in xive.h. 288 * 289 * Memory barriers should not be needed for loads (no store for now). 290 */ 291 static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, 292 uint64_t data, bool write) 293 { 294 uint64_t *addr = xsrc->esb_mmap + xive_source_esb_mgmt(xsrc, srcno) + 295 offset; 296 297 if (write) { 298 *addr = cpu_to_be64(data); 299 return -1; 300 } else { 301 /* Prevent the compiler from optimizing away the load */ 302 volatile uint64_t value = be64_to_cpu(*addr); 303 return value; 304 } 305 } 306 307 static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset) 308 { 309 return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3; 310 } 311 312 static void xive_esb_trigger(XiveSource *xsrc, int srcno) 313 { 314 uint64_t *addr = xsrc->esb_mmap + xive_source_esb_page(xsrc, srcno); 315 316 *addr = 0x0; 317 } 318 319 uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, 320 uint64_t data, bool write) 321 { 322 if (write) { 323 return xive_esb_rw(xsrc, srcno, offset, data, 1); 324 } 325 326 /* 327 * Special Load EOI handling for LSI sources. Q bit is never set 328 * and the interrupt should be re-triggered if the level is still 329 * asserted. 330 */ 331 if (xive_source_irq_is_lsi(xsrc, srcno) && 332 offset == XIVE_ESB_LOAD_EOI) { 333 xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00); 334 if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { 335 xive_esb_trigger(xsrc, srcno); 336 } 337 return 0; 338 } else { 339 return xive_esb_rw(xsrc, srcno, offset, 0, 0); 340 } 341 } 342 343 static void kvmppc_xive_source_get_state(XiveSource *xsrc) 344 { 345 SpaprXive *xive = SPAPR_XIVE(xsrc->xive); 346 int i; 347 348 for (i = 0; i < xsrc->nr_irqs; i++) { 349 uint8_t pq; 350 351 if (!xive_eas_is_valid(&xive->eat[i])) { 352 continue; 353 } 354 355 /* Perform a load without side effect to retrieve the PQ bits */ 356 pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); 357 358 /* and save PQ locally */ 359 xive_source_esb_set(xsrc, i, pq); 360 } 361 } 362 363 void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) 364 { 365 XiveSource *xsrc = opaque; 366 367 if (!xive_source_irq_is_lsi(xsrc, srcno)) { 368 if (!val) { 369 return; 370 } 371 } else { 372 if (val) { 373 xsrc->status[srcno] |= XIVE_STATUS_ASSERTED; 374 } else { 375 xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED; 376 } 377 } 378 379 xive_esb_trigger(xsrc, srcno); 380 } 381 382 /* 383 * sPAPR XIVE interrupt controller (KVM) 384 */ 385 void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, 386 uint32_t end_idx, XiveEND *end, 387 Error **errp) 388 { 389 struct kvm_ppc_xive_eq kvm_eq = { 0 }; 390 uint64_t kvm_eq_idx; 391 uint8_t priority; 392 uint32_t server; 393 Error *local_err = NULL; 394 395 assert(xive_end_is_valid(end)); 396 397 /* Encode the tuple (server, prio) as a KVM EQ index */ 398 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 399 400 kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & 401 KVM_XIVE_EQ_PRIORITY_MASK; 402 kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & 403 KVM_XIVE_EQ_SERVER_MASK; 404 405 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, 406 &kvm_eq, false, &local_err); 407 if (local_err) { 408 error_propagate(errp, local_err); 409 return; 410 } 411 412 /* 413 * The EQ index and toggle bit are updated by HW. These are the 414 * only fields from KVM we want to update QEMU with. The other END 415 * fields should already be in the QEMU END table. 416 */ 417 end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) | 418 xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex); 419 } 420 421 void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, 422 uint32_t end_idx, XiveEND *end, 423 Error **errp) 424 { 425 struct kvm_ppc_xive_eq kvm_eq = { 0 }; 426 uint64_t kvm_eq_idx; 427 uint8_t priority; 428 uint32_t server; 429 Error *local_err = NULL; 430 431 /* 432 * Build the KVM state from the local END structure. 433 */ 434 435 kvm_eq.flags = 0; 436 if (xive_get_field32(END_W0_UCOND_NOTIFY, end->w0)) { 437 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 438 } 439 440 /* 441 * If the hcall is disabling the EQ, set the size and page address 442 * to zero. When migrating, only valid ENDs are taken into 443 * account. 444 */ 445 if (xive_end_is_valid(end)) { 446 kvm_eq.qshift = xive_get_field32(END_W0_QSIZE, end->w0) + 12; 447 kvm_eq.qaddr = xive_end_qaddr(end); 448 /* 449 * The EQ toggle bit and index should only be relevant when 450 * restoring the EQ state 451 */ 452 kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1); 453 kvm_eq.qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); 454 } else { 455 kvm_eq.qshift = 0; 456 kvm_eq.qaddr = 0; 457 } 458 459 /* Encode the tuple (server, prio) as a KVM EQ index */ 460 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 461 462 kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & 463 KVM_XIVE_EQ_PRIORITY_MASK; 464 kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & 465 KVM_XIVE_EQ_SERVER_MASK; 466 467 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, 468 &kvm_eq, true, &local_err); 469 if (local_err) { 470 error_propagate(errp, local_err); 471 return; 472 } 473 } 474 475 void kvmppc_xive_reset(SpaprXive *xive, Error **errp) 476 { 477 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET, 478 NULL, true, errp); 479 } 480 481 static void kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) 482 { 483 Error *local_err = NULL; 484 int i; 485 486 for (i = 0; i < xive->nr_ends; i++) { 487 if (!xive_end_is_valid(&xive->endt[i])) { 488 continue; 489 } 490 491 kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, 492 &xive->endt[i], &local_err); 493 if (local_err) { 494 error_propagate(errp, local_err); 495 return; 496 } 497 } 498 } 499 500 /* 501 * The primary goal of the XIVE VM change handler is to mark the EQ 502 * pages dirty when all XIVE event notifications have stopped. 503 * 504 * Whenever the VM is stopped, the VM change handler sets the source 505 * PQs to PENDING to stop the flow of events and to possibly catch a 506 * triggered interrupt occuring while the VM is stopped. The previous 507 * state is saved in anticipation of a migration. The XIVE controller 508 * is then synced through KVM to flush any in-flight event 509 * notification and stabilize the EQs. 510 * 511 * At this stage, we can mark the EQ page dirty and let a migration 512 * sequence transfer the EQ pages to the destination, which is done 513 * just after the stop state. 514 * 515 * The previous configuration of the sources is restored when the VM 516 * runs again. If an interrupt was queued while the VM was stopped, 517 * simply generate a trigger. 518 */ 519 static void kvmppc_xive_change_state_handler(void *opaque, int running, 520 RunState state) 521 { 522 SpaprXive *xive = opaque; 523 XiveSource *xsrc = &xive->source; 524 Error *local_err = NULL; 525 int i; 526 527 /* 528 * Restore the sources to their initial state. This is called when 529 * the VM resumes after a stop or a migration. 530 */ 531 if (running) { 532 for (i = 0; i < xsrc->nr_irqs; i++) { 533 uint8_t pq; 534 uint8_t old_pq; 535 536 if (!xive_eas_is_valid(&xive->eat[i])) { 537 continue; 538 } 539 540 pq = xive_source_esb_get(xsrc, i); 541 old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8)); 542 543 /* 544 * An interrupt was queued while the VM was stopped, 545 * generate a trigger. 546 */ 547 if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) { 548 xive_esb_trigger(xsrc, i); 549 } 550 } 551 552 return; 553 } 554 555 /* 556 * Mask the sources, to stop the flow of event notifications, and 557 * save the PQs locally in the XiveSource object. The XiveSource 558 * state will be collected later on by its vmstate handler if a 559 * migration is in progress. 560 */ 561 for (i = 0; i < xsrc->nr_irqs; i++) { 562 uint8_t pq; 563 564 if (!xive_eas_is_valid(&xive->eat[i])) { 565 continue; 566 } 567 568 pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); 569 570 /* 571 * PQ is set to PENDING to possibly catch a triggered 572 * interrupt occuring while the VM is stopped (hotplug event 573 * for instance) . 574 */ 575 if (pq != XIVE_ESB_OFF) { 576 pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_10); 577 } 578 xive_source_esb_set(xsrc, i, pq); 579 } 580 581 /* 582 * Sync the XIVE controller in KVM, to flush in-flight event 583 * notification that should be enqueued in the EQs and mark the 584 * XIVE EQ pages dirty to collect all updates. 585 */ 586 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, 587 KVM_DEV_XIVE_EQ_SYNC, NULL, true, &local_err); 588 if (local_err) { 589 error_report_err(local_err); 590 return; 591 } 592 } 593 594 void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp) 595 { 596 /* The KVM XIVE device is not in use */ 597 if (xive->fd == -1) { 598 return; 599 } 600 601 /* 602 * When the VM is stopped, the sources are masked and the previous 603 * state is saved in anticipation of a migration. We should not 604 * synchronize the source state in that case else we will override 605 * the saved state. 606 */ 607 if (runstate_is_running()) { 608 kvmppc_xive_source_get_state(&xive->source); 609 } 610 611 /* EAT: there is no extra state to query from KVM */ 612 613 /* ENDT */ 614 kvmppc_xive_get_queues(xive, errp); 615 } 616 617 /* 618 * The SpaprXive 'pre_save' method is called by the vmstate handler of 619 * the SpaprXive model, after the XIVE controller is synced in the VM 620 * change handler. 621 */ 622 int kvmppc_xive_pre_save(SpaprXive *xive) 623 { 624 Error *local_err = NULL; 625 626 /* The KVM XIVE device is not in use */ 627 if (xive->fd == -1) { 628 return 0; 629 } 630 631 /* EAT: there is no extra state to query from KVM */ 632 633 /* ENDT */ 634 kvmppc_xive_get_queues(xive, &local_err); 635 if (local_err) { 636 error_report_err(local_err); 637 return -1; 638 } 639 640 return 0; 641 } 642 643 /* 644 * The SpaprXive 'post_load' method is not called by a vmstate 645 * handler. It is called at the sPAPR machine level at the end of the 646 * migration sequence by the sPAPR IRQ backend 'post_load' method, 647 * when all XIVE states have been transferred and loaded. 648 */ 649 int kvmppc_xive_post_load(SpaprXive *xive, int version_id) 650 { 651 Error *local_err = NULL; 652 CPUState *cs; 653 int i; 654 655 /* The KVM XIVE device should be in use */ 656 assert(xive->fd != -1); 657 658 /* Restore the ENDT first. The targetting depends on it. */ 659 for (i = 0; i < xive->nr_ends; i++) { 660 if (!xive_end_is_valid(&xive->endt[i])) { 661 continue; 662 } 663 664 kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, 665 &xive->endt[i], &local_err); 666 if (local_err) { 667 error_report_err(local_err); 668 return -1; 669 } 670 } 671 672 /* Restore the EAT */ 673 for (i = 0; i < xive->nr_irqs; i++) { 674 if (!xive_eas_is_valid(&xive->eat[i])) { 675 continue; 676 } 677 678 /* 679 * We can only restore the source config if the source has been 680 * previously set in KVM. Since we don't do that for all interrupts 681 * at reset time anymore, let's do it now. 682 */ 683 kvmppc_xive_source_reset_one(&xive->source, i, &local_err); 684 if (local_err) { 685 error_report_err(local_err); 686 return -1; 687 } 688 689 kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err); 690 if (local_err) { 691 error_report_err(local_err); 692 return -1; 693 } 694 } 695 696 /* 697 * Restore the thread interrupt contexts of initial CPUs. 698 * 699 * The context of hotplugged CPUs is restored later, by the 700 * 'post_load' handler of the XiveTCTX model because they are not 701 * available at the time the SpaprXive 'post_load' method is 702 * called. We can not restore the context of all CPUs in the 703 * 'post_load' handler of XiveTCTX because the machine is not 704 * necessarily connected to the KVM device at that time. 705 */ 706 CPU_FOREACH(cs) { 707 PowerPCCPU *cpu = POWERPC_CPU(cs); 708 709 kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err); 710 if (local_err) { 711 error_report_err(local_err); 712 return -1; 713 } 714 } 715 716 /* The source states will be restored when the machine starts running */ 717 return 0; 718 } 719 720 static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len, 721 Error **errp) 722 { 723 void *addr; 724 uint32_t page_shift = 16; /* TODO: fix page_shift */ 725 726 addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd, 727 pgoff << page_shift); 728 if (addr == MAP_FAILED) { 729 error_setg_errno(errp, errno, "XIVE: unable to set memory mapping"); 730 return NULL; 731 } 732 733 return addr; 734 } 735 736 /* 737 * All the XIVE memory regions are now backed by mappings from the KVM 738 * XIVE device. 739 */ 740 int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, 741 Error **errp) 742 { 743 SpaprXive *xive = SPAPR_XIVE(intc); 744 XiveSource *xsrc = &xive->source; 745 Error *local_err = NULL; 746 size_t esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs; 747 size_t tima_len = 4ull << TM_SHIFT; 748 CPUState *cs; 749 750 /* 751 * The KVM XIVE device already in use. This is the case when 752 * rebooting under the XIVE-only interrupt mode. 753 */ 754 if (xive->fd != -1) { 755 return 0; 756 } 757 758 if (!kvmppc_has_cap_xive()) { 759 error_setg(errp, "IRQ_XIVE capability must be present for KVM"); 760 return -1; 761 } 762 763 /* First, create the KVM XIVE device */ 764 xive->fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false); 765 if (xive->fd < 0) { 766 error_setg_errno(errp, -xive->fd, "XIVE: error creating KVM device"); 767 return -1; 768 } 769 770 /* Tell KVM about the # of VCPUs we may have */ 771 if (kvm_device_check_attr(xive->fd, KVM_DEV_XIVE_GRP_CTRL, 772 KVM_DEV_XIVE_NR_SERVERS)) { 773 if (kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, 774 KVM_DEV_XIVE_NR_SERVERS, &nr_servers, true, 775 &local_err)) { 776 goto fail; 777 } 778 } 779 780 /* 781 * 1. Source ESB pages - KVM mapping 782 */ 783 xsrc->esb_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len, 784 &local_err); 785 if (local_err) { 786 goto fail; 787 } 788 789 memory_region_init_ram_device_ptr(&xsrc->esb_mmio_kvm, OBJECT(xsrc), 790 "xive.esb", esb_len, xsrc->esb_mmap); 791 memory_region_add_subregion_overlap(&xsrc->esb_mmio, 0, 792 &xsrc->esb_mmio_kvm, 1); 793 794 /* 795 * 2. END ESB pages (No KVM support yet) 796 */ 797 798 /* 799 * 3. TIMA pages - KVM mapping 800 */ 801 xive->tm_mmap = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len, 802 &local_err); 803 if (local_err) { 804 goto fail; 805 } 806 memory_region_init_ram_device_ptr(&xive->tm_mmio_kvm, OBJECT(xive), 807 "xive.tima", tima_len, xive->tm_mmap); 808 memory_region_add_subregion_overlap(&xive->tm_mmio, 0, 809 &xive->tm_mmio_kvm, 1); 810 811 xive->change = qemu_add_vm_change_state_handler( 812 kvmppc_xive_change_state_handler, xive); 813 814 /* Connect the presenters to the initial VCPUs of the machine */ 815 CPU_FOREACH(cs) { 816 PowerPCCPU *cpu = POWERPC_CPU(cs); 817 818 kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, &local_err); 819 if (local_err) { 820 goto fail; 821 } 822 } 823 824 /* Update the KVM sources */ 825 kvmppc_xive_source_reset(xsrc, &local_err); 826 if (local_err) { 827 goto fail; 828 } 829 830 kvm_kernel_irqchip = true; 831 kvm_msi_via_irqfd_allowed = true; 832 kvm_gsi_direct_mapping = true; 833 return 0; 834 835 fail: 836 error_propagate(errp, local_err); 837 kvmppc_xive_disconnect(intc); 838 return -1; 839 } 840 841 void kvmppc_xive_disconnect(SpaprInterruptController *intc) 842 { 843 SpaprXive *xive = SPAPR_XIVE(intc); 844 XiveSource *xsrc; 845 size_t esb_len; 846 847 /* The KVM XIVE device is not in use */ 848 if (!xive || xive->fd == -1) { 849 return; 850 } 851 852 /* Clear the KVM mapping */ 853 xsrc = &xive->source; 854 esb_len = (1ull << xsrc->esb_shift) * xsrc->nr_irqs; 855 856 if (xsrc->esb_mmap) { 857 memory_region_del_subregion(&xsrc->esb_mmio, &xsrc->esb_mmio_kvm); 858 object_unparent(OBJECT(&xsrc->esb_mmio_kvm)); 859 munmap(xsrc->esb_mmap, esb_len); 860 xsrc->esb_mmap = NULL; 861 } 862 863 if (xive->tm_mmap) { 864 memory_region_del_subregion(&xive->tm_mmio, &xive->tm_mmio_kvm); 865 object_unparent(OBJECT(&xive->tm_mmio_kvm)); 866 munmap(xive->tm_mmap, 4ull << TM_SHIFT); 867 xive->tm_mmap = NULL; 868 } 869 870 /* 871 * When the KVM device fd is closed, the KVM device is destroyed 872 * and removed from the list of devices of the VM. The VCPU 873 * presenters are also detached from the device. 874 */ 875 if (xive->fd != -1) { 876 close(xive->fd); 877 xive->fd = -1; 878 } 879 880 kvm_kernel_irqchip = false; 881 kvm_msi_via_irqfd_allowed = false; 882 kvm_gsi_direct_mapping = false; 883 884 /* Clear the local list of presenter (hotplug) */ 885 kvm_cpu_disable_all(); 886 887 /* VM Change state handler is not needed anymore */ 888 if (xive->change) { 889 qemu_del_vm_change_state_handler(xive->change); 890 xive->change = NULL; 891 } 892 } 893