1 /* 2 * QEMU PowerPC sPAPR XIVE interrupt controller model 3 * 4 * Copyright (c) 2017-2019, IBM Corporation. 5 * 6 * This code is licensed under the GPL version 2 or later. See the 7 * COPYING file in the top-level directory. 8 */ 9 10 #include "qemu/osdep.h" 11 #include "qemu/log.h" 12 #include "qemu/error-report.h" 13 #include "qapi/error.h" 14 #include "target/ppc/cpu.h" 15 #include "sysemu/cpus.h" 16 #include "sysemu/kvm.h" 17 #include "sysemu/runstate.h" 18 #include "hw/ppc/spapr.h" 19 #include "hw/ppc/spapr_cpu_core.h" 20 #include "hw/ppc/spapr_xive.h" 21 #include "hw/ppc/xive.h" 22 #include "kvm_ppc.h" 23 #include "trace.h" 24 25 #include <sys/ioctl.h> 26 27 /* 28 * Helpers for CPU hotplug 29 * 30 * TODO: make a common KVMEnabledCPU layer for XICS and XIVE 31 */ 32 typedef struct KVMEnabledCPU { 33 unsigned long vcpu_id; 34 QLIST_ENTRY(KVMEnabledCPU) node; 35 } KVMEnabledCPU; 36 37 static QLIST_HEAD(, KVMEnabledCPU) 38 kvm_enabled_cpus = QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus); 39 40 static bool kvm_cpu_is_enabled(CPUState *cs) 41 { 42 KVMEnabledCPU *enabled_cpu; 43 unsigned long vcpu_id = kvm_arch_vcpu_id(cs); 44 45 QLIST_FOREACH(enabled_cpu, &kvm_enabled_cpus, node) { 46 if (enabled_cpu->vcpu_id == vcpu_id) { 47 return true; 48 } 49 } 50 return false; 51 } 52 53 static void kvm_cpu_enable(CPUState *cs) 54 { 55 KVMEnabledCPU *enabled_cpu; 56 unsigned long vcpu_id = kvm_arch_vcpu_id(cs); 57 58 enabled_cpu = g_malloc(sizeof(*enabled_cpu)); 59 enabled_cpu->vcpu_id = vcpu_id; 60 QLIST_INSERT_HEAD(&kvm_enabled_cpus, enabled_cpu, node); 61 } 62 63 static void kvm_cpu_disable_all(void) 64 { 65 KVMEnabledCPU *enabled_cpu, *next; 66 67 QLIST_FOREACH_SAFE(enabled_cpu, &kvm_enabled_cpus, node, next) { 68 QLIST_REMOVE(enabled_cpu, node); 69 g_free(enabled_cpu); 70 } 71 } 72 73 /* 74 * XIVE Thread Interrupt Management context (KVM) 75 */ 76 77 int kvmppc_xive_cpu_set_state(XiveTCTX *tctx, Error **errp) 78 { 79 SpaprXive *xive = SPAPR_XIVE(tctx->xptr); 80 uint64_t state[2]; 81 int ret; 82 83 assert(xive->fd != -1); 84 85 /* word0 and word1 of the OS ring. */ 86 state[0] = *((uint64_t *) &tctx->regs[TM_QW1_OS]); 87 88 ret = kvm_set_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); 89 if (ret != 0) { 90 error_setg_errno(errp, -ret, 91 "XIVE: could not restore KVM state of CPU %ld", 92 kvm_arch_vcpu_id(tctx->cs)); 93 return ret; 94 } 95 96 return 0; 97 } 98 99 int kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp) 100 { 101 SpaprXive *xive = SPAPR_XIVE(tctx->xptr); 102 uint64_t state[2] = { 0 }; 103 int ret; 104 105 assert(xive->fd != -1); 106 107 ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state); 108 if (ret != 0) { 109 error_setg_errno(errp, -ret, 110 "XIVE: could not capture KVM state of CPU %ld", 111 kvm_arch_vcpu_id(tctx->cs)); 112 return ret; 113 } 114 115 /* word0 and word1 of the OS ring. */ 116 *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0]; 117 118 return 0; 119 } 120 121 typedef struct { 122 XiveTCTX *tctx; 123 Error **errp; 124 int ret; 125 } XiveCpuGetState; 126 127 static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu, 128 run_on_cpu_data arg) 129 { 130 XiveCpuGetState *s = arg.host_ptr; 131 132 s->ret = kvmppc_xive_cpu_get_state(s->tctx, s->errp); 133 } 134 135 int kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp) 136 { 137 XiveCpuGetState s = { 138 .tctx = tctx, 139 .errp = errp, 140 }; 141 142 /* 143 * Kick the vCPU to make sure they are available for the KVM ioctl. 144 */ 145 run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state, 146 RUN_ON_CPU_HOST_PTR(&s)); 147 148 return s.ret; 149 } 150 151 int kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp) 152 { 153 ERRP_GUARD(); 154 SpaprXive *xive = SPAPR_XIVE(tctx->xptr); 155 unsigned long vcpu_id; 156 int ret; 157 158 assert(xive->fd != -1); 159 160 /* Check if CPU was hot unplugged and replugged. */ 161 if (kvm_cpu_is_enabled(tctx->cs)) { 162 return 0; 163 } 164 165 vcpu_id = kvm_arch_vcpu_id(tctx->cs); 166 167 trace_kvm_xive_cpu_connect(vcpu_id); 168 169 ret = kvm_vcpu_enable_cap(tctx->cs, KVM_CAP_PPC_IRQ_XIVE, 0, xive->fd, 170 vcpu_id, 0); 171 if (ret < 0) { 172 error_setg_errno(errp, -ret, 173 "XIVE: unable to connect CPU%ld to KVM device", 174 vcpu_id); 175 if (ret == -ENOSPC) { 176 error_append_hint(errp, "Try -smp maxcpus=N with N < %u\n", 177 MACHINE(qdev_get_machine())->smp.max_cpus); 178 } 179 return ret; 180 } 181 182 kvm_cpu_enable(tctx->cs); 183 return 0; 184 } 185 186 /* 187 * XIVE Interrupt Source (KVM) 188 */ 189 190 int kvmppc_xive_set_source_config(SpaprXive *xive, uint32_t lisn, XiveEAS *eas, 191 Error **errp) 192 { 193 uint32_t end_idx; 194 uint32_t end_blk; 195 uint8_t priority; 196 uint32_t server; 197 bool masked; 198 uint32_t eisn; 199 uint64_t kvm_src; 200 201 assert(xive_eas_is_valid(eas)); 202 203 end_idx = xive_get_field64(EAS_END_INDEX, eas->w); 204 end_blk = xive_get_field64(EAS_END_BLOCK, eas->w); 205 eisn = xive_get_field64(EAS_END_DATA, eas->w); 206 masked = xive_eas_is_masked(eas); 207 208 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 209 210 kvm_src = priority << KVM_XIVE_SOURCE_PRIORITY_SHIFT & 211 KVM_XIVE_SOURCE_PRIORITY_MASK; 212 kvm_src |= server << KVM_XIVE_SOURCE_SERVER_SHIFT & 213 KVM_XIVE_SOURCE_SERVER_MASK; 214 kvm_src |= ((uint64_t) masked << KVM_XIVE_SOURCE_MASKED_SHIFT) & 215 KVM_XIVE_SOURCE_MASKED_MASK; 216 kvm_src |= ((uint64_t)eisn << KVM_XIVE_SOURCE_EISN_SHIFT) & 217 KVM_XIVE_SOURCE_EISN_MASK; 218 219 return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_CONFIG, lisn, 220 &kvm_src, true, errp); 221 } 222 223 void kvmppc_xive_sync_source(SpaprXive *xive, uint32_t lisn, Error **errp) 224 { 225 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE_SYNC, lisn, 226 NULL, true, errp); 227 } 228 229 /* 230 * At reset, the interrupt sources are simply created and MASKED. We 231 * only need to inform the KVM XIVE device about their type: LSI or 232 * MSI. 233 */ 234 int kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp) 235 { 236 SpaprXive *xive = SPAPR_XIVE(xsrc->xive); 237 uint64_t state = 0; 238 239 assert(xive->fd != -1); 240 241 if (xive_source_irq_is_lsi(xsrc, srcno)) { 242 state |= KVM_XIVE_LEVEL_SENSITIVE; 243 if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { 244 state |= KVM_XIVE_LEVEL_ASSERTED; 245 } 246 } 247 248 return kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_SOURCE, srcno, &state, 249 true, errp); 250 } 251 252 static int kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp) 253 { 254 SpaprXive *xive = SPAPR_XIVE(xsrc->xive); 255 int i; 256 257 for (i = 0; i < xsrc->nr_irqs; i++) { 258 int ret; 259 260 if (!xive_eas_is_valid(&xive->eat[i])) { 261 continue; 262 } 263 264 ret = kvmppc_xive_source_reset_one(xsrc, i, errp); 265 if (ret < 0) { 266 return ret; 267 } 268 } 269 270 return 0; 271 } 272 273 /* 274 * This is used to perform the magic loads on the ESB pages, described 275 * in xive.h. 276 * 277 * Memory barriers should not be needed for loads (no store for now). 278 */ 279 static uint64_t xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, 280 uint64_t data, bool write) 281 { 282 uint64_t *addr = xsrc->esb_mmap + xive_source_esb_mgmt(xsrc, srcno) + 283 offset; 284 285 if (write) { 286 *addr = cpu_to_be64(data); 287 return -1; 288 } else { 289 /* Prevent the compiler from optimizing away the load */ 290 volatile uint64_t value = be64_to_cpu(*addr); 291 return value; 292 } 293 } 294 295 static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset) 296 { 297 return xive_esb_rw(xsrc, srcno, offset, 0, 0) & 0x3; 298 } 299 300 static void xive_esb_trigger(XiveSource *xsrc, int srcno) 301 { 302 uint64_t *addr = xsrc->esb_mmap + xive_source_esb_page(xsrc, srcno); 303 304 *addr = 0x0; 305 } 306 307 uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset, 308 uint64_t data, bool write) 309 { 310 if (write) { 311 return xive_esb_rw(xsrc, srcno, offset, data, 1); 312 } 313 314 trace_kvm_xive_source_reset(srcno); 315 316 /* 317 * Special Load EOI handling for LSI sources. Q bit is never set 318 * and the interrupt should be re-triggered if the level is still 319 * asserted. 320 */ 321 if (xive_source_irq_is_lsi(xsrc, srcno) && 322 offset == XIVE_ESB_LOAD_EOI) { 323 xive_esb_read(xsrc, srcno, XIVE_ESB_SET_PQ_00); 324 if (xsrc->status[srcno] & XIVE_STATUS_ASSERTED) { 325 xive_esb_trigger(xsrc, srcno); 326 } 327 return 0; 328 } else { 329 return xive_esb_rw(xsrc, srcno, offset, 0, 0); 330 } 331 } 332 333 static void kvmppc_xive_source_get_state(XiveSource *xsrc) 334 { 335 SpaprXive *xive = SPAPR_XIVE(xsrc->xive); 336 int i; 337 338 for (i = 0; i < xsrc->nr_irqs; i++) { 339 uint8_t pq; 340 341 if (!xive_eas_is_valid(&xive->eat[i])) { 342 continue; 343 } 344 345 /* Perform a load without side effect to retrieve the PQ bits */ 346 pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); 347 348 /* and save PQ locally */ 349 xive_source_esb_set(xsrc, i, pq); 350 } 351 } 352 353 void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val) 354 { 355 XiveSource *xsrc = opaque; 356 357 if (!xive_source_irq_is_lsi(xsrc, srcno)) { 358 if (!val) { 359 return; 360 } 361 } else { 362 if (val) { 363 xsrc->status[srcno] |= XIVE_STATUS_ASSERTED; 364 } else { 365 xsrc->status[srcno] &= ~XIVE_STATUS_ASSERTED; 366 } 367 } 368 369 xive_esb_trigger(xsrc, srcno); 370 } 371 372 /* 373 * sPAPR XIVE interrupt controller (KVM) 374 */ 375 int kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk, 376 uint32_t end_idx, XiveEND *end, 377 Error **errp) 378 { 379 struct kvm_ppc_xive_eq kvm_eq = { 0 }; 380 uint64_t kvm_eq_idx; 381 uint8_t priority; 382 uint32_t server; 383 int ret; 384 385 assert(xive_end_is_valid(end)); 386 387 /* Encode the tuple (server, prio) as a KVM EQ index */ 388 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 389 390 kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & 391 KVM_XIVE_EQ_PRIORITY_MASK; 392 kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & 393 KVM_XIVE_EQ_SERVER_MASK; 394 395 ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, 396 &kvm_eq, false, errp); 397 if (ret < 0) { 398 return ret; 399 } 400 401 /* 402 * The EQ index and toggle bit are updated by HW. These are the 403 * only fields from KVM we want to update QEMU with. The other END 404 * fields should already be in the QEMU END table. 405 */ 406 end->w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) | 407 xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex); 408 409 return 0; 410 } 411 412 int kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk, 413 uint32_t end_idx, XiveEND *end, 414 Error **errp) 415 { 416 struct kvm_ppc_xive_eq kvm_eq = { 0 }; 417 uint64_t kvm_eq_idx; 418 uint8_t priority; 419 uint32_t server; 420 421 /* 422 * Build the KVM state from the local END structure. 423 */ 424 425 kvm_eq.flags = 0; 426 if (xive_get_field32(END_W0_UCOND_NOTIFY, end->w0)) { 427 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 428 } 429 430 /* 431 * If the hcall is disabling the EQ, set the size and page address 432 * to zero. When migrating, only valid ENDs are taken into 433 * account. 434 */ 435 if (xive_end_is_valid(end)) { 436 kvm_eq.qshift = xive_get_field32(END_W0_QSIZE, end->w0) + 12; 437 kvm_eq.qaddr = xive_end_qaddr(end); 438 /* 439 * The EQ toggle bit and index should only be relevant when 440 * restoring the EQ state 441 */ 442 kvm_eq.qtoggle = xive_get_field32(END_W1_GENERATION, end->w1); 443 kvm_eq.qindex = xive_get_field32(END_W1_PAGE_OFF, end->w1); 444 } else { 445 kvm_eq.qshift = 0; 446 kvm_eq.qaddr = 0; 447 } 448 449 /* Encode the tuple (server, prio) as a KVM EQ index */ 450 spapr_xive_end_to_target(end_blk, end_idx, &server, &priority); 451 452 kvm_eq_idx = priority << KVM_XIVE_EQ_PRIORITY_SHIFT & 453 KVM_XIVE_EQ_PRIORITY_MASK; 454 kvm_eq_idx |= server << KVM_XIVE_EQ_SERVER_SHIFT & 455 KVM_XIVE_EQ_SERVER_MASK; 456 457 return 458 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ_CONFIG, kvm_eq_idx, 459 &kvm_eq, true, errp); 460 } 461 462 void kvmppc_xive_reset(SpaprXive *xive, Error **errp) 463 { 464 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, KVM_DEV_XIVE_RESET, 465 NULL, true, errp); 466 } 467 468 static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) 469 { 470 int i; 471 int ret; 472 473 for (i = 0; i < xive->nr_ends; i++) { 474 if (!xive_end_is_valid(&xive->endt[i])) { 475 continue; 476 } 477 478 ret = kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, 479 &xive->endt[i], errp); 480 if (ret < 0) { 481 return ret; 482 } 483 } 484 485 return 0; 486 } 487 488 /* 489 * The primary goal of the XIVE VM change handler is to mark the EQ 490 * pages dirty when all XIVE event notifications have stopped. 491 * 492 * Whenever the VM is stopped, the VM change handler sets the source 493 * PQs to PENDING to stop the flow of events and to possibly catch a 494 * triggered interrupt occuring while the VM is stopped. The previous 495 * state is saved in anticipation of a migration. The XIVE controller 496 * is then synced through KVM to flush any in-flight event 497 * notification and stabilize the EQs. 498 * 499 * At this stage, we can mark the EQ page dirty and let a migration 500 * sequence transfer the EQ pages to the destination, which is done 501 * just after the stop state. 502 * 503 * The previous configuration of the sources is restored when the VM 504 * runs again. If an interrupt was queued while the VM was stopped, 505 * simply generate a trigger. 506 */ 507 static void kvmppc_xive_change_state_handler(void *opaque, bool running, 508 RunState state) 509 { 510 SpaprXive *xive = opaque; 511 XiveSource *xsrc = &xive->source; 512 Error *local_err = NULL; 513 int i; 514 515 /* 516 * Restore the sources to their initial state. This is called when 517 * the VM resumes after a stop or a migration. 518 */ 519 if (running) { 520 for (i = 0; i < xsrc->nr_irqs; i++) { 521 uint8_t pq; 522 uint8_t old_pq; 523 524 if (!xive_eas_is_valid(&xive->eat[i])) { 525 continue; 526 } 527 528 pq = xive_source_esb_get(xsrc, i); 529 old_pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_00 + (pq << 8)); 530 531 /* 532 * An interrupt was queued while the VM was stopped, 533 * generate a trigger. 534 */ 535 if (pq == XIVE_ESB_RESET && old_pq == XIVE_ESB_QUEUED) { 536 xive_esb_trigger(xsrc, i); 537 } 538 } 539 540 return; 541 } 542 543 /* 544 * Mask the sources, to stop the flow of event notifications, and 545 * save the PQs locally in the XiveSource object. The XiveSource 546 * state will be collected later on by its vmstate handler if a 547 * migration is in progress. 548 */ 549 for (i = 0; i < xsrc->nr_irqs; i++) { 550 uint8_t pq; 551 552 if (!xive_eas_is_valid(&xive->eat[i])) { 553 continue; 554 } 555 556 pq = xive_esb_read(xsrc, i, XIVE_ESB_GET); 557 558 /* 559 * PQ is set to PENDING to possibly catch a triggered 560 * interrupt occuring while the VM is stopped (hotplug event 561 * for instance) . 562 */ 563 if (pq != XIVE_ESB_OFF) { 564 pq = xive_esb_read(xsrc, i, XIVE_ESB_SET_PQ_10); 565 } 566 xive_source_esb_set(xsrc, i, pq); 567 } 568 569 /* 570 * Sync the XIVE controller in KVM, to flush in-flight event 571 * notification that should be enqueued in the EQs and mark the 572 * XIVE EQ pages dirty to collect all updates. 573 */ 574 kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, 575 KVM_DEV_XIVE_EQ_SYNC, NULL, true, &local_err); 576 if (local_err) { 577 error_report_err(local_err); 578 return; 579 } 580 } 581 582 void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp) 583 { 584 assert(xive->fd != -1); 585 586 /* 587 * When the VM is stopped, the sources are masked and the previous 588 * state is saved in anticipation of a migration. We should not 589 * synchronize the source state in that case else we will override 590 * the saved state. 591 */ 592 if (runstate_is_running()) { 593 kvmppc_xive_source_get_state(&xive->source); 594 } 595 596 /* EAT: there is no extra state to query from KVM */ 597 598 /* ENDT */ 599 kvmppc_xive_get_queues(xive, errp); 600 } 601 602 /* 603 * The SpaprXive 'pre_save' method is called by the vmstate handler of 604 * the SpaprXive model, after the XIVE controller is synced in the VM 605 * change handler. 606 */ 607 int kvmppc_xive_pre_save(SpaprXive *xive) 608 { 609 Error *local_err = NULL; 610 int ret; 611 612 assert(xive->fd != -1); 613 614 /* EAT: there is no extra state to query from KVM */ 615 616 /* ENDT */ 617 ret = kvmppc_xive_get_queues(xive, &local_err); 618 if (ret < 0) { 619 error_report_err(local_err); 620 return ret; 621 } 622 623 return 0; 624 } 625 626 /* 627 * The SpaprXive 'post_load' method is not called by a vmstate 628 * handler. It is called at the sPAPR machine level at the end of the 629 * migration sequence by the sPAPR IRQ backend 'post_load' method, 630 * when all XIVE states have been transferred and loaded. 631 */ 632 int kvmppc_xive_post_load(SpaprXive *xive, int version_id) 633 { 634 Error *local_err = NULL; 635 CPUState *cs; 636 int i; 637 int ret; 638 639 /* The KVM XIVE device should be in use */ 640 assert(xive->fd != -1); 641 642 /* Restore the ENDT first. The targetting depends on it. */ 643 for (i = 0; i < xive->nr_ends; i++) { 644 if (!xive_end_is_valid(&xive->endt[i])) { 645 continue; 646 } 647 648 ret = kvmppc_xive_set_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i, 649 &xive->endt[i], &local_err); 650 if (ret < 0) { 651 goto fail; 652 } 653 } 654 655 /* Restore the EAT */ 656 for (i = 0; i < xive->nr_irqs; i++) { 657 if (!xive_eas_is_valid(&xive->eat[i])) { 658 continue; 659 } 660 661 /* 662 * We can only restore the source config if the source has been 663 * previously set in KVM. Since we don't do that for all interrupts 664 * at reset time anymore, let's do it now. 665 */ 666 ret = kvmppc_xive_source_reset_one(&xive->source, i, &local_err); 667 if (ret < 0) { 668 goto fail; 669 } 670 671 ret = kvmppc_xive_set_source_config(xive, i, &xive->eat[i], &local_err); 672 if (ret < 0) { 673 goto fail; 674 } 675 } 676 677 /* 678 * Restore the thread interrupt contexts of initial CPUs. 679 * 680 * The context of hotplugged CPUs is restored later, by the 681 * 'post_load' handler of the XiveTCTX model because they are not 682 * available at the time the SpaprXive 'post_load' method is 683 * called. We can not restore the context of all CPUs in the 684 * 'post_load' handler of XiveTCTX because the machine is not 685 * necessarily connected to the KVM device at that time. 686 */ 687 CPU_FOREACH(cs) { 688 PowerPCCPU *cpu = POWERPC_CPU(cs); 689 690 ret = kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu)->tctx, &local_err); 691 if (ret < 0) { 692 goto fail; 693 } 694 } 695 696 /* The source states will be restored when the machine starts running */ 697 return 0; 698 699 fail: 700 error_report_err(local_err); 701 return ret; 702 } 703 704 /* Returns MAP_FAILED on error and sets errno */ 705 static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len, 706 Error **errp) 707 { 708 void *addr; 709 uint32_t page_shift = 16; /* TODO: fix page_shift */ 710 711 addr = mmap(NULL, len, PROT_WRITE | PROT_READ, MAP_SHARED, xive->fd, 712 pgoff << page_shift); 713 if (addr == MAP_FAILED) { 714 error_setg_errno(errp, errno, "XIVE: unable to set memory mapping"); 715 } 716 717 return addr; 718 } 719 720 /* 721 * All the XIVE memory regions are now backed by mappings from the KVM 722 * XIVE device. 723 */ 724 int kvmppc_xive_connect(SpaprInterruptController *intc, uint32_t nr_servers, 725 Error **errp) 726 { 727 SpaprXive *xive = SPAPR_XIVE(intc); 728 XiveSource *xsrc = &xive->source; 729 size_t esb_len = xive_source_esb_len(xsrc); 730 size_t tima_len = 4ull << TM_SHIFT; 731 CPUState *cs; 732 int fd; 733 void *addr; 734 int ret; 735 736 /* 737 * The KVM XIVE device already in use. This is the case when 738 * rebooting under the XIVE-only interrupt mode. 739 */ 740 if (xive->fd != -1) { 741 return 0; 742 } 743 744 if (!kvmppc_has_cap_xive()) { 745 error_setg(errp, "IRQ_XIVE capability must be present for KVM"); 746 return -1; 747 } 748 749 /* First, create the KVM XIVE device */ 750 fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_XIVE, false); 751 if (fd < 0) { 752 error_setg_errno(errp, -fd, "XIVE: error creating KVM device"); 753 return -1; 754 } 755 xive->fd = fd; 756 757 /* Tell KVM about the # of VCPUs we may have */ 758 if (kvm_device_check_attr(xive->fd, KVM_DEV_XIVE_GRP_CTRL, 759 KVM_DEV_XIVE_NR_SERVERS)) { 760 ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_CTRL, 761 KVM_DEV_XIVE_NR_SERVERS, &nr_servers, true, 762 errp); 763 if (ret < 0) { 764 goto fail; 765 } 766 } 767 768 /* 769 * 1. Source ESB pages - KVM mapping 770 */ 771 addr = kvmppc_xive_mmap(xive, KVM_XIVE_ESB_PAGE_OFFSET, esb_len, errp); 772 if (addr == MAP_FAILED) { 773 goto fail; 774 } 775 xsrc->esb_mmap = addr; 776 777 memory_region_init_ram_device_ptr(&xsrc->esb_mmio_kvm, OBJECT(xsrc), 778 "xive.esb-kvm", esb_len, xsrc->esb_mmap); 779 memory_region_add_subregion_overlap(&xsrc->esb_mmio, 0, 780 &xsrc->esb_mmio_kvm, 1); 781 782 /* 783 * 2. END ESB pages (No KVM support yet) 784 */ 785 786 /* 787 * 3. TIMA pages - KVM mapping 788 */ 789 addr = kvmppc_xive_mmap(xive, KVM_XIVE_TIMA_PAGE_OFFSET, tima_len, errp); 790 if (addr == MAP_FAILED) { 791 goto fail; 792 } 793 xive->tm_mmap = addr; 794 795 memory_region_init_ram_device_ptr(&xive->tm_mmio_kvm, OBJECT(xive), 796 "xive.tima", tima_len, xive->tm_mmap); 797 memory_region_add_subregion_overlap(&xive->tm_mmio, 0, 798 &xive->tm_mmio_kvm, 1); 799 800 xive->change = qemu_add_vm_change_state_handler( 801 kvmppc_xive_change_state_handler, xive); 802 803 /* Connect the presenters to the initial VCPUs of the machine */ 804 CPU_FOREACH(cs) { 805 PowerPCCPU *cpu = POWERPC_CPU(cs); 806 807 ret = kvmppc_xive_cpu_connect(spapr_cpu_state(cpu)->tctx, errp); 808 if (ret < 0) { 809 goto fail; 810 } 811 } 812 813 /* Update the KVM sources */ 814 ret = kvmppc_xive_source_reset(xsrc, errp); 815 if (ret < 0) { 816 goto fail; 817 } 818 819 kvm_kernel_irqchip = true; 820 kvm_msi_via_irqfd_allowed = true; 821 kvm_gsi_direct_mapping = true; 822 return 0; 823 824 fail: 825 kvmppc_xive_disconnect(intc); 826 return -1; 827 } 828 829 void kvmppc_xive_disconnect(SpaprInterruptController *intc) 830 { 831 SpaprXive *xive = SPAPR_XIVE(intc); 832 XiveSource *xsrc; 833 size_t esb_len; 834 835 assert(xive->fd != -1); 836 837 /* Clear the KVM mapping */ 838 xsrc = &xive->source; 839 esb_len = xive_source_esb_len(xsrc); 840 841 if (xsrc->esb_mmap) { 842 memory_region_del_subregion(&xsrc->esb_mmio, &xsrc->esb_mmio_kvm); 843 object_unparent(OBJECT(&xsrc->esb_mmio_kvm)); 844 munmap(xsrc->esb_mmap, esb_len); 845 xsrc->esb_mmap = NULL; 846 } 847 848 if (xive->tm_mmap) { 849 memory_region_del_subregion(&xive->tm_mmio, &xive->tm_mmio_kvm); 850 object_unparent(OBJECT(&xive->tm_mmio_kvm)); 851 munmap(xive->tm_mmap, 4ull << TM_SHIFT); 852 xive->tm_mmap = NULL; 853 } 854 855 /* 856 * When the KVM device fd is closed, the KVM device is destroyed 857 * and removed from the list of devices of the VM. The VCPU 858 * presenters are also detached from the device. 859 */ 860 close(xive->fd); 861 xive->fd = -1; 862 863 kvm_kernel_irqchip = false; 864 kvm_msi_via_irqfd_allowed = false; 865 kvm_gsi_direct_mapping = false; 866 867 /* Clear the local list of presenter (hotplug) */ 868 kvm_cpu_disable_all(); 869 870 /* VM Change state handler is not needed anymore */ 871 if (xive->change) { 872 qemu_del_vm_change_state_handler(xive->change); 873 xive->change = NULL; 874 } 875 } 876