1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2017-2019, IBM Corporation. 4 */ 5 6 #define pr_fmt(fmt) "xive-kvm: " fmt 7 8 #include <linux/kernel.h> 9 #include <linux/kvm_host.h> 10 #include <linux/err.h> 11 #include <linux/gfp.h> 12 #include <linux/spinlock.h> 13 #include <linux/delay.h> 14 #include <linux/file.h> 15 #include <linux/irqdomain.h> 16 #include <asm/uaccess.h> 17 #include <asm/kvm_book3s.h> 18 #include <asm/kvm_ppc.h> 19 #include <asm/hvcall.h> 20 #include <asm/xive.h> 21 #include <asm/xive-regs.h> 22 #include <asm/debug.h> 23 #include <asm/opal.h> 24 25 #include <linux/debugfs.h> 26 #include <linux/seq_file.h> 27 28 #include "book3s_xive.h" 29 30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) 31 { 32 u64 val; 33 34 /* 35 * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10 36 * load operation, so there is no need to enforce load-after-store 37 * ordering. 38 */ 39 40 val = in_be64(xd->eoi_mmio + offset); 41 return (u8)val; 42 } 43 44 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) 45 { 46 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 47 struct xive_q *q = &xc->queues[prio]; 48 49 xive_native_disable_queue(xc->vp_id, q, prio); 50 if (q->qpage) { 51 put_page(virt_to_page(q->qpage)); 52 q->qpage = NULL; 53 } 54 } 55 56 static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q, 57 u8 prio, __be32 *qpage, 58 u32 order, bool can_escalate) 59 { 60 int rc; 61 __be32 *qpage_prev = q->qpage; 62 63 rc = xive_native_configure_queue(vp_id, q, prio, qpage, order, 64 can_escalate); 65 if (rc) 66 return rc; 67 68 if (qpage_prev) 69 put_page(virt_to_page(qpage_prev)); 70 71 return rc; 72 } 73 74 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) 75 { 76 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 77 int i; 78 79 if (!kvmppc_xive_enabled(vcpu)) 80 return; 81 82 if (!xc) 83 return; 84 85 pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); 86 87 /* Ensure no interrupt is still routed to that VP */ 88 xc->valid = false; 89 kvmppc_xive_disable_vcpu_interrupts(vcpu); 90 91 /* Free escalations */ 92 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 93 /* Free the escalation irq */ 94 if (xc->esc_virq[i]) { 95 if (kvmppc_xive_has_single_escalation(xc->xive)) 96 xive_cleanup_single_escalation(vcpu, xc, 97 xc->esc_virq[i]); 98 free_irq(xc->esc_virq[i], vcpu); 99 irq_dispose_mapping(xc->esc_virq[i]); 100 kfree(xc->esc_virq_names[i]); 101 xc->esc_virq[i] = 0; 102 } 103 } 104 105 /* Disable the VP */ 106 xive_native_disable_vp(xc->vp_id); 107 108 /* Clear the cam word so guest entry won't try to push context */ 109 vcpu->arch.xive_cam_word = 0; 110 111 /* Free the queues */ 112 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 113 kvmppc_xive_native_cleanup_queue(vcpu, i); 114 } 115 116 /* Free the VP */ 117 kfree(xc); 118 119 /* Cleanup the vcpu */ 120 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; 121 vcpu->arch.xive_vcpu = NULL; 122 } 123 124 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, 125 struct kvm_vcpu *vcpu, u32 server_num) 126 { 127 struct kvmppc_xive *xive = dev->private; 128 struct kvmppc_xive_vcpu *xc = NULL; 129 int rc; 130 u32 vp_id; 131 132 pr_devel("native_connect_vcpu(server=%d)\n", server_num); 133 134 if (dev->ops != &kvm_xive_native_ops) { 135 pr_devel("Wrong ops !\n"); 136 return -EPERM; 137 } 138 if (xive->kvm != vcpu->kvm) 139 return -EPERM; 140 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) 141 return -EBUSY; 142 143 mutex_lock(&xive->lock); 144 145 rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id); 146 if (rc) 147 goto bail; 148 149 xc = kzalloc(sizeof(*xc), GFP_KERNEL); 150 if (!xc) { 151 rc = -ENOMEM; 152 goto bail; 153 } 154 155 vcpu->arch.xive_vcpu = xc; 156 xc->xive = xive; 157 xc->vcpu = vcpu; 158 xc->server_num = server_num; 159 160 xc->vp_id = vp_id; 161 xc->valid = true; 162 vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; 163 164 rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); 165 if (rc) { 166 pr_err("Failed to get VP info from OPAL: %d\n", rc); 167 goto bail; 168 } 169 170 if (!kvmppc_xive_check_save_restore(vcpu)) { 171 pr_err("inconsistent save-restore setup for VCPU %d\n", server_num); 172 rc = -EIO; 173 goto bail; 174 } 175 176 /* 177 * Enable the VP first as the single escalation mode will 178 * affect escalation interrupts numbering 179 */ 180 rc = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive)); 181 if (rc) { 182 pr_err("Failed to enable VP in OPAL: %d\n", rc); 183 goto bail; 184 } 185 186 /* Configure VCPU fields for use by assembly push/pull */ 187 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); 188 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); 189 190 /* TODO: reset all queues to a clean state ? */ 191 bail: 192 mutex_unlock(&xive->lock); 193 if (rc) 194 kvmppc_xive_native_cleanup_vcpu(vcpu); 195 196 return rc; 197 } 198 199 /* 200 * Device passthrough support 201 */ 202 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) 203 { 204 struct kvmppc_xive *xive = kvm->arch.xive; 205 pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2; 206 207 if (irq >= KVMPPC_XIVE_NR_IRQS) 208 return -EINVAL; 209 210 /* 211 * Clear the ESB pages of the IRQ number being mapped (or 212 * unmapped) into the guest and let the VM fault handler 213 * repopulate with the appropriate ESB pages (device or IC) 214 */ 215 pr_debug("clearing esb pages for girq 0x%lx\n", irq); 216 mutex_lock(&xive->mapping_lock); 217 if (xive->mapping) 218 unmap_mapping_range(xive->mapping, 219 esb_pgoff << PAGE_SHIFT, 220 2ull << PAGE_SHIFT, 1); 221 mutex_unlock(&xive->mapping_lock); 222 return 0; 223 } 224 225 static struct kvmppc_xive_ops kvmppc_xive_native_ops = { 226 .reset_mapped = kvmppc_xive_native_reset_mapped, 227 }; 228 229 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) 230 { 231 struct vm_area_struct *vma = vmf->vma; 232 struct kvm_device *dev = vma->vm_file->private_data; 233 struct kvmppc_xive *xive = dev->private; 234 struct kvmppc_xive_src_block *sb; 235 struct kvmppc_xive_irq_state *state; 236 struct xive_irq_data *xd; 237 u32 hw_num; 238 u16 src; 239 u64 page; 240 unsigned long irq; 241 u64 page_offset; 242 243 /* 244 * Linux/KVM uses a two pages ESB setting, one for trigger and 245 * one for EOI 246 */ 247 page_offset = vmf->pgoff - vma->vm_pgoff; 248 irq = page_offset / 2; 249 250 sb = kvmppc_xive_find_source(xive, irq, &src); 251 if (!sb) { 252 pr_devel("%s: source %lx not found !\n", __func__, irq); 253 return VM_FAULT_SIGBUS; 254 } 255 256 state = &sb->irq_state[src]; 257 258 /* Some sanity checking */ 259 if (!state->valid) { 260 pr_devel("%s: source %lx invalid !\n", __func__, irq); 261 return VM_FAULT_SIGBUS; 262 } 263 264 kvmppc_xive_select_irq(state, &hw_num, &xd); 265 266 arch_spin_lock(&sb->lock); 267 268 /* 269 * first/even page is for trigger 270 * second/odd page is for EOI and management. 271 */ 272 page = page_offset % 2 ? xd->eoi_page : xd->trig_page; 273 arch_spin_unlock(&sb->lock); 274 275 if (WARN_ON(!page)) { 276 pr_err("%s: accessing invalid ESB page for source %lx !\n", 277 __func__, irq); 278 return VM_FAULT_SIGBUS; 279 } 280 281 vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT); 282 return VM_FAULT_NOPAGE; 283 } 284 285 static const struct vm_operations_struct xive_native_esb_vmops = { 286 .fault = xive_native_esb_fault, 287 }; 288 289 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) 290 { 291 struct vm_area_struct *vma = vmf->vma; 292 293 switch (vmf->pgoff - vma->vm_pgoff) { 294 case 0: /* HW - forbid access */ 295 case 1: /* HV - forbid access */ 296 return VM_FAULT_SIGBUS; 297 case 2: /* OS */ 298 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); 299 return VM_FAULT_NOPAGE; 300 case 3: /* USER - TODO */ 301 default: 302 return VM_FAULT_SIGBUS; 303 } 304 } 305 306 static const struct vm_operations_struct xive_native_tima_vmops = { 307 .fault = xive_native_tima_fault, 308 }; 309 310 static int kvmppc_xive_native_mmap(struct kvm_device *dev, 311 struct vm_area_struct *vma) 312 { 313 struct kvmppc_xive *xive = dev->private; 314 315 /* We only allow mappings at fixed offset for now */ 316 if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { 317 if (vma_pages(vma) > 4) 318 return -EINVAL; 319 vma->vm_ops = &xive_native_tima_vmops; 320 } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) { 321 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2) 322 return -EINVAL; 323 vma->vm_ops = &xive_native_esb_vmops; 324 } else { 325 return -EINVAL; 326 } 327 328 vma->vm_flags |= VM_IO | VM_PFNMAP; 329 vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); 330 331 /* 332 * Grab the KVM device file address_space to be able to clear 333 * the ESB pages mapping when a device is passed-through into 334 * the guest. 335 */ 336 xive->mapping = vma->vm_file->f_mapping; 337 return 0; 338 } 339 340 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, 341 u64 addr) 342 { 343 struct kvmppc_xive_src_block *sb; 344 struct kvmppc_xive_irq_state *state; 345 u64 __user *ubufp = (u64 __user *) addr; 346 u64 val; 347 u16 idx; 348 int rc; 349 350 pr_devel("%s irq=0x%lx\n", __func__, irq); 351 352 if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) 353 return -E2BIG; 354 355 sb = kvmppc_xive_find_source(xive, irq, &idx); 356 if (!sb) { 357 pr_debug("No source, creating source block...\n"); 358 sb = kvmppc_xive_create_src_block(xive, irq); 359 if (!sb) { 360 pr_err("Failed to create block...\n"); 361 return -ENOMEM; 362 } 363 } 364 state = &sb->irq_state[idx]; 365 366 if (get_user(val, ubufp)) { 367 pr_err("fault getting user info !\n"); 368 return -EFAULT; 369 } 370 371 arch_spin_lock(&sb->lock); 372 373 /* 374 * If the source doesn't already have an IPI, allocate 375 * one and get the corresponding data 376 */ 377 if (!state->ipi_number) { 378 state->ipi_number = xive_native_alloc_irq(); 379 if (state->ipi_number == 0) { 380 pr_err("Failed to allocate IRQ !\n"); 381 rc = -ENXIO; 382 goto unlock; 383 } 384 xive_native_populate_irq_data(state->ipi_number, 385 &state->ipi_data); 386 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, 387 state->ipi_number, irq); 388 } 389 390 /* Restore LSI state */ 391 if (val & KVM_XIVE_LEVEL_SENSITIVE) { 392 state->lsi = true; 393 if (val & KVM_XIVE_LEVEL_ASSERTED) 394 state->asserted = true; 395 pr_devel(" LSI ! Asserted=%d\n", state->asserted); 396 } 397 398 /* Mask IRQ to start with */ 399 state->act_server = 0; 400 state->act_priority = MASKED; 401 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 402 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 403 404 /* Increment the number of valid sources and mark this one valid */ 405 if (!state->valid) 406 xive->src_count++; 407 state->valid = true; 408 409 rc = 0; 410 411 unlock: 412 arch_spin_unlock(&sb->lock); 413 414 return rc; 415 } 416 417 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive, 418 struct kvmppc_xive_src_block *sb, 419 struct kvmppc_xive_irq_state *state, 420 u32 server, u8 priority, bool masked, 421 u32 eisn) 422 { 423 struct kvm *kvm = xive->kvm; 424 u32 hw_num; 425 int rc = 0; 426 427 arch_spin_lock(&sb->lock); 428 429 if (state->act_server == server && state->act_priority == priority && 430 state->eisn == eisn) 431 goto unlock; 432 433 pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n", 434 priority, server, masked, state->act_server, 435 state->act_priority); 436 437 kvmppc_xive_select_irq(state, &hw_num, NULL); 438 439 if (priority != MASKED && !masked) { 440 rc = kvmppc_xive_select_target(kvm, &server, priority); 441 if (rc) 442 goto unlock; 443 444 state->act_priority = priority; 445 state->act_server = server; 446 state->eisn = eisn; 447 448 rc = xive_native_configure_irq(hw_num, 449 kvmppc_xive_vp(xive, server), 450 priority, eisn); 451 } else { 452 state->act_priority = MASKED; 453 state->act_server = 0; 454 state->eisn = 0; 455 456 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0); 457 } 458 459 unlock: 460 arch_spin_unlock(&sb->lock); 461 return rc; 462 } 463 464 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, 465 long irq, u64 addr) 466 { 467 struct kvmppc_xive_src_block *sb; 468 struct kvmppc_xive_irq_state *state; 469 u64 __user *ubufp = (u64 __user *) addr; 470 u16 src; 471 u64 kvm_cfg; 472 u32 server; 473 u8 priority; 474 bool masked; 475 u32 eisn; 476 477 sb = kvmppc_xive_find_source(xive, irq, &src); 478 if (!sb) 479 return -ENOENT; 480 481 state = &sb->irq_state[src]; 482 483 if (!state->valid) 484 return -EINVAL; 485 486 if (get_user(kvm_cfg, ubufp)) 487 return -EFAULT; 488 489 pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg); 490 491 priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >> 492 KVM_XIVE_SOURCE_PRIORITY_SHIFT; 493 server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >> 494 KVM_XIVE_SOURCE_SERVER_SHIFT; 495 masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >> 496 KVM_XIVE_SOURCE_MASKED_SHIFT; 497 eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >> 498 KVM_XIVE_SOURCE_EISN_SHIFT; 499 500 if (priority != xive_prio_from_guest(priority)) { 501 pr_err("invalid priority for queue %d for VCPU %d\n", 502 priority, server); 503 return -EINVAL; 504 } 505 506 return kvmppc_xive_native_update_source_config(xive, sb, state, server, 507 priority, masked, eisn); 508 } 509 510 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive, 511 long irq, u64 addr) 512 { 513 struct kvmppc_xive_src_block *sb; 514 struct kvmppc_xive_irq_state *state; 515 struct xive_irq_data *xd; 516 u32 hw_num; 517 u16 src; 518 int rc = 0; 519 520 pr_devel("%s irq=0x%lx", __func__, irq); 521 522 sb = kvmppc_xive_find_source(xive, irq, &src); 523 if (!sb) 524 return -ENOENT; 525 526 state = &sb->irq_state[src]; 527 528 rc = -EINVAL; 529 530 arch_spin_lock(&sb->lock); 531 532 if (state->valid) { 533 kvmppc_xive_select_irq(state, &hw_num, &xd); 534 xive_native_sync_source(hw_num); 535 rc = 0; 536 } 537 538 arch_spin_unlock(&sb->lock); 539 return rc; 540 } 541 542 static int xive_native_validate_queue_size(u32 qshift) 543 { 544 /* 545 * We only support 64K pages for the moment. This is also 546 * advertised in the DT property "ibm,xive-eq-sizes" 547 */ 548 switch (qshift) { 549 case 0: /* EQ reset */ 550 case 16: 551 return 0; 552 case 12: 553 case 21: 554 case 24: 555 default: 556 return -EINVAL; 557 } 558 } 559 560 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, 561 long eq_idx, u64 addr) 562 { 563 struct kvm *kvm = xive->kvm; 564 struct kvm_vcpu *vcpu; 565 struct kvmppc_xive_vcpu *xc; 566 void __user *ubufp = (void __user *) addr; 567 u32 server; 568 u8 priority; 569 struct kvm_ppc_xive_eq kvm_eq; 570 int rc; 571 __be32 *qaddr = 0; 572 struct page *page; 573 struct xive_q *q; 574 gfn_t gfn; 575 unsigned long page_size; 576 int srcu_idx; 577 578 /* 579 * Demangle priority/server tuple from the EQ identifier 580 */ 581 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 582 KVM_XIVE_EQ_PRIORITY_SHIFT; 583 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 584 KVM_XIVE_EQ_SERVER_SHIFT; 585 586 if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) 587 return -EFAULT; 588 589 vcpu = kvmppc_xive_find_server(kvm, server); 590 if (!vcpu) { 591 pr_err("Can't find server %d\n", server); 592 return -ENOENT; 593 } 594 xc = vcpu->arch.xive_vcpu; 595 596 if (priority != xive_prio_from_guest(priority)) { 597 pr_err("Trying to restore invalid queue %d for VCPU %d\n", 598 priority, server); 599 return -EINVAL; 600 } 601 q = &xc->queues[priority]; 602 603 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 604 __func__, server, priority, kvm_eq.flags, 605 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 606 607 /* reset queue and disable queueing */ 608 if (!kvm_eq.qshift) { 609 q->guest_qaddr = 0; 610 q->guest_qshift = 0; 611 612 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, 613 NULL, 0, true); 614 if (rc) { 615 pr_err("Failed to reset queue %d for VCPU %d: %d\n", 616 priority, xc->server_num, rc); 617 return rc; 618 } 619 620 return 0; 621 } 622 623 /* 624 * sPAPR specifies a "Unconditional Notify (n) flag" for the 625 * H_INT_SET_QUEUE_CONFIG hcall which forces notification 626 * without using the coalescing mechanisms provided by the 627 * XIVE END ESBs. This is required on KVM as notification 628 * using the END ESBs is not supported. 629 */ 630 if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { 631 pr_err("invalid flags %d\n", kvm_eq.flags); 632 return -EINVAL; 633 } 634 635 rc = xive_native_validate_queue_size(kvm_eq.qshift); 636 if (rc) { 637 pr_err("invalid queue size %d\n", kvm_eq.qshift); 638 return rc; 639 } 640 641 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { 642 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, 643 1ull << kvm_eq.qshift); 644 return -EINVAL; 645 } 646 647 srcu_idx = srcu_read_lock(&kvm->srcu); 648 gfn = gpa_to_gfn(kvm_eq.qaddr); 649 650 page_size = kvm_host_page_size(vcpu, gfn); 651 if (1ull << kvm_eq.qshift > page_size) { 652 srcu_read_unlock(&kvm->srcu, srcu_idx); 653 pr_warn("Incompatible host page size %lx!\n", page_size); 654 return -EINVAL; 655 } 656 657 page = gfn_to_page(kvm, gfn); 658 if (is_error_page(page)) { 659 srcu_read_unlock(&kvm->srcu, srcu_idx); 660 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); 661 return -EINVAL; 662 } 663 664 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); 665 srcu_read_unlock(&kvm->srcu, srcu_idx); 666 667 /* 668 * Backup the queue page guest address to the mark EQ page 669 * dirty for migration. 670 */ 671 q->guest_qaddr = kvm_eq.qaddr; 672 q->guest_qshift = kvm_eq.qshift; 673 674 /* 675 * Unconditional Notification is forced by default at the 676 * OPAL level because the use of END ESBs is not supported by 677 * Linux. 678 */ 679 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, 680 (__be32 *) qaddr, kvm_eq.qshift, true); 681 if (rc) { 682 pr_err("Failed to configure queue %d for VCPU %d: %d\n", 683 priority, xc->server_num, rc); 684 put_page(page); 685 return rc; 686 } 687 688 /* 689 * Only restore the queue state when needed. When doing the 690 * H_INT_SET_SOURCE_CONFIG hcall, it should not. 691 */ 692 if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) { 693 rc = xive_native_set_queue_state(xc->vp_id, priority, 694 kvm_eq.qtoggle, 695 kvm_eq.qindex); 696 if (rc) 697 goto error; 698 } 699 700 rc = kvmppc_xive_attach_escalation(vcpu, priority, 701 kvmppc_xive_has_single_escalation(xive)); 702 error: 703 if (rc) 704 kvmppc_xive_native_cleanup_queue(vcpu, priority); 705 return rc; 706 } 707 708 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, 709 long eq_idx, u64 addr) 710 { 711 struct kvm *kvm = xive->kvm; 712 struct kvm_vcpu *vcpu; 713 struct kvmppc_xive_vcpu *xc; 714 struct xive_q *q; 715 void __user *ubufp = (u64 __user *) addr; 716 u32 server; 717 u8 priority; 718 struct kvm_ppc_xive_eq kvm_eq; 719 u64 qaddr; 720 u64 qshift; 721 u64 qeoi_page; 722 u32 escalate_irq; 723 u64 qflags; 724 int rc; 725 726 /* 727 * Demangle priority/server tuple from the EQ identifier 728 */ 729 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 730 KVM_XIVE_EQ_PRIORITY_SHIFT; 731 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 732 KVM_XIVE_EQ_SERVER_SHIFT; 733 734 vcpu = kvmppc_xive_find_server(kvm, server); 735 if (!vcpu) { 736 pr_err("Can't find server %d\n", server); 737 return -ENOENT; 738 } 739 xc = vcpu->arch.xive_vcpu; 740 741 if (priority != xive_prio_from_guest(priority)) { 742 pr_err("invalid priority for queue %d for VCPU %d\n", 743 priority, server); 744 return -EINVAL; 745 } 746 q = &xc->queues[priority]; 747 748 memset(&kvm_eq, 0, sizeof(kvm_eq)); 749 750 if (!q->qpage) 751 return 0; 752 753 rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift, 754 &qeoi_page, &escalate_irq, &qflags); 755 if (rc) 756 return rc; 757 758 kvm_eq.flags = 0; 759 if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) 760 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 761 762 kvm_eq.qshift = q->guest_qshift; 763 kvm_eq.qaddr = q->guest_qaddr; 764 765 rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, 766 &kvm_eq.qindex); 767 if (rc) 768 return rc; 769 770 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 771 __func__, server, priority, kvm_eq.flags, 772 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 773 774 if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) 775 return -EFAULT; 776 777 return 0; 778 } 779 780 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb) 781 { 782 int i; 783 784 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 785 struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; 786 787 if (!state->valid) 788 continue; 789 790 if (state->act_priority == MASKED) 791 continue; 792 793 state->eisn = 0; 794 state->act_server = 0; 795 state->act_priority = MASKED; 796 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 797 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 798 if (state->pt_number) { 799 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); 800 xive_native_configure_irq(state->pt_number, 801 0, MASKED, 0); 802 } 803 } 804 } 805 806 static int kvmppc_xive_reset(struct kvmppc_xive *xive) 807 { 808 struct kvm *kvm = xive->kvm; 809 struct kvm_vcpu *vcpu; 810 unsigned long i; 811 812 pr_devel("%s\n", __func__); 813 814 mutex_lock(&xive->lock); 815 816 kvm_for_each_vcpu(i, vcpu, kvm) { 817 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 818 unsigned int prio; 819 820 if (!xc) 821 continue; 822 823 kvmppc_xive_disable_vcpu_interrupts(vcpu); 824 825 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 826 827 /* Single escalation, no queue 7 */ 828 if (prio == 7 && kvmppc_xive_has_single_escalation(xive)) 829 break; 830 831 if (xc->esc_virq[prio]) { 832 free_irq(xc->esc_virq[prio], vcpu); 833 irq_dispose_mapping(xc->esc_virq[prio]); 834 kfree(xc->esc_virq_names[prio]); 835 xc->esc_virq[prio] = 0; 836 } 837 838 kvmppc_xive_native_cleanup_queue(vcpu, prio); 839 } 840 } 841 842 for (i = 0; i <= xive->max_sbid; i++) { 843 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 844 845 if (sb) { 846 arch_spin_lock(&sb->lock); 847 kvmppc_xive_reset_sources(sb); 848 arch_spin_unlock(&sb->lock); 849 } 850 } 851 852 mutex_unlock(&xive->lock); 853 854 return 0; 855 } 856 857 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb) 858 { 859 int j; 860 861 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { 862 struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; 863 struct xive_irq_data *xd; 864 u32 hw_num; 865 866 if (!state->valid) 867 continue; 868 869 /* 870 * The struct kvmppc_xive_irq_state reflects the state 871 * of the EAS configuration and not the state of the 872 * source. The source is masked setting the PQ bits to 873 * '-Q', which is what is being done before calling 874 * the KVM_DEV_XIVE_EQ_SYNC control. 875 * 876 * If a source EAS is configured, OPAL syncs the XIVE 877 * IC of the source and the XIVE IC of the previous 878 * target if any. 879 * 880 * So it should be fine ignoring MASKED sources as 881 * they have been synced already. 882 */ 883 if (state->act_priority == MASKED) 884 continue; 885 886 kvmppc_xive_select_irq(state, &hw_num, &xd); 887 xive_native_sync_source(hw_num); 888 xive_native_sync_queue(hw_num); 889 } 890 } 891 892 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu) 893 { 894 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 895 unsigned int prio; 896 int srcu_idx; 897 898 if (!xc) 899 return -ENOENT; 900 901 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 902 struct xive_q *q = &xc->queues[prio]; 903 904 if (!q->qpage) 905 continue; 906 907 /* Mark EQ page dirty for migration */ 908 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 909 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); 910 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 911 } 912 return 0; 913 } 914 915 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive) 916 { 917 struct kvm *kvm = xive->kvm; 918 struct kvm_vcpu *vcpu; 919 unsigned long i; 920 921 pr_devel("%s\n", __func__); 922 923 mutex_lock(&xive->lock); 924 for (i = 0; i <= xive->max_sbid; i++) { 925 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 926 927 if (sb) { 928 arch_spin_lock(&sb->lock); 929 kvmppc_xive_native_sync_sources(sb); 930 arch_spin_unlock(&sb->lock); 931 } 932 } 933 934 kvm_for_each_vcpu(i, vcpu, kvm) { 935 kvmppc_xive_native_vcpu_eq_sync(vcpu); 936 } 937 mutex_unlock(&xive->lock); 938 939 return 0; 940 } 941 942 static int kvmppc_xive_native_set_attr(struct kvm_device *dev, 943 struct kvm_device_attr *attr) 944 { 945 struct kvmppc_xive *xive = dev->private; 946 947 switch (attr->group) { 948 case KVM_DEV_XIVE_GRP_CTRL: 949 switch (attr->attr) { 950 case KVM_DEV_XIVE_RESET: 951 return kvmppc_xive_reset(xive); 952 case KVM_DEV_XIVE_EQ_SYNC: 953 return kvmppc_xive_native_eq_sync(xive); 954 case KVM_DEV_XIVE_NR_SERVERS: 955 return kvmppc_xive_set_nr_servers(xive, attr->addr); 956 } 957 break; 958 case KVM_DEV_XIVE_GRP_SOURCE: 959 return kvmppc_xive_native_set_source(xive, attr->attr, 960 attr->addr); 961 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 962 return kvmppc_xive_native_set_source_config(xive, attr->attr, 963 attr->addr); 964 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 965 return kvmppc_xive_native_set_queue_config(xive, attr->attr, 966 attr->addr); 967 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 968 return kvmppc_xive_native_sync_source(xive, attr->attr, 969 attr->addr); 970 } 971 return -ENXIO; 972 } 973 974 static int kvmppc_xive_native_get_attr(struct kvm_device *dev, 975 struct kvm_device_attr *attr) 976 { 977 struct kvmppc_xive *xive = dev->private; 978 979 switch (attr->group) { 980 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 981 return kvmppc_xive_native_get_queue_config(xive, attr->attr, 982 attr->addr); 983 } 984 return -ENXIO; 985 } 986 987 static int kvmppc_xive_native_has_attr(struct kvm_device *dev, 988 struct kvm_device_attr *attr) 989 { 990 switch (attr->group) { 991 case KVM_DEV_XIVE_GRP_CTRL: 992 switch (attr->attr) { 993 case KVM_DEV_XIVE_RESET: 994 case KVM_DEV_XIVE_EQ_SYNC: 995 case KVM_DEV_XIVE_NR_SERVERS: 996 return 0; 997 } 998 break; 999 case KVM_DEV_XIVE_GRP_SOURCE: 1000 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 1001 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 1002 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && 1003 attr->attr < KVMPPC_XIVE_NR_IRQS) 1004 return 0; 1005 break; 1006 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 1007 return 0; 1008 } 1009 return -ENXIO; 1010 } 1011 1012 /* 1013 * Called when device fd is closed. kvm->lock is held. 1014 */ 1015 static void kvmppc_xive_native_release(struct kvm_device *dev) 1016 { 1017 struct kvmppc_xive *xive = dev->private; 1018 struct kvm *kvm = xive->kvm; 1019 struct kvm_vcpu *vcpu; 1020 unsigned long i; 1021 1022 pr_devel("Releasing xive native device\n"); 1023 1024 /* 1025 * Clear the KVM device file address_space which is used to 1026 * unmap the ESB pages when a device is passed-through. 1027 */ 1028 mutex_lock(&xive->mapping_lock); 1029 xive->mapping = NULL; 1030 mutex_unlock(&xive->mapping_lock); 1031 1032 /* 1033 * Since this is the device release function, we know that 1034 * userspace does not have any open fd or mmap referring to 1035 * the device. Therefore there can not be any of the 1036 * device attribute set/get, mmap, or page fault functions 1037 * being executed concurrently, and similarly, the 1038 * connect_vcpu and set/clr_mapped functions also cannot 1039 * be being executed. 1040 */ 1041 1042 debugfs_remove(xive->dentry); 1043 1044 /* 1045 * We should clean up the vCPU interrupt presenters first. 1046 */ 1047 kvm_for_each_vcpu(i, vcpu, kvm) { 1048 /* 1049 * Take vcpu->mutex to ensure that no one_reg get/set ioctl 1050 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. 1051 * Holding the vcpu->mutex also means that the vcpu cannot 1052 * be executing the KVM_RUN ioctl, and therefore it cannot 1053 * be executing the XIVE push or pull code or accessing 1054 * the XIVE MMIO regions. 1055 */ 1056 mutex_lock(&vcpu->mutex); 1057 kvmppc_xive_native_cleanup_vcpu(vcpu); 1058 mutex_unlock(&vcpu->mutex); 1059 } 1060 1061 /* 1062 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type 1063 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe 1064 * against xive code getting called during vcpu execution or 1065 * set/get one_reg operations. 1066 */ 1067 kvm->arch.xive = NULL; 1068 1069 for (i = 0; i <= xive->max_sbid; i++) { 1070 if (xive->src_blocks[i]) 1071 kvmppc_xive_free_sources(xive->src_blocks[i]); 1072 kfree(xive->src_blocks[i]); 1073 xive->src_blocks[i] = NULL; 1074 } 1075 1076 if (xive->vp_base != XIVE_INVALID_VP) 1077 xive_native_free_vp_block(xive->vp_base); 1078 1079 /* 1080 * A reference of the kvmppc_xive pointer is now kept under 1081 * the xive_devices struct of the machine for reuse. It is 1082 * freed when the VM is destroyed for now until we fix all the 1083 * execution paths. 1084 */ 1085 1086 kfree(dev); 1087 } 1088 1089 /* 1090 * Create a XIVE device. kvm->lock is held. 1091 */ 1092 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) 1093 { 1094 struct kvmppc_xive *xive; 1095 struct kvm *kvm = dev->kvm; 1096 1097 pr_devel("Creating xive native device\n"); 1098 1099 if (kvm->arch.xive) 1100 return -EEXIST; 1101 1102 xive = kvmppc_xive_get_device(kvm, type); 1103 if (!xive) 1104 return -ENOMEM; 1105 1106 dev->private = xive; 1107 xive->dev = dev; 1108 xive->kvm = kvm; 1109 mutex_init(&xive->mapping_lock); 1110 mutex_init(&xive->lock); 1111 1112 /* VP allocation is delayed to the first call to connect_vcpu */ 1113 xive->vp_base = XIVE_INVALID_VP; 1114 /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets 1115 * on a POWER9 system. 1116 */ 1117 xive->nr_servers = KVM_MAX_VCPUS; 1118 1119 if (xive_native_has_single_escalation()) 1120 xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION; 1121 1122 if (xive_native_has_save_restore()) 1123 xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE; 1124 1125 xive->ops = &kvmppc_xive_native_ops; 1126 1127 kvm->arch.xive = xive; 1128 return 0; 1129 } 1130 1131 /* 1132 * Interrupt Pending Buffer (IPB) offset 1133 */ 1134 #define TM_IPB_SHIFT 40 1135 #define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT) 1136 1137 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1138 { 1139 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1140 u64 opal_state; 1141 int rc; 1142 1143 if (!kvmppc_xive_enabled(vcpu)) 1144 return -EPERM; 1145 1146 if (!xc) 1147 return -ENOENT; 1148 1149 /* Thread context registers. We only care about IPB and CPPR */ 1150 val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01; 1151 1152 /* Get the VP state from OPAL */ 1153 rc = xive_native_get_vp_state(xc->vp_id, &opal_state); 1154 if (rc) 1155 return rc; 1156 1157 /* 1158 * Capture the backup of IPB register in the NVT structure and 1159 * merge it in our KVM VP state. 1160 */ 1161 val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK); 1162 1163 pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", 1164 __func__, 1165 vcpu->arch.xive_saved_state.nsr, 1166 vcpu->arch.xive_saved_state.cppr, 1167 vcpu->arch.xive_saved_state.ipb, 1168 vcpu->arch.xive_saved_state.pipr, 1169 vcpu->arch.xive_saved_state.w01, 1170 (u32) vcpu->arch.xive_cam_word, opal_state); 1171 1172 return 0; 1173 } 1174 1175 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1176 { 1177 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1178 struct kvmppc_xive *xive = vcpu->kvm->arch.xive; 1179 1180 pr_devel("%s w01=%016llx vp=%016llx\n", __func__, 1181 val->xive_timaval[0], val->xive_timaval[1]); 1182 1183 if (!kvmppc_xive_enabled(vcpu)) 1184 return -EPERM; 1185 1186 if (!xc || !xive) 1187 return -ENOENT; 1188 1189 /* We can't update the state of a "pushed" VCPU */ 1190 if (WARN_ON(vcpu->arch.xive_pushed)) 1191 return -EBUSY; 1192 1193 /* 1194 * Restore the thread context registers. IPB and CPPR should 1195 * be the only ones that matter. 1196 */ 1197 vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0]; 1198 1199 /* 1200 * There is no need to restore the XIVE internal state (IPB 1201 * stored in the NVT) as the IPB register was merged in KVM VP 1202 * state when captured. 1203 */ 1204 return 0; 1205 } 1206 1207 bool kvmppc_xive_native_supported(void) 1208 { 1209 return xive_native_has_queue_state_support(); 1210 } 1211 1212 static int xive_native_debug_show(struct seq_file *m, void *private) 1213 { 1214 struct kvmppc_xive *xive = m->private; 1215 struct kvm *kvm = xive->kvm; 1216 struct kvm_vcpu *vcpu; 1217 unsigned long i; 1218 1219 if (!kvm) 1220 return 0; 1221 1222 seq_puts(m, "=========\nVCPU state\n=========\n"); 1223 1224 kvm_for_each_vcpu(i, vcpu, kvm) { 1225 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1226 1227 if (!xc) 1228 continue; 1229 1230 seq_printf(m, "VCPU %d: VP=%#x/%02x\n" 1231 " NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", 1232 xc->server_num, xc->vp_id, xc->vp_chip_id, 1233 vcpu->arch.xive_saved_state.nsr, 1234 vcpu->arch.xive_saved_state.cppr, 1235 vcpu->arch.xive_saved_state.ipb, 1236 vcpu->arch.xive_saved_state.pipr, 1237 be64_to_cpu(vcpu->arch.xive_saved_state.w01), 1238 be32_to_cpu(vcpu->arch.xive_cam_word)); 1239 1240 kvmppc_xive_debug_show_queues(m, vcpu); 1241 } 1242 1243 seq_puts(m, "=========\nSources\n=========\n"); 1244 1245 for (i = 0; i <= xive->max_sbid; i++) { 1246 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 1247 1248 if (sb) { 1249 arch_spin_lock(&sb->lock); 1250 kvmppc_xive_debug_show_sources(m, sb); 1251 arch_spin_unlock(&sb->lock); 1252 } 1253 } 1254 1255 return 0; 1256 } 1257 1258 DEFINE_SHOW_ATTRIBUTE(xive_native_debug); 1259 1260 static void xive_native_debugfs_init(struct kvmppc_xive *xive) 1261 { 1262 xive->dentry = debugfs_create_file("xive", 0444, xive->kvm->debugfs_dentry, 1263 xive, &xive_native_debug_fops); 1264 1265 pr_debug("%s: created\n", __func__); 1266 } 1267 1268 static void kvmppc_xive_native_init(struct kvm_device *dev) 1269 { 1270 struct kvmppc_xive *xive = dev->private; 1271 1272 /* Register some debug interfaces */ 1273 xive_native_debugfs_init(xive); 1274 } 1275 1276 struct kvm_device_ops kvm_xive_native_ops = { 1277 .name = "kvm-xive-native", 1278 .create = kvmppc_xive_native_create, 1279 .init = kvmppc_xive_native_init, 1280 .release = kvmppc_xive_native_release, 1281 .set_attr = kvmppc_xive_native_set_attr, 1282 .get_attr = kvmppc_xive_native_get_attr, 1283 .has_attr = kvmppc_xive_native_has_attr, 1284 .mmap = kvmppc_xive_native_mmap, 1285 }; 1286