1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2017-2019, IBM Corporation. 4 */ 5 6 #define pr_fmt(fmt) "xive-kvm: " fmt 7 8 #include <linux/kernel.h> 9 #include <linux/kvm_host.h> 10 #include <linux/err.h> 11 #include <linux/gfp.h> 12 #include <linux/spinlock.h> 13 #include <linux/delay.h> 14 #include <linux/file.h> 15 #include <asm/uaccess.h> 16 #include <asm/kvm_book3s.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/hvcall.h> 19 #include <asm/xive.h> 20 #include <asm/xive-regs.h> 21 #include <asm/debug.h> 22 #include <asm/debugfs.h> 23 #include <asm/opal.h> 24 25 #include <linux/debugfs.h> 26 #include <linux/seq_file.h> 27 28 #include "book3s_xive.h" 29 30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) 31 { 32 u64 val; 33 34 /* 35 * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10 36 * load operation, so there is no need to enforce load-after-store 37 * ordering. 38 */ 39 40 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) 41 offset |= offset << 4; 42 43 val = in_be64(xd->eoi_mmio + offset); 44 return (u8)val; 45 } 46 47 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) 48 { 49 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 50 struct xive_q *q = &xc->queues[prio]; 51 52 xive_native_disable_queue(xc->vp_id, q, prio); 53 if (q->qpage) { 54 put_page(virt_to_page(q->qpage)); 55 q->qpage = NULL; 56 } 57 } 58 59 static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q, 60 u8 prio, __be32 *qpage, 61 u32 order, bool can_escalate) 62 { 63 int rc; 64 __be32 *qpage_prev = q->qpage; 65 66 rc = xive_native_configure_queue(vp_id, q, prio, qpage, order, 67 can_escalate); 68 if (rc) 69 return rc; 70 71 if (qpage_prev) 72 put_page(virt_to_page(qpage_prev)); 73 74 return rc; 75 } 76 77 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) 78 { 79 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 80 int i; 81 82 if (!kvmppc_xive_enabled(vcpu)) 83 return; 84 85 if (!xc) 86 return; 87 88 pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); 89 90 /* Ensure no interrupt is still routed to that VP */ 91 xc->valid = false; 92 kvmppc_xive_disable_vcpu_interrupts(vcpu); 93 94 /* Free escalations */ 95 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 96 /* Free the escalation irq */ 97 if (xc->esc_virq[i]) { 98 if (xc->xive->single_escalation) 99 xive_cleanup_single_escalation(vcpu, xc, 100 xc->esc_virq[i]); 101 free_irq(xc->esc_virq[i], vcpu); 102 irq_dispose_mapping(xc->esc_virq[i]); 103 kfree(xc->esc_virq_names[i]); 104 xc->esc_virq[i] = 0; 105 } 106 } 107 108 /* Disable the VP */ 109 xive_native_disable_vp(xc->vp_id); 110 111 /* Clear the cam word so guest entry won't try to push context */ 112 vcpu->arch.xive_cam_word = 0; 113 114 /* Free the queues */ 115 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 116 kvmppc_xive_native_cleanup_queue(vcpu, i); 117 } 118 119 /* Free the VP */ 120 kfree(xc); 121 122 /* Cleanup the vcpu */ 123 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; 124 vcpu->arch.xive_vcpu = NULL; 125 } 126 127 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, 128 struct kvm_vcpu *vcpu, u32 server_num) 129 { 130 struct kvmppc_xive *xive = dev->private; 131 struct kvmppc_xive_vcpu *xc = NULL; 132 int rc; 133 u32 vp_id; 134 135 pr_devel("native_connect_vcpu(server=%d)\n", server_num); 136 137 if (dev->ops != &kvm_xive_native_ops) { 138 pr_devel("Wrong ops !\n"); 139 return -EPERM; 140 } 141 if (xive->kvm != vcpu->kvm) 142 return -EPERM; 143 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) 144 return -EBUSY; 145 146 mutex_lock(&xive->lock); 147 148 rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id); 149 if (rc) 150 goto bail; 151 152 xc = kzalloc(sizeof(*xc), GFP_KERNEL); 153 if (!xc) { 154 rc = -ENOMEM; 155 goto bail; 156 } 157 158 vcpu->arch.xive_vcpu = xc; 159 xc->xive = xive; 160 xc->vcpu = vcpu; 161 xc->server_num = server_num; 162 163 xc->vp_id = vp_id; 164 xc->valid = true; 165 vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; 166 167 rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); 168 if (rc) { 169 pr_err("Failed to get VP info from OPAL: %d\n", rc); 170 goto bail; 171 } 172 173 /* 174 * Enable the VP first as the single escalation mode will 175 * affect escalation interrupts numbering 176 */ 177 rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); 178 if (rc) { 179 pr_err("Failed to enable VP in OPAL: %d\n", rc); 180 goto bail; 181 } 182 183 /* Configure VCPU fields for use by assembly push/pull */ 184 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); 185 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); 186 187 /* TODO: reset all queues to a clean state ? */ 188 bail: 189 mutex_unlock(&xive->lock); 190 if (rc) 191 kvmppc_xive_native_cleanup_vcpu(vcpu); 192 193 return rc; 194 } 195 196 /* 197 * Device passthrough support 198 */ 199 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) 200 { 201 struct kvmppc_xive *xive = kvm->arch.xive; 202 pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2; 203 204 if (irq >= KVMPPC_XIVE_NR_IRQS) 205 return -EINVAL; 206 207 /* 208 * Clear the ESB pages of the IRQ number being mapped (or 209 * unmapped) into the guest and let the the VM fault handler 210 * repopulate with the appropriate ESB pages (device or IC) 211 */ 212 pr_debug("clearing esb pages for girq 0x%lx\n", irq); 213 mutex_lock(&xive->mapping_lock); 214 if (xive->mapping) 215 unmap_mapping_range(xive->mapping, 216 esb_pgoff << PAGE_SHIFT, 217 2ull << PAGE_SHIFT, 1); 218 mutex_unlock(&xive->mapping_lock); 219 return 0; 220 } 221 222 static struct kvmppc_xive_ops kvmppc_xive_native_ops = { 223 .reset_mapped = kvmppc_xive_native_reset_mapped, 224 }; 225 226 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) 227 { 228 struct vm_area_struct *vma = vmf->vma; 229 struct kvm_device *dev = vma->vm_file->private_data; 230 struct kvmppc_xive *xive = dev->private; 231 struct kvmppc_xive_src_block *sb; 232 struct kvmppc_xive_irq_state *state; 233 struct xive_irq_data *xd; 234 u32 hw_num; 235 u16 src; 236 u64 page; 237 unsigned long irq; 238 u64 page_offset; 239 240 /* 241 * Linux/KVM uses a two pages ESB setting, one for trigger and 242 * one for EOI 243 */ 244 page_offset = vmf->pgoff - vma->vm_pgoff; 245 irq = page_offset / 2; 246 247 sb = kvmppc_xive_find_source(xive, irq, &src); 248 if (!sb) { 249 pr_devel("%s: source %lx not found !\n", __func__, irq); 250 return VM_FAULT_SIGBUS; 251 } 252 253 state = &sb->irq_state[src]; 254 kvmppc_xive_select_irq(state, &hw_num, &xd); 255 256 arch_spin_lock(&sb->lock); 257 258 /* 259 * first/even page is for trigger 260 * second/odd page is for EOI and management. 261 */ 262 page = page_offset % 2 ? xd->eoi_page : xd->trig_page; 263 arch_spin_unlock(&sb->lock); 264 265 if (WARN_ON(!page)) { 266 pr_err("%s: accessing invalid ESB page for source %lx !\n", 267 __func__, irq); 268 return VM_FAULT_SIGBUS; 269 } 270 271 vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT); 272 return VM_FAULT_NOPAGE; 273 } 274 275 static const struct vm_operations_struct xive_native_esb_vmops = { 276 .fault = xive_native_esb_fault, 277 }; 278 279 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) 280 { 281 struct vm_area_struct *vma = vmf->vma; 282 283 switch (vmf->pgoff - vma->vm_pgoff) { 284 case 0: /* HW - forbid access */ 285 case 1: /* HV - forbid access */ 286 return VM_FAULT_SIGBUS; 287 case 2: /* OS */ 288 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); 289 return VM_FAULT_NOPAGE; 290 case 3: /* USER - TODO */ 291 default: 292 return VM_FAULT_SIGBUS; 293 } 294 } 295 296 static const struct vm_operations_struct xive_native_tima_vmops = { 297 .fault = xive_native_tima_fault, 298 }; 299 300 static int kvmppc_xive_native_mmap(struct kvm_device *dev, 301 struct vm_area_struct *vma) 302 { 303 struct kvmppc_xive *xive = dev->private; 304 305 /* We only allow mappings at fixed offset for now */ 306 if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { 307 if (vma_pages(vma) > 4) 308 return -EINVAL; 309 vma->vm_ops = &xive_native_tima_vmops; 310 } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) { 311 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2) 312 return -EINVAL; 313 vma->vm_ops = &xive_native_esb_vmops; 314 } else { 315 return -EINVAL; 316 } 317 318 vma->vm_flags |= VM_IO | VM_PFNMAP; 319 vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); 320 321 /* 322 * Grab the KVM device file address_space to be able to clear 323 * the ESB pages mapping when a device is passed-through into 324 * the guest. 325 */ 326 xive->mapping = vma->vm_file->f_mapping; 327 return 0; 328 } 329 330 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, 331 u64 addr) 332 { 333 struct kvmppc_xive_src_block *sb; 334 struct kvmppc_xive_irq_state *state; 335 u64 __user *ubufp = (u64 __user *) addr; 336 u64 val; 337 u16 idx; 338 int rc; 339 340 pr_devel("%s irq=0x%lx\n", __func__, irq); 341 342 if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) 343 return -E2BIG; 344 345 sb = kvmppc_xive_find_source(xive, irq, &idx); 346 if (!sb) { 347 pr_debug("No source, creating source block...\n"); 348 sb = kvmppc_xive_create_src_block(xive, irq); 349 if (!sb) { 350 pr_err("Failed to create block...\n"); 351 return -ENOMEM; 352 } 353 } 354 state = &sb->irq_state[idx]; 355 356 if (get_user(val, ubufp)) { 357 pr_err("fault getting user info !\n"); 358 return -EFAULT; 359 } 360 361 arch_spin_lock(&sb->lock); 362 363 /* 364 * If the source doesn't already have an IPI, allocate 365 * one and get the corresponding data 366 */ 367 if (!state->ipi_number) { 368 state->ipi_number = xive_native_alloc_irq(); 369 if (state->ipi_number == 0) { 370 pr_err("Failed to allocate IRQ !\n"); 371 rc = -ENXIO; 372 goto unlock; 373 } 374 xive_native_populate_irq_data(state->ipi_number, 375 &state->ipi_data); 376 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, 377 state->ipi_number, irq); 378 } 379 380 /* Restore LSI state */ 381 if (val & KVM_XIVE_LEVEL_SENSITIVE) { 382 state->lsi = true; 383 if (val & KVM_XIVE_LEVEL_ASSERTED) 384 state->asserted = true; 385 pr_devel(" LSI ! Asserted=%d\n", state->asserted); 386 } 387 388 /* Mask IRQ to start with */ 389 state->act_server = 0; 390 state->act_priority = MASKED; 391 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 392 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 393 394 /* Increment the number of valid sources and mark this one valid */ 395 if (!state->valid) 396 xive->src_count++; 397 state->valid = true; 398 399 rc = 0; 400 401 unlock: 402 arch_spin_unlock(&sb->lock); 403 404 return rc; 405 } 406 407 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive, 408 struct kvmppc_xive_src_block *sb, 409 struct kvmppc_xive_irq_state *state, 410 u32 server, u8 priority, bool masked, 411 u32 eisn) 412 { 413 struct kvm *kvm = xive->kvm; 414 u32 hw_num; 415 int rc = 0; 416 417 arch_spin_lock(&sb->lock); 418 419 if (state->act_server == server && state->act_priority == priority && 420 state->eisn == eisn) 421 goto unlock; 422 423 pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n", 424 priority, server, masked, state->act_server, 425 state->act_priority); 426 427 kvmppc_xive_select_irq(state, &hw_num, NULL); 428 429 if (priority != MASKED && !masked) { 430 rc = kvmppc_xive_select_target(kvm, &server, priority); 431 if (rc) 432 goto unlock; 433 434 state->act_priority = priority; 435 state->act_server = server; 436 state->eisn = eisn; 437 438 rc = xive_native_configure_irq(hw_num, 439 kvmppc_xive_vp(xive, server), 440 priority, eisn); 441 } else { 442 state->act_priority = MASKED; 443 state->act_server = 0; 444 state->eisn = 0; 445 446 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0); 447 } 448 449 unlock: 450 arch_spin_unlock(&sb->lock); 451 return rc; 452 } 453 454 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, 455 long irq, u64 addr) 456 { 457 struct kvmppc_xive_src_block *sb; 458 struct kvmppc_xive_irq_state *state; 459 u64 __user *ubufp = (u64 __user *) addr; 460 u16 src; 461 u64 kvm_cfg; 462 u32 server; 463 u8 priority; 464 bool masked; 465 u32 eisn; 466 467 sb = kvmppc_xive_find_source(xive, irq, &src); 468 if (!sb) 469 return -ENOENT; 470 471 state = &sb->irq_state[src]; 472 473 if (!state->valid) 474 return -EINVAL; 475 476 if (get_user(kvm_cfg, ubufp)) 477 return -EFAULT; 478 479 pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg); 480 481 priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >> 482 KVM_XIVE_SOURCE_PRIORITY_SHIFT; 483 server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >> 484 KVM_XIVE_SOURCE_SERVER_SHIFT; 485 masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >> 486 KVM_XIVE_SOURCE_MASKED_SHIFT; 487 eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >> 488 KVM_XIVE_SOURCE_EISN_SHIFT; 489 490 if (priority != xive_prio_from_guest(priority)) { 491 pr_err("invalid priority for queue %d for VCPU %d\n", 492 priority, server); 493 return -EINVAL; 494 } 495 496 return kvmppc_xive_native_update_source_config(xive, sb, state, server, 497 priority, masked, eisn); 498 } 499 500 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive, 501 long irq, u64 addr) 502 { 503 struct kvmppc_xive_src_block *sb; 504 struct kvmppc_xive_irq_state *state; 505 struct xive_irq_data *xd; 506 u32 hw_num; 507 u16 src; 508 int rc = 0; 509 510 pr_devel("%s irq=0x%lx", __func__, irq); 511 512 sb = kvmppc_xive_find_source(xive, irq, &src); 513 if (!sb) 514 return -ENOENT; 515 516 state = &sb->irq_state[src]; 517 518 rc = -EINVAL; 519 520 arch_spin_lock(&sb->lock); 521 522 if (state->valid) { 523 kvmppc_xive_select_irq(state, &hw_num, &xd); 524 xive_native_sync_source(hw_num); 525 rc = 0; 526 } 527 528 arch_spin_unlock(&sb->lock); 529 return rc; 530 } 531 532 static int xive_native_validate_queue_size(u32 qshift) 533 { 534 /* 535 * We only support 64K pages for the moment. This is also 536 * advertised in the DT property "ibm,xive-eq-sizes" 537 */ 538 switch (qshift) { 539 case 0: /* EQ reset */ 540 case 16: 541 return 0; 542 case 12: 543 case 21: 544 case 24: 545 default: 546 return -EINVAL; 547 } 548 } 549 550 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, 551 long eq_idx, u64 addr) 552 { 553 struct kvm *kvm = xive->kvm; 554 struct kvm_vcpu *vcpu; 555 struct kvmppc_xive_vcpu *xc; 556 void __user *ubufp = (void __user *) addr; 557 u32 server; 558 u8 priority; 559 struct kvm_ppc_xive_eq kvm_eq; 560 int rc; 561 __be32 *qaddr = 0; 562 struct page *page; 563 struct xive_q *q; 564 gfn_t gfn; 565 unsigned long page_size; 566 int srcu_idx; 567 568 /* 569 * Demangle priority/server tuple from the EQ identifier 570 */ 571 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 572 KVM_XIVE_EQ_PRIORITY_SHIFT; 573 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 574 KVM_XIVE_EQ_SERVER_SHIFT; 575 576 if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) 577 return -EFAULT; 578 579 vcpu = kvmppc_xive_find_server(kvm, server); 580 if (!vcpu) { 581 pr_err("Can't find server %d\n", server); 582 return -ENOENT; 583 } 584 xc = vcpu->arch.xive_vcpu; 585 586 if (priority != xive_prio_from_guest(priority)) { 587 pr_err("Trying to restore invalid queue %d for VCPU %d\n", 588 priority, server); 589 return -EINVAL; 590 } 591 q = &xc->queues[priority]; 592 593 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 594 __func__, server, priority, kvm_eq.flags, 595 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 596 597 /* reset queue and disable queueing */ 598 if (!kvm_eq.qshift) { 599 q->guest_qaddr = 0; 600 q->guest_qshift = 0; 601 602 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, 603 NULL, 0, true); 604 if (rc) { 605 pr_err("Failed to reset queue %d for VCPU %d: %d\n", 606 priority, xc->server_num, rc); 607 return rc; 608 } 609 610 return 0; 611 } 612 613 /* 614 * sPAPR specifies a "Unconditional Notify (n) flag" for the 615 * H_INT_SET_QUEUE_CONFIG hcall which forces notification 616 * without using the coalescing mechanisms provided by the 617 * XIVE END ESBs. This is required on KVM as notification 618 * using the END ESBs is not supported. 619 */ 620 if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { 621 pr_err("invalid flags %d\n", kvm_eq.flags); 622 return -EINVAL; 623 } 624 625 rc = xive_native_validate_queue_size(kvm_eq.qshift); 626 if (rc) { 627 pr_err("invalid queue size %d\n", kvm_eq.qshift); 628 return rc; 629 } 630 631 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { 632 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, 633 1ull << kvm_eq.qshift); 634 return -EINVAL; 635 } 636 637 srcu_idx = srcu_read_lock(&kvm->srcu); 638 gfn = gpa_to_gfn(kvm_eq.qaddr); 639 640 page_size = kvm_host_page_size(vcpu, gfn); 641 if (1ull << kvm_eq.qshift > page_size) { 642 srcu_read_unlock(&kvm->srcu, srcu_idx); 643 pr_warn("Incompatible host page size %lx!\n", page_size); 644 return -EINVAL; 645 } 646 647 page = gfn_to_page(kvm, gfn); 648 if (is_error_page(page)) { 649 srcu_read_unlock(&kvm->srcu, srcu_idx); 650 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); 651 return -EINVAL; 652 } 653 654 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); 655 srcu_read_unlock(&kvm->srcu, srcu_idx); 656 657 /* 658 * Backup the queue page guest address to the mark EQ page 659 * dirty for migration. 660 */ 661 q->guest_qaddr = kvm_eq.qaddr; 662 q->guest_qshift = kvm_eq.qshift; 663 664 /* 665 * Unconditional Notification is forced by default at the 666 * OPAL level because the use of END ESBs is not supported by 667 * Linux. 668 */ 669 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, 670 (__be32 *) qaddr, kvm_eq.qshift, true); 671 if (rc) { 672 pr_err("Failed to configure queue %d for VCPU %d: %d\n", 673 priority, xc->server_num, rc); 674 put_page(page); 675 return rc; 676 } 677 678 /* 679 * Only restore the queue state when needed. When doing the 680 * H_INT_SET_SOURCE_CONFIG hcall, it should not. 681 */ 682 if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) { 683 rc = xive_native_set_queue_state(xc->vp_id, priority, 684 kvm_eq.qtoggle, 685 kvm_eq.qindex); 686 if (rc) 687 goto error; 688 } 689 690 rc = kvmppc_xive_attach_escalation(vcpu, priority, 691 xive->single_escalation); 692 error: 693 if (rc) 694 kvmppc_xive_native_cleanup_queue(vcpu, priority); 695 return rc; 696 } 697 698 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, 699 long eq_idx, u64 addr) 700 { 701 struct kvm *kvm = xive->kvm; 702 struct kvm_vcpu *vcpu; 703 struct kvmppc_xive_vcpu *xc; 704 struct xive_q *q; 705 void __user *ubufp = (u64 __user *) addr; 706 u32 server; 707 u8 priority; 708 struct kvm_ppc_xive_eq kvm_eq; 709 u64 qaddr; 710 u64 qshift; 711 u64 qeoi_page; 712 u32 escalate_irq; 713 u64 qflags; 714 int rc; 715 716 /* 717 * Demangle priority/server tuple from the EQ identifier 718 */ 719 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 720 KVM_XIVE_EQ_PRIORITY_SHIFT; 721 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 722 KVM_XIVE_EQ_SERVER_SHIFT; 723 724 vcpu = kvmppc_xive_find_server(kvm, server); 725 if (!vcpu) { 726 pr_err("Can't find server %d\n", server); 727 return -ENOENT; 728 } 729 xc = vcpu->arch.xive_vcpu; 730 731 if (priority != xive_prio_from_guest(priority)) { 732 pr_err("invalid priority for queue %d for VCPU %d\n", 733 priority, server); 734 return -EINVAL; 735 } 736 q = &xc->queues[priority]; 737 738 memset(&kvm_eq, 0, sizeof(kvm_eq)); 739 740 if (!q->qpage) 741 return 0; 742 743 rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift, 744 &qeoi_page, &escalate_irq, &qflags); 745 if (rc) 746 return rc; 747 748 kvm_eq.flags = 0; 749 if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) 750 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 751 752 kvm_eq.qshift = q->guest_qshift; 753 kvm_eq.qaddr = q->guest_qaddr; 754 755 rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, 756 &kvm_eq.qindex); 757 if (rc) 758 return rc; 759 760 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 761 __func__, server, priority, kvm_eq.flags, 762 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 763 764 if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) 765 return -EFAULT; 766 767 return 0; 768 } 769 770 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb) 771 { 772 int i; 773 774 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 775 struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; 776 777 if (!state->valid) 778 continue; 779 780 if (state->act_priority == MASKED) 781 continue; 782 783 state->eisn = 0; 784 state->act_server = 0; 785 state->act_priority = MASKED; 786 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 787 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 788 if (state->pt_number) { 789 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); 790 xive_native_configure_irq(state->pt_number, 791 0, MASKED, 0); 792 } 793 } 794 } 795 796 static int kvmppc_xive_reset(struct kvmppc_xive *xive) 797 { 798 struct kvm *kvm = xive->kvm; 799 struct kvm_vcpu *vcpu; 800 unsigned int i; 801 802 pr_devel("%s\n", __func__); 803 804 mutex_lock(&xive->lock); 805 806 kvm_for_each_vcpu(i, vcpu, kvm) { 807 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 808 unsigned int prio; 809 810 if (!xc) 811 continue; 812 813 kvmppc_xive_disable_vcpu_interrupts(vcpu); 814 815 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 816 817 /* Single escalation, no queue 7 */ 818 if (prio == 7 && xive->single_escalation) 819 break; 820 821 if (xc->esc_virq[prio]) { 822 free_irq(xc->esc_virq[prio], vcpu); 823 irq_dispose_mapping(xc->esc_virq[prio]); 824 kfree(xc->esc_virq_names[prio]); 825 xc->esc_virq[prio] = 0; 826 } 827 828 kvmppc_xive_native_cleanup_queue(vcpu, prio); 829 } 830 } 831 832 for (i = 0; i <= xive->max_sbid; i++) { 833 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 834 835 if (sb) { 836 arch_spin_lock(&sb->lock); 837 kvmppc_xive_reset_sources(sb); 838 arch_spin_unlock(&sb->lock); 839 } 840 } 841 842 mutex_unlock(&xive->lock); 843 844 return 0; 845 } 846 847 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb) 848 { 849 int j; 850 851 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { 852 struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; 853 struct xive_irq_data *xd; 854 u32 hw_num; 855 856 if (!state->valid) 857 continue; 858 859 /* 860 * The struct kvmppc_xive_irq_state reflects the state 861 * of the EAS configuration and not the state of the 862 * source. The source is masked setting the PQ bits to 863 * '-Q', which is what is being done before calling 864 * the KVM_DEV_XIVE_EQ_SYNC control. 865 * 866 * If a source EAS is configured, OPAL syncs the XIVE 867 * IC of the source and the XIVE IC of the previous 868 * target if any. 869 * 870 * So it should be fine ignoring MASKED sources as 871 * they have been synced already. 872 */ 873 if (state->act_priority == MASKED) 874 continue; 875 876 kvmppc_xive_select_irq(state, &hw_num, &xd); 877 xive_native_sync_source(hw_num); 878 xive_native_sync_queue(hw_num); 879 } 880 } 881 882 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu) 883 { 884 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 885 unsigned int prio; 886 int srcu_idx; 887 888 if (!xc) 889 return -ENOENT; 890 891 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 892 struct xive_q *q = &xc->queues[prio]; 893 894 if (!q->qpage) 895 continue; 896 897 /* Mark EQ page dirty for migration */ 898 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 899 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); 900 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 901 } 902 return 0; 903 } 904 905 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive) 906 { 907 struct kvm *kvm = xive->kvm; 908 struct kvm_vcpu *vcpu; 909 unsigned int i; 910 911 pr_devel("%s\n", __func__); 912 913 mutex_lock(&xive->lock); 914 for (i = 0; i <= xive->max_sbid; i++) { 915 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 916 917 if (sb) { 918 arch_spin_lock(&sb->lock); 919 kvmppc_xive_native_sync_sources(sb); 920 arch_spin_unlock(&sb->lock); 921 } 922 } 923 924 kvm_for_each_vcpu(i, vcpu, kvm) { 925 kvmppc_xive_native_vcpu_eq_sync(vcpu); 926 } 927 mutex_unlock(&xive->lock); 928 929 return 0; 930 } 931 932 static int kvmppc_xive_native_set_attr(struct kvm_device *dev, 933 struct kvm_device_attr *attr) 934 { 935 struct kvmppc_xive *xive = dev->private; 936 937 switch (attr->group) { 938 case KVM_DEV_XIVE_GRP_CTRL: 939 switch (attr->attr) { 940 case KVM_DEV_XIVE_RESET: 941 return kvmppc_xive_reset(xive); 942 case KVM_DEV_XIVE_EQ_SYNC: 943 return kvmppc_xive_native_eq_sync(xive); 944 case KVM_DEV_XIVE_NR_SERVERS: 945 return kvmppc_xive_set_nr_servers(xive, attr->addr); 946 } 947 break; 948 case KVM_DEV_XIVE_GRP_SOURCE: 949 return kvmppc_xive_native_set_source(xive, attr->attr, 950 attr->addr); 951 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 952 return kvmppc_xive_native_set_source_config(xive, attr->attr, 953 attr->addr); 954 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 955 return kvmppc_xive_native_set_queue_config(xive, attr->attr, 956 attr->addr); 957 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 958 return kvmppc_xive_native_sync_source(xive, attr->attr, 959 attr->addr); 960 } 961 return -ENXIO; 962 } 963 964 static int kvmppc_xive_native_get_attr(struct kvm_device *dev, 965 struct kvm_device_attr *attr) 966 { 967 struct kvmppc_xive *xive = dev->private; 968 969 switch (attr->group) { 970 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 971 return kvmppc_xive_native_get_queue_config(xive, attr->attr, 972 attr->addr); 973 } 974 return -ENXIO; 975 } 976 977 static int kvmppc_xive_native_has_attr(struct kvm_device *dev, 978 struct kvm_device_attr *attr) 979 { 980 switch (attr->group) { 981 case KVM_DEV_XIVE_GRP_CTRL: 982 switch (attr->attr) { 983 case KVM_DEV_XIVE_RESET: 984 case KVM_DEV_XIVE_EQ_SYNC: 985 case KVM_DEV_XIVE_NR_SERVERS: 986 return 0; 987 } 988 break; 989 case KVM_DEV_XIVE_GRP_SOURCE: 990 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 991 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 992 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && 993 attr->attr < KVMPPC_XIVE_NR_IRQS) 994 return 0; 995 break; 996 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 997 return 0; 998 } 999 return -ENXIO; 1000 } 1001 1002 /* 1003 * Called when device fd is closed. kvm->lock is held. 1004 */ 1005 static void kvmppc_xive_native_release(struct kvm_device *dev) 1006 { 1007 struct kvmppc_xive *xive = dev->private; 1008 struct kvm *kvm = xive->kvm; 1009 struct kvm_vcpu *vcpu; 1010 int i; 1011 1012 pr_devel("Releasing xive native device\n"); 1013 1014 /* 1015 * Clear the KVM device file address_space which is used to 1016 * unmap the ESB pages when a device is passed-through. 1017 */ 1018 mutex_lock(&xive->mapping_lock); 1019 xive->mapping = NULL; 1020 mutex_unlock(&xive->mapping_lock); 1021 1022 /* 1023 * Since this is the device release function, we know that 1024 * userspace does not have any open fd or mmap referring to 1025 * the device. Therefore there can not be any of the 1026 * device attribute set/get, mmap, or page fault functions 1027 * being executed concurrently, and similarly, the 1028 * connect_vcpu and set/clr_mapped functions also cannot 1029 * be being executed. 1030 */ 1031 1032 debugfs_remove(xive->dentry); 1033 1034 /* 1035 * We should clean up the vCPU interrupt presenters first. 1036 */ 1037 kvm_for_each_vcpu(i, vcpu, kvm) { 1038 /* 1039 * Take vcpu->mutex to ensure that no one_reg get/set ioctl 1040 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. 1041 * Holding the vcpu->mutex also means that the vcpu cannot 1042 * be executing the KVM_RUN ioctl, and therefore it cannot 1043 * be executing the XIVE push or pull code or accessing 1044 * the XIVE MMIO regions. 1045 */ 1046 mutex_lock(&vcpu->mutex); 1047 kvmppc_xive_native_cleanup_vcpu(vcpu); 1048 mutex_unlock(&vcpu->mutex); 1049 } 1050 1051 /* 1052 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type 1053 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe 1054 * against xive code getting called during vcpu execution or 1055 * set/get one_reg operations. 1056 */ 1057 kvm->arch.xive = NULL; 1058 1059 for (i = 0; i <= xive->max_sbid; i++) { 1060 if (xive->src_blocks[i]) 1061 kvmppc_xive_free_sources(xive->src_blocks[i]); 1062 kfree(xive->src_blocks[i]); 1063 xive->src_blocks[i] = NULL; 1064 } 1065 1066 if (xive->vp_base != XIVE_INVALID_VP) 1067 xive_native_free_vp_block(xive->vp_base); 1068 1069 /* 1070 * A reference of the kvmppc_xive pointer is now kept under 1071 * the xive_devices struct of the machine for reuse. It is 1072 * freed when the VM is destroyed for now until we fix all the 1073 * execution paths. 1074 */ 1075 1076 kfree(dev); 1077 } 1078 1079 /* 1080 * Create a XIVE device. kvm->lock is held. 1081 */ 1082 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) 1083 { 1084 struct kvmppc_xive *xive; 1085 struct kvm *kvm = dev->kvm; 1086 1087 pr_devel("Creating xive native device\n"); 1088 1089 if (kvm->arch.xive) 1090 return -EEXIST; 1091 1092 xive = kvmppc_xive_get_device(kvm, type); 1093 if (!xive) 1094 return -ENOMEM; 1095 1096 dev->private = xive; 1097 xive->dev = dev; 1098 xive->kvm = kvm; 1099 mutex_init(&xive->mapping_lock); 1100 mutex_init(&xive->lock); 1101 1102 /* VP allocation is delayed to the first call to connect_vcpu */ 1103 xive->vp_base = XIVE_INVALID_VP; 1104 /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets 1105 * on a POWER9 system. 1106 */ 1107 xive->nr_servers = KVM_MAX_VCPUS; 1108 1109 xive->single_escalation = xive_native_has_single_escalation(); 1110 xive->ops = &kvmppc_xive_native_ops; 1111 1112 kvm->arch.xive = xive; 1113 return 0; 1114 } 1115 1116 /* 1117 * Interrupt Pending Buffer (IPB) offset 1118 */ 1119 #define TM_IPB_SHIFT 40 1120 #define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT) 1121 1122 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1123 { 1124 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1125 u64 opal_state; 1126 int rc; 1127 1128 if (!kvmppc_xive_enabled(vcpu)) 1129 return -EPERM; 1130 1131 if (!xc) 1132 return -ENOENT; 1133 1134 /* Thread context registers. We only care about IPB and CPPR */ 1135 val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01; 1136 1137 /* Get the VP state from OPAL */ 1138 rc = xive_native_get_vp_state(xc->vp_id, &opal_state); 1139 if (rc) 1140 return rc; 1141 1142 /* 1143 * Capture the backup of IPB register in the NVT structure and 1144 * merge it in our KVM VP state. 1145 */ 1146 val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK); 1147 1148 pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", 1149 __func__, 1150 vcpu->arch.xive_saved_state.nsr, 1151 vcpu->arch.xive_saved_state.cppr, 1152 vcpu->arch.xive_saved_state.ipb, 1153 vcpu->arch.xive_saved_state.pipr, 1154 vcpu->arch.xive_saved_state.w01, 1155 (u32) vcpu->arch.xive_cam_word, opal_state); 1156 1157 return 0; 1158 } 1159 1160 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1161 { 1162 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1163 struct kvmppc_xive *xive = vcpu->kvm->arch.xive; 1164 1165 pr_devel("%s w01=%016llx vp=%016llx\n", __func__, 1166 val->xive_timaval[0], val->xive_timaval[1]); 1167 1168 if (!kvmppc_xive_enabled(vcpu)) 1169 return -EPERM; 1170 1171 if (!xc || !xive) 1172 return -ENOENT; 1173 1174 /* We can't update the state of a "pushed" VCPU */ 1175 if (WARN_ON(vcpu->arch.xive_pushed)) 1176 return -EBUSY; 1177 1178 /* 1179 * Restore the thread context registers. IPB and CPPR should 1180 * be the only ones that matter. 1181 */ 1182 vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0]; 1183 1184 /* 1185 * There is no need to restore the XIVE internal state (IPB 1186 * stored in the NVT) as the IPB register was merged in KVM VP 1187 * state when captured. 1188 */ 1189 return 0; 1190 } 1191 1192 bool kvmppc_xive_native_supported(void) 1193 { 1194 return xive_native_has_queue_state_support(); 1195 } 1196 1197 static int xive_native_debug_show(struct seq_file *m, void *private) 1198 { 1199 struct kvmppc_xive *xive = m->private; 1200 struct kvm *kvm = xive->kvm; 1201 struct kvm_vcpu *vcpu; 1202 unsigned int i; 1203 1204 if (!kvm) 1205 return 0; 1206 1207 seq_puts(m, "=========\nVCPU state\n=========\n"); 1208 1209 kvm_for_each_vcpu(i, vcpu, kvm) { 1210 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1211 1212 if (!xc) 1213 continue; 1214 1215 seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", 1216 xc->server_num, xc->vp_id, 1217 vcpu->arch.xive_saved_state.nsr, 1218 vcpu->arch.xive_saved_state.cppr, 1219 vcpu->arch.xive_saved_state.ipb, 1220 vcpu->arch.xive_saved_state.pipr, 1221 vcpu->arch.xive_saved_state.w01, 1222 (u32) vcpu->arch.xive_cam_word); 1223 1224 kvmppc_xive_debug_show_queues(m, vcpu); 1225 } 1226 1227 return 0; 1228 } 1229 1230 static int xive_native_debug_open(struct inode *inode, struct file *file) 1231 { 1232 return single_open(file, xive_native_debug_show, inode->i_private); 1233 } 1234 1235 static const struct file_operations xive_native_debug_fops = { 1236 .open = xive_native_debug_open, 1237 .read = seq_read, 1238 .llseek = seq_lseek, 1239 .release = single_release, 1240 }; 1241 1242 static void xive_native_debugfs_init(struct kvmppc_xive *xive) 1243 { 1244 char *name; 1245 1246 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); 1247 if (!name) { 1248 pr_err("%s: no memory for name\n", __func__); 1249 return; 1250 } 1251 1252 xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, 1253 xive, &xive_native_debug_fops); 1254 1255 pr_debug("%s: created %s\n", __func__, name); 1256 kfree(name); 1257 } 1258 1259 static void kvmppc_xive_native_init(struct kvm_device *dev) 1260 { 1261 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; 1262 1263 /* Register some debug interfaces */ 1264 xive_native_debugfs_init(xive); 1265 } 1266 1267 struct kvm_device_ops kvm_xive_native_ops = { 1268 .name = "kvm-xive-native", 1269 .create = kvmppc_xive_native_create, 1270 .init = kvmppc_xive_native_init, 1271 .release = kvmppc_xive_native_release, 1272 .set_attr = kvmppc_xive_native_set_attr, 1273 .get_attr = kvmppc_xive_native_get_attr, 1274 .has_attr = kvmppc_xive_native_has_attr, 1275 .mmap = kvmppc_xive_native_mmap, 1276 }; 1277 1278 void kvmppc_xive_native_init_module(void) 1279 { 1280 ; 1281 } 1282 1283 void kvmppc_xive_native_exit_module(void) 1284 { 1285 ; 1286 } 1287