1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2017-2019, IBM Corporation. 4 */ 5 6 #define pr_fmt(fmt) "xive-kvm: " fmt 7 8 #include <linux/kernel.h> 9 #include <linux/kvm_host.h> 10 #include <linux/err.h> 11 #include <linux/gfp.h> 12 #include <linux/spinlock.h> 13 #include <linux/delay.h> 14 #include <linux/file.h> 15 #include <asm/uaccess.h> 16 #include <asm/kvm_book3s.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/hvcall.h> 19 #include <asm/xive.h> 20 #include <asm/xive-regs.h> 21 #include <asm/debug.h> 22 #include <asm/debugfs.h> 23 #include <asm/opal.h> 24 25 #include <linux/debugfs.h> 26 #include <linux/seq_file.h> 27 28 #include "book3s_xive.h" 29 30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) 31 { 32 u64 val; 33 34 /* 35 * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10 36 * load operation, so there is no need to enforce load-after-store 37 * ordering. 38 */ 39 40 val = in_be64(xd->eoi_mmio + offset); 41 return (u8)val; 42 } 43 44 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) 45 { 46 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 47 struct xive_q *q = &xc->queues[prio]; 48 49 xive_native_disable_queue(xc->vp_id, q, prio); 50 if (q->qpage) { 51 put_page(virt_to_page(q->qpage)); 52 q->qpage = NULL; 53 } 54 } 55 56 static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q, 57 u8 prio, __be32 *qpage, 58 u32 order, bool can_escalate) 59 { 60 int rc; 61 __be32 *qpage_prev = q->qpage; 62 63 rc = xive_native_configure_queue(vp_id, q, prio, qpage, order, 64 can_escalate); 65 if (rc) 66 return rc; 67 68 if (qpage_prev) 69 put_page(virt_to_page(qpage_prev)); 70 71 return rc; 72 } 73 74 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) 75 { 76 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 77 int i; 78 79 if (!kvmppc_xive_enabled(vcpu)) 80 return; 81 82 if (!xc) 83 return; 84 85 pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); 86 87 /* Ensure no interrupt is still routed to that VP */ 88 xc->valid = false; 89 kvmppc_xive_disable_vcpu_interrupts(vcpu); 90 91 /* Free escalations */ 92 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 93 /* Free the escalation irq */ 94 if (xc->esc_virq[i]) { 95 if (xc->xive->single_escalation) 96 xive_cleanup_single_escalation(vcpu, xc, 97 xc->esc_virq[i]); 98 free_irq(xc->esc_virq[i], vcpu); 99 irq_dispose_mapping(xc->esc_virq[i]); 100 kfree(xc->esc_virq_names[i]); 101 xc->esc_virq[i] = 0; 102 } 103 } 104 105 /* Disable the VP */ 106 xive_native_disable_vp(xc->vp_id); 107 108 /* Clear the cam word so guest entry won't try to push context */ 109 vcpu->arch.xive_cam_word = 0; 110 111 /* Free the queues */ 112 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 113 kvmppc_xive_native_cleanup_queue(vcpu, i); 114 } 115 116 /* Free the VP */ 117 kfree(xc); 118 119 /* Cleanup the vcpu */ 120 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; 121 vcpu->arch.xive_vcpu = NULL; 122 } 123 124 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, 125 struct kvm_vcpu *vcpu, u32 server_num) 126 { 127 struct kvmppc_xive *xive = dev->private; 128 struct kvmppc_xive_vcpu *xc = NULL; 129 int rc; 130 u32 vp_id; 131 132 pr_devel("native_connect_vcpu(server=%d)\n", server_num); 133 134 if (dev->ops != &kvm_xive_native_ops) { 135 pr_devel("Wrong ops !\n"); 136 return -EPERM; 137 } 138 if (xive->kvm != vcpu->kvm) 139 return -EPERM; 140 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) 141 return -EBUSY; 142 143 mutex_lock(&xive->lock); 144 145 rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id); 146 if (rc) 147 goto bail; 148 149 xc = kzalloc(sizeof(*xc), GFP_KERNEL); 150 if (!xc) { 151 rc = -ENOMEM; 152 goto bail; 153 } 154 155 vcpu->arch.xive_vcpu = xc; 156 xc->xive = xive; 157 xc->vcpu = vcpu; 158 xc->server_num = server_num; 159 160 xc->vp_id = vp_id; 161 xc->valid = true; 162 vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; 163 164 rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); 165 if (rc) { 166 pr_err("Failed to get VP info from OPAL: %d\n", rc); 167 goto bail; 168 } 169 170 /* 171 * Enable the VP first as the single escalation mode will 172 * affect escalation interrupts numbering 173 */ 174 rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); 175 if (rc) { 176 pr_err("Failed to enable VP in OPAL: %d\n", rc); 177 goto bail; 178 } 179 180 /* Configure VCPU fields for use by assembly push/pull */ 181 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); 182 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); 183 184 /* TODO: reset all queues to a clean state ? */ 185 bail: 186 mutex_unlock(&xive->lock); 187 if (rc) 188 kvmppc_xive_native_cleanup_vcpu(vcpu); 189 190 return rc; 191 } 192 193 /* 194 * Device passthrough support 195 */ 196 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) 197 { 198 struct kvmppc_xive *xive = kvm->arch.xive; 199 pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2; 200 201 if (irq >= KVMPPC_XIVE_NR_IRQS) 202 return -EINVAL; 203 204 /* 205 * Clear the ESB pages of the IRQ number being mapped (or 206 * unmapped) into the guest and let the the VM fault handler 207 * repopulate with the appropriate ESB pages (device or IC) 208 */ 209 pr_debug("clearing esb pages for girq 0x%lx\n", irq); 210 mutex_lock(&xive->mapping_lock); 211 if (xive->mapping) 212 unmap_mapping_range(xive->mapping, 213 esb_pgoff << PAGE_SHIFT, 214 2ull << PAGE_SHIFT, 1); 215 mutex_unlock(&xive->mapping_lock); 216 return 0; 217 } 218 219 static struct kvmppc_xive_ops kvmppc_xive_native_ops = { 220 .reset_mapped = kvmppc_xive_native_reset_mapped, 221 }; 222 223 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) 224 { 225 struct vm_area_struct *vma = vmf->vma; 226 struct kvm_device *dev = vma->vm_file->private_data; 227 struct kvmppc_xive *xive = dev->private; 228 struct kvmppc_xive_src_block *sb; 229 struct kvmppc_xive_irq_state *state; 230 struct xive_irq_data *xd; 231 u32 hw_num; 232 u16 src; 233 u64 page; 234 unsigned long irq; 235 u64 page_offset; 236 237 /* 238 * Linux/KVM uses a two pages ESB setting, one for trigger and 239 * one for EOI 240 */ 241 page_offset = vmf->pgoff - vma->vm_pgoff; 242 irq = page_offset / 2; 243 244 sb = kvmppc_xive_find_source(xive, irq, &src); 245 if (!sb) { 246 pr_devel("%s: source %lx not found !\n", __func__, irq); 247 return VM_FAULT_SIGBUS; 248 } 249 250 state = &sb->irq_state[src]; 251 252 /* Some sanity checking */ 253 if (!state->valid) { 254 pr_devel("%s: source %lx invalid !\n", __func__, irq); 255 return VM_FAULT_SIGBUS; 256 } 257 258 kvmppc_xive_select_irq(state, &hw_num, &xd); 259 260 arch_spin_lock(&sb->lock); 261 262 /* 263 * first/even page is for trigger 264 * second/odd page is for EOI and management. 265 */ 266 page = page_offset % 2 ? xd->eoi_page : xd->trig_page; 267 arch_spin_unlock(&sb->lock); 268 269 if (WARN_ON(!page)) { 270 pr_err("%s: accessing invalid ESB page for source %lx !\n", 271 __func__, irq); 272 return VM_FAULT_SIGBUS; 273 } 274 275 vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT); 276 return VM_FAULT_NOPAGE; 277 } 278 279 static const struct vm_operations_struct xive_native_esb_vmops = { 280 .fault = xive_native_esb_fault, 281 }; 282 283 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) 284 { 285 struct vm_area_struct *vma = vmf->vma; 286 287 switch (vmf->pgoff - vma->vm_pgoff) { 288 case 0: /* HW - forbid access */ 289 case 1: /* HV - forbid access */ 290 return VM_FAULT_SIGBUS; 291 case 2: /* OS */ 292 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); 293 return VM_FAULT_NOPAGE; 294 case 3: /* USER - TODO */ 295 default: 296 return VM_FAULT_SIGBUS; 297 } 298 } 299 300 static const struct vm_operations_struct xive_native_tima_vmops = { 301 .fault = xive_native_tima_fault, 302 }; 303 304 static int kvmppc_xive_native_mmap(struct kvm_device *dev, 305 struct vm_area_struct *vma) 306 { 307 struct kvmppc_xive *xive = dev->private; 308 309 /* We only allow mappings at fixed offset for now */ 310 if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { 311 if (vma_pages(vma) > 4) 312 return -EINVAL; 313 vma->vm_ops = &xive_native_tima_vmops; 314 } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) { 315 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2) 316 return -EINVAL; 317 vma->vm_ops = &xive_native_esb_vmops; 318 } else { 319 return -EINVAL; 320 } 321 322 vma->vm_flags |= VM_IO | VM_PFNMAP; 323 vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); 324 325 /* 326 * Grab the KVM device file address_space to be able to clear 327 * the ESB pages mapping when a device is passed-through into 328 * the guest. 329 */ 330 xive->mapping = vma->vm_file->f_mapping; 331 return 0; 332 } 333 334 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, 335 u64 addr) 336 { 337 struct kvmppc_xive_src_block *sb; 338 struct kvmppc_xive_irq_state *state; 339 u64 __user *ubufp = (u64 __user *) addr; 340 u64 val; 341 u16 idx; 342 int rc; 343 344 pr_devel("%s irq=0x%lx\n", __func__, irq); 345 346 if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) 347 return -E2BIG; 348 349 sb = kvmppc_xive_find_source(xive, irq, &idx); 350 if (!sb) { 351 pr_debug("No source, creating source block...\n"); 352 sb = kvmppc_xive_create_src_block(xive, irq); 353 if (!sb) { 354 pr_err("Failed to create block...\n"); 355 return -ENOMEM; 356 } 357 } 358 state = &sb->irq_state[idx]; 359 360 if (get_user(val, ubufp)) { 361 pr_err("fault getting user info !\n"); 362 return -EFAULT; 363 } 364 365 arch_spin_lock(&sb->lock); 366 367 /* 368 * If the source doesn't already have an IPI, allocate 369 * one and get the corresponding data 370 */ 371 if (!state->ipi_number) { 372 state->ipi_number = xive_native_alloc_irq(); 373 if (state->ipi_number == 0) { 374 pr_err("Failed to allocate IRQ !\n"); 375 rc = -ENXIO; 376 goto unlock; 377 } 378 xive_native_populate_irq_data(state->ipi_number, 379 &state->ipi_data); 380 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, 381 state->ipi_number, irq); 382 } 383 384 /* Restore LSI state */ 385 if (val & KVM_XIVE_LEVEL_SENSITIVE) { 386 state->lsi = true; 387 if (val & KVM_XIVE_LEVEL_ASSERTED) 388 state->asserted = true; 389 pr_devel(" LSI ! Asserted=%d\n", state->asserted); 390 } 391 392 /* Mask IRQ to start with */ 393 state->act_server = 0; 394 state->act_priority = MASKED; 395 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 396 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 397 398 /* Increment the number of valid sources and mark this one valid */ 399 if (!state->valid) 400 xive->src_count++; 401 state->valid = true; 402 403 rc = 0; 404 405 unlock: 406 arch_spin_unlock(&sb->lock); 407 408 return rc; 409 } 410 411 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive, 412 struct kvmppc_xive_src_block *sb, 413 struct kvmppc_xive_irq_state *state, 414 u32 server, u8 priority, bool masked, 415 u32 eisn) 416 { 417 struct kvm *kvm = xive->kvm; 418 u32 hw_num; 419 int rc = 0; 420 421 arch_spin_lock(&sb->lock); 422 423 if (state->act_server == server && state->act_priority == priority && 424 state->eisn == eisn) 425 goto unlock; 426 427 pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n", 428 priority, server, masked, state->act_server, 429 state->act_priority); 430 431 kvmppc_xive_select_irq(state, &hw_num, NULL); 432 433 if (priority != MASKED && !masked) { 434 rc = kvmppc_xive_select_target(kvm, &server, priority); 435 if (rc) 436 goto unlock; 437 438 state->act_priority = priority; 439 state->act_server = server; 440 state->eisn = eisn; 441 442 rc = xive_native_configure_irq(hw_num, 443 kvmppc_xive_vp(xive, server), 444 priority, eisn); 445 } else { 446 state->act_priority = MASKED; 447 state->act_server = 0; 448 state->eisn = 0; 449 450 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0); 451 } 452 453 unlock: 454 arch_spin_unlock(&sb->lock); 455 return rc; 456 } 457 458 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, 459 long irq, u64 addr) 460 { 461 struct kvmppc_xive_src_block *sb; 462 struct kvmppc_xive_irq_state *state; 463 u64 __user *ubufp = (u64 __user *) addr; 464 u16 src; 465 u64 kvm_cfg; 466 u32 server; 467 u8 priority; 468 bool masked; 469 u32 eisn; 470 471 sb = kvmppc_xive_find_source(xive, irq, &src); 472 if (!sb) 473 return -ENOENT; 474 475 state = &sb->irq_state[src]; 476 477 if (!state->valid) 478 return -EINVAL; 479 480 if (get_user(kvm_cfg, ubufp)) 481 return -EFAULT; 482 483 pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg); 484 485 priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >> 486 KVM_XIVE_SOURCE_PRIORITY_SHIFT; 487 server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >> 488 KVM_XIVE_SOURCE_SERVER_SHIFT; 489 masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >> 490 KVM_XIVE_SOURCE_MASKED_SHIFT; 491 eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >> 492 KVM_XIVE_SOURCE_EISN_SHIFT; 493 494 if (priority != xive_prio_from_guest(priority)) { 495 pr_err("invalid priority for queue %d for VCPU %d\n", 496 priority, server); 497 return -EINVAL; 498 } 499 500 return kvmppc_xive_native_update_source_config(xive, sb, state, server, 501 priority, masked, eisn); 502 } 503 504 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive, 505 long irq, u64 addr) 506 { 507 struct kvmppc_xive_src_block *sb; 508 struct kvmppc_xive_irq_state *state; 509 struct xive_irq_data *xd; 510 u32 hw_num; 511 u16 src; 512 int rc = 0; 513 514 pr_devel("%s irq=0x%lx", __func__, irq); 515 516 sb = kvmppc_xive_find_source(xive, irq, &src); 517 if (!sb) 518 return -ENOENT; 519 520 state = &sb->irq_state[src]; 521 522 rc = -EINVAL; 523 524 arch_spin_lock(&sb->lock); 525 526 if (state->valid) { 527 kvmppc_xive_select_irq(state, &hw_num, &xd); 528 xive_native_sync_source(hw_num); 529 rc = 0; 530 } 531 532 arch_spin_unlock(&sb->lock); 533 return rc; 534 } 535 536 static int xive_native_validate_queue_size(u32 qshift) 537 { 538 /* 539 * We only support 64K pages for the moment. This is also 540 * advertised in the DT property "ibm,xive-eq-sizes" 541 */ 542 switch (qshift) { 543 case 0: /* EQ reset */ 544 case 16: 545 return 0; 546 case 12: 547 case 21: 548 case 24: 549 default: 550 return -EINVAL; 551 } 552 } 553 554 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, 555 long eq_idx, u64 addr) 556 { 557 struct kvm *kvm = xive->kvm; 558 struct kvm_vcpu *vcpu; 559 struct kvmppc_xive_vcpu *xc; 560 void __user *ubufp = (void __user *) addr; 561 u32 server; 562 u8 priority; 563 struct kvm_ppc_xive_eq kvm_eq; 564 int rc; 565 __be32 *qaddr = 0; 566 struct page *page; 567 struct xive_q *q; 568 gfn_t gfn; 569 unsigned long page_size; 570 int srcu_idx; 571 572 /* 573 * Demangle priority/server tuple from the EQ identifier 574 */ 575 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 576 KVM_XIVE_EQ_PRIORITY_SHIFT; 577 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 578 KVM_XIVE_EQ_SERVER_SHIFT; 579 580 if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) 581 return -EFAULT; 582 583 vcpu = kvmppc_xive_find_server(kvm, server); 584 if (!vcpu) { 585 pr_err("Can't find server %d\n", server); 586 return -ENOENT; 587 } 588 xc = vcpu->arch.xive_vcpu; 589 590 if (priority != xive_prio_from_guest(priority)) { 591 pr_err("Trying to restore invalid queue %d for VCPU %d\n", 592 priority, server); 593 return -EINVAL; 594 } 595 q = &xc->queues[priority]; 596 597 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 598 __func__, server, priority, kvm_eq.flags, 599 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 600 601 /* reset queue and disable queueing */ 602 if (!kvm_eq.qshift) { 603 q->guest_qaddr = 0; 604 q->guest_qshift = 0; 605 606 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, 607 NULL, 0, true); 608 if (rc) { 609 pr_err("Failed to reset queue %d for VCPU %d: %d\n", 610 priority, xc->server_num, rc); 611 return rc; 612 } 613 614 return 0; 615 } 616 617 /* 618 * sPAPR specifies a "Unconditional Notify (n) flag" for the 619 * H_INT_SET_QUEUE_CONFIG hcall which forces notification 620 * without using the coalescing mechanisms provided by the 621 * XIVE END ESBs. This is required on KVM as notification 622 * using the END ESBs is not supported. 623 */ 624 if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { 625 pr_err("invalid flags %d\n", kvm_eq.flags); 626 return -EINVAL; 627 } 628 629 rc = xive_native_validate_queue_size(kvm_eq.qshift); 630 if (rc) { 631 pr_err("invalid queue size %d\n", kvm_eq.qshift); 632 return rc; 633 } 634 635 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { 636 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, 637 1ull << kvm_eq.qshift); 638 return -EINVAL; 639 } 640 641 srcu_idx = srcu_read_lock(&kvm->srcu); 642 gfn = gpa_to_gfn(kvm_eq.qaddr); 643 644 page_size = kvm_host_page_size(vcpu, gfn); 645 if (1ull << kvm_eq.qshift > page_size) { 646 srcu_read_unlock(&kvm->srcu, srcu_idx); 647 pr_warn("Incompatible host page size %lx!\n", page_size); 648 return -EINVAL; 649 } 650 651 page = gfn_to_page(kvm, gfn); 652 if (is_error_page(page)) { 653 srcu_read_unlock(&kvm->srcu, srcu_idx); 654 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); 655 return -EINVAL; 656 } 657 658 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); 659 srcu_read_unlock(&kvm->srcu, srcu_idx); 660 661 /* 662 * Backup the queue page guest address to the mark EQ page 663 * dirty for migration. 664 */ 665 q->guest_qaddr = kvm_eq.qaddr; 666 q->guest_qshift = kvm_eq.qshift; 667 668 /* 669 * Unconditional Notification is forced by default at the 670 * OPAL level because the use of END ESBs is not supported by 671 * Linux. 672 */ 673 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, 674 (__be32 *) qaddr, kvm_eq.qshift, true); 675 if (rc) { 676 pr_err("Failed to configure queue %d for VCPU %d: %d\n", 677 priority, xc->server_num, rc); 678 put_page(page); 679 return rc; 680 } 681 682 /* 683 * Only restore the queue state when needed. When doing the 684 * H_INT_SET_SOURCE_CONFIG hcall, it should not. 685 */ 686 if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) { 687 rc = xive_native_set_queue_state(xc->vp_id, priority, 688 kvm_eq.qtoggle, 689 kvm_eq.qindex); 690 if (rc) 691 goto error; 692 } 693 694 rc = kvmppc_xive_attach_escalation(vcpu, priority, 695 xive->single_escalation); 696 error: 697 if (rc) 698 kvmppc_xive_native_cleanup_queue(vcpu, priority); 699 return rc; 700 } 701 702 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, 703 long eq_idx, u64 addr) 704 { 705 struct kvm *kvm = xive->kvm; 706 struct kvm_vcpu *vcpu; 707 struct kvmppc_xive_vcpu *xc; 708 struct xive_q *q; 709 void __user *ubufp = (u64 __user *) addr; 710 u32 server; 711 u8 priority; 712 struct kvm_ppc_xive_eq kvm_eq; 713 u64 qaddr; 714 u64 qshift; 715 u64 qeoi_page; 716 u32 escalate_irq; 717 u64 qflags; 718 int rc; 719 720 /* 721 * Demangle priority/server tuple from the EQ identifier 722 */ 723 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 724 KVM_XIVE_EQ_PRIORITY_SHIFT; 725 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 726 KVM_XIVE_EQ_SERVER_SHIFT; 727 728 vcpu = kvmppc_xive_find_server(kvm, server); 729 if (!vcpu) { 730 pr_err("Can't find server %d\n", server); 731 return -ENOENT; 732 } 733 xc = vcpu->arch.xive_vcpu; 734 735 if (priority != xive_prio_from_guest(priority)) { 736 pr_err("invalid priority for queue %d for VCPU %d\n", 737 priority, server); 738 return -EINVAL; 739 } 740 q = &xc->queues[priority]; 741 742 memset(&kvm_eq, 0, sizeof(kvm_eq)); 743 744 if (!q->qpage) 745 return 0; 746 747 rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift, 748 &qeoi_page, &escalate_irq, &qflags); 749 if (rc) 750 return rc; 751 752 kvm_eq.flags = 0; 753 if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) 754 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 755 756 kvm_eq.qshift = q->guest_qshift; 757 kvm_eq.qaddr = q->guest_qaddr; 758 759 rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, 760 &kvm_eq.qindex); 761 if (rc) 762 return rc; 763 764 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 765 __func__, server, priority, kvm_eq.flags, 766 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 767 768 if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) 769 return -EFAULT; 770 771 return 0; 772 } 773 774 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb) 775 { 776 int i; 777 778 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 779 struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; 780 781 if (!state->valid) 782 continue; 783 784 if (state->act_priority == MASKED) 785 continue; 786 787 state->eisn = 0; 788 state->act_server = 0; 789 state->act_priority = MASKED; 790 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 791 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 792 if (state->pt_number) { 793 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); 794 xive_native_configure_irq(state->pt_number, 795 0, MASKED, 0); 796 } 797 } 798 } 799 800 static int kvmppc_xive_reset(struct kvmppc_xive *xive) 801 { 802 struct kvm *kvm = xive->kvm; 803 struct kvm_vcpu *vcpu; 804 unsigned int i; 805 806 pr_devel("%s\n", __func__); 807 808 mutex_lock(&xive->lock); 809 810 kvm_for_each_vcpu(i, vcpu, kvm) { 811 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 812 unsigned int prio; 813 814 if (!xc) 815 continue; 816 817 kvmppc_xive_disable_vcpu_interrupts(vcpu); 818 819 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 820 821 /* Single escalation, no queue 7 */ 822 if (prio == 7 && xive->single_escalation) 823 break; 824 825 if (xc->esc_virq[prio]) { 826 free_irq(xc->esc_virq[prio], vcpu); 827 irq_dispose_mapping(xc->esc_virq[prio]); 828 kfree(xc->esc_virq_names[prio]); 829 xc->esc_virq[prio] = 0; 830 } 831 832 kvmppc_xive_native_cleanup_queue(vcpu, prio); 833 } 834 } 835 836 for (i = 0; i <= xive->max_sbid; i++) { 837 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 838 839 if (sb) { 840 arch_spin_lock(&sb->lock); 841 kvmppc_xive_reset_sources(sb); 842 arch_spin_unlock(&sb->lock); 843 } 844 } 845 846 mutex_unlock(&xive->lock); 847 848 return 0; 849 } 850 851 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb) 852 { 853 int j; 854 855 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { 856 struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; 857 struct xive_irq_data *xd; 858 u32 hw_num; 859 860 if (!state->valid) 861 continue; 862 863 /* 864 * The struct kvmppc_xive_irq_state reflects the state 865 * of the EAS configuration and not the state of the 866 * source. The source is masked setting the PQ bits to 867 * '-Q', which is what is being done before calling 868 * the KVM_DEV_XIVE_EQ_SYNC control. 869 * 870 * If a source EAS is configured, OPAL syncs the XIVE 871 * IC of the source and the XIVE IC of the previous 872 * target if any. 873 * 874 * So it should be fine ignoring MASKED sources as 875 * they have been synced already. 876 */ 877 if (state->act_priority == MASKED) 878 continue; 879 880 kvmppc_xive_select_irq(state, &hw_num, &xd); 881 xive_native_sync_source(hw_num); 882 xive_native_sync_queue(hw_num); 883 } 884 } 885 886 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu) 887 { 888 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 889 unsigned int prio; 890 int srcu_idx; 891 892 if (!xc) 893 return -ENOENT; 894 895 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 896 struct xive_q *q = &xc->queues[prio]; 897 898 if (!q->qpage) 899 continue; 900 901 /* Mark EQ page dirty for migration */ 902 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 903 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); 904 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 905 } 906 return 0; 907 } 908 909 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive) 910 { 911 struct kvm *kvm = xive->kvm; 912 struct kvm_vcpu *vcpu; 913 unsigned int i; 914 915 pr_devel("%s\n", __func__); 916 917 mutex_lock(&xive->lock); 918 for (i = 0; i <= xive->max_sbid; i++) { 919 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 920 921 if (sb) { 922 arch_spin_lock(&sb->lock); 923 kvmppc_xive_native_sync_sources(sb); 924 arch_spin_unlock(&sb->lock); 925 } 926 } 927 928 kvm_for_each_vcpu(i, vcpu, kvm) { 929 kvmppc_xive_native_vcpu_eq_sync(vcpu); 930 } 931 mutex_unlock(&xive->lock); 932 933 return 0; 934 } 935 936 static int kvmppc_xive_native_set_attr(struct kvm_device *dev, 937 struct kvm_device_attr *attr) 938 { 939 struct kvmppc_xive *xive = dev->private; 940 941 switch (attr->group) { 942 case KVM_DEV_XIVE_GRP_CTRL: 943 switch (attr->attr) { 944 case KVM_DEV_XIVE_RESET: 945 return kvmppc_xive_reset(xive); 946 case KVM_DEV_XIVE_EQ_SYNC: 947 return kvmppc_xive_native_eq_sync(xive); 948 case KVM_DEV_XIVE_NR_SERVERS: 949 return kvmppc_xive_set_nr_servers(xive, attr->addr); 950 } 951 break; 952 case KVM_DEV_XIVE_GRP_SOURCE: 953 return kvmppc_xive_native_set_source(xive, attr->attr, 954 attr->addr); 955 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 956 return kvmppc_xive_native_set_source_config(xive, attr->attr, 957 attr->addr); 958 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 959 return kvmppc_xive_native_set_queue_config(xive, attr->attr, 960 attr->addr); 961 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 962 return kvmppc_xive_native_sync_source(xive, attr->attr, 963 attr->addr); 964 } 965 return -ENXIO; 966 } 967 968 static int kvmppc_xive_native_get_attr(struct kvm_device *dev, 969 struct kvm_device_attr *attr) 970 { 971 struct kvmppc_xive *xive = dev->private; 972 973 switch (attr->group) { 974 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 975 return kvmppc_xive_native_get_queue_config(xive, attr->attr, 976 attr->addr); 977 } 978 return -ENXIO; 979 } 980 981 static int kvmppc_xive_native_has_attr(struct kvm_device *dev, 982 struct kvm_device_attr *attr) 983 { 984 switch (attr->group) { 985 case KVM_DEV_XIVE_GRP_CTRL: 986 switch (attr->attr) { 987 case KVM_DEV_XIVE_RESET: 988 case KVM_DEV_XIVE_EQ_SYNC: 989 case KVM_DEV_XIVE_NR_SERVERS: 990 return 0; 991 } 992 break; 993 case KVM_DEV_XIVE_GRP_SOURCE: 994 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 995 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 996 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && 997 attr->attr < KVMPPC_XIVE_NR_IRQS) 998 return 0; 999 break; 1000 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 1001 return 0; 1002 } 1003 return -ENXIO; 1004 } 1005 1006 /* 1007 * Called when device fd is closed. kvm->lock is held. 1008 */ 1009 static void kvmppc_xive_native_release(struct kvm_device *dev) 1010 { 1011 struct kvmppc_xive *xive = dev->private; 1012 struct kvm *kvm = xive->kvm; 1013 struct kvm_vcpu *vcpu; 1014 int i; 1015 1016 pr_devel("Releasing xive native device\n"); 1017 1018 /* 1019 * Clear the KVM device file address_space which is used to 1020 * unmap the ESB pages when a device is passed-through. 1021 */ 1022 mutex_lock(&xive->mapping_lock); 1023 xive->mapping = NULL; 1024 mutex_unlock(&xive->mapping_lock); 1025 1026 /* 1027 * Since this is the device release function, we know that 1028 * userspace does not have any open fd or mmap referring to 1029 * the device. Therefore there can not be any of the 1030 * device attribute set/get, mmap, or page fault functions 1031 * being executed concurrently, and similarly, the 1032 * connect_vcpu and set/clr_mapped functions also cannot 1033 * be being executed. 1034 */ 1035 1036 debugfs_remove(xive->dentry); 1037 1038 /* 1039 * We should clean up the vCPU interrupt presenters first. 1040 */ 1041 kvm_for_each_vcpu(i, vcpu, kvm) { 1042 /* 1043 * Take vcpu->mutex to ensure that no one_reg get/set ioctl 1044 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. 1045 * Holding the vcpu->mutex also means that the vcpu cannot 1046 * be executing the KVM_RUN ioctl, and therefore it cannot 1047 * be executing the XIVE push or pull code or accessing 1048 * the XIVE MMIO regions. 1049 */ 1050 mutex_lock(&vcpu->mutex); 1051 kvmppc_xive_native_cleanup_vcpu(vcpu); 1052 mutex_unlock(&vcpu->mutex); 1053 } 1054 1055 /* 1056 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type 1057 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe 1058 * against xive code getting called during vcpu execution or 1059 * set/get one_reg operations. 1060 */ 1061 kvm->arch.xive = NULL; 1062 1063 for (i = 0; i <= xive->max_sbid; i++) { 1064 if (xive->src_blocks[i]) 1065 kvmppc_xive_free_sources(xive->src_blocks[i]); 1066 kfree(xive->src_blocks[i]); 1067 xive->src_blocks[i] = NULL; 1068 } 1069 1070 if (xive->vp_base != XIVE_INVALID_VP) 1071 xive_native_free_vp_block(xive->vp_base); 1072 1073 /* 1074 * A reference of the kvmppc_xive pointer is now kept under 1075 * the xive_devices struct of the machine for reuse. It is 1076 * freed when the VM is destroyed for now until we fix all the 1077 * execution paths. 1078 */ 1079 1080 kfree(dev); 1081 } 1082 1083 /* 1084 * Create a XIVE device. kvm->lock is held. 1085 */ 1086 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) 1087 { 1088 struct kvmppc_xive *xive; 1089 struct kvm *kvm = dev->kvm; 1090 1091 pr_devel("Creating xive native device\n"); 1092 1093 if (kvm->arch.xive) 1094 return -EEXIST; 1095 1096 xive = kvmppc_xive_get_device(kvm, type); 1097 if (!xive) 1098 return -ENOMEM; 1099 1100 dev->private = xive; 1101 xive->dev = dev; 1102 xive->kvm = kvm; 1103 mutex_init(&xive->mapping_lock); 1104 mutex_init(&xive->lock); 1105 1106 /* VP allocation is delayed to the first call to connect_vcpu */ 1107 xive->vp_base = XIVE_INVALID_VP; 1108 /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets 1109 * on a POWER9 system. 1110 */ 1111 xive->nr_servers = KVM_MAX_VCPUS; 1112 1113 xive->single_escalation = xive_native_has_single_escalation(); 1114 xive->ops = &kvmppc_xive_native_ops; 1115 1116 kvm->arch.xive = xive; 1117 return 0; 1118 } 1119 1120 /* 1121 * Interrupt Pending Buffer (IPB) offset 1122 */ 1123 #define TM_IPB_SHIFT 40 1124 #define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT) 1125 1126 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1127 { 1128 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1129 u64 opal_state; 1130 int rc; 1131 1132 if (!kvmppc_xive_enabled(vcpu)) 1133 return -EPERM; 1134 1135 if (!xc) 1136 return -ENOENT; 1137 1138 /* Thread context registers. We only care about IPB and CPPR */ 1139 val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01; 1140 1141 /* Get the VP state from OPAL */ 1142 rc = xive_native_get_vp_state(xc->vp_id, &opal_state); 1143 if (rc) 1144 return rc; 1145 1146 /* 1147 * Capture the backup of IPB register in the NVT structure and 1148 * merge it in our KVM VP state. 1149 */ 1150 val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK); 1151 1152 pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", 1153 __func__, 1154 vcpu->arch.xive_saved_state.nsr, 1155 vcpu->arch.xive_saved_state.cppr, 1156 vcpu->arch.xive_saved_state.ipb, 1157 vcpu->arch.xive_saved_state.pipr, 1158 vcpu->arch.xive_saved_state.w01, 1159 (u32) vcpu->arch.xive_cam_word, opal_state); 1160 1161 return 0; 1162 } 1163 1164 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1165 { 1166 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1167 struct kvmppc_xive *xive = vcpu->kvm->arch.xive; 1168 1169 pr_devel("%s w01=%016llx vp=%016llx\n", __func__, 1170 val->xive_timaval[0], val->xive_timaval[1]); 1171 1172 if (!kvmppc_xive_enabled(vcpu)) 1173 return -EPERM; 1174 1175 if (!xc || !xive) 1176 return -ENOENT; 1177 1178 /* We can't update the state of a "pushed" VCPU */ 1179 if (WARN_ON(vcpu->arch.xive_pushed)) 1180 return -EBUSY; 1181 1182 /* 1183 * Restore the thread context registers. IPB and CPPR should 1184 * be the only ones that matter. 1185 */ 1186 vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0]; 1187 1188 /* 1189 * There is no need to restore the XIVE internal state (IPB 1190 * stored in the NVT) as the IPB register was merged in KVM VP 1191 * state when captured. 1192 */ 1193 return 0; 1194 } 1195 1196 bool kvmppc_xive_native_supported(void) 1197 { 1198 return xive_native_has_queue_state_support(); 1199 } 1200 1201 static int xive_native_debug_show(struct seq_file *m, void *private) 1202 { 1203 struct kvmppc_xive *xive = m->private; 1204 struct kvm *kvm = xive->kvm; 1205 struct kvm_vcpu *vcpu; 1206 unsigned int i; 1207 1208 if (!kvm) 1209 return 0; 1210 1211 seq_puts(m, "=========\nVCPU state\n=========\n"); 1212 1213 kvm_for_each_vcpu(i, vcpu, kvm) { 1214 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1215 1216 if (!xc) 1217 continue; 1218 1219 seq_printf(m, "VCPU %d: VP=%#x/%02x\n" 1220 " NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", 1221 xc->server_num, xc->vp_id, xc->vp_chip_id, 1222 vcpu->arch.xive_saved_state.nsr, 1223 vcpu->arch.xive_saved_state.cppr, 1224 vcpu->arch.xive_saved_state.ipb, 1225 vcpu->arch.xive_saved_state.pipr, 1226 be64_to_cpu(vcpu->arch.xive_saved_state.w01), 1227 be32_to_cpu(vcpu->arch.xive_cam_word)); 1228 1229 kvmppc_xive_debug_show_queues(m, vcpu); 1230 } 1231 1232 seq_puts(m, "=========\nSources\n=========\n"); 1233 1234 for (i = 0; i <= xive->max_sbid; i++) { 1235 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 1236 1237 if (sb) { 1238 arch_spin_lock(&sb->lock); 1239 kvmppc_xive_debug_show_sources(m, sb); 1240 arch_spin_unlock(&sb->lock); 1241 } 1242 } 1243 1244 return 0; 1245 } 1246 1247 DEFINE_SHOW_ATTRIBUTE(xive_native_debug); 1248 1249 static void xive_native_debugfs_init(struct kvmppc_xive *xive) 1250 { 1251 char *name; 1252 1253 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); 1254 if (!name) { 1255 pr_err("%s: no memory for name\n", __func__); 1256 return; 1257 } 1258 1259 xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, 1260 xive, &xive_native_debug_fops); 1261 1262 pr_debug("%s: created %s\n", __func__, name); 1263 kfree(name); 1264 } 1265 1266 static void kvmppc_xive_native_init(struct kvm_device *dev) 1267 { 1268 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; 1269 1270 /* Register some debug interfaces */ 1271 xive_native_debugfs_init(xive); 1272 } 1273 1274 struct kvm_device_ops kvm_xive_native_ops = { 1275 .name = "kvm-xive-native", 1276 .create = kvmppc_xive_native_create, 1277 .init = kvmppc_xive_native_init, 1278 .release = kvmppc_xive_native_release, 1279 .set_attr = kvmppc_xive_native_set_attr, 1280 .get_attr = kvmppc_xive_native_get_attr, 1281 .has_attr = kvmppc_xive_native_has_attr, 1282 .mmap = kvmppc_xive_native_mmap, 1283 }; 1284 1285 void kvmppc_xive_native_init_module(void) 1286 { 1287 ; 1288 } 1289 1290 void kvmppc_xive_native_exit_module(void) 1291 { 1292 ; 1293 } 1294