1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2017-2019, IBM Corporation. 4 */ 5 6 #define pr_fmt(fmt) "xive-kvm: " fmt 7 8 #include <linux/kernel.h> 9 #include <linux/kvm_host.h> 10 #include <linux/err.h> 11 #include <linux/gfp.h> 12 #include <linux/spinlock.h> 13 #include <linux/delay.h> 14 #include <linux/file.h> 15 #include <linux/irqdomain.h> 16 #include <asm/uaccess.h> 17 #include <asm/kvm_book3s.h> 18 #include <asm/kvm_ppc.h> 19 #include <asm/hvcall.h> 20 #include <asm/xive.h> 21 #include <asm/xive-regs.h> 22 #include <asm/debug.h> 23 #include <asm/debugfs.h> 24 #include <asm/opal.h> 25 26 #include <linux/debugfs.h> 27 #include <linux/seq_file.h> 28 29 #include "book3s_xive.h" 30 31 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) 32 { 33 u64 val; 34 35 /* 36 * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10 37 * load operation, so there is no need to enforce load-after-store 38 * ordering. 39 */ 40 41 val = in_be64(xd->eoi_mmio + offset); 42 return (u8)val; 43 } 44 45 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) 46 { 47 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 48 struct xive_q *q = &xc->queues[prio]; 49 50 xive_native_disable_queue(xc->vp_id, q, prio); 51 if (q->qpage) { 52 put_page(virt_to_page(q->qpage)); 53 q->qpage = NULL; 54 } 55 } 56 57 static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q, 58 u8 prio, __be32 *qpage, 59 u32 order, bool can_escalate) 60 { 61 int rc; 62 __be32 *qpage_prev = q->qpage; 63 64 rc = xive_native_configure_queue(vp_id, q, prio, qpage, order, 65 can_escalate); 66 if (rc) 67 return rc; 68 69 if (qpage_prev) 70 put_page(virt_to_page(qpage_prev)); 71 72 return rc; 73 } 74 75 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) 76 { 77 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 78 int i; 79 80 if (!kvmppc_xive_enabled(vcpu)) 81 return; 82 83 if (!xc) 84 return; 85 86 pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); 87 88 /* Ensure no interrupt is still routed to that VP */ 89 xc->valid = false; 90 kvmppc_xive_disable_vcpu_interrupts(vcpu); 91 92 /* Free escalations */ 93 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 94 /* Free the escalation irq */ 95 if (xc->esc_virq[i]) { 96 if (xc->xive->single_escalation) 97 xive_cleanup_single_escalation(vcpu, xc, 98 xc->esc_virq[i]); 99 free_irq(xc->esc_virq[i], vcpu); 100 irq_dispose_mapping(xc->esc_virq[i]); 101 kfree(xc->esc_virq_names[i]); 102 xc->esc_virq[i] = 0; 103 } 104 } 105 106 /* Disable the VP */ 107 xive_native_disable_vp(xc->vp_id); 108 109 /* Clear the cam word so guest entry won't try to push context */ 110 vcpu->arch.xive_cam_word = 0; 111 112 /* Free the queues */ 113 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 114 kvmppc_xive_native_cleanup_queue(vcpu, i); 115 } 116 117 /* Free the VP */ 118 kfree(xc); 119 120 /* Cleanup the vcpu */ 121 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; 122 vcpu->arch.xive_vcpu = NULL; 123 } 124 125 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, 126 struct kvm_vcpu *vcpu, u32 server_num) 127 { 128 struct kvmppc_xive *xive = dev->private; 129 struct kvmppc_xive_vcpu *xc = NULL; 130 int rc; 131 u32 vp_id; 132 133 pr_devel("native_connect_vcpu(server=%d)\n", server_num); 134 135 if (dev->ops != &kvm_xive_native_ops) { 136 pr_devel("Wrong ops !\n"); 137 return -EPERM; 138 } 139 if (xive->kvm != vcpu->kvm) 140 return -EPERM; 141 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) 142 return -EBUSY; 143 144 mutex_lock(&xive->lock); 145 146 rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id); 147 if (rc) 148 goto bail; 149 150 xc = kzalloc(sizeof(*xc), GFP_KERNEL); 151 if (!xc) { 152 rc = -ENOMEM; 153 goto bail; 154 } 155 156 vcpu->arch.xive_vcpu = xc; 157 xc->xive = xive; 158 xc->vcpu = vcpu; 159 xc->server_num = server_num; 160 161 xc->vp_id = vp_id; 162 xc->valid = true; 163 vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; 164 165 rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); 166 if (rc) { 167 pr_err("Failed to get VP info from OPAL: %d\n", rc); 168 goto bail; 169 } 170 171 /* 172 * Enable the VP first as the single escalation mode will 173 * affect escalation interrupts numbering 174 */ 175 rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); 176 if (rc) { 177 pr_err("Failed to enable VP in OPAL: %d\n", rc); 178 goto bail; 179 } 180 181 /* Configure VCPU fields for use by assembly push/pull */ 182 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); 183 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); 184 185 /* TODO: reset all queues to a clean state ? */ 186 bail: 187 mutex_unlock(&xive->lock); 188 if (rc) 189 kvmppc_xive_native_cleanup_vcpu(vcpu); 190 191 return rc; 192 } 193 194 /* 195 * Device passthrough support 196 */ 197 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) 198 { 199 struct kvmppc_xive *xive = kvm->arch.xive; 200 pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2; 201 202 if (irq >= KVMPPC_XIVE_NR_IRQS) 203 return -EINVAL; 204 205 /* 206 * Clear the ESB pages of the IRQ number being mapped (or 207 * unmapped) into the guest and let the the VM fault handler 208 * repopulate with the appropriate ESB pages (device or IC) 209 */ 210 pr_debug("clearing esb pages for girq 0x%lx\n", irq); 211 mutex_lock(&xive->mapping_lock); 212 if (xive->mapping) 213 unmap_mapping_range(xive->mapping, 214 esb_pgoff << PAGE_SHIFT, 215 2ull << PAGE_SHIFT, 1); 216 mutex_unlock(&xive->mapping_lock); 217 return 0; 218 } 219 220 static struct kvmppc_xive_ops kvmppc_xive_native_ops = { 221 .reset_mapped = kvmppc_xive_native_reset_mapped, 222 }; 223 224 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) 225 { 226 struct vm_area_struct *vma = vmf->vma; 227 struct kvm_device *dev = vma->vm_file->private_data; 228 struct kvmppc_xive *xive = dev->private; 229 struct kvmppc_xive_src_block *sb; 230 struct kvmppc_xive_irq_state *state; 231 struct xive_irq_data *xd; 232 u32 hw_num; 233 u16 src; 234 u64 page; 235 unsigned long irq; 236 u64 page_offset; 237 238 /* 239 * Linux/KVM uses a two pages ESB setting, one for trigger and 240 * one for EOI 241 */ 242 page_offset = vmf->pgoff - vma->vm_pgoff; 243 irq = page_offset / 2; 244 245 sb = kvmppc_xive_find_source(xive, irq, &src); 246 if (!sb) { 247 pr_devel("%s: source %lx not found !\n", __func__, irq); 248 return VM_FAULT_SIGBUS; 249 } 250 251 state = &sb->irq_state[src]; 252 253 /* Some sanity checking */ 254 if (!state->valid) { 255 pr_devel("%s: source %lx invalid !\n", __func__, irq); 256 return VM_FAULT_SIGBUS; 257 } 258 259 kvmppc_xive_select_irq(state, &hw_num, &xd); 260 261 arch_spin_lock(&sb->lock); 262 263 /* 264 * first/even page is for trigger 265 * second/odd page is for EOI and management. 266 */ 267 page = page_offset % 2 ? xd->eoi_page : xd->trig_page; 268 arch_spin_unlock(&sb->lock); 269 270 if (WARN_ON(!page)) { 271 pr_err("%s: accessing invalid ESB page for source %lx !\n", 272 __func__, irq); 273 return VM_FAULT_SIGBUS; 274 } 275 276 vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT); 277 return VM_FAULT_NOPAGE; 278 } 279 280 static const struct vm_operations_struct xive_native_esb_vmops = { 281 .fault = xive_native_esb_fault, 282 }; 283 284 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) 285 { 286 struct vm_area_struct *vma = vmf->vma; 287 288 switch (vmf->pgoff - vma->vm_pgoff) { 289 case 0: /* HW - forbid access */ 290 case 1: /* HV - forbid access */ 291 return VM_FAULT_SIGBUS; 292 case 2: /* OS */ 293 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); 294 return VM_FAULT_NOPAGE; 295 case 3: /* USER - TODO */ 296 default: 297 return VM_FAULT_SIGBUS; 298 } 299 } 300 301 static const struct vm_operations_struct xive_native_tima_vmops = { 302 .fault = xive_native_tima_fault, 303 }; 304 305 static int kvmppc_xive_native_mmap(struct kvm_device *dev, 306 struct vm_area_struct *vma) 307 { 308 struct kvmppc_xive *xive = dev->private; 309 310 /* We only allow mappings at fixed offset for now */ 311 if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { 312 if (vma_pages(vma) > 4) 313 return -EINVAL; 314 vma->vm_ops = &xive_native_tima_vmops; 315 } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) { 316 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2) 317 return -EINVAL; 318 vma->vm_ops = &xive_native_esb_vmops; 319 } else { 320 return -EINVAL; 321 } 322 323 vma->vm_flags |= VM_IO | VM_PFNMAP; 324 vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); 325 326 /* 327 * Grab the KVM device file address_space to be able to clear 328 * the ESB pages mapping when a device is passed-through into 329 * the guest. 330 */ 331 xive->mapping = vma->vm_file->f_mapping; 332 return 0; 333 } 334 335 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, 336 u64 addr) 337 { 338 struct kvmppc_xive_src_block *sb; 339 struct kvmppc_xive_irq_state *state; 340 u64 __user *ubufp = (u64 __user *) addr; 341 u64 val; 342 u16 idx; 343 int rc; 344 345 pr_devel("%s irq=0x%lx\n", __func__, irq); 346 347 if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) 348 return -E2BIG; 349 350 sb = kvmppc_xive_find_source(xive, irq, &idx); 351 if (!sb) { 352 pr_debug("No source, creating source block...\n"); 353 sb = kvmppc_xive_create_src_block(xive, irq); 354 if (!sb) { 355 pr_err("Failed to create block...\n"); 356 return -ENOMEM; 357 } 358 } 359 state = &sb->irq_state[idx]; 360 361 if (get_user(val, ubufp)) { 362 pr_err("fault getting user info !\n"); 363 return -EFAULT; 364 } 365 366 arch_spin_lock(&sb->lock); 367 368 /* 369 * If the source doesn't already have an IPI, allocate 370 * one and get the corresponding data 371 */ 372 if (!state->ipi_number) { 373 state->ipi_number = xive_native_alloc_irq(); 374 if (state->ipi_number == 0) { 375 pr_err("Failed to allocate IRQ !\n"); 376 rc = -ENXIO; 377 goto unlock; 378 } 379 xive_native_populate_irq_data(state->ipi_number, 380 &state->ipi_data); 381 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, 382 state->ipi_number, irq); 383 } 384 385 /* Restore LSI state */ 386 if (val & KVM_XIVE_LEVEL_SENSITIVE) { 387 state->lsi = true; 388 if (val & KVM_XIVE_LEVEL_ASSERTED) 389 state->asserted = true; 390 pr_devel(" LSI ! Asserted=%d\n", state->asserted); 391 } 392 393 /* Mask IRQ to start with */ 394 state->act_server = 0; 395 state->act_priority = MASKED; 396 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 397 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 398 399 /* Increment the number of valid sources and mark this one valid */ 400 if (!state->valid) 401 xive->src_count++; 402 state->valid = true; 403 404 rc = 0; 405 406 unlock: 407 arch_spin_unlock(&sb->lock); 408 409 return rc; 410 } 411 412 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive, 413 struct kvmppc_xive_src_block *sb, 414 struct kvmppc_xive_irq_state *state, 415 u32 server, u8 priority, bool masked, 416 u32 eisn) 417 { 418 struct kvm *kvm = xive->kvm; 419 u32 hw_num; 420 int rc = 0; 421 422 arch_spin_lock(&sb->lock); 423 424 if (state->act_server == server && state->act_priority == priority && 425 state->eisn == eisn) 426 goto unlock; 427 428 pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n", 429 priority, server, masked, state->act_server, 430 state->act_priority); 431 432 kvmppc_xive_select_irq(state, &hw_num, NULL); 433 434 if (priority != MASKED && !masked) { 435 rc = kvmppc_xive_select_target(kvm, &server, priority); 436 if (rc) 437 goto unlock; 438 439 state->act_priority = priority; 440 state->act_server = server; 441 state->eisn = eisn; 442 443 rc = xive_native_configure_irq(hw_num, 444 kvmppc_xive_vp(xive, server), 445 priority, eisn); 446 } else { 447 state->act_priority = MASKED; 448 state->act_server = 0; 449 state->eisn = 0; 450 451 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0); 452 } 453 454 unlock: 455 arch_spin_unlock(&sb->lock); 456 return rc; 457 } 458 459 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, 460 long irq, u64 addr) 461 { 462 struct kvmppc_xive_src_block *sb; 463 struct kvmppc_xive_irq_state *state; 464 u64 __user *ubufp = (u64 __user *) addr; 465 u16 src; 466 u64 kvm_cfg; 467 u32 server; 468 u8 priority; 469 bool masked; 470 u32 eisn; 471 472 sb = kvmppc_xive_find_source(xive, irq, &src); 473 if (!sb) 474 return -ENOENT; 475 476 state = &sb->irq_state[src]; 477 478 if (!state->valid) 479 return -EINVAL; 480 481 if (get_user(kvm_cfg, ubufp)) 482 return -EFAULT; 483 484 pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg); 485 486 priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >> 487 KVM_XIVE_SOURCE_PRIORITY_SHIFT; 488 server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >> 489 KVM_XIVE_SOURCE_SERVER_SHIFT; 490 masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >> 491 KVM_XIVE_SOURCE_MASKED_SHIFT; 492 eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >> 493 KVM_XIVE_SOURCE_EISN_SHIFT; 494 495 if (priority != xive_prio_from_guest(priority)) { 496 pr_err("invalid priority for queue %d for VCPU %d\n", 497 priority, server); 498 return -EINVAL; 499 } 500 501 return kvmppc_xive_native_update_source_config(xive, sb, state, server, 502 priority, masked, eisn); 503 } 504 505 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive, 506 long irq, u64 addr) 507 { 508 struct kvmppc_xive_src_block *sb; 509 struct kvmppc_xive_irq_state *state; 510 struct xive_irq_data *xd; 511 u32 hw_num; 512 u16 src; 513 int rc = 0; 514 515 pr_devel("%s irq=0x%lx", __func__, irq); 516 517 sb = kvmppc_xive_find_source(xive, irq, &src); 518 if (!sb) 519 return -ENOENT; 520 521 state = &sb->irq_state[src]; 522 523 rc = -EINVAL; 524 525 arch_spin_lock(&sb->lock); 526 527 if (state->valid) { 528 kvmppc_xive_select_irq(state, &hw_num, &xd); 529 xive_native_sync_source(hw_num); 530 rc = 0; 531 } 532 533 arch_spin_unlock(&sb->lock); 534 return rc; 535 } 536 537 static int xive_native_validate_queue_size(u32 qshift) 538 { 539 /* 540 * We only support 64K pages for the moment. This is also 541 * advertised in the DT property "ibm,xive-eq-sizes" 542 */ 543 switch (qshift) { 544 case 0: /* EQ reset */ 545 case 16: 546 return 0; 547 case 12: 548 case 21: 549 case 24: 550 default: 551 return -EINVAL; 552 } 553 } 554 555 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, 556 long eq_idx, u64 addr) 557 { 558 struct kvm *kvm = xive->kvm; 559 struct kvm_vcpu *vcpu; 560 struct kvmppc_xive_vcpu *xc; 561 void __user *ubufp = (void __user *) addr; 562 u32 server; 563 u8 priority; 564 struct kvm_ppc_xive_eq kvm_eq; 565 int rc; 566 __be32 *qaddr = 0; 567 struct page *page; 568 struct xive_q *q; 569 gfn_t gfn; 570 unsigned long page_size; 571 int srcu_idx; 572 573 /* 574 * Demangle priority/server tuple from the EQ identifier 575 */ 576 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 577 KVM_XIVE_EQ_PRIORITY_SHIFT; 578 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 579 KVM_XIVE_EQ_SERVER_SHIFT; 580 581 if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) 582 return -EFAULT; 583 584 vcpu = kvmppc_xive_find_server(kvm, server); 585 if (!vcpu) { 586 pr_err("Can't find server %d\n", server); 587 return -ENOENT; 588 } 589 xc = vcpu->arch.xive_vcpu; 590 591 if (priority != xive_prio_from_guest(priority)) { 592 pr_err("Trying to restore invalid queue %d for VCPU %d\n", 593 priority, server); 594 return -EINVAL; 595 } 596 q = &xc->queues[priority]; 597 598 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 599 __func__, server, priority, kvm_eq.flags, 600 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 601 602 /* reset queue and disable queueing */ 603 if (!kvm_eq.qshift) { 604 q->guest_qaddr = 0; 605 q->guest_qshift = 0; 606 607 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, 608 NULL, 0, true); 609 if (rc) { 610 pr_err("Failed to reset queue %d for VCPU %d: %d\n", 611 priority, xc->server_num, rc); 612 return rc; 613 } 614 615 return 0; 616 } 617 618 /* 619 * sPAPR specifies a "Unconditional Notify (n) flag" for the 620 * H_INT_SET_QUEUE_CONFIG hcall which forces notification 621 * without using the coalescing mechanisms provided by the 622 * XIVE END ESBs. This is required on KVM as notification 623 * using the END ESBs is not supported. 624 */ 625 if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { 626 pr_err("invalid flags %d\n", kvm_eq.flags); 627 return -EINVAL; 628 } 629 630 rc = xive_native_validate_queue_size(kvm_eq.qshift); 631 if (rc) { 632 pr_err("invalid queue size %d\n", kvm_eq.qshift); 633 return rc; 634 } 635 636 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { 637 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, 638 1ull << kvm_eq.qshift); 639 return -EINVAL; 640 } 641 642 srcu_idx = srcu_read_lock(&kvm->srcu); 643 gfn = gpa_to_gfn(kvm_eq.qaddr); 644 645 page_size = kvm_host_page_size(vcpu, gfn); 646 if (1ull << kvm_eq.qshift > page_size) { 647 srcu_read_unlock(&kvm->srcu, srcu_idx); 648 pr_warn("Incompatible host page size %lx!\n", page_size); 649 return -EINVAL; 650 } 651 652 page = gfn_to_page(kvm, gfn); 653 if (is_error_page(page)) { 654 srcu_read_unlock(&kvm->srcu, srcu_idx); 655 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); 656 return -EINVAL; 657 } 658 659 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); 660 srcu_read_unlock(&kvm->srcu, srcu_idx); 661 662 /* 663 * Backup the queue page guest address to the mark EQ page 664 * dirty for migration. 665 */ 666 q->guest_qaddr = kvm_eq.qaddr; 667 q->guest_qshift = kvm_eq.qshift; 668 669 /* 670 * Unconditional Notification is forced by default at the 671 * OPAL level because the use of END ESBs is not supported by 672 * Linux. 673 */ 674 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, 675 (__be32 *) qaddr, kvm_eq.qshift, true); 676 if (rc) { 677 pr_err("Failed to configure queue %d for VCPU %d: %d\n", 678 priority, xc->server_num, rc); 679 put_page(page); 680 return rc; 681 } 682 683 /* 684 * Only restore the queue state when needed. When doing the 685 * H_INT_SET_SOURCE_CONFIG hcall, it should not. 686 */ 687 if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) { 688 rc = xive_native_set_queue_state(xc->vp_id, priority, 689 kvm_eq.qtoggle, 690 kvm_eq.qindex); 691 if (rc) 692 goto error; 693 } 694 695 rc = kvmppc_xive_attach_escalation(vcpu, priority, 696 xive->single_escalation); 697 error: 698 if (rc) 699 kvmppc_xive_native_cleanup_queue(vcpu, priority); 700 return rc; 701 } 702 703 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, 704 long eq_idx, u64 addr) 705 { 706 struct kvm *kvm = xive->kvm; 707 struct kvm_vcpu *vcpu; 708 struct kvmppc_xive_vcpu *xc; 709 struct xive_q *q; 710 void __user *ubufp = (u64 __user *) addr; 711 u32 server; 712 u8 priority; 713 struct kvm_ppc_xive_eq kvm_eq; 714 u64 qaddr; 715 u64 qshift; 716 u64 qeoi_page; 717 u32 escalate_irq; 718 u64 qflags; 719 int rc; 720 721 /* 722 * Demangle priority/server tuple from the EQ identifier 723 */ 724 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 725 KVM_XIVE_EQ_PRIORITY_SHIFT; 726 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 727 KVM_XIVE_EQ_SERVER_SHIFT; 728 729 vcpu = kvmppc_xive_find_server(kvm, server); 730 if (!vcpu) { 731 pr_err("Can't find server %d\n", server); 732 return -ENOENT; 733 } 734 xc = vcpu->arch.xive_vcpu; 735 736 if (priority != xive_prio_from_guest(priority)) { 737 pr_err("invalid priority for queue %d for VCPU %d\n", 738 priority, server); 739 return -EINVAL; 740 } 741 q = &xc->queues[priority]; 742 743 memset(&kvm_eq, 0, sizeof(kvm_eq)); 744 745 if (!q->qpage) 746 return 0; 747 748 rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift, 749 &qeoi_page, &escalate_irq, &qflags); 750 if (rc) 751 return rc; 752 753 kvm_eq.flags = 0; 754 if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) 755 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 756 757 kvm_eq.qshift = q->guest_qshift; 758 kvm_eq.qaddr = q->guest_qaddr; 759 760 rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, 761 &kvm_eq.qindex); 762 if (rc) 763 return rc; 764 765 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 766 __func__, server, priority, kvm_eq.flags, 767 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 768 769 if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) 770 return -EFAULT; 771 772 return 0; 773 } 774 775 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb) 776 { 777 int i; 778 779 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 780 struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; 781 782 if (!state->valid) 783 continue; 784 785 if (state->act_priority == MASKED) 786 continue; 787 788 state->eisn = 0; 789 state->act_server = 0; 790 state->act_priority = MASKED; 791 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 792 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 793 if (state->pt_number) { 794 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); 795 xive_native_configure_irq(state->pt_number, 796 0, MASKED, 0); 797 } 798 } 799 } 800 801 static int kvmppc_xive_reset(struct kvmppc_xive *xive) 802 { 803 struct kvm *kvm = xive->kvm; 804 struct kvm_vcpu *vcpu; 805 unsigned int i; 806 807 pr_devel("%s\n", __func__); 808 809 mutex_lock(&xive->lock); 810 811 kvm_for_each_vcpu(i, vcpu, kvm) { 812 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 813 unsigned int prio; 814 815 if (!xc) 816 continue; 817 818 kvmppc_xive_disable_vcpu_interrupts(vcpu); 819 820 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 821 822 /* Single escalation, no queue 7 */ 823 if (prio == 7 && xive->single_escalation) 824 break; 825 826 if (xc->esc_virq[prio]) { 827 free_irq(xc->esc_virq[prio], vcpu); 828 irq_dispose_mapping(xc->esc_virq[prio]); 829 kfree(xc->esc_virq_names[prio]); 830 xc->esc_virq[prio] = 0; 831 } 832 833 kvmppc_xive_native_cleanup_queue(vcpu, prio); 834 } 835 } 836 837 for (i = 0; i <= xive->max_sbid; i++) { 838 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 839 840 if (sb) { 841 arch_spin_lock(&sb->lock); 842 kvmppc_xive_reset_sources(sb); 843 arch_spin_unlock(&sb->lock); 844 } 845 } 846 847 mutex_unlock(&xive->lock); 848 849 return 0; 850 } 851 852 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb) 853 { 854 int j; 855 856 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { 857 struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; 858 struct xive_irq_data *xd; 859 u32 hw_num; 860 861 if (!state->valid) 862 continue; 863 864 /* 865 * The struct kvmppc_xive_irq_state reflects the state 866 * of the EAS configuration and not the state of the 867 * source. The source is masked setting the PQ bits to 868 * '-Q', which is what is being done before calling 869 * the KVM_DEV_XIVE_EQ_SYNC control. 870 * 871 * If a source EAS is configured, OPAL syncs the XIVE 872 * IC of the source and the XIVE IC of the previous 873 * target if any. 874 * 875 * So it should be fine ignoring MASKED sources as 876 * they have been synced already. 877 */ 878 if (state->act_priority == MASKED) 879 continue; 880 881 kvmppc_xive_select_irq(state, &hw_num, &xd); 882 xive_native_sync_source(hw_num); 883 xive_native_sync_queue(hw_num); 884 } 885 } 886 887 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu) 888 { 889 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 890 unsigned int prio; 891 int srcu_idx; 892 893 if (!xc) 894 return -ENOENT; 895 896 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 897 struct xive_q *q = &xc->queues[prio]; 898 899 if (!q->qpage) 900 continue; 901 902 /* Mark EQ page dirty for migration */ 903 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 904 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); 905 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 906 } 907 return 0; 908 } 909 910 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive) 911 { 912 struct kvm *kvm = xive->kvm; 913 struct kvm_vcpu *vcpu; 914 unsigned int i; 915 916 pr_devel("%s\n", __func__); 917 918 mutex_lock(&xive->lock); 919 for (i = 0; i <= xive->max_sbid; i++) { 920 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 921 922 if (sb) { 923 arch_spin_lock(&sb->lock); 924 kvmppc_xive_native_sync_sources(sb); 925 arch_spin_unlock(&sb->lock); 926 } 927 } 928 929 kvm_for_each_vcpu(i, vcpu, kvm) { 930 kvmppc_xive_native_vcpu_eq_sync(vcpu); 931 } 932 mutex_unlock(&xive->lock); 933 934 return 0; 935 } 936 937 static int kvmppc_xive_native_set_attr(struct kvm_device *dev, 938 struct kvm_device_attr *attr) 939 { 940 struct kvmppc_xive *xive = dev->private; 941 942 switch (attr->group) { 943 case KVM_DEV_XIVE_GRP_CTRL: 944 switch (attr->attr) { 945 case KVM_DEV_XIVE_RESET: 946 return kvmppc_xive_reset(xive); 947 case KVM_DEV_XIVE_EQ_SYNC: 948 return kvmppc_xive_native_eq_sync(xive); 949 case KVM_DEV_XIVE_NR_SERVERS: 950 return kvmppc_xive_set_nr_servers(xive, attr->addr); 951 } 952 break; 953 case KVM_DEV_XIVE_GRP_SOURCE: 954 return kvmppc_xive_native_set_source(xive, attr->attr, 955 attr->addr); 956 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 957 return kvmppc_xive_native_set_source_config(xive, attr->attr, 958 attr->addr); 959 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 960 return kvmppc_xive_native_set_queue_config(xive, attr->attr, 961 attr->addr); 962 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 963 return kvmppc_xive_native_sync_source(xive, attr->attr, 964 attr->addr); 965 } 966 return -ENXIO; 967 } 968 969 static int kvmppc_xive_native_get_attr(struct kvm_device *dev, 970 struct kvm_device_attr *attr) 971 { 972 struct kvmppc_xive *xive = dev->private; 973 974 switch (attr->group) { 975 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 976 return kvmppc_xive_native_get_queue_config(xive, attr->attr, 977 attr->addr); 978 } 979 return -ENXIO; 980 } 981 982 static int kvmppc_xive_native_has_attr(struct kvm_device *dev, 983 struct kvm_device_attr *attr) 984 { 985 switch (attr->group) { 986 case KVM_DEV_XIVE_GRP_CTRL: 987 switch (attr->attr) { 988 case KVM_DEV_XIVE_RESET: 989 case KVM_DEV_XIVE_EQ_SYNC: 990 case KVM_DEV_XIVE_NR_SERVERS: 991 return 0; 992 } 993 break; 994 case KVM_DEV_XIVE_GRP_SOURCE: 995 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 996 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 997 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && 998 attr->attr < KVMPPC_XIVE_NR_IRQS) 999 return 0; 1000 break; 1001 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 1002 return 0; 1003 } 1004 return -ENXIO; 1005 } 1006 1007 /* 1008 * Called when device fd is closed. kvm->lock is held. 1009 */ 1010 static void kvmppc_xive_native_release(struct kvm_device *dev) 1011 { 1012 struct kvmppc_xive *xive = dev->private; 1013 struct kvm *kvm = xive->kvm; 1014 struct kvm_vcpu *vcpu; 1015 int i; 1016 1017 pr_devel("Releasing xive native device\n"); 1018 1019 /* 1020 * Clear the KVM device file address_space which is used to 1021 * unmap the ESB pages when a device is passed-through. 1022 */ 1023 mutex_lock(&xive->mapping_lock); 1024 xive->mapping = NULL; 1025 mutex_unlock(&xive->mapping_lock); 1026 1027 /* 1028 * Since this is the device release function, we know that 1029 * userspace does not have any open fd or mmap referring to 1030 * the device. Therefore there can not be any of the 1031 * device attribute set/get, mmap, or page fault functions 1032 * being executed concurrently, and similarly, the 1033 * connect_vcpu and set/clr_mapped functions also cannot 1034 * be being executed. 1035 */ 1036 1037 debugfs_remove(xive->dentry); 1038 1039 /* 1040 * We should clean up the vCPU interrupt presenters first. 1041 */ 1042 kvm_for_each_vcpu(i, vcpu, kvm) { 1043 /* 1044 * Take vcpu->mutex to ensure that no one_reg get/set ioctl 1045 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. 1046 * Holding the vcpu->mutex also means that the vcpu cannot 1047 * be executing the KVM_RUN ioctl, and therefore it cannot 1048 * be executing the XIVE push or pull code or accessing 1049 * the XIVE MMIO regions. 1050 */ 1051 mutex_lock(&vcpu->mutex); 1052 kvmppc_xive_native_cleanup_vcpu(vcpu); 1053 mutex_unlock(&vcpu->mutex); 1054 } 1055 1056 /* 1057 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type 1058 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe 1059 * against xive code getting called during vcpu execution or 1060 * set/get one_reg operations. 1061 */ 1062 kvm->arch.xive = NULL; 1063 1064 for (i = 0; i <= xive->max_sbid; i++) { 1065 if (xive->src_blocks[i]) 1066 kvmppc_xive_free_sources(xive->src_blocks[i]); 1067 kfree(xive->src_blocks[i]); 1068 xive->src_blocks[i] = NULL; 1069 } 1070 1071 if (xive->vp_base != XIVE_INVALID_VP) 1072 xive_native_free_vp_block(xive->vp_base); 1073 1074 /* 1075 * A reference of the kvmppc_xive pointer is now kept under 1076 * the xive_devices struct of the machine for reuse. It is 1077 * freed when the VM is destroyed for now until we fix all the 1078 * execution paths. 1079 */ 1080 1081 kfree(dev); 1082 } 1083 1084 /* 1085 * Create a XIVE device. kvm->lock is held. 1086 */ 1087 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) 1088 { 1089 struct kvmppc_xive *xive; 1090 struct kvm *kvm = dev->kvm; 1091 1092 pr_devel("Creating xive native device\n"); 1093 1094 if (kvm->arch.xive) 1095 return -EEXIST; 1096 1097 xive = kvmppc_xive_get_device(kvm, type); 1098 if (!xive) 1099 return -ENOMEM; 1100 1101 dev->private = xive; 1102 xive->dev = dev; 1103 xive->kvm = kvm; 1104 mutex_init(&xive->mapping_lock); 1105 mutex_init(&xive->lock); 1106 1107 /* VP allocation is delayed to the first call to connect_vcpu */ 1108 xive->vp_base = XIVE_INVALID_VP; 1109 /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets 1110 * on a POWER9 system. 1111 */ 1112 xive->nr_servers = KVM_MAX_VCPUS; 1113 1114 xive->single_escalation = xive_native_has_single_escalation(); 1115 xive->ops = &kvmppc_xive_native_ops; 1116 1117 kvm->arch.xive = xive; 1118 return 0; 1119 } 1120 1121 /* 1122 * Interrupt Pending Buffer (IPB) offset 1123 */ 1124 #define TM_IPB_SHIFT 40 1125 #define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT) 1126 1127 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1128 { 1129 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1130 u64 opal_state; 1131 int rc; 1132 1133 if (!kvmppc_xive_enabled(vcpu)) 1134 return -EPERM; 1135 1136 if (!xc) 1137 return -ENOENT; 1138 1139 /* Thread context registers. We only care about IPB and CPPR */ 1140 val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01; 1141 1142 /* Get the VP state from OPAL */ 1143 rc = xive_native_get_vp_state(xc->vp_id, &opal_state); 1144 if (rc) 1145 return rc; 1146 1147 /* 1148 * Capture the backup of IPB register in the NVT structure and 1149 * merge it in our KVM VP state. 1150 */ 1151 val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK); 1152 1153 pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", 1154 __func__, 1155 vcpu->arch.xive_saved_state.nsr, 1156 vcpu->arch.xive_saved_state.cppr, 1157 vcpu->arch.xive_saved_state.ipb, 1158 vcpu->arch.xive_saved_state.pipr, 1159 vcpu->arch.xive_saved_state.w01, 1160 (u32) vcpu->arch.xive_cam_word, opal_state); 1161 1162 return 0; 1163 } 1164 1165 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1166 { 1167 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1168 struct kvmppc_xive *xive = vcpu->kvm->arch.xive; 1169 1170 pr_devel("%s w01=%016llx vp=%016llx\n", __func__, 1171 val->xive_timaval[0], val->xive_timaval[1]); 1172 1173 if (!kvmppc_xive_enabled(vcpu)) 1174 return -EPERM; 1175 1176 if (!xc || !xive) 1177 return -ENOENT; 1178 1179 /* We can't update the state of a "pushed" VCPU */ 1180 if (WARN_ON(vcpu->arch.xive_pushed)) 1181 return -EBUSY; 1182 1183 /* 1184 * Restore the thread context registers. IPB and CPPR should 1185 * be the only ones that matter. 1186 */ 1187 vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0]; 1188 1189 /* 1190 * There is no need to restore the XIVE internal state (IPB 1191 * stored in the NVT) as the IPB register was merged in KVM VP 1192 * state when captured. 1193 */ 1194 return 0; 1195 } 1196 1197 bool kvmppc_xive_native_supported(void) 1198 { 1199 return xive_native_has_queue_state_support(); 1200 } 1201 1202 static int xive_native_debug_show(struct seq_file *m, void *private) 1203 { 1204 struct kvmppc_xive *xive = m->private; 1205 struct kvm *kvm = xive->kvm; 1206 struct kvm_vcpu *vcpu; 1207 unsigned int i; 1208 1209 if (!kvm) 1210 return 0; 1211 1212 seq_puts(m, "=========\nVCPU state\n=========\n"); 1213 1214 kvm_for_each_vcpu(i, vcpu, kvm) { 1215 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1216 1217 if (!xc) 1218 continue; 1219 1220 seq_printf(m, "VCPU %d: VP=%#x/%02x\n" 1221 " NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", 1222 xc->server_num, xc->vp_id, xc->vp_chip_id, 1223 vcpu->arch.xive_saved_state.nsr, 1224 vcpu->arch.xive_saved_state.cppr, 1225 vcpu->arch.xive_saved_state.ipb, 1226 vcpu->arch.xive_saved_state.pipr, 1227 be64_to_cpu(vcpu->arch.xive_saved_state.w01), 1228 be32_to_cpu(vcpu->arch.xive_cam_word)); 1229 1230 kvmppc_xive_debug_show_queues(m, vcpu); 1231 } 1232 1233 seq_puts(m, "=========\nSources\n=========\n"); 1234 1235 for (i = 0; i <= xive->max_sbid; i++) { 1236 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 1237 1238 if (sb) { 1239 arch_spin_lock(&sb->lock); 1240 kvmppc_xive_debug_show_sources(m, sb); 1241 arch_spin_unlock(&sb->lock); 1242 } 1243 } 1244 1245 return 0; 1246 } 1247 1248 DEFINE_SHOW_ATTRIBUTE(xive_native_debug); 1249 1250 static void xive_native_debugfs_init(struct kvmppc_xive *xive) 1251 { 1252 char *name; 1253 1254 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); 1255 if (!name) { 1256 pr_err("%s: no memory for name\n", __func__); 1257 return; 1258 } 1259 1260 xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, 1261 xive, &xive_native_debug_fops); 1262 1263 pr_debug("%s: created %s\n", __func__, name); 1264 kfree(name); 1265 } 1266 1267 static void kvmppc_xive_native_init(struct kvm_device *dev) 1268 { 1269 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; 1270 1271 /* Register some debug interfaces */ 1272 xive_native_debugfs_init(xive); 1273 } 1274 1275 struct kvm_device_ops kvm_xive_native_ops = { 1276 .name = "kvm-xive-native", 1277 .create = kvmppc_xive_native_create, 1278 .init = kvmppc_xive_native_init, 1279 .release = kvmppc_xive_native_release, 1280 .set_attr = kvmppc_xive_native_set_attr, 1281 .get_attr = kvmppc_xive_native_get_attr, 1282 .has_attr = kvmppc_xive_native_has_attr, 1283 .mmap = kvmppc_xive_native_mmap, 1284 }; 1285