1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2017-2019, IBM Corporation. 4 */ 5 6 #define pr_fmt(fmt) "xive-kvm: " fmt 7 8 #include <linux/kernel.h> 9 #include <linux/kvm_host.h> 10 #include <linux/err.h> 11 #include <linux/gfp.h> 12 #include <linux/spinlock.h> 13 #include <linux/delay.h> 14 #include <linux/file.h> 15 #include <asm/uaccess.h> 16 #include <asm/kvm_book3s.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/hvcall.h> 19 #include <asm/xive.h> 20 #include <asm/xive-regs.h> 21 #include <asm/debug.h> 22 #include <asm/debugfs.h> 23 #include <asm/opal.h> 24 25 #include <linux/debugfs.h> 26 #include <linux/seq_file.h> 27 28 #include "book3s_xive.h" 29 30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) 31 { 32 u64 val; 33 34 /* 35 * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10 36 * load operation, so there is no need to enforce load-after-store 37 * ordering. 38 */ 39 40 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) 41 offset |= offset << 4; 42 43 val = in_be64(xd->eoi_mmio + offset); 44 return (u8)val; 45 } 46 47 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) 48 { 49 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 50 struct xive_q *q = &xc->queues[prio]; 51 52 xive_native_disable_queue(xc->vp_id, q, prio); 53 if (q->qpage) { 54 put_page(virt_to_page(q->qpage)); 55 q->qpage = NULL; 56 } 57 } 58 59 static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q, 60 u8 prio, __be32 *qpage, 61 u32 order, bool can_escalate) 62 { 63 int rc; 64 __be32 *qpage_prev = q->qpage; 65 66 rc = xive_native_configure_queue(vp_id, q, prio, qpage, order, 67 can_escalate); 68 if (rc) 69 return rc; 70 71 if (qpage_prev) 72 put_page(virt_to_page(qpage_prev)); 73 74 return rc; 75 } 76 77 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) 78 { 79 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 80 int i; 81 82 if (!kvmppc_xive_enabled(vcpu)) 83 return; 84 85 if (!xc) 86 return; 87 88 pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); 89 90 /* Ensure no interrupt is still routed to that VP */ 91 xc->valid = false; 92 kvmppc_xive_disable_vcpu_interrupts(vcpu); 93 94 /* Free escalations */ 95 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 96 /* Free the escalation irq */ 97 if (xc->esc_virq[i]) { 98 if (xc->xive->single_escalation) 99 xive_cleanup_single_escalation(vcpu, xc, 100 xc->esc_virq[i]); 101 free_irq(xc->esc_virq[i], vcpu); 102 irq_dispose_mapping(xc->esc_virq[i]); 103 kfree(xc->esc_virq_names[i]); 104 xc->esc_virq[i] = 0; 105 } 106 } 107 108 /* Disable the VP */ 109 xive_native_disable_vp(xc->vp_id); 110 111 /* Clear the cam word so guest entry won't try to push context */ 112 vcpu->arch.xive_cam_word = 0; 113 114 /* Free the queues */ 115 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 116 kvmppc_xive_native_cleanup_queue(vcpu, i); 117 } 118 119 /* Free the VP */ 120 kfree(xc); 121 122 /* Cleanup the vcpu */ 123 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; 124 vcpu->arch.xive_vcpu = NULL; 125 } 126 127 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, 128 struct kvm_vcpu *vcpu, u32 server_num) 129 { 130 struct kvmppc_xive *xive = dev->private; 131 struct kvmppc_xive_vcpu *xc = NULL; 132 int rc; 133 u32 vp_id; 134 135 pr_devel("native_connect_vcpu(server=%d)\n", server_num); 136 137 if (dev->ops != &kvm_xive_native_ops) { 138 pr_devel("Wrong ops !\n"); 139 return -EPERM; 140 } 141 if (xive->kvm != vcpu->kvm) 142 return -EPERM; 143 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) 144 return -EBUSY; 145 146 mutex_lock(&xive->lock); 147 148 rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id); 149 if (rc) 150 goto bail; 151 152 xc = kzalloc(sizeof(*xc), GFP_KERNEL); 153 if (!xc) { 154 rc = -ENOMEM; 155 goto bail; 156 } 157 158 vcpu->arch.xive_vcpu = xc; 159 xc->xive = xive; 160 xc->vcpu = vcpu; 161 xc->server_num = server_num; 162 163 xc->vp_id = vp_id; 164 xc->valid = true; 165 vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; 166 167 rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); 168 if (rc) { 169 pr_err("Failed to get VP info from OPAL: %d\n", rc); 170 goto bail; 171 } 172 173 /* 174 * Enable the VP first as the single escalation mode will 175 * affect escalation interrupts numbering 176 */ 177 rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); 178 if (rc) { 179 pr_err("Failed to enable VP in OPAL: %d\n", rc); 180 goto bail; 181 } 182 183 /* Configure VCPU fields for use by assembly push/pull */ 184 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); 185 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); 186 187 /* TODO: reset all queues to a clean state ? */ 188 bail: 189 mutex_unlock(&xive->lock); 190 if (rc) 191 kvmppc_xive_native_cleanup_vcpu(vcpu); 192 193 return rc; 194 } 195 196 /* 197 * Device passthrough support 198 */ 199 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) 200 { 201 struct kvmppc_xive *xive = kvm->arch.xive; 202 pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2; 203 204 if (irq >= KVMPPC_XIVE_NR_IRQS) 205 return -EINVAL; 206 207 /* 208 * Clear the ESB pages of the IRQ number being mapped (or 209 * unmapped) into the guest and let the the VM fault handler 210 * repopulate with the appropriate ESB pages (device or IC) 211 */ 212 pr_debug("clearing esb pages for girq 0x%lx\n", irq); 213 mutex_lock(&xive->mapping_lock); 214 if (xive->mapping) 215 unmap_mapping_range(xive->mapping, 216 esb_pgoff << PAGE_SHIFT, 217 2ull << PAGE_SHIFT, 1); 218 mutex_unlock(&xive->mapping_lock); 219 return 0; 220 } 221 222 static struct kvmppc_xive_ops kvmppc_xive_native_ops = { 223 .reset_mapped = kvmppc_xive_native_reset_mapped, 224 }; 225 226 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) 227 { 228 struct vm_area_struct *vma = vmf->vma; 229 struct kvm_device *dev = vma->vm_file->private_data; 230 struct kvmppc_xive *xive = dev->private; 231 struct kvmppc_xive_src_block *sb; 232 struct kvmppc_xive_irq_state *state; 233 struct xive_irq_data *xd; 234 u32 hw_num; 235 u16 src; 236 u64 page; 237 unsigned long irq; 238 u64 page_offset; 239 240 /* 241 * Linux/KVM uses a two pages ESB setting, one for trigger and 242 * one for EOI 243 */ 244 page_offset = vmf->pgoff - vma->vm_pgoff; 245 irq = page_offset / 2; 246 247 sb = kvmppc_xive_find_source(xive, irq, &src); 248 if (!sb) { 249 pr_devel("%s: source %lx not found !\n", __func__, irq); 250 return VM_FAULT_SIGBUS; 251 } 252 253 state = &sb->irq_state[src]; 254 255 /* Some sanity checking */ 256 if (!state->valid) { 257 pr_devel("%s: source %lx invalid !\n", __func__, irq); 258 return VM_FAULT_SIGBUS; 259 } 260 261 kvmppc_xive_select_irq(state, &hw_num, &xd); 262 263 arch_spin_lock(&sb->lock); 264 265 /* 266 * first/even page is for trigger 267 * second/odd page is for EOI and management. 268 */ 269 page = page_offset % 2 ? xd->eoi_page : xd->trig_page; 270 arch_spin_unlock(&sb->lock); 271 272 if (WARN_ON(!page)) { 273 pr_err("%s: accessing invalid ESB page for source %lx !\n", 274 __func__, irq); 275 return VM_FAULT_SIGBUS; 276 } 277 278 vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT); 279 return VM_FAULT_NOPAGE; 280 } 281 282 static const struct vm_operations_struct xive_native_esb_vmops = { 283 .fault = xive_native_esb_fault, 284 }; 285 286 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) 287 { 288 struct vm_area_struct *vma = vmf->vma; 289 290 switch (vmf->pgoff - vma->vm_pgoff) { 291 case 0: /* HW - forbid access */ 292 case 1: /* HV - forbid access */ 293 return VM_FAULT_SIGBUS; 294 case 2: /* OS */ 295 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); 296 return VM_FAULT_NOPAGE; 297 case 3: /* USER - TODO */ 298 default: 299 return VM_FAULT_SIGBUS; 300 } 301 } 302 303 static const struct vm_operations_struct xive_native_tima_vmops = { 304 .fault = xive_native_tima_fault, 305 }; 306 307 static int kvmppc_xive_native_mmap(struct kvm_device *dev, 308 struct vm_area_struct *vma) 309 { 310 struct kvmppc_xive *xive = dev->private; 311 312 /* We only allow mappings at fixed offset for now */ 313 if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { 314 if (vma_pages(vma) > 4) 315 return -EINVAL; 316 vma->vm_ops = &xive_native_tima_vmops; 317 } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) { 318 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2) 319 return -EINVAL; 320 vma->vm_ops = &xive_native_esb_vmops; 321 } else { 322 return -EINVAL; 323 } 324 325 vma->vm_flags |= VM_IO | VM_PFNMAP; 326 vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); 327 328 /* 329 * Grab the KVM device file address_space to be able to clear 330 * the ESB pages mapping when a device is passed-through into 331 * the guest. 332 */ 333 xive->mapping = vma->vm_file->f_mapping; 334 return 0; 335 } 336 337 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, 338 u64 addr) 339 { 340 struct kvmppc_xive_src_block *sb; 341 struct kvmppc_xive_irq_state *state; 342 u64 __user *ubufp = (u64 __user *) addr; 343 u64 val; 344 u16 idx; 345 int rc; 346 347 pr_devel("%s irq=0x%lx\n", __func__, irq); 348 349 if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) 350 return -E2BIG; 351 352 sb = kvmppc_xive_find_source(xive, irq, &idx); 353 if (!sb) { 354 pr_debug("No source, creating source block...\n"); 355 sb = kvmppc_xive_create_src_block(xive, irq); 356 if (!sb) { 357 pr_err("Failed to create block...\n"); 358 return -ENOMEM; 359 } 360 } 361 state = &sb->irq_state[idx]; 362 363 if (get_user(val, ubufp)) { 364 pr_err("fault getting user info !\n"); 365 return -EFAULT; 366 } 367 368 arch_spin_lock(&sb->lock); 369 370 /* 371 * If the source doesn't already have an IPI, allocate 372 * one and get the corresponding data 373 */ 374 if (!state->ipi_number) { 375 state->ipi_number = xive_native_alloc_irq(); 376 if (state->ipi_number == 0) { 377 pr_err("Failed to allocate IRQ !\n"); 378 rc = -ENXIO; 379 goto unlock; 380 } 381 xive_native_populate_irq_data(state->ipi_number, 382 &state->ipi_data); 383 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, 384 state->ipi_number, irq); 385 } 386 387 /* Restore LSI state */ 388 if (val & KVM_XIVE_LEVEL_SENSITIVE) { 389 state->lsi = true; 390 if (val & KVM_XIVE_LEVEL_ASSERTED) 391 state->asserted = true; 392 pr_devel(" LSI ! Asserted=%d\n", state->asserted); 393 } 394 395 /* Mask IRQ to start with */ 396 state->act_server = 0; 397 state->act_priority = MASKED; 398 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 399 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 400 401 /* Increment the number of valid sources and mark this one valid */ 402 if (!state->valid) 403 xive->src_count++; 404 state->valid = true; 405 406 rc = 0; 407 408 unlock: 409 arch_spin_unlock(&sb->lock); 410 411 return rc; 412 } 413 414 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive, 415 struct kvmppc_xive_src_block *sb, 416 struct kvmppc_xive_irq_state *state, 417 u32 server, u8 priority, bool masked, 418 u32 eisn) 419 { 420 struct kvm *kvm = xive->kvm; 421 u32 hw_num; 422 int rc = 0; 423 424 arch_spin_lock(&sb->lock); 425 426 if (state->act_server == server && state->act_priority == priority && 427 state->eisn == eisn) 428 goto unlock; 429 430 pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n", 431 priority, server, masked, state->act_server, 432 state->act_priority); 433 434 kvmppc_xive_select_irq(state, &hw_num, NULL); 435 436 if (priority != MASKED && !masked) { 437 rc = kvmppc_xive_select_target(kvm, &server, priority); 438 if (rc) 439 goto unlock; 440 441 state->act_priority = priority; 442 state->act_server = server; 443 state->eisn = eisn; 444 445 rc = xive_native_configure_irq(hw_num, 446 kvmppc_xive_vp(xive, server), 447 priority, eisn); 448 } else { 449 state->act_priority = MASKED; 450 state->act_server = 0; 451 state->eisn = 0; 452 453 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0); 454 } 455 456 unlock: 457 arch_spin_unlock(&sb->lock); 458 return rc; 459 } 460 461 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, 462 long irq, u64 addr) 463 { 464 struct kvmppc_xive_src_block *sb; 465 struct kvmppc_xive_irq_state *state; 466 u64 __user *ubufp = (u64 __user *) addr; 467 u16 src; 468 u64 kvm_cfg; 469 u32 server; 470 u8 priority; 471 bool masked; 472 u32 eisn; 473 474 sb = kvmppc_xive_find_source(xive, irq, &src); 475 if (!sb) 476 return -ENOENT; 477 478 state = &sb->irq_state[src]; 479 480 if (!state->valid) 481 return -EINVAL; 482 483 if (get_user(kvm_cfg, ubufp)) 484 return -EFAULT; 485 486 pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg); 487 488 priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >> 489 KVM_XIVE_SOURCE_PRIORITY_SHIFT; 490 server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >> 491 KVM_XIVE_SOURCE_SERVER_SHIFT; 492 masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >> 493 KVM_XIVE_SOURCE_MASKED_SHIFT; 494 eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >> 495 KVM_XIVE_SOURCE_EISN_SHIFT; 496 497 if (priority != xive_prio_from_guest(priority)) { 498 pr_err("invalid priority for queue %d for VCPU %d\n", 499 priority, server); 500 return -EINVAL; 501 } 502 503 return kvmppc_xive_native_update_source_config(xive, sb, state, server, 504 priority, masked, eisn); 505 } 506 507 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive, 508 long irq, u64 addr) 509 { 510 struct kvmppc_xive_src_block *sb; 511 struct kvmppc_xive_irq_state *state; 512 struct xive_irq_data *xd; 513 u32 hw_num; 514 u16 src; 515 int rc = 0; 516 517 pr_devel("%s irq=0x%lx", __func__, irq); 518 519 sb = kvmppc_xive_find_source(xive, irq, &src); 520 if (!sb) 521 return -ENOENT; 522 523 state = &sb->irq_state[src]; 524 525 rc = -EINVAL; 526 527 arch_spin_lock(&sb->lock); 528 529 if (state->valid) { 530 kvmppc_xive_select_irq(state, &hw_num, &xd); 531 xive_native_sync_source(hw_num); 532 rc = 0; 533 } 534 535 arch_spin_unlock(&sb->lock); 536 return rc; 537 } 538 539 static int xive_native_validate_queue_size(u32 qshift) 540 { 541 /* 542 * We only support 64K pages for the moment. This is also 543 * advertised in the DT property "ibm,xive-eq-sizes" 544 */ 545 switch (qshift) { 546 case 0: /* EQ reset */ 547 case 16: 548 return 0; 549 case 12: 550 case 21: 551 case 24: 552 default: 553 return -EINVAL; 554 } 555 } 556 557 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, 558 long eq_idx, u64 addr) 559 { 560 struct kvm *kvm = xive->kvm; 561 struct kvm_vcpu *vcpu; 562 struct kvmppc_xive_vcpu *xc; 563 void __user *ubufp = (void __user *) addr; 564 u32 server; 565 u8 priority; 566 struct kvm_ppc_xive_eq kvm_eq; 567 int rc; 568 __be32 *qaddr = 0; 569 struct page *page; 570 struct xive_q *q; 571 gfn_t gfn; 572 unsigned long page_size; 573 int srcu_idx; 574 575 /* 576 * Demangle priority/server tuple from the EQ identifier 577 */ 578 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 579 KVM_XIVE_EQ_PRIORITY_SHIFT; 580 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 581 KVM_XIVE_EQ_SERVER_SHIFT; 582 583 if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) 584 return -EFAULT; 585 586 vcpu = kvmppc_xive_find_server(kvm, server); 587 if (!vcpu) { 588 pr_err("Can't find server %d\n", server); 589 return -ENOENT; 590 } 591 xc = vcpu->arch.xive_vcpu; 592 593 if (priority != xive_prio_from_guest(priority)) { 594 pr_err("Trying to restore invalid queue %d for VCPU %d\n", 595 priority, server); 596 return -EINVAL; 597 } 598 q = &xc->queues[priority]; 599 600 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 601 __func__, server, priority, kvm_eq.flags, 602 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 603 604 /* reset queue and disable queueing */ 605 if (!kvm_eq.qshift) { 606 q->guest_qaddr = 0; 607 q->guest_qshift = 0; 608 609 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, 610 NULL, 0, true); 611 if (rc) { 612 pr_err("Failed to reset queue %d for VCPU %d: %d\n", 613 priority, xc->server_num, rc); 614 return rc; 615 } 616 617 return 0; 618 } 619 620 /* 621 * sPAPR specifies a "Unconditional Notify (n) flag" for the 622 * H_INT_SET_QUEUE_CONFIG hcall which forces notification 623 * without using the coalescing mechanisms provided by the 624 * XIVE END ESBs. This is required on KVM as notification 625 * using the END ESBs is not supported. 626 */ 627 if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { 628 pr_err("invalid flags %d\n", kvm_eq.flags); 629 return -EINVAL; 630 } 631 632 rc = xive_native_validate_queue_size(kvm_eq.qshift); 633 if (rc) { 634 pr_err("invalid queue size %d\n", kvm_eq.qshift); 635 return rc; 636 } 637 638 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { 639 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, 640 1ull << kvm_eq.qshift); 641 return -EINVAL; 642 } 643 644 srcu_idx = srcu_read_lock(&kvm->srcu); 645 gfn = gpa_to_gfn(kvm_eq.qaddr); 646 647 page_size = kvm_host_page_size(vcpu, gfn); 648 if (1ull << kvm_eq.qshift > page_size) { 649 srcu_read_unlock(&kvm->srcu, srcu_idx); 650 pr_warn("Incompatible host page size %lx!\n", page_size); 651 return -EINVAL; 652 } 653 654 page = gfn_to_page(kvm, gfn); 655 if (is_error_page(page)) { 656 srcu_read_unlock(&kvm->srcu, srcu_idx); 657 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); 658 return -EINVAL; 659 } 660 661 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); 662 srcu_read_unlock(&kvm->srcu, srcu_idx); 663 664 /* 665 * Backup the queue page guest address to the mark EQ page 666 * dirty for migration. 667 */ 668 q->guest_qaddr = kvm_eq.qaddr; 669 q->guest_qshift = kvm_eq.qshift; 670 671 /* 672 * Unconditional Notification is forced by default at the 673 * OPAL level because the use of END ESBs is not supported by 674 * Linux. 675 */ 676 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, 677 (__be32 *) qaddr, kvm_eq.qshift, true); 678 if (rc) { 679 pr_err("Failed to configure queue %d for VCPU %d: %d\n", 680 priority, xc->server_num, rc); 681 put_page(page); 682 return rc; 683 } 684 685 /* 686 * Only restore the queue state when needed. When doing the 687 * H_INT_SET_SOURCE_CONFIG hcall, it should not. 688 */ 689 if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) { 690 rc = xive_native_set_queue_state(xc->vp_id, priority, 691 kvm_eq.qtoggle, 692 kvm_eq.qindex); 693 if (rc) 694 goto error; 695 } 696 697 rc = kvmppc_xive_attach_escalation(vcpu, priority, 698 xive->single_escalation); 699 error: 700 if (rc) 701 kvmppc_xive_native_cleanup_queue(vcpu, priority); 702 return rc; 703 } 704 705 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, 706 long eq_idx, u64 addr) 707 { 708 struct kvm *kvm = xive->kvm; 709 struct kvm_vcpu *vcpu; 710 struct kvmppc_xive_vcpu *xc; 711 struct xive_q *q; 712 void __user *ubufp = (u64 __user *) addr; 713 u32 server; 714 u8 priority; 715 struct kvm_ppc_xive_eq kvm_eq; 716 u64 qaddr; 717 u64 qshift; 718 u64 qeoi_page; 719 u32 escalate_irq; 720 u64 qflags; 721 int rc; 722 723 /* 724 * Demangle priority/server tuple from the EQ identifier 725 */ 726 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 727 KVM_XIVE_EQ_PRIORITY_SHIFT; 728 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 729 KVM_XIVE_EQ_SERVER_SHIFT; 730 731 vcpu = kvmppc_xive_find_server(kvm, server); 732 if (!vcpu) { 733 pr_err("Can't find server %d\n", server); 734 return -ENOENT; 735 } 736 xc = vcpu->arch.xive_vcpu; 737 738 if (priority != xive_prio_from_guest(priority)) { 739 pr_err("invalid priority for queue %d for VCPU %d\n", 740 priority, server); 741 return -EINVAL; 742 } 743 q = &xc->queues[priority]; 744 745 memset(&kvm_eq, 0, sizeof(kvm_eq)); 746 747 if (!q->qpage) 748 return 0; 749 750 rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift, 751 &qeoi_page, &escalate_irq, &qflags); 752 if (rc) 753 return rc; 754 755 kvm_eq.flags = 0; 756 if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) 757 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 758 759 kvm_eq.qshift = q->guest_qshift; 760 kvm_eq.qaddr = q->guest_qaddr; 761 762 rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, 763 &kvm_eq.qindex); 764 if (rc) 765 return rc; 766 767 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 768 __func__, server, priority, kvm_eq.flags, 769 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 770 771 if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) 772 return -EFAULT; 773 774 return 0; 775 } 776 777 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb) 778 { 779 int i; 780 781 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 782 struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; 783 784 if (!state->valid) 785 continue; 786 787 if (state->act_priority == MASKED) 788 continue; 789 790 state->eisn = 0; 791 state->act_server = 0; 792 state->act_priority = MASKED; 793 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 794 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 795 if (state->pt_number) { 796 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); 797 xive_native_configure_irq(state->pt_number, 798 0, MASKED, 0); 799 } 800 } 801 } 802 803 static int kvmppc_xive_reset(struct kvmppc_xive *xive) 804 { 805 struct kvm *kvm = xive->kvm; 806 struct kvm_vcpu *vcpu; 807 unsigned int i; 808 809 pr_devel("%s\n", __func__); 810 811 mutex_lock(&xive->lock); 812 813 kvm_for_each_vcpu(i, vcpu, kvm) { 814 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 815 unsigned int prio; 816 817 if (!xc) 818 continue; 819 820 kvmppc_xive_disable_vcpu_interrupts(vcpu); 821 822 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 823 824 /* Single escalation, no queue 7 */ 825 if (prio == 7 && xive->single_escalation) 826 break; 827 828 if (xc->esc_virq[prio]) { 829 free_irq(xc->esc_virq[prio], vcpu); 830 irq_dispose_mapping(xc->esc_virq[prio]); 831 kfree(xc->esc_virq_names[prio]); 832 xc->esc_virq[prio] = 0; 833 } 834 835 kvmppc_xive_native_cleanup_queue(vcpu, prio); 836 } 837 } 838 839 for (i = 0; i <= xive->max_sbid; i++) { 840 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 841 842 if (sb) { 843 arch_spin_lock(&sb->lock); 844 kvmppc_xive_reset_sources(sb); 845 arch_spin_unlock(&sb->lock); 846 } 847 } 848 849 mutex_unlock(&xive->lock); 850 851 return 0; 852 } 853 854 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb) 855 { 856 int j; 857 858 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { 859 struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; 860 struct xive_irq_data *xd; 861 u32 hw_num; 862 863 if (!state->valid) 864 continue; 865 866 /* 867 * The struct kvmppc_xive_irq_state reflects the state 868 * of the EAS configuration and not the state of the 869 * source. The source is masked setting the PQ bits to 870 * '-Q', which is what is being done before calling 871 * the KVM_DEV_XIVE_EQ_SYNC control. 872 * 873 * If a source EAS is configured, OPAL syncs the XIVE 874 * IC of the source and the XIVE IC of the previous 875 * target if any. 876 * 877 * So it should be fine ignoring MASKED sources as 878 * they have been synced already. 879 */ 880 if (state->act_priority == MASKED) 881 continue; 882 883 kvmppc_xive_select_irq(state, &hw_num, &xd); 884 xive_native_sync_source(hw_num); 885 xive_native_sync_queue(hw_num); 886 } 887 } 888 889 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu) 890 { 891 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 892 unsigned int prio; 893 int srcu_idx; 894 895 if (!xc) 896 return -ENOENT; 897 898 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 899 struct xive_q *q = &xc->queues[prio]; 900 901 if (!q->qpage) 902 continue; 903 904 /* Mark EQ page dirty for migration */ 905 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 906 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); 907 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 908 } 909 return 0; 910 } 911 912 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive) 913 { 914 struct kvm *kvm = xive->kvm; 915 struct kvm_vcpu *vcpu; 916 unsigned int i; 917 918 pr_devel("%s\n", __func__); 919 920 mutex_lock(&xive->lock); 921 for (i = 0; i <= xive->max_sbid; i++) { 922 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 923 924 if (sb) { 925 arch_spin_lock(&sb->lock); 926 kvmppc_xive_native_sync_sources(sb); 927 arch_spin_unlock(&sb->lock); 928 } 929 } 930 931 kvm_for_each_vcpu(i, vcpu, kvm) { 932 kvmppc_xive_native_vcpu_eq_sync(vcpu); 933 } 934 mutex_unlock(&xive->lock); 935 936 return 0; 937 } 938 939 static int kvmppc_xive_native_set_attr(struct kvm_device *dev, 940 struct kvm_device_attr *attr) 941 { 942 struct kvmppc_xive *xive = dev->private; 943 944 switch (attr->group) { 945 case KVM_DEV_XIVE_GRP_CTRL: 946 switch (attr->attr) { 947 case KVM_DEV_XIVE_RESET: 948 return kvmppc_xive_reset(xive); 949 case KVM_DEV_XIVE_EQ_SYNC: 950 return kvmppc_xive_native_eq_sync(xive); 951 case KVM_DEV_XIVE_NR_SERVERS: 952 return kvmppc_xive_set_nr_servers(xive, attr->addr); 953 } 954 break; 955 case KVM_DEV_XIVE_GRP_SOURCE: 956 return kvmppc_xive_native_set_source(xive, attr->attr, 957 attr->addr); 958 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 959 return kvmppc_xive_native_set_source_config(xive, attr->attr, 960 attr->addr); 961 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 962 return kvmppc_xive_native_set_queue_config(xive, attr->attr, 963 attr->addr); 964 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 965 return kvmppc_xive_native_sync_source(xive, attr->attr, 966 attr->addr); 967 } 968 return -ENXIO; 969 } 970 971 static int kvmppc_xive_native_get_attr(struct kvm_device *dev, 972 struct kvm_device_attr *attr) 973 { 974 struct kvmppc_xive *xive = dev->private; 975 976 switch (attr->group) { 977 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 978 return kvmppc_xive_native_get_queue_config(xive, attr->attr, 979 attr->addr); 980 } 981 return -ENXIO; 982 } 983 984 static int kvmppc_xive_native_has_attr(struct kvm_device *dev, 985 struct kvm_device_attr *attr) 986 { 987 switch (attr->group) { 988 case KVM_DEV_XIVE_GRP_CTRL: 989 switch (attr->attr) { 990 case KVM_DEV_XIVE_RESET: 991 case KVM_DEV_XIVE_EQ_SYNC: 992 case KVM_DEV_XIVE_NR_SERVERS: 993 return 0; 994 } 995 break; 996 case KVM_DEV_XIVE_GRP_SOURCE: 997 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 998 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 999 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && 1000 attr->attr < KVMPPC_XIVE_NR_IRQS) 1001 return 0; 1002 break; 1003 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 1004 return 0; 1005 } 1006 return -ENXIO; 1007 } 1008 1009 /* 1010 * Called when device fd is closed. kvm->lock is held. 1011 */ 1012 static void kvmppc_xive_native_release(struct kvm_device *dev) 1013 { 1014 struct kvmppc_xive *xive = dev->private; 1015 struct kvm *kvm = xive->kvm; 1016 struct kvm_vcpu *vcpu; 1017 int i; 1018 1019 pr_devel("Releasing xive native device\n"); 1020 1021 /* 1022 * Clear the KVM device file address_space which is used to 1023 * unmap the ESB pages when a device is passed-through. 1024 */ 1025 mutex_lock(&xive->mapping_lock); 1026 xive->mapping = NULL; 1027 mutex_unlock(&xive->mapping_lock); 1028 1029 /* 1030 * Since this is the device release function, we know that 1031 * userspace does not have any open fd or mmap referring to 1032 * the device. Therefore there can not be any of the 1033 * device attribute set/get, mmap, or page fault functions 1034 * being executed concurrently, and similarly, the 1035 * connect_vcpu and set/clr_mapped functions also cannot 1036 * be being executed. 1037 */ 1038 1039 debugfs_remove(xive->dentry); 1040 1041 /* 1042 * We should clean up the vCPU interrupt presenters first. 1043 */ 1044 kvm_for_each_vcpu(i, vcpu, kvm) { 1045 /* 1046 * Take vcpu->mutex to ensure that no one_reg get/set ioctl 1047 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. 1048 * Holding the vcpu->mutex also means that the vcpu cannot 1049 * be executing the KVM_RUN ioctl, and therefore it cannot 1050 * be executing the XIVE push or pull code or accessing 1051 * the XIVE MMIO regions. 1052 */ 1053 mutex_lock(&vcpu->mutex); 1054 kvmppc_xive_native_cleanup_vcpu(vcpu); 1055 mutex_unlock(&vcpu->mutex); 1056 } 1057 1058 /* 1059 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type 1060 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe 1061 * against xive code getting called during vcpu execution or 1062 * set/get one_reg operations. 1063 */ 1064 kvm->arch.xive = NULL; 1065 1066 for (i = 0; i <= xive->max_sbid; i++) { 1067 if (xive->src_blocks[i]) 1068 kvmppc_xive_free_sources(xive->src_blocks[i]); 1069 kfree(xive->src_blocks[i]); 1070 xive->src_blocks[i] = NULL; 1071 } 1072 1073 if (xive->vp_base != XIVE_INVALID_VP) 1074 xive_native_free_vp_block(xive->vp_base); 1075 1076 /* 1077 * A reference of the kvmppc_xive pointer is now kept under 1078 * the xive_devices struct of the machine for reuse. It is 1079 * freed when the VM is destroyed for now until we fix all the 1080 * execution paths. 1081 */ 1082 1083 kfree(dev); 1084 } 1085 1086 /* 1087 * Create a XIVE device. kvm->lock is held. 1088 */ 1089 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) 1090 { 1091 struct kvmppc_xive *xive; 1092 struct kvm *kvm = dev->kvm; 1093 1094 pr_devel("Creating xive native device\n"); 1095 1096 if (kvm->arch.xive) 1097 return -EEXIST; 1098 1099 xive = kvmppc_xive_get_device(kvm, type); 1100 if (!xive) 1101 return -ENOMEM; 1102 1103 dev->private = xive; 1104 xive->dev = dev; 1105 xive->kvm = kvm; 1106 mutex_init(&xive->mapping_lock); 1107 mutex_init(&xive->lock); 1108 1109 /* VP allocation is delayed to the first call to connect_vcpu */ 1110 xive->vp_base = XIVE_INVALID_VP; 1111 /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets 1112 * on a POWER9 system. 1113 */ 1114 xive->nr_servers = KVM_MAX_VCPUS; 1115 1116 xive->single_escalation = xive_native_has_single_escalation(); 1117 xive->ops = &kvmppc_xive_native_ops; 1118 1119 kvm->arch.xive = xive; 1120 return 0; 1121 } 1122 1123 /* 1124 * Interrupt Pending Buffer (IPB) offset 1125 */ 1126 #define TM_IPB_SHIFT 40 1127 #define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT) 1128 1129 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1130 { 1131 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1132 u64 opal_state; 1133 int rc; 1134 1135 if (!kvmppc_xive_enabled(vcpu)) 1136 return -EPERM; 1137 1138 if (!xc) 1139 return -ENOENT; 1140 1141 /* Thread context registers. We only care about IPB and CPPR */ 1142 val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01; 1143 1144 /* Get the VP state from OPAL */ 1145 rc = xive_native_get_vp_state(xc->vp_id, &opal_state); 1146 if (rc) 1147 return rc; 1148 1149 /* 1150 * Capture the backup of IPB register in the NVT structure and 1151 * merge it in our KVM VP state. 1152 */ 1153 val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK); 1154 1155 pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", 1156 __func__, 1157 vcpu->arch.xive_saved_state.nsr, 1158 vcpu->arch.xive_saved_state.cppr, 1159 vcpu->arch.xive_saved_state.ipb, 1160 vcpu->arch.xive_saved_state.pipr, 1161 vcpu->arch.xive_saved_state.w01, 1162 (u32) vcpu->arch.xive_cam_word, opal_state); 1163 1164 return 0; 1165 } 1166 1167 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1168 { 1169 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1170 struct kvmppc_xive *xive = vcpu->kvm->arch.xive; 1171 1172 pr_devel("%s w01=%016llx vp=%016llx\n", __func__, 1173 val->xive_timaval[0], val->xive_timaval[1]); 1174 1175 if (!kvmppc_xive_enabled(vcpu)) 1176 return -EPERM; 1177 1178 if (!xc || !xive) 1179 return -ENOENT; 1180 1181 /* We can't update the state of a "pushed" VCPU */ 1182 if (WARN_ON(vcpu->arch.xive_pushed)) 1183 return -EBUSY; 1184 1185 /* 1186 * Restore the thread context registers. IPB and CPPR should 1187 * be the only ones that matter. 1188 */ 1189 vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0]; 1190 1191 /* 1192 * There is no need to restore the XIVE internal state (IPB 1193 * stored in the NVT) as the IPB register was merged in KVM VP 1194 * state when captured. 1195 */ 1196 return 0; 1197 } 1198 1199 bool kvmppc_xive_native_supported(void) 1200 { 1201 return xive_native_has_queue_state_support(); 1202 } 1203 1204 static int xive_native_debug_show(struct seq_file *m, void *private) 1205 { 1206 struct kvmppc_xive *xive = m->private; 1207 struct kvm *kvm = xive->kvm; 1208 struct kvm_vcpu *vcpu; 1209 unsigned int i; 1210 1211 if (!kvm) 1212 return 0; 1213 1214 seq_puts(m, "=========\nVCPU state\n=========\n"); 1215 1216 kvm_for_each_vcpu(i, vcpu, kvm) { 1217 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1218 1219 if (!xc) 1220 continue; 1221 1222 seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", 1223 xc->server_num, xc->vp_id, 1224 vcpu->arch.xive_saved_state.nsr, 1225 vcpu->arch.xive_saved_state.cppr, 1226 vcpu->arch.xive_saved_state.ipb, 1227 vcpu->arch.xive_saved_state.pipr, 1228 vcpu->arch.xive_saved_state.w01, 1229 (u32) vcpu->arch.xive_cam_word); 1230 1231 kvmppc_xive_debug_show_queues(m, vcpu); 1232 } 1233 1234 return 0; 1235 } 1236 1237 DEFINE_SHOW_ATTRIBUTE(xive_native_debug); 1238 1239 static void xive_native_debugfs_init(struct kvmppc_xive *xive) 1240 { 1241 char *name; 1242 1243 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); 1244 if (!name) { 1245 pr_err("%s: no memory for name\n", __func__); 1246 return; 1247 } 1248 1249 xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, 1250 xive, &xive_native_debug_fops); 1251 1252 pr_debug("%s: created %s\n", __func__, name); 1253 kfree(name); 1254 } 1255 1256 static void kvmppc_xive_native_init(struct kvm_device *dev) 1257 { 1258 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; 1259 1260 /* Register some debug interfaces */ 1261 xive_native_debugfs_init(xive); 1262 } 1263 1264 struct kvm_device_ops kvm_xive_native_ops = { 1265 .name = "kvm-xive-native", 1266 .create = kvmppc_xive_native_create, 1267 .init = kvmppc_xive_native_init, 1268 .release = kvmppc_xive_native_release, 1269 .set_attr = kvmppc_xive_native_set_attr, 1270 .get_attr = kvmppc_xive_native_get_attr, 1271 .has_attr = kvmppc_xive_native_has_attr, 1272 .mmap = kvmppc_xive_native_mmap, 1273 }; 1274 1275 void kvmppc_xive_native_init_module(void) 1276 { 1277 ; 1278 } 1279 1280 void kvmppc_xive_native_exit_module(void) 1281 { 1282 ; 1283 } 1284