1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2017-2019, IBM Corporation. 4 */ 5 6 #define pr_fmt(fmt) "xive-kvm: " fmt 7 8 #include <linux/kernel.h> 9 #include <linux/kvm_host.h> 10 #include <linux/err.h> 11 #include <linux/gfp.h> 12 #include <linux/spinlock.h> 13 #include <linux/delay.h> 14 #include <linux/file.h> 15 #include <asm/uaccess.h> 16 #include <asm/kvm_book3s.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/hvcall.h> 19 #include <asm/xive.h> 20 #include <asm/xive-regs.h> 21 #include <asm/debug.h> 22 #include <asm/debugfs.h> 23 #include <asm/opal.h> 24 25 #include <linux/debugfs.h> 26 #include <linux/seq_file.h> 27 28 #include "book3s_xive.h" 29 30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) 31 { 32 u64 val; 33 34 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) 35 offset |= offset << 4; 36 37 val = in_be64(xd->eoi_mmio + offset); 38 return (u8)val; 39 } 40 41 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) 42 { 43 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 44 struct xive_q *q = &xc->queues[prio]; 45 46 xive_native_disable_queue(xc->vp_id, q, prio); 47 if (q->qpage) { 48 put_page(virt_to_page(q->qpage)); 49 q->qpage = NULL; 50 } 51 } 52 53 static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q, 54 u8 prio, __be32 *qpage, 55 u32 order, bool can_escalate) 56 { 57 int rc; 58 __be32 *qpage_prev = q->qpage; 59 60 rc = xive_native_configure_queue(vp_id, q, prio, qpage, order, 61 can_escalate); 62 if (rc) 63 return rc; 64 65 if (qpage_prev) 66 put_page(virt_to_page(qpage_prev)); 67 68 return rc; 69 } 70 71 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) 72 { 73 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 74 int i; 75 76 if (!kvmppc_xive_enabled(vcpu)) 77 return; 78 79 if (!xc) 80 return; 81 82 pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); 83 84 /* Ensure no interrupt is still routed to that VP */ 85 xc->valid = false; 86 kvmppc_xive_disable_vcpu_interrupts(vcpu); 87 88 /* Free escalations */ 89 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 90 /* Free the escalation irq */ 91 if (xc->esc_virq[i]) { 92 if (xc->xive->single_escalation) 93 xive_cleanup_single_escalation(vcpu, xc, 94 xc->esc_virq[i]); 95 free_irq(xc->esc_virq[i], vcpu); 96 irq_dispose_mapping(xc->esc_virq[i]); 97 kfree(xc->esc_virq_names[i]); 98 xc->esc_virq[i] = 0; 99 } 100 } 101 102 /* Disable the VP */ 103 xive_native_disable_vp(xc->vp_id); 104 105 /* Clear the cam word so guest entry won't try to push context */ 106 vcpu->arch.xive_cam_word = 0; 107 108 /* Free the queues */ 109 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 110 kvmppc_xive_native_cleanup_queue(vcpu, i); 111 } 112 113 /* Free the VP */ 114 kfree(xc); 115 116 /* Cleanup the vcpu */ 117 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; 118 vcpu->arch.xive_vcpu = NULL; 119 } 120 121 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, 122 struct kvm_vcpu *vcpu, u32 server_num) 123 { 124 struct kvmppc_xive *xive = dev->private; 125 struct kvmppc_xive_vcpu *xc = NULL; 126 int rc; 127 u32 vp_id; 128 129 pr_devel("native_connect_vcpu(server=%d)\n", server_num); 130 131 if (dev->ops != &kvm_xive_native_ops) { 132 pr_devel("Wrong ops !\n"); 133 return -EPERM; 134 } 135 if (xive->kvm != vcpu->kvm) 136 return -EPERM; 137 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) 138 return -EBUSY; 139 140 mutex_lock(&xive->lock); 141 142 rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id); 143 if (rc) 144 goto bail; 145 146 xc = kzalloc(sizeof(*xc), GFP_KERNEL); 147 if (!xc) { 148 rc = -ENOMEM; 149 goto bail; 150 } 151 152 vcpu->arch.xive_vcpu = xc; 153 xc->xive = xive; 154 xc->vcpu = vcpu; 155 xc->server_num = server_num; 156 157 xc->vp_id = vp_id; 158 xc->valid = true; 159 vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; 160 161 rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); 162 if (rc) { 163 pr_err("Failed to get VP info from OPAL: %d\n", rc); 164 goto bail; 165 } 166 167 /* 168 * Enable the VP first as the single escalation mode will 169 * affect escalation interrupts numbering 170 */ 171 rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); 172 if (rc) { 173 pr_err("Failed to enable VP in OPAL: %d\n", rc); 174 goto bail; 175 } 176 177 /* Configure VCPU fields for use by assembly push/pull */ 178 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); 179 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); 180 181 /* TODO: reset all queues to a clean state ? */ 182 bail: 183 mutex_unlock(&xive->lock); 184 if (rc) 185 kvmppc_xive_native_cleanup_vcpu(vcpu); 186 187 return rc; 188 } 189 190 /* 191 * Device passthrough support 192 */ 193 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) 194 { 195 struct kvmppc_xive *xive = kvm->arch.xive; 196 pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2; 197 198 if (irq >= KVMPPC_XIVE_NR_IRQS) 199 return -EINVAL; 200 201 /* 202 * Clear the ESB pages of the IRQ number being mapped (or 203 * unmapped) into the guest and let the the VM fault handler 204 * repopulate with the appropriate ESB pages (device or IC) 205 */ 206 pr_debug("clearing esb pages for girq 0x%lx\n", irq); 207 mutex_lock(&xive->mapping_lock); 208 if (xive->mapping) 209 unmap_mapping_range(xive->mapping, 210 esb_pgoff << PAGE_SHIFT, 211 2ull << PAGE_SHIFT, 1); 212 mutex_unlock(&xive->mapping_lock); 213 return 0; 214 } 215 216 static struct kvmppc_xive_ops kvmppc_xive_native_ops = { 217 .reset_mapped = kvmppc_xive_native_reset_mapped, 218 }; 219 220 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) 221 { 222 struct vm_area_struct *vma = vmf->vma; 223 struct kvm_device *dev = vma->vm_file->private_data; 224 struct kvmppc_xive *xive = dev->private; 225 struct kvmppc_xive_src_block *sb; 226 struct kvmppc_xive_irq_state *state; 227 struct xive_irq_data *xd; 228 u32 hw_num; 229 u16 src; 230 u64 page; 231 unsigned long irq; 232 u64 page_offset; 233 234 /* 235 * Linux/KVM uses a two pages ESB setting, one for trigger and 236 * one for EOI 237 */ 238 page_offset = vmf->pgoff - vma->vm_pgoff; 239 irq = page_offset / 2; 240 241 sb = kvmppc_xive_find_source(xive, irq, &src); 242 if (!sb) { 243 pr_devel("%s: source %lx not found !\n", __func__, irq); 244 return VM_FAULT_SIGBUS; 245 } 246 247 state = &sb->irq_state[src]; 248 kvmppc_xive_select_irq(state, &hw_num, &xd); 249 250 arch_spin_lock(&sb->lock); 251 252 /* 253 * first/even page is for trigger 254 * second/odd page is for EOI and management. 255 */ 256 page = page_offset % 2 ? xd->eoi_page : xd->trig_page; 257 arch_spin_unlock(&sb->lock); 258 259 if (WARN_ON(!page)) { 260 pr_err("%s: accessing invalid ESB page for source %lx !\n", 261 __func__, irq); 262 return VM_FAULT_SIGBUS; 263 } 264 265 vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT); 266 return VM_FAULT_NOPAGE; 267 } 268 269 static const struct vm_operations_struct xive_native_esb_vmops = { 270 .fault = xive_native_esb_fault, 271 }; 272 273 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) 274 { 275 struct vm_area_struct *vma = vmf->vma; 276 277 switch (vmf->pgoff - vma->vm_pgoff) { 278 case 0: /* HW - forbid access */ 279 case 1: /* HV - forbid access */ 280 return VM_FAULT_SIGBUS; 281 case 2: /* OS */ 282 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); 283 return VM_FAULT_NOPAGE; 284 case 3: /* USER - TODO */ 285 default: 286 return VM_FAULT_SIGBUS; 287 } 288 } 289 290 static const struct vm_operations_struct xive_native_tima_vmops = { 291 .fault = xive_native_tima_fault, 292 }; 293 294 static int kvmppc_xive_native_mmap(struct kvm_device *dev, 295 struct vm_area_struct *vma) 296 { 297 struct kvmppc_xive *xive = dev->private; 298 299 /* We only allow mappings at fixed offset for now */ 300 if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { 301 if (vma_pages(vma) > 4) 302 return -EINVAL; 303 vma->vm_ops = &xive_native_tima_vmops; 304 } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) { 305 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2) 306 return -EINVAL; 307 vma->vm_ops = &xive_native_esb_vmops; 308 } else { 309 return -EINVAL; 310 } 311 312 vma->vm_flags |= VM_IO | VM_PFNMAP; 313 vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); 314 315 /* 316 * Grab the KVM device file address_space to be able to clear 317 * the ESB pages mapping when a device is passed-through into 318 * the guest. 319 */ 320 xive->mapping = vma->vm_file->f_mapping; 321 return 0; 322 } 323 324 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, 325 u64 addr) 326 { 327 struct kvmppc_xive_src_block *sb; 328 struct kvmppc_xive_irq_state *state; 329 u64 __user *ubufp = (u64 __user *) addr; 330 u64 val; 331 u16 idx; 332 int rc; 333 334 pr_devel("%s irq=0x%lx\n", __func__, irq); 335 336 if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) 337 return -E2BIG; 338 339 sb = kvmppc_xive_find_source(xive, irq, &idx); 340 if (!sb) { 341 pr_debug("No source, creating source block...\n"); 342 sb = kvmppc_xive_create_src_block(xive, irq); 343 if (!sb) { 344 pr_err("Failed to create block...\n"); 345 return -ENOMEM; 346 } 347 } 348 state = &sb->irq_state[idx]; 349 350 if (get_user(val, ubufp)) { 351 pr_err("fault getting user info !\n"); 352 return -EFAULT; 353 } 354 355 arch_spin_lock(&sb->lock); 356 357 /* 358 * If the source doesn't already have an IPI, allocate 359 * one and get the corresponding data 360 */ 361 if (!state->ipi_number) { 362 state->ipi_number = xive_native_alloc_irq(); 363 if (state->ipi_number == 0) { 364 pr_err("Failed to allocate IRQ !\n"); 365 rc = -ENXIO; 366 goto unlock; 367 } 368 xive_native_populate_irq_data(state->ipi_number, 369 &state->ipi_data); 370 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, 371 state->ipi_number, irq); 372 } 373 374 /* Restore LSI state */ 375 if (val & KVM_XIVE_LEVEL_SENSITIVE) { 376 state->lsi = true; 377 if (val & KVM_XIVE_LEVEL_ASSERTED) 378 state->asserted = true; 379 pr_devel(" LSI ! Asserted=%d\n", state->asserted); 380 } 381 382 /* Mask IRQ to start with */ 383 state->act_server = 0; 384 state->act_priority = MASKED; 385 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 386 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 387 388 /* Increment the number of valid sources and mark this one valid */ 389 if (!state->valid) 390 xive->src_count++; 391 state->valid = true; 392 393 rc = 0; 394 395 unlock: 396 arch_spin_unlock(&sb->lock); 397 398 return rc; 399 } 400 401 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive, 402 struct kvmppc_xive_src_block *sb, 403 struct kvmppc_xive_irq_state *state, 404 u32 server, u8 priority, bool masked, 405 u32 eisn) 406 { 407 struct kvm *kvm = xive->kvm; 408 u32 hw_num; 409 int rc = 0; 410 411 arch_spin_lock(&sb->lock); 412 413 if (state->act_server == server && state->act_priority == priority && 414 state->eisn == eisn) 415 goto unlock; 416 417 pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n", 418 priority, server, masked, state->act_server, 419 state->act_priority); 420 421 kvmppc_xive_select_irq(state, &hw_num, NULL); 422 423 if (priority != MASKED && !masked) { 424 rc = kvmppc_xive_select_target(kvm, &server, priority); 425 if (rc) 426 goto unlock; 427 428 state->act_priority = priority; 429 state->act_server = server; 430 state->eisn = eisn; 431 432 rc = xive_native_configure_irq(hw_num, 433 kvmppc_xive_vp(xive, server), 434 priority, eisn); 435 } else { 436 state->act_priority = MASKED; 437 state->act_server = 0; 438 state->eisn = 0; 439 440 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0); 441 } 442 443 unlock: 444 arch_spin_unlock(&sb->lock); 445 return rc; 446 } 447 448 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, 449 long irq, u64 addr) 450 { 451 struct kvmppc_xive_src_block *sb; 452 struct kvmppc_xive_irq_state *state; 453 u64 __user *ubufp = (u64 __user *) addr; 454 u16 src; 455 u64 kvm_cfg; 456 u32 server; 457 u8 priority; 458 bool masked; 459 u32 eisn; 460 461 sb = kvmppc_xive_find_source(xive, irq, &src); 462 if (!sb) 463 return -ENOENT; 464 465 state = &sb->irq_state[src]; 466 467 if (!state->valid) 468 return -EINVAL; 469 470 if (get_user(kvm_cfg, ubufp)) 471 return -EFAULT; 472 473 pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg); 474 475 priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >> 476 KVM_XIVE_SOURCE_PRIORITY_SHIFT; 477 server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >> 478 KVM_XIVE_SOURCE_SERVER_SHIFT; 479 masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >> 480 KVM_XIVE_SOURCE_MASKED_SHIFT; 481 eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >> 482 KVM_XIVE_SOURCE_EISN_SHIFT; 483 484 if (priority != xive_prio_from_guest(priority)) { 485 pr_err("invalid priority for queue %d for VCPU %d\n", 486 priority, server); 487 return -EINVAL; 488 } 489 490 return kvmppc_xive_native_update_source_config(xive, sb, state, server, 491 priority, masked, eisn); 492 } 493 494 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive, 495 long irq, u64 addr) 496 { 497 struct kvmppc_xive_src_block *sb; 498 struct kvmppc_xive_irq_state *state; 499 struct xive_irq_data *xd; 500 u32 hw_num; 501 u16 src; 502 int rc = 0; 503 504 pr_devel("%s irq=0x%lx", __func__, irq); 505 506 sb = kvmppc_xive_find_source(xive, irq, &src); 507 if (!sb) 508 return -ENOENT; 509 510 state = &sb->irq_state[src]; 511 512 rc = -EINVAL; 513 514 arch_spin_lock(&sb->lock); 515 516 if (state->valid) { 517 kvmppc_xive_select_irq(state, &hw_num, &xd); 518 xive_native_sync_source(hw_num); 519 rc = 0; 520 } 521 522 arch_spin_unlock(&sb->lock); 523 return rc; 524 } 525 526 static int xive_native_validate_queue_size(u32 qshift) 527 { 528 /* 529 * We only support 64K pages for the moment. This is also 530 * advertised in the DT property "ibm,xive-eq-sizes" 531 */ 532 switch (qshift) { 533 case 0: /* EQ reset */ 534 case 16: 535 return 0; 536 case 12: 537 case 21: 538 case 24: 539 default: 540 return -EINVAL; 541 } 542 } 543 544 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, 545 long eq_idx, u64 addr) 546 { 547 struct kvm *kvm = xive->kvm; 548 struct kvm_vcpu *vcpu; 549 struct kvmppc_xive_vcpu *xc; 550 void __user *ubufp = (void __user *) addr; 551 u32 server; 552 u8 priority; 553 struct kvm_ppc_xive_eq kvm_eq; 554 int rc; 555 __be32 *qaddr = 0; 556 struct page *page; 557 struct xive_q *q; 558 gfn_t gfn; 559 unsigned long page_size; 560 int srcu_idx; 561 562 /* 563 * Demangle priority/server tuple from the EQ identifier 564 */ 565 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 566 KVM_XIVE_EQ_PRIORITY_SHIFT; 567 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 568 KVM_XIVE_EQ_SERVER_SHIFT; 569 570 if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) 571 return -EFAULT; 572 573 vcpu = kvmppc_xive_find_server(kvm, server); 574 if (!vcpu) { 575 pr_err("Can't find server %d\n", server); 576 return -ENOENT; 577 } 578 xc = vcpu->arch.xive_vcpu; 579 580 if (priority != xive_prio_from_guest(priority)) { 581 pr_err("Trying to restore invalid queue %d for VCPU %d\n", 582 priority, server); 583 return -EINVAL; 584 } 585 q = &xc->queues[priority]; 586 587 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 588 __func__, server, priority, kvm_eq.flags, 589 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 590 591 /* reset queue and disable queueing */ 592 if (!kvm_eq.qshift) { 593 q->guest_qaddr = 0; 594 q->guest_qshift = 0; 595 596 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, 597 NULL, 0, true); 598 if (rc) { 599 pr_err("Failed to reset queue %d for VCPU %d: %d\n", 600 priority, xc->server_num, rc); 601 return rc; 602 } 603 604 return 0; 605 } 606 607 /* 608 * sPAPR specifies a "Unconditional Notify (n) flag" for the 609 * H_INT_SET_QUEUE_CONFIG hcall which forces notification 610 * without using the coalescing mechanisms provided by the 611 * XIVE END ESBs. This is required on KVM as notification 612 * using the END ESBs is not supported. 613 */ 614 if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { 615 pr_err("invalid flags %d\n", kvm_eq.flags); 616 return -EINVAL; 617 } 618 619 rc = xive_native_validate_queue_size(kvm_eq.qshift); 620 if (rc) { 621 pr_err("invalid queue size %d\n", kvm_eq.qshift); 622 return rc; 623 } 624 625 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { 626 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, 627 1ull << kvm_eq.qshift); 628 return -EINVAL; 629 } 630 631 srcu_idx = srcu_read_lock(&kvm->srcu); 632 gfn = gpa_to_gfn(kvm_eq.qaddr); 633 634 page_size = kvm_host_page_size(vcpu, gfn); 635 if (1ull << kvm_eq.qshift > page_size) { 636 srcu_read_unlock(&kvm->srcu, srcu_idx); 637 pr_warn("Incompatible host page size %lx!\n", page_size); 638 return -EINVAL; 639 } 640 641 page = gfn_to_page(kvm, gfn); 642 if (is_error_page(page)) { 643 srcu_read_unlock(&kvm->srcu, srcu_idx); 644 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); 645 return -EINVAL; 646 } 647 648 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); 649 srcu_read_unlock(&kvm->srcu, srcu_idx); 650 651 /* 652 * Backup the queue page guest address to the mark EQ page 653 * dirty for migration. 654 */ 655 q->guest_qaddr = kvm_eq.qaddr; 656 q->guest_qshift = kvm_eq.qshift; 657 658 /* 659 * Unconditional Notification is forced by default at the 660 * OPAL level because the use of END ESBs is not supported by 661 * Linux. 662 */ 663 rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, 664 (__be32 *) qaddr, kvm_eq.qshift, true); 665 if (rc) { 666 pr_err("Failed to configure queue %d for VCPU %d: %d\n", 667 priority, xc->server_num, rc); 668 put_page(page); 669 return rc; 670 } 671 672 /* 673 * Only restore the queue state when needed. When doing the 674 * H_INT_SET_SOURCE_CONFIG hcall, it should not. 675 */ 676 if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) { 677 rc = xive_native_set_queue_state(xc->vp_id, priority, 678 kvm_eq.qtoggle, 679 kvm_eq.qindex); 680 if (rc) 681 goto error; 682 } 683 684 rc = kvmppc_xive_attach_escalation(vcpu, priority, 685 xive->single_escalation); 686 error: 687 if (rc) 688 kvmppc_xive_native_cleanup_queue(vcpu, priority); 689 return rc; 690 } 691 692 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, 693 long eq_idx, u64 addr) 694 { 695 struct kvm *kvm = xive->kvm; 696 struct kvm_vcpu *vcpu; 697 struct kvmppc_xive_vcpu *xc; 698 struct xive_q *q; 699 void __user *ubufp = (u64 __user *) addr; 700 u32 server; 701 u8 priority; 702 struct kvm_ppc_xive_eq kvm_eq; 703 u64 qaddr; 704 u64 qshift; 705 u64 qeoi_page; 706 u32 escalate_irq; 707 u64 qflags; 708 int rc; 709 710 /* 711 * Demangle priority/server tuple from the EQ identifier 712 */ 713 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 714 KVM_XIVE_EQ_PRIORITY_SHIFT; 715 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 716 KVM_XIVE_EQ_SERVER_SHIFT; 717 718 vcpu = kvmppc_xive_find_server(kvm, server); 719 if (!vcpu) { 720 pr_err("Can't find server %d\n", server); 721 return -ENOENT; 722 } 723 xc = vcpu->arch.xive_vcpu; 724 725 if (priority != xive_prio_from_guest(priority)) { 726 pr_err("invalid priority for queue %d for VCPU %d\n", 727 priority, server); 728 return -EINVAL; 729 } 730 q = &xc->queues[priority]; 731 732 memset(&kvm_eq, 0, sizeof(kvm_eq)); 733 734 if (!q->qpage) 735 return 0; 736 737 rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift, 738 &qeoi_page, &escalate_irq, &qflags); 739 if (rc) 740 return rc; 741 742 kvm_eq.flags = 0; 743 if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) 744 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 745 746 kvm_eq.qshift = q->guest_qshift; 747 kvm_eq.qaddr = q->guest_qaddr; 748 749 rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, 750 &kvm_eq.qindex); 751 if (rc) 752 return rc; 753 754 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 755 __func__, server, priority, kvm_eq.flags, 756 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 757 758 if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) 759 return -EFAULT; 760 761 return 0; 762 } 763 764 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb) 765 { 766 int i; 767 768 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 769 struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; 770 771 if (!state->valid) 772 continue; 773 774 if (state->act_priority == MASKED) 775 continue; 776 777 state->eisn = 0; 778 state->act_server = 0; 779 state->act_priority = MASKED; 780 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 781 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 782 if (state->pt_number) { 783 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); 784 xive_native_configure_irq(state->pt_number, 785 0, MASKED, 0); 786 } 787 } 788 } 789 790 static int kvmppc_xive_reset(struct kvmppc_xive *xive) 791 { 792 struct kvm *kvm = xive->kvm; 793 struct kvm_vcpu *vcpu; 794 unsigned int i; 795 796 pr_devel("%s\n", __func__); 797 798 mutex_lock(&xive->lock); 799 800 kvm_for_each_vcpu(i, vcpu, kvm) { 801 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 802 unsigned int prio; 803 804 if (!xc) 805 continue; 806 807 kvmppc_xive_disable_vcpu_interrupts(vcpu); 808 809 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 810 811 /* Single escalation, no queue 7 */ 812 if (prio == 7 && xive->single_escalation) 813 break; 814 815 if (xc->esc_virq[prio]) { 816 free_irq(xc->esc_virq[prio], vcpu); 817 irq_dispose_mapping(xc->esc_virq[prio]); 818 kfree(xc->esc_virq_names[prio]); 819 xc->esc_virq[prio] = 0; 820 } 821 822 kvmppc_xive_native_cleanup_queue(vcpu, prio); 823 } 824 } 825 826 for (i = 0; i <= xive->max_sbid; i++) { 827 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 828 829 if (sb) { 830 arch_spin_lock(&sb->lock); 831 kvmppc_xive_reset_sources(sb); 832 arch_spin_unlock(&sb->lock); 833 } 834 } 835 836 mutex_unlock(&xive->lock); 837 838 return 0; 839 } 840 841 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb) 842 { 843 int j; 844 845 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { 846 struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; 847 struct xive_irq_data *xd; 848 u32 hw_num; 849 850 if (!state->valid) 851 continue; 852 853 /* 854 * The struct kvmppc_xive_irq_state reflects the state 855 * of the EAS configuration and not the state of the 856 * source. The source is masked setting the PQ bits to 857 * '-Q', which is what is being done before calling 858 * the KVM_DEV_XIVE_EQ_SYNC control. 859 * 860 * If a source EAS is configured, OPAL syncs the XIVE 861 * IC of the source and the XIVE IC of the previous 862 * target if any. 863 * 864 * So it should be fine ignoring MASKED sources as 865 * they have been synced already. 866 */ 867 if (state->act_priority == MASKED) 868 continue; 869 870 kvmppc_xive_select_irq(state, &hw_num, &xd); 871 xive_native_sync_source(hw_num); 872 xive_native_sync_queue(hw_num); 873 } 874 } 875 876 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu) 877 { 878 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 879 unsigned int prio; 880 int srcu_idx; 881 882 if (!xc) 883 return -ENOENT; 884 885 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 886 struct xive_q *q = &xc->queues[prio]; 887 888 if (!q->qpage) 889 continue; 890 891 /* Mark EQ page dirty for migration */ 892 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 893 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); 894 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 895 } 896 return 0; 897 } 898 899 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive) 900 { 901 struct kvm *kvm = xive->kvm; 902 struct kvm_vcpu *vcpu; 903 unsigned int i; 904 905 pr_devel("%s\n", __func__); 906 907 mutex_lock(&xive->lock); 908 for (i = 0; i <= xive->max_sbid; i++) { 909 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 910 911 if (sb) { 912 arch_spin_lock(&sb->lock); 913 kvmppc_xive_native_sync_sources(sb); 914 arch_spin_unlock(&sb->lock); 915 } 916 } 917 918 kvm_for_each_vcpu(i, vcpu, kvm) { 919 kvmppc_xive_native_vcpu_eq_sync(vcpu); 920 } 921 mutex_unlock(&xive->lock); 922 923 return 0; 924 } 925 926 static int kvmppc_xive_native_set_attr(struct kvm_device *dev, 927 struct kvm_device_attr *attr) 928 { 929 struct kvmppc_xive *xive = dev->private; 930 931 switch (attr->group) { 932 case KVM_DEV_XIVE_GRP_CTRL: 933 switch (attr->attr) { 934 case KVM_DEV_XIVE_RESET: 935 return kvmppc_xive_reset(xive); 936 case KVM_DEV_XIVE_EQ_SYNC: 937 return kvmppc_xive_native_eq_sync(xive); 938 case KVM_DEV_XIVE_NR_SERVERS: 939 return kvmppc_xive_set_nr_servers(xive, attr->addr); 940 } 941 break; 942 case KVM_DEV_XIVE_GRP_SOURCE: 943 return kvmppc_xive_native_set_source(xive, attr->attr, 944 attr->addr); 945 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 946 return kvmppc_xive_native_set_source_config(xive, attr->attr, 947 attr->addr); 948 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 949 return kvmppc_xive_native_set_queue_config(xive, attr->attr, 950 attr->addr); 951 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 952 return kvmppc_xive_native_sync_source(xive, attr->attr, 953 attr->addr); 954 } 955 return -ENXIO; 956 } 957 958 static int kvmppc_xive_native_get_attr(struct kvm_device *dev, 959 struct kvm_device_attr *attr) 960 { 961 struct kvmppc_xive *xive = dev->private; 962 963 switch (attr->group) { 964 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 965 return kvmppc_xive_native_get_queue_config(xive, attr->attr, 966 attr->addr); 967 } 968 return -ENXIO; 969 } 970 971 static int kvmppc_xive_native_has_attr(struct kvm_device *dev, 972 struct kvm_device_attr *attr) 973 { 974 switch (attr->group) { 975 case KVM_DEV_XIVE_GRP_CTRL: 976 switch (attr->attr) { 977 case KVM_DEV_XIVE_RESET: 978 case KVM_DEV_XIVE_EQ_SYNC: 979 case KVM_DEV_XIVE_NR_SERVERS: 980 return 0; 981 } 982 break; 983 case KVM_DEV_XIVE_GRP_SOURCE: 984 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 985 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 986 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && 987 attr->attr < KVMPPC_XIVE_NR_IRQS) 988 return 0; 989 break; 990 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 991 return 0; 992 } 993 return -ENXIO; 994 } 995 996 /* 997 * Called when device fd is closed. kvm->lock is held. 998 */ 999 static void kvmppc_xive_native_release(struct kvm_device *dev) 1000 { 1001 struct kvmppc_xive *xive = dev->private; 1002 struct kvm *kvm = xive->kvm; 1003 struct kvm_vcpu *vcpu; 1004 int i; 1005 1006 pr_devel("Releasing xive native device\n"); 1007 1008 /* 1009 * Clear the KVM device file address_space which is used to 1010 * unmap the ESB pages when a device is passed-through. 1011 */ 1012 mutex_lock(&xive->mapping_lock); 1013 xive->mapping = NULL; 1014 mutex_unlock(&xive->mapping_lock); 1015 1016 /* 1017 * Since this is the device release function, we know that 1018 * userspace does not have any open fd or mmap referring to 1019 * the device. Therefore there can not be any of the 1020 * device attribute set/get, mmap, or page fault functions 1021 * being executed concurrently, and similarly, the 1022 * connect_vcpu and set/clr_mapped functions also cannot 1023 * be being executed. 1024 */ 1025 1026 debugfs_remove(xive->dentry); 1027 1028 /* 1029 * We should clean up the vCPU interrupt presenters first. 1030 */ 1031 kvm_for_each_vcpu(i, vcpu, kvm) { 1032 /* 1033 * Take vcpu->mutex to ensure that no one_reg get/set ioctl 1034 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. 1035 * Holding the vcpu->mutex also means that the vcpu cannot 1036 * be executing the KVM_RUN ioctl, and therefore it cannot 1037 * be executing the XIVE push or pull code or accessing 1038 * the XIVE MMIO regions. 1039 */ 1040 mutex_lock(&vcpu->mutex); 1041 kvmppc_xive_native_cleanup_vcpu(vcpu); 1042 mutex_unlock(&vcpu->mutex); 1043 } 1044 1045 /* 1046 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type 1047 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe 1048 * against xive code getting called during vcpu execution or 1049 * set/get one_reg operations. 1050 */ 1051 kvm->arch.xive = NULL; 1052 1053 for (i = 0; i <= xive->max_sbid; i++) { 1054 if (xive->src_blocks[i]) 1055 kvmppc_xive_free_sources(xive->src_blocks[i]); 1056 kfree(xive->src_blocks[i]); 1057 xive->src_blocks[i] = NULL; 1058 } 1059 1060 if (xive->vp_base != XIVE_INVALID_VP) 1061 xive_native_free_vp_block(xive->vp_base); 1062 1063 /* 1064 * A reference of the kvmppc_xive pointer is now kept under 1065 * the xive_devices struct of the machine for reuse. It is 1066 * freed when the VM is destroyed for now until we fix all the 1067 * execution paths. 1068 */ 1069 1070 kfree(dev); 1071 } 1072 1073 /* 1074 * Create a XIVE device. kvm->lock is held. 1075 */ 1076 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) 1077 { 1078 struct kvmppc_xive *xive; 1079 struct kvm *kvm = dev->kvm; 1080 1081 pr_devel("Creating xive native device\n"); 1082 1083 if (kvm->arch.xive) 1084 return -EEXIST; 1085 1086 xive = kvmppc_xive_get_device(kvm, type); 1087 if (!xive) 1088 return -ENOMEM; 1089 1090 dev->private = xive; 1091 xive->dev = dev; 1092 xive->kvm = kvm; 1093 mutex_init(&xive->mapping_lock); 1094 mutex_init(&xive->lock); 1095 1096 /* VP allocation is delayed to the first call to connect_vcpu */ 1097 xive->vp_base = XIVE_INVALID_VP; 1098 /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets 1099 * on a POWER9 system. 1100 */ 1101 xive->nr_servers = KVM_MAX_VCPUS; 1102 1103 xive->single_escalation = xive_native_has_single_escalation(); 1104 xive->ops = &kvmppc_xive_native_ops; 1105 1106 kvm->arch.xive = xive; 1107 return 0; 1108 } 1109 1110 /* 1111 * Interrupt Pending Buffer (IPB) offset 1112 */ 1113 #define TM_IPB_SHIFT 40 1114 #define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT) 1115 1116 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1117 { 1118 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1119 u64 opal_state; 1120 int rc; 1121 1122 if (!kvmppc_xive_enabled(vcpu)) 1123 return -EPERM; 1124 1125 if (!xc) 1126 return -ENOENT; 1127 1128 /* Thread context registers. We only care about IPB and CPPR */ 1129 val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01; 1130 1131 /* Get the VP state from OPAL */ 1132 rc = xive_native_get_vp_state(xc->vp_id, &opal_state); 1133 if (rc) 1134 return rc; 1135 1136 /* 1137 * Capture the backup of IPB register in the NVT structure and 1138 * merge it in our KVM VP state. 1139 */ 1140 val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK); 1141 1142 pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", 1143 __func__, 1144 vcpu->arch.xive_saved_state.nsr, 1145 vcpu->arch.xive_saved_state.cppr, 1146 vcpu->arch.xive_saved_state.ipb, 1147 vcpu->arch.xive_saved_state.pipr, 1148 vcpu->arch.xive_saved_state.w01, 1149 (u32) vcpu->arch.xive_cam_word, opal_state); 1150 1151 return 0; 1152 } 1153 1154 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1155 { 1156 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1157 struct kvmppc_xive *xive = vcpu->kvm->arch.xive; 1158 1159 pr_devel("%s w01=%016llx vp=%016llx\n", __func__, 1160 val->xive_timaval[0], val->xive_timaval[1]); 1161 1162 if (!kvmppc_xive_enabled(vcpu)) 1163 return -EPERM; 1164 1165 if (!xc || !xive) 1166 return -ENOENT; 1167 1168 /* We can't update the state of a "pushed" VCPU */ 1169 if (WARN_ON(vcpu->arch.xive_pushed)) 1170 return -EBUSY; 1171 1172 /* 1173 * Restore the thread context registers. IPB and CPPR should 1174 * be the only ones that matter. 1175 */ 1176 vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0]; 1177 1178 /* 1179 * There is no need to restore the XIVE internal state (IPB 1180 * stored in the NVT) as the IPB register was merged in KVM VP 1181 * state when captured. 1182 */ 1183 return 0; 1184 } 1185 1186 bool kvmppc_xive_native_supported(void) 1187 { 1188 return xive_native_has_queue_state_support(); 1189 } 1190 1191 static int xive_native_debug_show(struct seq_file *m, void *private) 1192 { 1193 struct kvmppc_xive *xive = m->private; 1194 struct kvm *kvm = xive->kvm; 1195 struct kvm_vcpu *vcpu; 1196 unsigned int i; 1197 1198 if (!kvm) 1199 return 0; 1200 1201 seq_puts(m, "=========\nVCPU state\n=========\n"); 1202 1203 kvm_for_each_vcpu(i, vcpu, kvm) { 1204 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1205 1206 if (!xc) 1207 continue; 1208 1209 seq_printf(m, "cpu server %#x VP=%#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", 1210 xc->server_num, xc->vp_id, 1211 vcpu->arch.xive_saved_state.nsr, 1212 vcpu->arch.xive_saved_state.cppr, 1213 vcpu->arch.xive_saved_state.ipb, 1214 vcpu->arch.xive_saved_state.pipr, 1215 vcpu->arch.xive_saved_state.w01, 1216 (u32) vcpu->arch.xive_cam_word); 1217 1218 kvmppc_xive_debug_show_queues(m, vcpu); 1219 } 1220 1221 return 0; 1222 } 1223 1224 static int xive_native_debug_open(struct inode *inode, struct file *file) 1225 { 1226 return single_open(file, xive_native_debug_show, inode->i_private); 1227 } 1228 1229 static const struct file_operations xive_native_debug_fops = { 1230 .open = xive_native_debug_open, 1231 .read = seq_read, 1232 .llseek = seq_lseek, 1233 .release = single_release, 1234 }; 1235 1236 static void xive_native_debugfs_init(struct kvmppc_xive *xive) 1237 { 1238 char *name; 1239 1240 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); 1241 if (!name) { 1242 pr_err("%s: no memory for name\n", __func__); 1243 return; 1244 } 1245 1246 xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, 1247 xive, &xive_native_debug_fops); 1248 1249 pr_debug("%s: created %s\n", __func__, name); 1250 kfree(name); 1251 } 1252 1253 static void kvmppc_xive_native_init(struct kvm_device *dev) 1254 { 1255 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; 1256 1257 /* Register some debug interfaces */ 1258 xive_native_debugfs_init(xive); 1259 } 1260 1261 struct kvm_device_ops kvm_xive_native_ops = { 1262 .name = "kvm-xive-native", 1263 .create = kvmppc_xive_native_create, 1264 .init = kvmppc_xive_native_init, 1265 .release = kvmppc_xive_native_release, 1266 .set_attr = kvmppc_xive_native_set_attr, 1267 .get_attr = kvmppc_xive_native_get_attr, 1268 .has_attr = kvmppc_xive_native_has_attr, 1269 .mmap = kvmppc_xive_native_mmap, 1270 }; 1271 1272 void kvmppc_xive_native_init_module(void) 1273 { 1274 ; 1275 } 1276 1277 void kvmppc_xive_native_exit_module(void) 1278 { 1279 ; 1280 } 1281