1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2017-2019, IBM Corporation. 4 */ 5 6 #define pr_fmt(fmt) "xive-kvm: " fmt 7 8 #include <linux/kernel.h> 9 #include <linux/kvm_host.h> 10 #include <linux/err.h> 11 #include <linux/gfp.h> 12 #include <linux/spinlock.h> 13 #include <linux/delay.h> 14 #include <linux/file.h> 15 #include <asm/uaccess.h> 16 #include <asm/kvm_book3s.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/hvcall.h> 19 #include <asm/xive.h> 20 #include <asm/xive-regs.h> 21 #include <asm/debug.h> 22 #include <asm/debugfs.h> 23 #include <asm/opal.h> 24 25 #include <linux/debugfs.h> 26 #include <linux/seq_file.h> 27 28 #include "book3s_xive.h" 29 30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) 31 { 32 u64 val; 33 34 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) 35 offset |= offset << 4; 36 37 val = in_be64(xd->eoi_mmio + offset); 38 return (u8)val; 39 } 40 41 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) 42 { 43 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 44 struct xive_q *q = &xc->queues[prio]; 45 46 xive_native_disable_queue(xc->vp_id, q, prio); 47 if (q->qpage) { 48 put_page(virt_to_page(q->qpage)); 49 q->qpage = NULL; 50 } 51 } 52 53 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) 54 { 55 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 56 int i; 57 58 if (!kvmppc_xive_enabled(vcpu)) 59 return; 60 61 if (!xc) 62 return; 63 64 pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); 65 66 /* Ensure no interrupt is still routed to that VP */ 67 xc->valid = false; 68 kvmppc_xive_disable_vcpu_interrupts(vcpu); 69 70 /* Free escalations */ 71 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 72 /* Free the escalation irq */ 73 if (xc->esc_virq[i]) { 74 if (xc->xive->single_escalation) 75 xive_cleanup_single_escalation(vcpu, xc, 76 xc->esc_virq[i]); 77 free_irq(xc->esc_virq[i], vcpu); 78 irq_dispose_mapping(xc->esc_virq[i]); 79 kfree(xc->esc_virq_names[i]); 80 xc->esc_virq[i] = 0; 81 } 82 } 83 84 /* Disable the VP */ 85 xive_native_disable_vp(xc->vp_id); 86 87 /* Clear the cam word so guest entry won't try to push context */ 88 vcpu->arch.xive_cam_word = 0; 89 90 /* Free the queues */ 91 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 92 kvmppc_xive_native_cleanup_queue(vcpu, i); 93 } 94 95 /* Free the VP */ 96 kfree(xc); 97 98 /* Cleanup the vcpu */ 99 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; 100 vcpu->arch.xive_vcpu = NULL; 101 } 102 103 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, 104 struct kvm_vcpu *vcpu, u32 server_num) 105 { 106 struct kvmppc_xive *xive = dev->private; 107 struct kvmppc_xive_vcpu *xc = NULL; 108 int rc; 109 110 pr_devel("native_connect_vcpu(server=%d)\n", server_num); 111 112 if (dev->ops != &kvm_xive_native_ops) { 113 pr_devel("Wrong ops !\n"); 114 return -EPERM; 115 } 116 if (xive->kvm != vcpu->kvm) 117 return -EPERM; 118 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) 119 return -EBUSY; 120 if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { 121 pr_devel("Out of bounds !\n"); 122 return -EINVAL; 123 } 124 125 mutex_lock(&xive->lock); 126 127 if (kvmppc_xive_find_server(vcpu->kvm, server_num)) { 128 pr_devel("Duplicate !\n"); 129 rc = -EEXIST; 130 goto bail; 131 } 132 133 xc = kzalloc(sizeof(*xc), GFP_KERNEL); 134 if (!xc) { 135 rc = -ENOMEM; 136 goto bail; 137 } 138 139 vcpu->arch.xive_vcpu = xc; 140 xc->xive = xive; 141 xc->vcpu = vcpu; 142 xc->server_num = server_num; 143 144 xc->vp_id = kvmppc_xive_vp(xive, server_num); 145 xc->valid = true; 146 vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; 147 148 rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); 149 if (rc) { 150 pr_err("Failed to get VP info from OPAL: %d\n", rc); 151 goto bail; 152 } 153 154 /* 155 * Enable the VP first as the single escalation mode will 156 * affect escalation interrupts numbering 157 */ 158 rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); 159 if (rc) { 160 pr_err("Failed to enable VP in OPAL: %d\n", rc); 161 goto bail; 162 } 163 164 /* Configure VCPU fields for use by assembly push/pull */ 165 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); 166 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); 167 168 /* TODO: reset all queues to a clean state ? */ 169 bail: 170 mutex_unlock(&xive->lock); 171 if (rc) 172 kvmppc_xive_native_cleanup_vcpu(vcpu); 173 174 return rc; 175 } 176 177 /* 178 * Device passthrough support 179 */ 180 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) 181 { 182 struct kvmppc_xive *xive = kvm->arch.xive; 183 pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2; 184 185 if (irq >= KVMPPC_XIVE_NR_IRQS) 186 return -EINVAL; 187 188 /* 189 * Clear the ESB pages of the IRQ number being mapped (or 190 * unmapped) into the guest and let the the VM fault handler 191 * repopulate with the appropriate ESB pages (device or IC) 192 */ 193 pr_debug("clearing esb pages for girq 0x%lx\n", irq); 194 mutex_lock(&xive->mapping_lock); 195 if (xive->mapping) 196 unmap_mapping_range(xive->mapping, 197 esb_pgoff << PAGE_SHIFT, 198 2ull << PAGE_SHIFT, 1); 199 mutex_unlock(&xive->mapping_lock); 200 return 0; 201 } 202 203 static struct kvmppc_xive_ops kvmppc_xive_native_ops = { 204 .reset_mapped = kvmppc_xive_native_reset_mapped, 205 }; 206 207 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) 208 { 209 struct vm_area_struct *vma = vmf->vma; 210 struct kvm_device *dev = vma->vm_file->private_data; 211 struct kvmppc_xive *xive = dev->private; 212 struct kvmppc_xive_src_block *sb; 213 struct kvmppc_xive_irq_state *state; 214 struct xive_irq_data *xd; 215 u32 hw_num; 216 u16 src; 217 u64 page; 218 unsigned long irq; 219 u64 page_offset; 220 221 /* 222 * Linux/KVM uses a two pages ESB setting, one for trigger and 223 * one for EOI 224 */ 225 page_offset = vmf->pgoff - vma->vm_pgoff; 226 irq = page_offset / 2; 227 228 sb = kvmppc_xive_find_source(xive, irq, &src); 229 if (!sb) { 230 pr_devel("%s: source %lx not found !\n", __func__, irq); 231 return VM_FAULT_SIGBUS; 232 } 233 234 state = &sb->irq_state[src]; 235 kvmppc_xive_select_irq(state, &hw_num, &xd); 236 237 arch_spin_lock(&sb->lock); 238 239 /* 240 * first/even page is for trigger 241 * second/odd page is for EOI and management. 242 */ 243 page = page_offset % 2 ? xd->eoi_page : xd->trig_page; 244 arch_spin_unlock(&sb->lock); 245 246 if (WARN_ON(!page)) { 247 pr_err("%s: accessing invalid ESB page for source %lx !\n", 248 __func__, irq); 249 return VM_FAULT_SIGBUS; 250 } 251 252 vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT); 253 return VM_FAULT_NOPAGE; 254 } 255 256 static const struct vm_operations_struct xive_native_esb_vmops = { 257 .fault = xive_native_esb_fault, 258 }; 259 260 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) 261 { 262 struct vm_area_struct *vma = vmf->vma; 263 264 switch (vmf->pgoff - vma->vm_pgoff) { 265 case 0: /* HW - forbid access */ 266 case 1: /* HV - forbid access */ 267 return VM_FAULT_SIGBUS; 268 case 2: /* OS */ 269 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); 270 return VM_FAULT_NOPAGE; 271 case 3: /* USER - TODO */ 272 default: 273 return VM_FAULT_SIGBUS; 274 } 275 } 276 277 static const struct vm_operations_struct xive_native_tima_vmops = { 278 .fault = xive_native_tima_fault, 279 }; 280 281 static int kvmppc_xive_native_mmap(struct kvm_device *dev, 282 struct vm_area_struct *vma) 283 { 284 struct kvmppc_xive *xive = dev->private; 285 286 /* We only allow mappings at fixed offset for now */ 287 if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { 288 if (vma_pages(vma) > 4) 289 return -EINVAL; 290 vma->vm_ops = &xive_native_tima_vmops; 291 } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) { 292 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2) 293 return -EINVAL; 294 vma->vm_ops = &xive_native_esb_vmops; 295 } else { 296 return -EINVAL; 297 } 298 299 vma->vm_flags |= VM_IO | VM_PFNMAP; 300 vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); 301 302 /* 303 * Grab the KVM device file address_space to be able to clear 304 * the ESB pages mapping when a device is passed-through into 305 * the guest. 306 */ 307 xive->mapping = vma->vm_file->f_mapping; 308 return 0; 309 } 310 311 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, 312 u64 addr) 313 { 314 struct kvmppc_xive_src_block *sb; 315 struct kvmppc_xive_irq_state *state; 316 u64 __user *ubufp = (u64 __user *) addr; 317 u64 val; 318 u16 idx; 319 int rc; 320 321 pr_devel("%s irq=0x%lx\n", __func__, irq); 322 323 if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) 324 return -E2BIG; 325 326 sb = kvmppc_xive_find_source(xive, irq, &idx); 327 if (!sb) { 328 pr_debug("No source, creating source block...\n"); 329 sb = kvmppc_xive_create_src_block(xive, irq); 330 if (!sb) { 331 pr_err("Failed to create block...\n"); 332 return -ENOMEM; 333 } 334 } 335 state = &sb->irq_state[idx]; 336 337 if (get_user(val, ubufp)) { 338 pr_err("fault getting user info !\n"); 339 return -EFAULT; 340 } 341 342 arch_spin_lock(&sb->lock); 343 344 /* 345 * If the source doesn't already have an IPI, allocate 346 * one and get the corresponding data 347 */ 348 if (!state->ipi_number) { 349 state->ipi_number = xive_native_alloc_irq(); 350 if (state->ipi_number == 0) { 351 pr_err("Failed to allocate IRQ !\n"); 352 rc = -ENXIO; 353 goto unlock; 354 } 355 xive_native_populate_irq_data(state->ipi_number, 356 &state->ipi_data); 357 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, 358 state->ipi_number, irq); 359 } 360 361 /* Restore LSI state */ 362 if (val & KVM_XIVE_LEVEL_SENSITIVE) { 363 state->lsi = true; 364 if (val & KVM_XIVE_LEVEL_ASSERTED) 365 state->asserted = true; 366 pr_devel(" LSI ! Asserted=%d\n", state->asserted); 367 } 368 369 /* Mask IRQ to start with */ 370 state->act_server = 0; 371 state->act_priority = MASKED; 372 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 373 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 374 375 /* Increment the number of valid sources and mark this one valid */ 376 if (!state->valid) 377 xive->src_count++; 378 state->valid = true; 379 380 rc = 0; 381 382 unlock: 383 arch_spin_unlock(&sb->lock); 384 385 return rc; 386 } 387 388 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive, 389 struct kvmppc_xive_src_block *sb, 390 struct kvmppc_xive_irq_state *state, 391 u32 server, u8 priority, bool masked, 392 u32 eisn) 393 { 394 struct kvm *kvm = xive->kvm; 395 u32 hw_num; 396 int rc = 0; 397 398 arch_spin_lock(&sb->lock); 399 400 if (state->act_server == server && state->act_priority == priority && 401 state->eisn == eisn) 402 goto unlock; 403 404 pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n", 405 priority, server, masked, state->act_server, 406 state->act_priority); 407 408 kvmppc_xive_select_irq(state, &hw_num, NULL); 409 410 if (priority != MASKED && !masked) { 411 rc = kvmppc_xive_select_target(kvm, &server, priority); 412 if (rc) 413 goto unlock; 414 415 state->act_priority = priority; 416 state->act_server = server; 417 state->eisn = eisn; 418 419 rc = xive_native_configure_irq(hw_num, 420 kvmppc_xive_vp(xive, server), 421 priority, eisn); 422 } else { 423 state->act_priority = MASKED; 424 state->act_server = 0; 425 state->eisn = 0; 426 427 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0); 428 } 429 430 unlock: 431 arch_spin_unlock(&sb->lock); 432 return rc; 433 } 434 435 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, 436 long irq, u64 addr) 437 { 438 struct kvmppc_xive_src_block *sb; 439 struct kvmppc_xive_irq_state *state; 440 u64 __user *ubufp = (u64 __user *) addr; 441 u16 src; 442 u64 kvm_cfg; 443 u32 server; 444 u8 priority; 445 bool masked; 446 u32 eisn; 447 448 sb = kvmppc_xive_find_source(xive, irq, &src); 449 if (!sb) 450 return -ENOENT; 451 452 state = &sb->irq_state[src]; 453 454 if (!state->valid) 455 return -EINVAL; 456 457 if (get_user(kvm_cfg, ubufp)) 458 return -EFAULT; 459 460 pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg); 461 462 priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >> 463 KVM_XIVE_SOURCE_PRIORITY_SHIFT; 464 server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >> 465 KVM_XIVE_SOURCE_SERVER_SHIFT; 466 masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >> 467 KVM_XIVE_SOURCE_MASKED_SHIFT; 468 eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >> 469 KVM_XIVE_SOURCE_EISN_SHIFT; 470 471 if (priority != xive_prio_from_guest(priority)) { 472 pr_err("invalid priority for queue %d for VCPU %d\n", 473 priority, server); 474 return -EINVAL; 475 } 476 477 return kvmppc_xive_native_update_source_config(xive, sb, state, server, 478 priority, masked, eisn); 479 } 480 481 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive, 482 long irq, u64 addr) 483 { 484 struct kvmppc_xive_src_block *sb; 485 struct kvmppc_xive_irq_state *state; 486 struct xive_irq_data *xd; 487 u32 hw_num; 488 u16 src; 489 int rc = 0; 490 491 pr_devel("%s irq=0x%lx", __func__, irq); 492 493 sb = kvmppc_xive_find_source(xive, irq, &src); 494 if (!sb) 495 return -ENOENT; 496 497 state = &sb->irq_state[src]; 498 499 rc = -EINVAL; 500 501 arch_spin_lock(&sb->lock); 502 503 if (state->valid) { 504 kvmppc_xive_select_irq(state, &hw_num, &xd); 505 xive_native_sync_source(hw_num); 506 rc = 0; 507 } 508 509 arch_spin_unlock(&sb->lock); 510 return rc; 511 } 512 513 static int xive_native_validate_queue_size(u32 qshift) 514 { 515 /* 516 * We only support 64K pages for the moment. This is also 517 * advertised in the DT property "ibm,xive-eq-sizes" 518 */ 519 switch (qshift) { 520 case 0: /* EQ reset */ 521 case 16: 522 return 0; 523 case 12: 524 case 21: 525 case 24: 526 default: 527 return -EINVAL; 528 } 529 } 530 531 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, 532 long eq_idx, u64 addr) 533 { 534 struct kvm *kvm = xive->kvm; 535 struct kvm_vcpu *vcpu; 536 struct kvmppc_xive_vcpu *xc; 537 void __user *ubufp = (void __user *) addr; 538 u32 server; 539 u8 priority; 540 struct kvm_ppc_xive_eq kvm_eq; 541 int rc; 542 __be32 *qaddr = 0; 543 struct page *page; 544 struct xive_q *q; 545 gfn_t gfn; 546 unsigned long page_size; 547 int srcu_idx; 548 549 /* 550 * Demangle priority/server tuple from the EQ identifier 551 */ 552 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 553 KVM_XIVE_EQ_PRIORITY_SHIFT; 554 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 555 KVM_XIVE_EQ_SERVER_SHIFT; 556 557 if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) 558 return -EFAULT; 559 560 vcpu = kvmppc_xive_find_server(kvm, server); 561 if (!vcpu) { 562 pr_err("Can't find server %d\n", server); 563 return -ENOENT; 564 } 565 xc = vcpu->arch.xive_vcpu; 566 567 if (priority != xive_prio_from_guest(priority)) { 568 pr_err("Trying to restore invalid queue %d for VCPU %d\n", 569 priority, server); 570 return -EINVAL; 571 } 572 q = &xc->queues[priority]; 573 574 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 575 __func__, server, priority, kvm_eq.flags, 576 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 577 578 /* reset queue and disable queueing */ 579 if (!kvm_eq.qshift) { 580 q->guest_qaddr = 0; 581 q->guest_qshift = 0; 582 583 rc = xive_native_configure_queue(xc->vp_id, q, priority, 584 NULL, 0, true); 585 if (rc) { 586 pr_err("Failed to reset queue %d for VCPU %d: %d\n", 587 priority, xc->server_num, rc); 588 return rc; 589 } 590 591 if (q->qpage) { 592 put_page(virt_to_page(q->qpage)); 593 q->qpage = NULL; 594 } 595 596 return 0; 597 } 598 599 /* 600 * sPAPR specifies a "Unconditional Notify (n) flag" for the 601 * H_INT_SET_QUEUE_CONFIG hcall which forces notification 602 * without using the coalescing mechanisms provided by the 603 * XIVE END ESBs. This is required on KVM as notification 604 * using the END ESBs is not supported. 605 */ 606 if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { 607 pr_err("invalid flags %d\n", kvm_eq.flags); 608 return -EINVAL; 609 } 610 611 rc = xive_native_validate_queue_size(kvm_eq.qshift); 612 if (rc) { 613 pr_err("invalid queue size %d\n", kvm_eq.qshift); 614 return rc; 615 } 616 617 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { 618 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, 619 1ull << kvm_eq.qshift); 620 return -EINVAL; 621 } 622 623 srcu_idx = srcu_read_lock(&kvm->srcu); 624 gfn = gpa_to_gfn(kvm_eq.qaddr); 625 page = gfn_to_page(kvm, gfn); 626 if (is_error_page(page)) { 627 srcu_read_unlock(&kvm->srcu, srcu_idx); 628 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); 629 return -EINVAL; 630 } 631 632 page_size = kvm_host_page_size(kvm, gfn); 633 if (1ull << kvm_eq.qshift > page_size) { 634 srcu_read_unlock(&kvm->srcu, srcu_idx); 635 pr_warn("Incompatible host page size %lx!\n", page_size); 636 return -EINVAL; 637 } 638 639 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); 640 srcu_read_unlock(&kvm->srcu, srcu_idx); 641 642 /* 643 * Backup the queue page guest address to the mark EQ page 644 * dirty for migration. 645 */ 646 q->guest_qaddr = kvm_eq.qaddr; 647 q->guest_qshift = kvm_eq.qshift; 648 649 /* 650 * Unconditional Notification is forced by default at the 651 * OPAL level because the use of END ESBs is not supported by 652 * Linux. 653 */ 654 rc = xive_native_configure_queue(xc->vp_id, q, priority, 655 (__be32 *) qaddr, kvm_eq.qshift, true); 656 if (rc) { 657 pr_err("Failed to configure queue %d for VCPU %d: %d\n", 658 priority, xc->server_num, rc); 659 put_page(page); 660 return rc; 661 } 662 663 /* 664 * Only restore the queue state when needed. When doing the 665 * H_INT_SET_SOURCE_CONFIG hcall, it should not. 666 */ 667 if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) { 668 rc = xive_native_set_queue_state(xc->vp_id, priority, 669 kvm_eq.qtoggle, 670 kvm_eq.qindex); 671 if (rc) 672 goto error; 673 } 674 675 rc = kvmppc_xive_attach_escalation(vcpu, priority, 676 xive->single_escalation); 677 error: 678 if (rc) 679 kvmppc_xive_native_cleanup_queue(vcpu, priority); 680 return rc; 681 } 682 683 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, 684 long eq_idx, u64 addr) 685 { 686 struct kvm *kvm = xive->kvm; 687 struct kvm_vcpu *vcpu; 688 struct kvmppc_xive_vcpu *xc; 689 struct xive_q *q; 690 void __user *ubufp = (u64 __user *) addr; 691 u32 server; 692 u8 priority; 693 struct kvm_ppc_xive_eq kvm_eq; 694 u64 qaddr; 695 u64 qshift; 696 u64 qeoi_page; 697 u32 escalate_irq; 698 u64 qflags; 699 int rc; 700 701 /* 702 * Demangle priority/server tuple from the EQ identifier 703 */ 704 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 705 KVM_XIVE_EQ_PRIORITY_SHIFT; 706 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 707 KVM_XIVE_EQ_SERVER_SHIFT; 708 709 vcpu = kvmppc_xive_find_server(kvm, server); 710 if (!vcpu) { 711 pr_err("Can't find server %d\n", server); 712 return -ENOENT; 713 } 714 xc = vcpu->arch.xive_vcpu; 715 716 if (priority != xive_prio_from_guest(priority)) { 717 pr_err("invalid priority for queue %d for VCPU %d\n", 718 priority, server); 719 return -EINVAL; 720 } 721 q = &xc->queues[priority]; 722 723 memset(&kvm_eq, 0, sizeof(kvm_eq)); 724 725 if (!q->qpage) 726 return 0; 727 728 rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift, 729 &qeoi_page, &escalate_irq, &qflags); 730 if (rc) 731 return rc; 732 733 kvm_eq.flags = 0; 734 if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) 735 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 736 737 kvm_eq.qshift = q->guest_qshift; 738 kvm_eq.qaddr = q->guest_qaddr; 739 740 rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, 741 &kvm_eq.qindex); 742 if (rc) 743 return rc; 744 745 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 746 __func__, server, priority, kvm_eq.flags, 747 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 748 749 if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) 750 return -EFAULT; 751 752 return 0; 753 } 754 755 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb) 756 { 757 int i; 758 759 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 760 struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; 761 762 if (!state->valid) 763 continue; 764 765 if (state->act_priority == MASKED) 766 continue; 767 768 state->eisn = 0; 769 state->act_server = 0; 770 state->act_priority = MASKED; 771 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 772 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 773 if (state->pt_number) { 774 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); 775 xive_native_configure_irq(state->pt_number, 776 0, MASKED, 0); 777 } 778 } 779 } 780 781 static int kvmppc_xive_reset(struct kvmppc_xive *xive) 782 { 783 struct kvm *kvm = xive->kvm; 784 struct kvm_vcpu *vcpu; 785 unsigned int i; 786 787 pr_devel("%s\n", __func__); 788 789 mutex_lock(&xive->lock); 790 791 kvm_for_each_vcpu(i, vcpu, kvm) { 792 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 793 unsigned int prio; 794 795 if (!xc) 796 continue; 797 798 kvmppc_xive_disable_vcpu_interrupts(vcpu); 799 800 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 801 802 /* Single escalation, no queue 7 */ 803 if (prio == 7 && xive->single_escalation) 804 break; 805 806 if (xc->esc_virq[prio]) { 807 free_irq(xc->esc_virq[prio], vcpu); 808 irq_dispose_mapping(xc->esc_virq[prio]); 809 kfree(xc->esc_virq_names[prio]); 810 xc->esc_virq[prio] = 0; 811 } 812 813 kvmppc_xive_native_cleanup_queue(vcpu, prio); 814 } 815 } 816 817 for (i = 0; i <= xive->max_sbid; i++) { 818 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 819 820 if (sb) { 821 arch_spin_lock(&sb->lock); 822 kvmppc_xive_reset_sources(sb); 823 arch_spin_unlock(&sb->lock); 824 } 825 } 826 827 mutex_unlock(&xive->lock); 828 829 return 0; 830 } 831 832 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb) 833 { 834 int j; 835 836 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { 837 struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; 838 struct xive_irq_data *xd; 839 u32 hw_num; 840 841 if (!state->valid) 842 continue; 843 844 /* 845 * The struct kvmppc_xive_irq_state reflects the state 846 * of the EAS configuration and not the state of the 847 * source. The source is masked setting the PQ bits to 848 * '-Q', which is what is being done before calling 849 * the KVM_DEV_XIVE_EQ_SYNC control. 850 * 851 * If a source EAS is configured, OPAL syncs the XIVE 852 * IC of the source and the XIVE IC of the previous 853 * target if any. 854 * 855 * So it should be fine ignoring MASKED sources as 856 * they have been synced already. 857 */ 858 if (state->act_priority == MASKED) 859 continue; 860 861 kvmppc_xive_select_irq(state, &hw_num, &xd); 862 xive_native_sync_source(hw_num); 863 xive_native_sync_queue(hw_num); 864 } 865 } 866 867 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu) 868 { 869 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 870 unsigned int prio; 871 int srcu_idx; 872 873 if (!xc) 874 return -ENOENT; 875 876 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 877 struct xive_q *q = &xc->queues[prio]; 878 879 if (!q->qpage) 880 continue; 881 882 /* Mark EQ page dirty for migration */ 883 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 884 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); 885 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 886 } 887 return 0; 888 } 889 890 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive) 891 { 892 struct kvm *kvm = xive->kvm; 893 struct kvm_vcpu *vcpu; 894 unsigned int i; 895 896 pr_devel("%s\n", __func__); 897 898 mutex_lock(&xive->lock); 899 for (i = 0; i <= xive->max_sbid; i++) { 900 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 901 902 if (sb) { 903 arch_spin_lock(&sb->lock); 904 kvmppc_xive_native_sync_sources(sb); 905 arch_spin_unlock(&sb->lock); 906 } 907 } 908 909 kvm_for_each_vcpu(i, vcpu, kvm) { 910 kvmppc_xive_native_vcpu_eq_sync(vcpu); 911 } 912 mutex_unlock(&xive->lock); 913 914 return 0; 915 } 916 917 static int kvmppc_xive_native_set_attr(struct kvm_device *dev, 918 struct kvm_device_attr *attr) 919 { 920 struct kvmppc_xive *xive = dev->private; 921 922 switch (attr->group) { 923 case KVM_DEV_XIVE_GRP_CTRL: 924 switch (attr->attr) { 925 case KVM_DEV_XIVE_RESET: 926 return kvmppc_xive_reset(xive); 927 case KVM_DEV_XIVE_EQ_SYNC: 928 return kvmppc_xive_native_eq_sync(xive); 929 } 930 break; 931 case KVM_DEV_XIVE_GRP_SOURCE: 932 return kvmppc_xive_native_set_source(xive, attr->attr, 933 attr->addr); 934 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 935 return kvmppc_xive_native_set_source_config(xive, attr->attr, 936 attr->addr); 937 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 938 return kvmppc_xive_native_set_queue_config(xive, attr->attr, 939 attr->addr); 940 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 941 return kvmppc_xive_native_sync_source(xive, attr->attr, 942 attr->addr); 943 } 944 return -ENXIO; 945 } 946 947 static int kvmppc_xive_native_get_attr(struct kvm_device *dev, 948 struct kvm_device_attr *attr) 949 { 950 struct kvmppc_xive *xive = dev->private; 951 952 switch (attr->group) { 953 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 954 return kvmppc_xive_native_get_queue_config(xive, attr->attr, 955 attr->addr); 956 } 957 return -ENXIO; 958 } 959 960 static int kvmppc_xive_native_has_attr(struct kvm_device *dev, 961 struct kvm_device_attr *attr) 962 { 963 switch (attr->group) { 964 case KVM_DEV_XIVE_GRP_CTRL: 965 switch (attr->attr) { 966 case KVM_DEV_XIVE_RESET: 967 case KVM_DEV_XIVE_EQ_SYNC: 968 return 0; 969 } 970 break; 971 case KVM_DEV_XIVE_GRP_SOURCE: 972 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 973 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 974 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && 975 attr->attr < KVMPPC_XIVE_NR_IRQS) 976 return 0; 977 break; 978 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 979 return 0; 980 } 981 return -ENXIO; 982 } 983 984 /* 985 * Called when device fd is closed. kvm->lock is held. 986 */ 987 static void kvmppc_xive_native_release(struct kvm_device *dev) 988 { 989 struct kvmppc_xive *xive = dev->private; 990 struct kvm *kvm = xive->kvm; 991 struct kvm_vcpu *vcpu; 992 int i; 993 994 pr_devel("Releasing xive native device\n"); 995 996 /* 997 * Clear the KVM device file address_space which is used to 998 * unmap the ESB pages when a device is passed-through. 999 */ 1000 mutex_lock(&xive->mapping_lock); 1001 xive->mapping = NULL; 1002 mutex_unlock(&xive->mapping_lock); 1003 1004 /* 1005 * Since this is the device release function, we know that 1006 * userspace does not have any open fd or mmap referring to 1007 * the device. Therefore there can not be any of the 1008 * device attribute set/get, mmap, or page fault functions 1009 * being executed concurrently, and similarly, the 1010 * connect_vcpu and set/clr_mapped functions also cannot 1011 * be being executed. 1012 */ 1013 1014 debugfs_remove(xive->dentry); 1015 1016 /* 1017 * We should clean up the vCPU interrupt presenters first. 1018 */ 1019 kvm_for_each_vcpu(i, vcpu, kvm) { 1020 /* 1021 * Take vcpu->mutex to ensure that no one_reg get/set ioctl 1022 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. 1023 * Holding the vcpu->mutex also means that the vcpu cannot 1024 * be executing the KVM_RUN ioctl, and therefore it cannot 1025 * be executing the XIVE push or pull code or accessing 1026 * the XIVE MMIO regions. 1027 */ 1028 mutex_lock(&vcpu->mutex); 1029 kvmppc_xive_native_cleanup_vcpu(vcpu); 1030 mutex_unlock(&vcpu->mutex); 1031 } 1032 1033 /* 1034 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type 1035 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe 1036 * against xive code getting called during vcpu execution or 1037 * set/get one_reg operations. 1038 */ 1039 kvm->arch.xive = NULL; 1040 1041 for (i = 0; i <= xive->max_sbid; i++) { 1042 if (xive->src_blocks[i]) 1043 kvmppc_xive_free_sources(xive->src_blocks[i]); 1044 kfree(xive->src_blocks[i]); 1045 xive->src_blocks[i] = NULL; 1046 } 1047 1048 if (xive->vp_base != XIVE_INVALID_VP) 1049 xive_native_free_vp_block(xive->vp_base); 1050 1051 /* 1052 * A reference of the kvmppc_xive pointer is now kept under 1053 * the xive_devices struct of the machine for reuse. It is 1054 * freed when the VM is destroyed for now until we fix all the 1055 * execution paths. 1056 */ 1057 1058 kfree(dev); 1059 } 1060 1061 /* 1062 * Create a XIVE device. kvm->lock is held. 1063 */ 1064 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) 1065 { 1066 struct kvmppc_xive *xive; 1067 struct kvm *kvm = dev->kvm; 1068 int ret = 0; 1069 1070 pr_devel("Creating xive native device\n"); 1071 1072 if (kvm->arch.xive) 1073 return -EEXIST; 1074 1075 xive = kvmppc_xive_get_device(kvm, type); 1076 if (!xive) 1077 return -ENOMEM; 1078 1079 dev->private = xive; 1080 xive->dev = dev; 1081 xive->kvm = kvm; 1082 kvm->arch.xive = xive; 1083 mutex_init(&xive->mapping_lock); 1084 mutex_init(&xive->lock); 1085 1086 /* 1087 * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for 1088 * a default. Getting the max number of CPUs the VM was 1089 * configured with would improve our usage of the XIVE VP space. 1090 */ 1091 xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); 1092 pr_devel("VP_Base=%x\n", xive->vp_base); 1093 1094 if (xive->vp_base == XIVE_INVALID_VP) 1095 ret = -ENXIO; 1096 1097 xive->single_escalation = xive_native_has_single_escalation(); 1098 xive->ops = &kvmppc_xive_native_ops; 1099 1100 if (ret) 1101 return ret; 1102 1103 return 0; 1104 } 1105 1106 /* 1107 * Interrupt Pending Buffer (IPB) offset 1108 */ 1109 #define TM_IPB_SHIFT 40 1110 #define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT) 1111 1112 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1113 { 1114 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1115 u64 opal_state; 1116 int rc; 1117 1118 if (!kvmppc_xive_enabled(vcpu)) 1119 return -EPERM; 1120 1121 if (!xc) 1122 return -ENOENT; 1123 1124 /* Thread context registers. We only care about IPB and CPPR */ 1125 val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01; 1126 1127 /* Get the VP state from OPAL */ 1128 rc = xive_native_get_vp_state(xc->vp_id, &opal_state); 1129 if (rc) 1130 return rc; 1131 1132 /* 1133 * Capture the backup of IPB register in the NVT structure and 1134 * merge it in our KVM VP state. 1135 */ 1136 val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK); 1137 1138 pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", 1139 __func__, 1140 vcpu->arch.xive_saved_state.nsr, 1141 vcpu->arch.xive_saved_state.cppr, 1142 vcpu->arch.xive_saved_state.ipb, 1143 vcpu->arch.xive_saved_state.pipr, 1144 vcpu->arch.xive_saved_state.w01, 1145 (u32) vcpu->arch.xive_cam_word, opal_state); 1146 1147 return 0; 1148 } 1149 1150 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1151 { 1152 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1153 struct kvmppc_xive *xive = vcpu->kvm->arch.xive; 1154 1155 pr_devel("%s w01=%016llx vp=%016llx\n", __func__, 1156 val->xive_timaval[0], val->xive_timaval[1]); 1157 1158 if (!kvmppc_xive_enabled(vcpu)) 1159 return -EPERM; 1160 1161 if (!xc || !xive) 1162 return -ENOENT; 1163 1164 /* We can't update the state of a "pushed" VCPU */ 1165 if (WARN_ON(vcpu->arch.xive_pushed)) 1166 return -EBUSY; 1167 1168 /* 1169 * Restore the thread context registers. IPB and CPPR should 1170 * be the only ones that matter. 1171 */ 1172 vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0]; 1173 1174 /* 1175 * There is no need to restore the XIVE internal state (IPB 1176 * stored in the NVT) as the IPB register was merged in KVM VP 1177 * state when captured. 1178 */ 1179 return 0; 1180 } 1181 1182 bool kvmppc_xive_native_supported(void) 1183 { 1184 return xive_native_has_queue_state_support(); 1185 } 1186 1187 static int xive_native_debug_show(struct seq_file *m, void *private) 1188 { 1189 struct kvmppc_xive *xive = m->private; 1190 struct kvm *kvm = xive->kvm; 1191 struct kvm_vcpu *vcpu; 1192 unsigned int i; 1193 1194 if (!kvm) 1195 return 0; 1196 1197 seq_puts(m, "=========\nVCPU state\n=========\n"); 1198 1199 kvm_for_each_vcpu(i, vcpu, kvm) { 1200 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1201 1202 if (!xc) 1203 continue; 1204 1205 seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", 1206 xc->server_num, 1207 vcpu->arch.xive_saved_state.nsr, 1208 vcpu->arch.xive_saved_state.cppr, 1209 vcpu->arch.xive_saved_state.ipb, 1210 vcpu->arch.xive_saved_state.pipr, 1211 vcpu->arch.xive_saved_state.w01, 1212 (u32) vcpu->arch.xive_cam_word); 1213 1214 kvmppc_xive_debug_show_queues(m, vcpu); 1215 } 1216 1217 return 0; 1218 } 1219 1220 static int xive_native_debug_open(struct inode *inode, struct file *file) 1221 { 1222 return single_open(file, xive_native_debug_show, inode->i_private); 1223 } 1224 1225 static const struct file_operations xive_native_debug_fops = { 1226 .open = xive_native_debug_open, 1227 .read = seq_read, 1228 .llseek = seq_lseek, 1229 .release = single_release, 1230 }; 1231 1232 static void xive_native_debugfs_init(struct kvmppc_xive *xive) 1233 { 1234 char *name; 1235 1236 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); 1237 if (!name) { 1238 pr_err("%s: no memory for name\n", __func__); 1239 return; 1240 } 1241 1242 xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, 1243 xive, &xive_native_debug_fops); 1244 1245 pr_debug("%s: created %s\n", __func__, name); 1246 kfree(name); 1247 } 1248 1249 static void kvmppc_xive_native_init(struct kvm_device *dev) 1250 { 1251 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; 1252 1253 /* Register some debug interfaces */ 1254 xive_native_debugfs_init(xive); 1255 } 1256 1257 struct kvm_device_ops kvm_xive_native_ops = { 1258 .name = "kvm-xive-native", 1259 .create = kvmppc_xive_native_create, 1260 .init = kvmppc_xive_native_init, 1261 .release = kvmppc_xive_native_release, 1262 .set_attr = kvmppc_xive_native_set_attr, 1263 .get_attr = kvmppc_xive_native_get_attr, 1264 .has_attr = kvmppc_xive_native_has_attr, 1265 .mmap = kvmppc_xive_native_mmap, 1266 }; 1267 1268 void kvmppc_xive_native_init_module(void) 1269 { 1270 ; 1271 } 1272 1273 void kvmppc_xive_native_exit_module(void) 1274 { 1275 ; 1276 } 1277