1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2017-2019, IBM Corporation. 4 */ 5 6 #define pr_fmt(fmt) "xive-kvm: " fmt 7 8 #include <linux/kernel.h> 9 #include <linux/kvm_host.h> 10 #include <linux/err.h> 11 #include <linux/gfp.h> 12 #include <linux/spinlock.h> 13 #include <linux/delay.h> 14 #include <linux/file.h> 15 #include <asm/uaccess.h> 16 #include <asm/kvm_book3s.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/hvcall.h> 19 #include <asm/xive.h> 20 #include <asm/xive-regs.h> 21 #include <asm/debug.h> 22 #include <asm/debugfs.h> 23 #include <asm/opal.h> 24 25 #include <linux/debugfs.h> 26 #include <linux/seq_file.h> 27 28 #include "book3s_xive.h" 29 30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) 31 { 32 u64 val; 33 34 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) 35 offset |= offset << 4; 36 37 val = in_be64(xd->eoi_mmio + offset); 38 return (u8)val; 39 } 40 41 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) 42 { 43 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 44 struct xive_q *q = &xc->queues[prio]; 45 46 xive_native_disable_queue(xc->vp_id, q, prio); 47 if (q->qpage) { 48 put_page(virt_to_page(q->qpage)); 49 q->qpage = NULL; 50 } 51 } 52 53 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) 54 { 55 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 56 int i; 57 58 if (!kvmppc_xive_enabled(vcpu)) 59 return; 60 61 if (!xc) 62 return; 63 64 pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); 65 66 /* Ensure no interrupt is still routed to that VP */ 67 xc->valid = false; 68 kvmppc_xive_disable_vcpu_interrupts(vcpu); 69 70 /* Disable the VP */ 71 xive_native_disable_vp(xc->vp_id); 72 73 /* Free the queues & associated interrupts */ 74 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 75 /* Free the escalation irq */ 76 if (xc->esc_virq[i]) { 77 free_irq(xc->esc_virq[i], vcpu); 78 irq_dispose_mapping(xc->esc_virq[i]); 79 kfree(xc->esc_virq_names[i]); 80 xc->esc_virq[i] = 0; 81 } 82 83 /* Free the queue */ 84 kvmppc_xive_native_cleanup_queue(vcpu, i); 85 } 86 87 /* Free the VP */ 88 kfree(xc); 89 90 /* Cleanup the vcpu */ 91 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; 92 vcpu->arch.xive_vcpu = NULL; 93 } 94 95 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, 96 struct kvm_vcpu *vcpu, u32 server_num) 97 { 98 struct kvmppc_xive *xive = dev->private; 99 struct kvmppc_xive_vcpu *xc = NULL; 100 int rc; 101 102 pr_devel("native_connect_vcpu(server=%d)\n", server_num); 103 104 if (dev->ops != &kvm_xive_native_ops) { 105 pr_devel("Wrong ops !\n"); 106 return -EPERM; 107 } 108 if (xive->kvm != vcpu->kvm) 109 return -EPERM; 110 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) 111 return -EBUSY; 112 if (server_num >= KVM_MAX_VCPUS) { 113 pr_devel("Out of bounds !\n"); 114 return -EINVAL; 115 } 116 117 mutex_lock(&vcpu->kvm->lock); 118 119 if (kvmppc_xive_find_server(vcpu->kvm, server_num)) { 120 pr_devel("Duplicate !\n"); 121 rc = -EEXIST; 122 goto bail; 123 } 124 125 xc = kzalloc(sizeof(*xc), GFP_KERNEL); 126 if (!xc) { 127 rc = -ENOMEM; 128 goto bail; 129 } 130 131 vcpu->arch.xive_vcpu = xc; 132 xc->xive = xive; 133 xc->vcpu = vcpu; 134 xc->server_num = server_num; 135 136 xc->vp_id = kvmppc_xive_vp(xive, server_num); 137 xc->valid = true; 138 vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; 139 140 rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); 141 if (rc) { 142 pr_err("Failed to get VP info from OPAL: %d\n", rc); 143 goto bail; 144 } 145 146 /* 147 * Enable the VP first as the single escalation mode will 148 * affect escalation interrupts numbering 149 */ 150 rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); 151 if (rc) { 152 pr_err("Failed to enable VP in OPAL: %d\n", rc); 153 goto bail; 154 } 155 156 /* Configure VCPU fields for use by assembly push/pull */ 157 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); 158 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); 159 160 /* TODO: reset all queues to a clean state ? */ 161 bail: 162 mutex_unlock(&vcpu->kvm->lock); 163 if (rc) 164 kvmppc_xive_native_cleanup_vcpu(vcpu); 165 166 return rc; 167 } 168 169 /* 170 * Device passthrough support 171 */ 172 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) 173 { 174 struct kvmppc_xive *xive = kvm->arch.xive; 175 176 if (irq >= KVMPPC_XIVE_NR_IRQS) 177 return -EINVAL; 178 179 /* 180 * Clear the ESB pages of the IRQ number being mapped (or 181 * unmapped) into the guest and let the the VM fault handler 182 * repopulate with the appropriate ESB pages (device or IC) 183 */ 184 pr_debug("clearing esb pages for girq 0x%lx\n", irq); 185 mutex_lock(&xive->mapping_lock); 186 if (xive->mapping) 187 unmap_mapping_range(xive->mapping, 188 irq * (2ull << PAGE_SHIFT), 189 2ull << PAGE_SHIFT, 1); 190 mutex_unlock(&xive->mapping_lock); 191 return 0; 192 } 193 194 static struct kvmppc_xive_ops kvmppc_xive_native_ops = { 195 .reset_mapped = kvmppc_xive_native_reset_mapped, 196 }; 197 198 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) 199 { 200 struct vm_area_struct *vma = vmf->vma; 201 struct kvm_device *dev = vma->vm_file->private_data; 202 struct kvmppc_xive *xive = dev->private; 203 struct kvmppc_xive_src_block *sb; 204 struct kvmppc_xive_irq_state *state; 205 struct xive_irq_data *xd; 206 u32 hw_num; 207 u16 src; 208 u64 page; 209 unsigned long irq; 210 u64 page_offset; 211 212 /* 213 * Linux/KVM uses a two pages ESB setting, one for trigger and 214 * one for EOI 215 */ 216 page_offset = vmf->pgoff - vma->vm_pgoff; 217 irq = page_offset / 2; 218 219 sb = kvmppc_xive_find_source(xive, irq, &src); 220 if (!sb) { 221 pr_devel("%s: source %lx not found !\n", __func__, irq); 222 return VM_FAULT_SIGBUS; 223 } 224 225 state = &sb->irq_state[src]; 226 kvmppc_xive_select_irq(state, &hw_num, &xd); 227 228 arch_spin_lock(&sb->lock); 229 230 /* 231 * first/even page is for trigger 232 * second/odd page is for EOI and management. 233 */ 234 page = page_offset % 2 ? xd->eoi_page : xd->trig_page; 235 arch_spin_unlock(&sb->lock); 236 237 if (WARN_ON(!page)) { 238 pr_err("%s: accessing invalid ESB page for source %lx !\n", 239 __func__, irq); 240 return VM_FAULT_SIGBUS; 241 } 242 243 vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT); 244 return VM_FAULT_NOPAGE; 245 } 246 247 static const struct vm_operations_struct xive_native_esb_vmops = { 248 .fault = xive_native_esb_fault, 249 }; 250 251 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) 252 { 253 struct vm_area_struct *vma = vmf->vma; 254 255 switch (vmf->pgoff - vma->vm_pgoff) { 256 case 0: /* HW - forbid access */ 257 case 1: /* HV - forbid access */ 258 return VM_FAULT_SIGBUS; 259 case 2: /* OS */ 260 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); 261 return VM_FAULT_NOPAGE; 262 case 3: /* USER - TODO */ 263 default: 264 return VM_FAULT_SIGBUS; 265 } 266 } 267 268 static const struct vm_operations_struct xive_native_tima_vmops = { 269 .fault = xive_native_tima_fault, 270 }; 271 272 static int kvmppc_xive_native_mmap(struct kvm_device *dev, 273 struct vm_area_struct *vma) 274 { 275 struct kvmppc_xive *xive = dev->private; 276 277 /* We only allow mappings at fixed offset for now */ 278 if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { 279 if (vma_pages(vma) > 4) 280 return -EINVAL; 281 vma->vm_ops = &xive_native_tima_vmops; 282 } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) { 283 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2) 284 return -EINVAL; 285 vma->vm_ops = &xive_native_esb_vmops; 286 } else { 287 return -EINVAL; 288 } 289 290 vma->vm_flags |= VM_IO | VM_PFNMAP; 291 vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); 292 293 /* 294 * Grab the KVM device file address_space to be able to clear 295 * the ESB pages mapping when a device is passed-through into 296 * the guest. 297 */ 298 xive->mapping = vma->vm_file->f_mapping; 299 return 0; 300 } 301 302 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, 303 u64 addr) 304 { 305 struct kvmppc_xive_src_block *sb; 306 struct kvmppc_xive_irq_state *state; 307 u64 __user *ubufp = (u64 __user *) addr; 308 u64 val; 309 u16 idx; 310 int rc; 311 312 pr_devel("%s irq=0x%lx\n", __func__, irq); 313 314 if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) 315 return -E2BIG; 316 317 sb = kvmppc_xive_find_source(xive, irq, &idx); 318 if (!sb) { 319 pr_debug("No source, creating source block...\n"); 320 sb = kvmppc_xive_create_src_block(xive, irq); 321 if (!sb) { 322 pr_err("Failed to create block...\n"); 323 return -ENOMEM; 324 } 325 } 326 state = &sb->irq_state[idx]; 327 328 if (get_user(val, ubufp)) { 329 pr_err("fault getting user info !\n"); 330 return -EFAULT; 331 } 332 333 arch_spin_lock(&sb->lock); 334 335 /* 336 * If the source doesn't already have an IPI, allocate 337 * one and get the corresponding data 338 */ 339 if (!state->ipi_number) { 340 state->ipi_number = xive_native_alloc_irq(); 341 if (state->ipi_number == 0) { 342 pr_err("Failed to allocate IRQ !\n"); 343 rc = -ENXIO; 344 goto unlock; 345 } 346 xive_native_populate_irq_data(state->ipi_number, 347 &state->ipi_data); 348 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, 349 state->ipi_number, irq); 350 } 351 352 /* Restore LSI state */ 353 if (val & KVM_XIVE_LEVEL_SENSITIVE) { 354 state->lsi = true; 355 if (val & KVM_XIVE_LEVEL_ASSERTED) 356 state->asserted = true; 357 pr_devel(" LSI ! Asserted=%d\n", state->asserted); 358 } 359 360 /* Mask IRQ to start with */ 361 state->act_server = 0; 362 state->act_priority = MASKED; 363 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 364 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 365 366 /* Increment the number of valid sources and mark this one valid */ 367 if (!state->valid) 368 xive->src_count++; 369 state->valid = true; 370 371 rc = 0; 372 373 unlock: 374 arch_spin_unlock(&sb->lock); 375 376 return rc; 377 } 378 379 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive, 380 struct kvmppc_xive_src_block *sb, 381 struct kvmppc_xive_irq_state *state, 382 u32 server, u8 priority, bool masked, 383 u32 eisn) 384 { 385 struct kvm *kvm = xive->kvm; 386 u32 hw_num; 387 int rc = 0; 388 389 arch_spin_lock(&sb->lock); 390 391 if (state->act_server == server && state->act_priority == priority && 392 state->eisn == eisn) 393 goto unlock; 394 395 pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n", 396 priority, server, masked, state->act_server, 397 state->act_priority); 398 399 kvmppc_xive_select_irq(state, &hw_num, NULL); 400 401 if (priority != MASKED && !masked) { 402 rc = kvmppc_xive_select_target(kvm, &server, priority); 403 if (rc) 404 goto unlock; 405 406 state->act_priority = priority; 407 state->act_server = server; 408 state->eisn = eisn; 409 410 rc = xive_native_configure_irq(hw_num, 411 kvmppc_xive_vp(xive, server), 412 priority, eisn); 413 } else { 414 state->act_priority = MASKED; 415 state->act_server = 0; 416 state->eisn = 0; 417 418 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0); 419 } 420 421 unlock: 422 arch_spin_unlock(&sb->lock); 423 return rc; 424 } 425 426 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, 427 long irq, u64 addr) 428 { 429 struct kvmppc_xive_src_block *sb; 430 struct kvmppc_xive_irq_state *state; 431 u64 __user *ubufp = (u64 __user *) addr; 432 u16 src; 433 u64 kvm_cfg; 434 u32 server; 435 u8 priority; 436 bool masked; 437 u32 eisn; 438 439 sb = kvmppc_xive_find_source(xive, irq, &src); 440 if (!sb) 441 return -ENOENT; 442 443 state = &sb->irq_state[src]; 444 445 if (!state->valid) 446 return -EINVAL; 447 448 if (get_user(kvm_cfg, ubufp)) 449 return -EFAULT; 450 451 pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg); 452 453 priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >> 454 KVM_XIVE_SOURCE_PRIORITY_SHIFT; 455 server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >> 456 KVM_XIVE_SOURCE_SERVER_SHIFT; 457 masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >> 458 KVM_XIVE_SOURCE_MASKED_SHIFT; 459 eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >> 460 KVM_XIVE_SOURCE_EISN_SHIFT; 461 462 if (priority != xive_prio_from_guest(priority)) { 463 pr_err("invalid priority for queue %d for VCPU %d\n", 464 priority, server); 465 return -EINVAL; 466 } 467 468 return kvmppc_xive_native_update_source_config(xive, sb, state, server, 469 priority, masked, eisn); 470 } 471 472 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive, 473 long irq, u64 addr) 474 { 475 struct kvmppc_xive_src_block *sb; 476 struct kvmppc_xive_irq_state *state; 477 struct xive_irq_data *xd; 478 u32 hw_num; 479 u16 src; 480 int rc = 0; 481 482 pr_devel("%s irq=0x%lx", __func__, irq); 483 484 sb = kvmppc_xive_find_source(xive, irq, &src); 485 if (!sb) 486 return -ENOENT; 487 488 state = &sb->irq_state[src]; 489 490 rc = -EINVAL; 491 492 arch_spin_lock(&sb->lock); 493 494 if (state->valid) { 495 kvmppc_xive_select_irq(state, &hw_num, &xd); 496 xive_native_sync_source(hw_num); 497 rc = 0; 498 } 499 500 arch_spin_unlock(&sb->lock); 501 return rc; 502 } 503 504 static int xive_native_validate_queue_size(u32 qshift) 505 { 506 /* 507 * We only support 64K pages for the moment. This is also 508 * advertised in the DT property "ibm,xive-eq-sizes" 509 */ 510 switch (qshift) { 511 case 0: /* EQ reset */ 512 case 16: 513 return 0; 514 case 12: 515 case 21: 516 case 24: 517 default: 518 return -EINVAL; 519 } 520 } 521 522 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, 523 long eq_idx, u64 addr) 524 { 525 struct kvm *kvm = xive->kvm; 526 struct kvm_vcpu *vcpu; 527 struct kvmppc_xive_vcpu *xc; 528 void __user *ubufp = (void __user *) addr; 529 u32 server; 530 u8 priority; 531 struct kvm_ppc_xive_eq kvm_eq; 532 int rc; 533 __be32 *qaddr = 0; 534 struct page *page; 535 struct xive_q *q; 536 gfn_t gfn; 537 unsigned long page_size; 538 539 /* 540 * Demangle priority/server tuple from the EQ identifier 541 */ 542 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 543 KVM_XIVE_EQ_PRIORITY_SHIFT; 544 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 545 KVM_XIVE_EQ_SERVER_SHIFT; 546 547 if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) 548 return -EFAULT; 549 550 vcpu = kvmppc_xive_find_server(kvm, server); 551 if (!vcpu) { 552 pr_err("Can't find server %d\n", server); 553 return -ENOENT; 554 } 555 xc = vcpu->arch.xive_vcpu; 556 557 if (priority != xive_prio_from_guest(priority)) { 558 pr_err("Trying to restore invalid queue %d for VCPU %d\n", 559 priority, server); 560 return -EINVAL; 561 } 562 q = &xc->queues[priority]; 563 564 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 565 __func__, server, priority, kvm_eq.flags, 566 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 567 568 /* 569 * sPAPR specifies a "Unconditional Notify (n) flag" for the 570 * H_INT_SET_QUEUE_CONFIG hcall which forces notification 571 * without using the coalescing mechanisms provided by the 572 * XIVE END ESBs. This is required on KVM as notification 573 * using the END ESBs is not supported. 574 */ 575 if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { 576 pr_err("invalid flags %d\n", kvm_eq.flags); 577 return -EINVAL; 578 } 579 580 rc = xive_native_validate_queue_size(kvm_eq.qshift); 581 if (rc) { 582 pr_err("invalid queue size %d\n", kvm_eq.qshift); 583 return rc; 584 } 585 586 /* reset queue and disable queueing */ 587 if (!kvm_eq.qshift) { 588 q->guest_qaddr = 0; 589 q->guest_qshift = 0; 590 591 rc = xive_native_configure_queue(xc->vp_id, q, priority, 592 NULL, 0, true); 593 if (rc) { 594 pr_err("Failed to reset queue %d for VCPU %d: %d\n", 595 priority, xc->server_num, rc); 596 return rc; 597 } 598 599 if (q->qpage) { 600 put_page(virt_to_page(q->qpage)); 601 q->qpage = NULL; 602 } 603 604 return 0; 605 } 606 607 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { 608 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, 609 1ull << kvm_eq.qshift); 610 return -EINVAL; 611 } 612 613 gfn = gpa_to_gfn(kvm_eq.qaddr); 614 page = gfn_to_page(kvm, gfn); 615 if (is_error_page(page)) { 616 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); 617 return -EINVAL; 618 } 619 620 page_size = kvm_host_page_size(kvm, gfn); 621 if (1ull << kvm_eq.qshift > page_size) { 622 pr_warn("Incompatible host page size %lx!\n", page_size); 623 return -EINVAL; 624 } 625 626 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); 627 628 /* 629 * Backup the queue page guest address to the mark EQ page 630 * dirty for migration. 631 */ 632 q->guest_qaddr = kvm_eq.qaddr; 633 q->guest_qshift = kvm_eq.qshift; 634 635 /* 636 * Unconditional Notification is forced by default at the 637 * OPAL level because the use of END ESBs is not supported by 638 * Linux. 639 */ 640 rc = xive_native_configure_queue(xc->vp_id, q, priority, 641 (__be32 *) qaddr, kvm_eq.qshift, true); 642 if (rc) { 643 pr_err("Failed to configure queue %d for VCPU %d: %d\n", 644 priority, xc->server_num, rc); 645 put_page(page); 646 return rc; 647 } 648 649 /* 650 * Only restore the queue state when needed. When doing the 651 * H_INT_SET_SOURCE_CONFIG hcall, it should not. 652 */ 653 if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) { 654 rc = xive_native_set_queue_state(xc->vp_id, priority, 655 kvm_eq.qtoggle, 656 kvm_eq.qindex); 657 if (rc) 658 goto error; 659 } 660 661 rc = kvmppc_xive_attach_escalation(vcpu, priority, 662 xive->single_escalation); 663 error: 664 if (rc) 665 kvmppc_xive_native_cleanup_queue(vcpu, priority); 666 return rc; 667 } 668 669 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, 670 long eq_idx, u64 addr) 671 { 672 struct kvm *kvm = xive->kvm; 673 struct kvm_vcpu *vcpu; 674 struct kvmppc_xive_vcpu *xc; 675 struct xive_q *q; 676 void __user *ubufp = (u64 __user *) addr; 677 u32 server; 678 u8 priority; 679 struct kvm_ppc_xive_eq kvm_eq; 680 u64 qaddr; 681 u64 qshift; 682 u64 qeoi_page; 683 u32 escalate_irq; 684 u64 qflags; 685 int rc; 686 687 /* 688 * Demangle priority/server tuple from the EQ identifier 689 */ 690 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 691 KVM_XIVE_EQ_PRIORITY_SHIFT; 692 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 693 KVM_XIVE_EQ_SERVER_SHIFT; 694 695 vcpu = kvmppc_xive_find_server(kvm, server); 696 if (!vcpu) { 697 pr_err("Can't find server %d\n", server); 698 return -ENOENT; 699 } 700 xc = vcpu->arch.xive_vcpu; 701 702 if (priority != xive_prio_from_guest(priority)) { 703 pr_err("invalid priority for queue %d for VCPU %d\n", 704 priority, server); 705 return -EINVAL; 706 } 707 q = &xc->queues[priority]; 708 709 memset(&kvm_eq, 0, sizeof(kvm_eq)); 710 711 if (!q->qpage) 712 return 0; 713 714 rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift, 715 &qeoi_page, &escalate_irq, &qflags); 716 if (rc) 717 return rc; 718 719 kvm_eq.flags = 0; 720 if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) 721 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 722 723 kvm_eq.qshift = q->guest_qshift; 724 kvm_eq.qaddr = q->guest_qaddr; 725 726 rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, 727 &kvm_eq.qindex); 728 if (rc) 729 return rc; 730 731 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 732 __func__, server, priority, kvm_eq.flags, 733 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 734 735 if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) 736 return -EFAULT; 737 738 return 0; 739 } 740 741 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb) 742 { 743 int i; 744 745 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 746 struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; 747 748 if (!state->valid) 749 continue; 750 751 if (state->act_priority == MASKED) 752 continue; 753 754 state->eisn = 0; 755 state->act_server = 0; 756 state->act_priority = MASKED; 757 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 758 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 759 if (state->pt_number) { 760 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); 761 xive_native_configure_irq(state->pt_number, 762 0, MASKED, 0); 763 } 764 } 765 } 766 767 static int kvmppc_xive_reset(struct kvmppc_xive *xive) 768 { 769 struct kvm *kvm = xive->kvm; 770 struct kvm_vcpu *vcpu; 771 unsigned int i; 772 773 pr_devel("%s\n", __func__); 774 775 mutex_lock(&kvm->lock); 776 777 kvm_for_each_vcpu(i, vcpu, kvm) { 778 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 779 unsigned int prio; 780 781 if (!xc) 782 continue; 783 784 kvmppc_xive_disable_vcpu_interrupts(vcpu); 785 786 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 787 788 /* Single escalation, no queue 7 */ 789 if (prio == 7 && xive->single_escalation) 790 break; 791 792 if (xc->esc_virq[prio]) { 793 free_irq(xc->esc_virq[prio], vcpu); 794 irq_dispose_mapping(xc->esc_virq[prio]); 795 kfree(xc->esc_virq_names[prio]); 796 xc->esc_virq[prio] = 0; 797 } 798 799 kvmppc_xive_native_cleanup_queue(vcpu, prio); 800 } 801 } 802 803 for (i = 0; i <= xive->max_sbid; i++) { 804 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 805 806 if (sb) { 807 arch_spin_lock(&sb->lock); 808 kvmppc_xive_reset_sources(sb); 809 arch_spin_unlock(&sb->lock); 810 } 811 } 812 813 mutex_unlock(&kvm->lock); 814 815 return 0; 816 } 817 818 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb) 819 { 820 int j; 821 822 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { 823 struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; 824 struct xive_irq_data *xd; 825 u32 hw_num; 826 827 if (!state->valid) 828 continue; 829 830 /* 831 * The struct kvmppc_xive_irq_state reflects the state 832 * of the EAS configuration and not the state of the 833 * source. The source is masked setting the PQ bits to 834 * '-Q', which is what is being done before calling 835 * the KVM_DEV_XIVE_EQ_SYNC control. 836 * 837 * If a source EAS is configured, OPAL syncs the XIVE 838 * IC of the source and the XIVE IC of the previous 839 * target if any. 840 * 841 * So it should be fine ignoring MASKED sources as 842 * they have been synced already. 843 */ 844 if (state->act_priority == MASKED) 845 continue; 846 847 kvmppc_xive_select_irq(state, &hw_num, &xd); 848 xive_native_sync_source(hw_num); 849 xive_native_sync_queue(hw_num); 850 } 851 } 852 853 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu) 854 { 855 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 856 unsigned int prio; 857 858 if (!xc) 859 return -ENOENT; 860 861 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 862 struct xive_q *q = &xc->queues[prio]; 863 864 if (!q->qpage) 865 continue; 866 867 /* Mark EQ page dirty for migration */ 868 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); 869 } 870 return 0; 871 } 872 873 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive) 874 { 875 struct kvm *kvm = xive->kvm; 876 struct kvm_vcpu *vcpu; 877 unsigned int i; 878 879 pr_devel("%s\n", __func__); 880 881 mutex_lock(&kvm->lock); 882 for (i = 0; i <= xive->max_sbid; i++) { 883 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 884 885 if (sb) { 886 arch_spin_lock(&sb->lock); 887 kvmppc_xive_native_sync_sources(sb); 888 arch_spin_unlock(&sb->lock); 889 } 890 } 891 892 kvm_for_each_vcpu(i, vcpu, kvm) { 893 kvmppc_xive_native_vcpu_eq_sync(vcpu); 894 } 895 mutex_unlock(&kvm->lock); 896 897 return 0; 898 } 899 900 static int kvmppc_xive_native_set_attr(struct kvm_device *dev, 901 struct kvm_device_attr *attr) 902 { 903 struct kvmppc_xive *xive = dev->private; 904 905 switch (attr->group) { 906 case KVM_DEV_XIVE_GRP_CTRL: 907 switch (attr->attr) { 908 case KVM_DEV_XIVE_RESET: 909 return kvmppc_xive_reset(xive); 910 case KVM_DEV_XIVE_EQ_SYNC: 911 return kvmppc_xive_native_eq_sync(xive); 912 } 913 break; 914 case KVM_DEV_XIVE_GRP_SOURCE: 915 return kvmppc_xive_native_set_source(xive, attr->attr, 916 attr->addr); 917 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 918 return kvmppc_xive_native_set_source_config(xive, attr->attr, 919 attr->addr); 920 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 921 return kvmppc_xive_native_set_queue_config(xive, attr->attr, 922 attr->addr); 923 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 924 return kvmppc_xive_native_sync_source(xive, attr->attr, 925 attr->addr); 926 } 927 return -ENXIO; 928 } 929 930 static int kvmppc_xive_native_get_attr(struct kvm_device *dev, 931 struct kvm_device_attr *attr) 932 { 933 struct kvmppc_xive *xive = dev->private; 934 935 switch (attr->group) { 936 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 937 return kvmppc_xive_native_get_queue_config(xive, attr->attr, 938 attr->addr); 939 } 940 return -ENXIO; 941 } 942 943 static int kvmppc_xive_native_has_attr(struct kvm_device *dev, 944 struct kvm_device_attr *attr) 945 { 946 switch (attr->group) { 947 case KVM_DEV_XIVE_GRP_CTRL: 948 switch (attr->attr) { 949 case KVM_DEV_XIVE_RESET: 950 case KVM_DEV_XIVE_EQ_SYNC: 951 return 0; 952 } 953 break; 954 case KVM_DEV_XIVE_GRP_SOURCE: 955 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 956 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 957 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && 958 attr->attr < KVMPPC_XIVE_NR_IRQS) 959 return 0; 960 break; 961 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 962 return 0; 963 } 964 return -ENXIO; 965 } 966 967 /* 968 * Called when device fd is closed 969 */ 970 static void kvmppc_xive_native_release(struct kvm_device *dev) 971 { 972 struct kvmppc_xive *xive = dev->private; 973 struct kvm *kvm = xive->kvm; 974 struct kvm_vcpu *vcpu; 975 int i; 976 int was_ready; 977 978 debugfs_remove(xive->dentry); 979 980 pr_devel("Releasing xive native device\n"); 981 982 /* 983 * Clearing mmu_ready temporarily while holding kvm->lock 984 * is a way of ensuring that no vcpus can enter the guest 985 * until we drop kvm->lock. Doing kick_all_cpus_sync() 986 * ensures that any vcpu executing inside the guest has 987 * exited the guest. Once kick_all_cpus_sync() has finished, 988 * we know that no vcpu can be executing the XIVE push or 989 * pull code or accessing the XIVE MMIO regions. 990 * 991 * Since this is the device release function, we know that 992 * userspace does not have any open fd or mmap referring to 993 * the device. Therefore there can not be any of the 994 * device attribute set/get, mmap, or page fault functions 995 * being executed concurrently, and similarly, the 996 * connect_vcpu and set/clr_mapped functions also cannot 997 * be being executed. 998 */ 999 was_ready = kvm->arch.mmu_ready; 1000 kvm->arch.mmu_ready = 0; 1001 kick_all_cpus_sync(); 1002 1003 /* 1004 * We should clean up the vCPU interrupt presenters first. 1005 */ 1006 kvm_for_each_vcpu(i, vcpu, kvm) { 1007 /* 1008 * Take vcpu->mutex to ensure that no one_reg get/set ioctl 1009 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. 1010 */ 1011 mutex_lock(&vcpu->mutex); 1012 kvmppc_xive_native_cleanup_vcpu(vcpu); 1013 mutex_unlock(&vcpu->mutex); 1014 } 1015 1016 kvm->arch.xive = NULL; 1017 1018 for (i = 0; i <= xive->max_sbid; i++) { 1019 if (xive->src_blocks[i]) 1020 kvmppc_xive_free_sources(xive->src_blocks[i]); 1021 kfree(xive->src_blocks[i]); 1022 xive->src_blocks[i] = NULL; 1023 } 1024 1025 if (xive->vp_base != XIVE_INVALID_VP) 1026 xive_native_free_vp_block(xive->vp_base); 1027 1028 kvm->arch.mmu_ready = was_ready; 1029 1030 /* 1031 * A reference of the kvmppc_xive pointer is now kept under 1032 * the xive_devices struct of the machine for reuse. It is 1033 * freed when the VM is destroyed for now until we fix all the 1034 * execution paths. 1035 */ 1036 1037 kfree(dev); 1038 } 1039 1040 /* 1041 * Create a XIVE device. kvm->lock is held. 1042 */ 1043 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) 1044 { 1045 struct kvmppc_xive *xive; 1046 struct kvm *kvm = dev->kvm; 1047 int ret = 0; 1048 1049 pr_devel("Creating xive native device\n"); 1050 1051 if (kvm->arch.xive) 1052 return -EEXIST; 1053 1054 xive = kvmppc_xive_get_device(kvm, type); 1055 if (!xive) 1056 return -ENOMEM; 1057 1058 dev->private = xive; 1059 xive->dev = dev; 1060 xive->kvm = kvm; 1061 kvm->arch.xive = xive; 1062 mutex_init(&xive->mapping_lock); 1063 1064 /* 1065 * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for 1066 * a default. Getting the max number of CPUs the VM was 1067 * configured with would improve our usage of the XIVE VP space. 1068 */ 1069 xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); 1070 pr_devel("VP_Base=%x\n", xive->vp_base); 1071 1072 if (xive->vp_base == XIVE_INVALID_VP) 1073 ret = -ENXIO; 1074 1075 xive->single_escalation = xive_native_has_single_escalation(); 1076 xive->ops = &kvmppc_xive_native_ops; 1077 1078 if (ret) 1079 kfree(xive); 1080 1081 return ret; 1082 } 1083 1084 /* 1085 * Interrupt Pending Buffer (IPB) offset 1086 */ 1087 #define TM_IPB_SHIFT 40 1088 #define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT) 1089 1090 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1091 { 1092 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1093 u64 opal_state; 1094 int rc; 1095 1096 if (!kvmppc_xive_enabled(vcpu)) 1097 return -EPERM; 1098 1099 if (!xc) 1100 return -ENOENT; 1101 1102 /* Thread context registers. We only care about IPB and CPPR */ 1103 val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01; 1104 1105 /* Get the VP state from OPAL */ 1106 rc = xive_native_get_vp_state(xc->vp_id, &opal_state); 1107 if (rc) 1108 return rc; 1109 1110 /* 1111 * Capture the backup of IPB register in the NVT structure and 1112 * merge it in our KVM VP state. 1113 */ 1114 val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK); 1115 1116 pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", 1117 __func__, 1118 vcpu->arch.xive_saved_state.nsr, 1119 vcpu->arch.xive_saved_state.cppr, 1120 vcpu->arch.xive_saved_state.ipb, 1121 vcpu->arch.xive_saved_state.pipr, 1122 vcpu->arch.xive_saved_state.w01, 1123 (u32) vcpu->arch.xive_cam_word, opal_state); 1124 1125 return 0; 1126 } 1127 1128 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1129 { 1130 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1131 struct kvmppc_xive *xive = vcpu->kvm->arch.xive; 1132 1133 pr_devel("%s w01=%016llx vp=%016llx\n", __func__, 1134 val->xive_timaval[0], val->xive_timaval[1]); 1135 1136 if (!kvmppc_xive_enabled(vcpu)) 1137 return -EPERM; 1138 1139 if (!xc || !xive) 1140 return -ENOENT; 1141 1142 /* We can't update the state of a "pushed" VCPU */ 1143 if (WARN_ON(vcpu->arch.xive_pushed)) 1144 return -EBUSY; 1145 1146 /* 1147 * Restore the thread context registers. IPB and CPPR should 1148 * be the only ones that matter. 1149 */ 1150 vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0]; 1151 1152 /* 1153 * There is no need to restore the XIVE internal state (IPB 1154 * stored in the NVT) as the IPB register was merged in KVM VP 1155 * state when captured. 1156 */ 1157 return 0; 1158 } 1159 1160 static int xive_native_debug_show(struct seq_file *m, void *private) 1161 { 1162 struct kvmppc_xive *xive = m->private; 1163 struct kvm *kvm = xive->kvm; 1164 struct kvm_vcpu *vcpu; 1165 unsigned int i; 1166 1167 if (!kvm) 1168 return 0; 1169 1170 seq_puts(m, "=========\nVCPU state\n=========\n"); 1171 1172 kvm_for_each_vcpu(i, vcpu, kvm) { 1173 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1174 1175 if (!xc) 1176 continue; 1177 1178 seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", 1179 xc->server_num, 1180 vcpu->arch.xive_saved_state.nsr, 1181 vcpu->arch.xive_saved_state.cppr, 1182 vcpu->arch.xive_saved_state.ipb, 1183 vcpu->arch.xive_saved_state.pipr, 1184 vcpu->arch.xive_saved_state.w01, 1185 (u32) vcpu->arch.xive_cam_word); 1186 1187 kvmppc_xive_debug_show_queues(m, vcpu); 1188 } 1189 1190 return 0; 1191 } 1192 1193 static int xive_native_debug_open(struct inode *inode, struct file *file) 1194 { 1195 return single_open(file, xive_native_debug_show, inode->i_private); 1196 } 1197 1198 static const struct file_operations xive_native_debug_fops = { 1199 .open = xive_native_debug_open, 1200 .read = seq_read, 1201 .llseek = seq_lseek, 1202 .release = single_release, 1203 }; 1204 1205 static void xive_native_debugfs_init(struct kvmppc_xive *xive) 1206 { 1207 char *name; 1208 1209 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); 1210 if (!name) { 1211 pr_err("%s: no memory for name\n", __func__); 1212 return; 1213 } 1214 1215 xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, 1216 xive, &xive_native_debug_fops); 1217 1218 pr_debug("%s: created %s\n", __func__, name); 1219 kfree(name); 1220 } 1221 1222 static void kvmppc_xive_native_init(struct kvm_device *dev) 1223 { 1224 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; 1225 1226 /* Register some debug interfaces */ 1227 xive_native_debugfs_init(xive); 1228 } 1229 1230 struct kvm_device_ops kvm_xive_native_ops = { 1231 .name = "kvm-xive-native", 1232 .create = kvmppc_xive_native_create, 1233 .init = kvmppc_xive_native_init, 1234 .release = kvmppc_xive_native_release, 1235 .set_attr = kvmppc_xive_native_set_attr, 1236 .get_attr = kvmppc_xive_native_get_attr, 1237 .has_attr = kvmppc_xive_native_has_attr, 1238 .mmap = kvmppc_xive_native_mmap, 1239 }; 1240 1241 void kvmppc_xive_native_init_module(void) 1242 { 1243 ; 1244 } 1245 1246 void kvmppc_xive_native_exit_module(void) 1247 { 1248 ; 1249 } 1250