1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2017-2019, IBM Corporation. 4 */ 5 6 #define pr_fmt(fmt) "xive-kvm: " fmt 7 8 #include <linux/kernel.h> 9 #include <linux/kvm_host.h> 10 #include <linux/err.h> 11 #include <linux/gfp.h> 12 #include <linux/spinlock.h> 13 #include <linux/delay.h> 14 #include <linux/file.h> 15 #include <asm/uaccess.h> 16 #include <asm/kvm_book3s.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/hvcall.h> 19 #include <asm/xive.h> 20 #include <asm/xive-regs.h> 21 #include <asm/debug.h> 22 #include <asm/debugfs.h> 23 #include <asm/opal.h> 24 25 #include <linux/debugfs.h> 26 #include <linux/seq_file.h> 27 28 #include "book3s_xive.h" 29 30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) 31 { 32 u64 val; 33 34 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) 35 offset |= offset << 4; 36 37 val = in_be64(xd->eoi_mmio + offset); 38 return (u8)val; 39 } 40 41 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) 42 { 43 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 44 struct xive_q *q = &xc->queues[prio]; 45 46 xive_native_disable_queue(xc->vp_id, q, prio); 47 if (q->qpage) { 48 put_page(virt_to_page(q->qpage)); 49 q->qpage = NULL; 50 } 51 } 52 53 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) 54 { 55 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 56 int i; 57 58 if (!kvmppc_xive_enabled(vcpu)) 59 return; 60 61 if (!xc) 62 return; 63 64 pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num); 65 66 /* Ensure no interrupt is still routed to that VP */ 67 xc->valid = false; 68 kvmppc_xive_disable_vcpu_interrupts(vcpu); 69 70 /* Disable the VP */ 71 xive_native_disable_vp(xc->vp_id); 72 73 /* Free the queues & associated interrupts */ 74 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { 75 /* Free the escalation irq */ 76 if (xc->esc_virq[i]) { 77 free_irq(xc->esc_virq[i], vcpu); 78 irq_dispose_mapping(xc->esc_virq[i]); 79 kfree(xc->esc_virq_names[i]); 80 xc->esc_virq[i] = 0; 81 } 82 83 /* Free the queue */ 84 kvmppc_xive_native_cleanup_queue(vcpu, i); 85 } 86 87 /* Free the VP */ 88 kfree(xc); 89 90 /* Cleanup the vcpu */ 91 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; 92 vcpu->arch.xive_vcpu = NULL; 93 } 94 95 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev, 96 struct kvm_vcpu *vcpu, u32 server_num) 97 { 98 struct kvmppc_xive *xive = dev->private; 99 struct kvmppc_xive_vcpu *xc = NULL; 100 int rc; 101 102 pr_devel("native_connect_vcpu(server=%d)\n", server_num); 103 104 if (dev->ops != &kvm_xive_native_ops) { 105 pr_devel("Wrong ops !\n"); 106 return -EPERM; 107 } 108 if (xive->kvm != vcpu->kvm) 109 return -EPERM; 110 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) 111 return -EBUSY; 112 if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) { 113 pr_devel("Out of bounds !\n"); 114 return -EINVAL; 115 } 116 117 mutex_lock(&xive->lock); 118 119 if (kvmppc_xive_find_server(vcpu->kvm, server_num)) { 120 pr_devel("Duplicate !\n"); 121 rc = -EEXIST; 122 goto bail; 123 } 124 125 xc = kzalloc(sizeof(*xc), GFP_KERNEL); 126 if (!xc) { 127 rc = -ENOMEM; 128 goto bail; 129 } 130 131 vcpu->arch.xive_vcpu = xc; 132 xc->xive = xive; 133 xc->vcpu = vcpu; 134 xc->server_num = server_num; 135 136 xc->vp_id = kvmppc_xive_vp(xive, server_num); 137 xc->valid = true; 138 vcpu->arch.irq_type = KVMPPC_IRQ_XIVE; 139 140 rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); 141 if (rc) { 142 pr_err("Failed to get VP info from OPAL: %d\n", rc); 143 goto bail; 144 } 145 146 /* 147 * Enable the VP first as the single escalation mode will 148 * affect escalation interrupts numbering 149 */ 150 rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation); 151 if (rc) { 152 pr_err("Failed to enable VP in OPAL: %d\n", rc); 153 goto bail; 154 } 155 156 /* Configure VCPU fields for use by assembly push/pull */ 157 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); 158 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); 159 160 /* TODO: reset all queues to a clean state ? */ 161 bail: 162 mutex_unlock(&xive->lock); 163 if (rc) 164 kvmppc_xive_native_cleanup_vcpu(vcpu); 165 166 return rc; 167 } 168 169 /* 170 * Device passthrough support 171 */ 172 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq) 173 { 174 struct kvmppc_xive *xive = kvm->arch.xive; 175 pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2; 176 177 if (irq >= KVMPPC_XIVE_NR_IRQS) 178 return -EINVAL; 179 180 /* 181 * Clear the ESB pages of the IRQ number being mapped (or 182 * unmapped) into the guest and let the the VM fault handler 183 * repopulate with the appropriate ESB pages (device or IC) 184 */ 185 pr_debug("clearing esb pages for girq 0x%lx\n", irq); 186 mutex_lock(&xive->mapping_lock); 187 if (xive->mapping) 188 unmap_mapping_range(xive->mapping, 189 esb_pgoff << PAGE_SHIFT, 190 2ull << PAGE_SHIFT, 1); 191 mutex_unlock(&xive->mapping_lock); 192 return 0; 193 } 194 195 static struct kvmppc_xive_ops kvmppc_xive_native_ops = { 196 .reset_mapped = kvmppc_xive_native_reset_mapped, 197 }; 198 199 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) 200 { 201 struct vm_area_struct *vma = vmf->vma; 202 struct kvm_device *dev = vma->vm_file->private_data; 203 struct kvmppc_xive *xive = dev->private; 204 struct kvmppc_xive_src_block *sb; 205 struct kvmppc_xive_irq_state *state; 206 struct xive_irq_data *xd; 207 u32 hw_num; 208 u16 src; 209 u64 page; 210 unsigned long irq; 211 u64 page_offset; 212 213 /* 214 * Linux/KVM uses a two pages ESB setting, one for trigger and 215 * one for EOI 216 */ 217 page_offset = vmf->pgoff - vma->vm_pgoff; 218 irq = page_offset / 2; 219 220 sb = kvmppc_xive_find_source(xive, irq, &src); 221 if (!sb) { 222 pr_devel("%s: source %lx not found !\n", __func__, irq); 223 return VM_FAULT_SIGBUS; 224 } 225 226 state = &sb->irq_state[src]; 227 kvmppc_xive_select_irq(state, &hw_num, &xd); 228 229 arch_spin_lock(&sb->lock); 230 231 /* 232 * first/even page is for trigger 233 * second/odd page is for EOI and management. 234 */ 235 page = page_offset % 2 ? xd->eoi_page : xd->trig_page; 236 arch_spin_unlock(&sb->lock); 237 238 if (WARN_ON(!page)) { 239 pr_err("%s: accessing invalid ESB page for source %lx !\n", 240 __func__, irq); 241 return VM_FAULT_SIGBUS; 242 } 243 244 vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT); 245 return VM_FAULT_NOPAGE; 246 } 247 248 static const struct vm_operations_struct xive_native_esb_vmops = { 249 .fault = xive_native_esb_fault, 250 }; 251 252 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf) 253 { 254 struct vm_area_struct *vma = vmf->vma; 255 256 switch (vmf->pgoff - vma->vm_pgoff) { 257 case 0: /* HW - forbid access */ 258 case 1: /* HV - forbid access */ 259 return VM_FAULT_SIGBUS; 260 case 2: /* OS */ 261 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT); 262 return VM_FAULT_NOPAGE; 263 case 3: /* USER - TODO */ 264 default: 265 return VM_FAULT_SIGBUS; 266 } 267 } 268 269 static const struct vm_operations_struct xive_native_tima_vmops = { 270 .fault = xive_native_tima_fault, 271 }; 272 273 static int kvmppc_xive_native_mmap(struct kvm_device *dev, 274 struct vm_area_struct *vma) 275 { 276 struct kvmppc_xive *xive = dev->private; 277 278 /* We only allow mappings at fixed offset for now */ 279 if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) { 280 if (vma_pages(vma) > 4) 281 return -EINVAL; 282 vma->vm_ops = &xive_native_tima_vmops; 283 } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) { 284 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2) 285 return -EINVAL; 286 vma->vm_ops = &xive_native_esb_vmops; 287 } else { 288 return -EINVAL; 289 } 290 291 vma->vm_flags |= VM_IO | VM_PFNMAP; 292 vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot); 293 294 /* 295 * Grab the KVM device file address_space to be able to clear 296 * the ESB pages mapping when a device is passed-through into 297 * the guest. 298 */ 299 xive->mapping = vma->vm_file->f_mapping; 300 return 0; 301 } 302 303 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq, 304 u64 addr) 305 { 306 struct kvmppc_xive_src_block *sb; 307 struct kvmppc_xive_irq_state *state; 308 u64 __user *ubufp = (u64 __user *) addr; 309 u64 val; 310 u16 idx; 311 int rc; 312 313 pr_devel("%s irq=0x%lx\n", __func__, irq); 314 315 if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS) 316 return -E2BIG; 317 318 sb = kvmppc_xive_find_source(xive, irq, &idx); 319 if (!sb) { 320 pr_debug("No source, creating source block...\n"); 321 sb = kvmppc_xive_create_src_block(xive, irq); 322 if (!sb) { 323 pr_err("Failed to create block...\n"); 324 return -ENOMEM; 325 } 326 } 327 state = &sb->irq_state[idx]; 328 329 if (get_user(val, ubufp)) { 330 pr_err("fault getting user info !\n"); 331 return -EFAULT; 332 } 333 334 arch_spin_lock(&sb->lock); 335 336 /* 337 * If the source doesn't already have an IPI, allocate 338 * one and get the corresponding data 339 */ 340 if (!state->ipi_number) { 341 state->ipi_number = xive_native_alloc_irq(); 342 if (state->ipi_number == 0) { 343 pr_err("Failed to allocate IRQ !\n"); 344 rc = -ENXIO; 345 goto unlock; 346 } 347 xive_native_populate_irq_data(state->ipi_number, 348 &state->ipi_data); 349 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__, 350 state->ipi_number, irq); 351 } 352 353 /* Restore LSI state */ 354 if (val & KVM_XIVE_LEVEL_SENSITIVE) { 355 state->lsi = true; 356 if (val & KVM_XIVE_LEVEL_ASSERTED) 357 state->asserted = true; 358 pr_devel(" LSI ! Asserted=%d\n", state->asserted); 359 } 360 361 /* Mask IRQ to start with */ 362 state->act_server = 0; 363 state->act_priority = MASKED; 364 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 365 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 366 367 /* Increment the number of valid sources and mark this one valid */ 368 if (!state->valid) 369 xive->src_count++; 370 state->valid = true; 371 372 rc = 0; 373 374 unlock: 375 arch_spin_unlock(&sb->lock); 376 377 return rc; 378 } 379 380 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive, 381 struct kvmppc_xive_src_block *sb, 382 struct kvmppc_xive_irq_state *state, 383 u32 server, u8 priority, bool masked, 384 u32 eisn) 385 { 386 struct kvm *kvm = xive->kvm; 387 u32 hw_num; 388 int rc = 0; 389 390 arch_spin_lock(&sb->lock); 391 392 if (state->act_server == server && state->act_priority == priority && 393 state->eisn == eisn) 394 goto unlock; 395 396 pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n", 397 priority, server, masked, state->act_server, 398 state->act_priority); 399 400 kvmppc_xive_select_irq(state, &hw_num, NULL); 401 402 if (priority != MASKED && !masked) { 403 rc = kvmppc_xive_select_target(kvm, &server, priority); 404 if (rc) 405 goto unlock; 406 407 state->act_priority = priority; 408 state->act_server = server; 409 state->eisn = eisn; 410 411 rc = xive_native_configure_irq(hw_num, 412 kvmppc_xive_vp(xive, server), 413 priority, eisn); 414 } else { 415 state->act_priority = MASKED; 416 state->act_server = 0; 417 state->eisn = 0; 418 419 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0); 420 } 421 422 unlock: 423 arch_spin_unlock(&sb->lock); 424 return rc; 425 } 426 427 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive, 428 long irq, u64 addr) 429 { 430 struct kvmppc_xive_src_block *sb; 431 struct kvmppc_xive_irq_state *state; 432 u64 __user *ubufp = (u64 __user *) addr; 433 u16 src; 434 u64 kvm_cfg; 435 u32 server; 436 u8 priority; 437 bool masked; 438 u32 eisn; 439 440 sb = kvmppc_xive_find_source(xive, irq, &src); 441 if (!sb) 442 return -ENOENT; 443 444 state = &sb->irq_state[src]; 445 446 if (!state->valid) 447 return -EINVAL; 448 449 if (get_user(kvm_cfg, ubufp)) 450 return -EFAULT; 451 452 pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg); 453 454 priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >> 455 KVM_XIVE_SOURCE_PRIORITY_SHIFT; 456 server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >> 457 KVM_XIVE_SOURCE_SERVER_SHIFT; 458 masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >> 459 KVM_XIVE_SOURCE_MASKED_SHIFT; 460 eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >> 461 KVM_XIVE_SOURCE_EISN_SHIFT; 462 463 if (priority != xive_prio_from_guest(priority)) { 464 pr_err("invalid priority for queue %d for VCPU %d\n", 465 priority, server); 466 return -EINVAL; 467 } 468 469 return kvmppc_xive_native_update_source_config(xive, sb, state, server, 470 priority, masked, eisn); 471 } 472 473 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive, 474 long irq, u64 addr) 475 { 476 struct kvmppc_xive_src_block *sb; 477 struct kvmppc_xive_irq_state *state; 478 struct xive_irq_data *xd; 479 u32 hw_num; 480 u16 src; 481 int rc = 0; 482 483 pr_devel("%s irq=0x%lx", __func__, irq); 484 485 sb = kvmppc_xive_find_source(xive, irq, &src); 486 if (!sb) 487 return -ENOENT; 488 489 state = &sb->irq_state[src]; 490 491 rc = -EINVAL; 492 493 arch_spin_lock(&sb->lock); 494 495 if (state->valid) { 496 kvmppc_xive_select_irq(state, &hw_num, &xd); 497 xive_native_sync_source(hw_num); 498 rc = 0; 499 } 500 501 arch_spin_unlock(&sb->lock); 502 return rc; 503 } 504 505 static int xive_native_validate_queue_size(u32 qshift) 506 { 507 /* 508 * We only support 64K pages for the moment. This is also 509 * advertised in the DT property "ibm,xive-eq-sizes" 510 */ 511 switch (qshift) { 512 case 0: /* EQ reset */ 513 case 16: 514 return 0; 515 case 12: 516 case 21: 517 case 24: 518 default: 519 return -EINVAL; 520 } 521 } 522 523 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, 524 long eq_idx, u64 addr) 525 { 526 struct kvm *kvm = xive->kvm; 527 struct kvm_vcpu *vcpu; 528 struct kvmppc_xive_vcpu *xc; 529 void __user *ubufp = (void __user *) addr; 530 u32 server; 531 u8 priority; 532 struct kvm_ppc_xive_eq kvm_eq; 533 int rc; 534 __be32 *qaddr = 0; 535 struct page *page; 536 struct xive_q *q; 537 gfn_t gfn; 538 unsigned long page_size; 539 int srcu_idx; 540 541 /* 542 * Demangle priority/server tuple from the EQ identifier 543 */ 544 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 545 KVM_XIVE_EQ_PRIORITY_SHIFT; 546 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 547 KVM_XIVE_EQ_SERVER_SHIFT; 548 549 if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq))) 550 return -EFAULT; 551 552 vcpu = kvmppc_xive_find_server(kvm, server); 553 if (!vcpu) { 554 pr_err("Can't find server %d\n", server); 555 return -ENOENT; 556 } 557 xc = vcpu->arch.xive_vcpu; 558 559 if (priority != xive_prio_from_guest(priority)) { 560 pr_err("Trying to restore invalid queue %d for VCPU %d\n", 561 priority, server); 562 return -EINVAL; 563 } 564 q = &xc->queues[priority]; 565 566 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 567 __func__, server, priority, kvm_eq.flags, 568 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 569 570 /* reset queue and disable queueing */ 571 if (!kvm_eq.qshift) { 572 q->guest_qaddr = 0; 573 q->guest_qshift = 0; 574 575 rc = xive_native_configure_queue(xc->vp_id, q, priority, 576 NULL, 0, true); 577 if (rc) { 578 pr_err("Failed to reset queue %d for VCPU %d: %d\n", 579 priority, xc->server_num, rc); 580 return rc; 581 } 582 583 if (q->qpage) { 584 put_page(virt_to_page(q->qpage)); 585 q->qpage = NULL; 586 } 587 588 return 0; 589 } 590 591 /* 592 * sPAPR specifies a "Unconditional Notify (n) flag" for the 593 * H_INT_SET_QUEUE_CONFIG hcall which forces notification 594 * without using the coalescing mechanisms provided by the 595 * XIVE END ESBs. This is required on KVM as notification 596 * using the END ESBs is not supported. 597 */ 598 if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) { 599 pr_err("invalid flags %d\n", kvm_eq.flags); 600 return -EINVAL; 601 } 602 603 rc = xive_native_validate_queue_size(kvm_eq.qshift); 604 if (rc) { 605 pr_err("invalid queue size %d\n", kvm_eq.qshift); 606 return rc; 607 } 608 609 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) { 610 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr, 611 1ull << kvm_eq.qshift); 612 return -EINVAL; 613 } 614 615 srcu_idx = srcu_read_lock(&kvm->srcu); 616 gfn = gpa_to_gfn(kvm_eq.qaddr); 617 page = gfn_to_page(kvm, gfn); 618 if (is_error_page(page)) { 619 srcu_read_unlock(&kvm->srcu, srcu_idx); 620 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); 621 return -EINVAL; 622 } 623 624 page_size = kvm_host_page_size(kvm, gfn); 625 if (1ull << kvm_eq.qshift > page_size) { 626 srcu_read_unlock(&kvm->srcu, srcu_idx); 627 pr_warn("Incompatible host page size %lx!\n", page_size); 628 return -EINVAL; 629 } 630 631 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK); 632 srcu_read_unlock(&kvm->srcu, srcu_idx); 633 634 /* 635 * Backup the queue page guest address to the mark EQ page 636 * dirty for migration. 637 */ 638 q->guest_qaddr = kvm_eq.qaddr; 639 q->guest_qshift = kvm_eq.qshift; 640 641 /* 642 * Unconditional Notification is forced by default at the 643 * OPAL level because the use of END ESBs is not supported by 644 * Linux. 645 */ 646 rc = xive_native_configure_queue(xc->vp_id, q, priority, 647 (__be32 *) qaddr, kvm_eq.qshift, true); 648 if (rc) { 649 pr_err("Failed to configure queue %d for VCPU %d: %d\n", 650 priority, xc->server_num, rc); 651 put_page(page); 652 return rc; 653 } 654 655 /* 656 * Only restore the queue state when needed. When doing the 657 * H_INT_SET_SOURCE_CONFIG hcall, it should not. 658 */ 659 if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) { 660 rc = xive_native_set_queue_state(xc->vp_id, priority, 661 kvm_eq.qtoggle, 662 kvm_eq.qindex); 663 if (rc) 664 goto error; 665 } 666 667 rc = kvmppc_xive_attach_escalation(vcpu, priority, 668 xive->single_escalation); 669 error: 670 if (rc) 671 kvmppc_xive_native_cleanup_queue(vcpu, priority); 672 return rc; 673 } 674 675 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive, 676 long eq_idx, u64 addr) 677 { 678 struct kvm *kvm = xive->kvm; 679 struct kvm_vcpu *vcpu; 680 struct kvmppc_xive_vcpu *xc; 681 struct xive_q *q; 682 void __user *ubufp = (u64 __user *) addr; 683 u32 server; 684 u8 priority; 685 struct kvm_ppc_xive_eq kvm_eq; 686 u64 qaddr; 687 u64 qshift; 688 u64 qeoi_page; 689 u32 escalate_irq; 690 u64 qflags; 691 int rc; 692 693 /* 694 * Demangle priority/server tuple from the EQ identifier 695 */ 696 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >> 697 KVM_XIVE_EQ_PRIORITY_SHIFT; 698 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >> 699 KVM_XIVE_EQ_SERVER_SHIFT; 700 701 vcpu = kvmppc_xive_find_server(kvm, server); 702 if (!vcpu) { 703 pr_err("Can't find server %d\n", server); 704 return -ENOENT; 705 } 706 xc = vcpu->arch.xive_vcpu; 707 708 if (priority != xive_prio_from_guest(priority)) { 709 pr_err("invalid priority for queue %d for VCPU %d\n", 710 priority, server); 711 return -EINVAL; 712 } 713 q = &xc->queues[priority]; 714 715 memset(&kvm_eq, 0, sizeof(kvm_eq)); 716 717 if (!q->qpage) 718 return 0; 719 720 rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift, 721 &qeoi_page, &escalate_irq, &qflags); 722 if (rc) 723 return rc; 724 725 kvm_eq.flags = 0; 726 if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY) 727 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY; 728 729 kvm_eq.qshift = q->guest_qshift; 730 kvm_eq.qaddr = q->guest_qaddr; 731 732 rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle, 733 &kvm_eq.qindex); 734 if (rc) 735 return rc; 736 737 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n", 738 __func__, server, priority, kvm_eq.flags, 739 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex); 740 741 if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq))) 742 return -EFAULT; 743 744 return 0; 745 } 746 747 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb) 748 { 749 int i; 750 751 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 752 struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; 753 754 if (!state->valid) 755 continue; 756 757 if (state->act_priority == MASKED) 758 continue; 759 760 state->eisn = 0; 761 state->act_server = 0; 762 state->act_priority = MASKED; 763 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); 764 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); 765 if (state->pt_number) { 766 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); 767 xive_native_configure_irq(state->pt_number, 768 0, MASKED, 0); 769 } 770 } 771 } 772 773 static int kvmppc_xive_reset(struct kvmppc_xive *xive) 774 { 775 struct kvm *kvm = xive->kvm; 776 struct kvm_vcpu *vcpu; 777 unsigned int i; 778 779 pr_devel("%s\n", __func__); 780 781 mutex_lock(&xive->lock); 782 783 kvm_for_each_vcpu(i, vcpu, kvm) { 784 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 785 unsigned int prio; 786 787 if (!xc) 788 continue; 789 790 kvmppc_xive_disable_vcpu_interrupts(vcpu); 791 792 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 793 794 /* Single escalation, no queue 7 */ 795 if (prio == 7 && xive->single_escalation) 796 break; 797 798 if (xc->esc_virq[prio]) { 799 free_irq(xc->esc_virq[prio], vcpu); 800 irq_dispose_mapping(xc->esc_virq[prio]); 801 kfree(xc->esc_virq_names[prio]); 802 xc->esc_virq[prio] = 0; 803 } 804 805 kvmppc_xive_native_cleanup_queue(vcpu, prio); 806 } 807 } 808 809 for (i = 0; i <= xive->max_sbid; i++) { 810 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 811 812 if (sb) { 813 arch_spin_lock(&sb->lock); 814 kvmppc_xive_reset_sources(sb); 815 arch_spin_unlock(&sb->lock); 816 } 817 } 818 819 mutex_unlock(&xive->lock); 820 821 return 0; 822 } 823 824 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb) 825 { 826 int j; 827 828 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { 829 struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; 830 struct xive_irq_data *xd; 831 u32 hw_num; 832 833 if (!state->valid) 834 continue; 835 836 /* 837 * The struct kvmppc_xive_irq_state reflects the state 838 * of the EAS configuration and not the state of the 839 * source. The source is masked setting the PQ bits to 840 * '-Q', which is what is being done before calling 841 * the KVM_DEV_XIVE_EQ_SYNC control. 842 * 843 * If a source EAS is configured, OPAL syncs the XIVE 844 * IC of the source and the XIVE IC of the previous 845 * target if any. 846 * 847 * So it should be fine ignoring MASKED sources as 848 * they have been synced already. 849 */ 850 if (state->act_priority == MASKED) 851 continue; 852 853 kvmppc_xive_select_irq(state, &hw_num, &xd); 854 xive_native_sync_source(hw_num); 855 xive_native_sync_queue(hw_num); 856 } 857 } 858 859 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu) 860 { 861 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 862 unsigned int prio; 863 int srcu_idx; 864 865 if (!xc) 866 return -ENOENT; 867 868 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) { 869 struct xive_q *q = &xc->queues[prio]; 870 871 if (!q->qpage) 872 continue; 873 874 /* Mark EQ page dirty for migration */ 875 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 876 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr)); 877 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 878 } 879 return 0; 880 } 881 882 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive) 883 { 884 struct kvm *kvm = xive->kvm; 885 struct kvm_vcpu *vcpu; 886 unsigned int i; 887 888 pr_devel("%s\n", __func__); 889 890 mutex_lock(&xive->lock); 891 for (i = 0; i <= xive->max_sbid; i++) { 892 struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; 893 894 if (sb) { 895 arch_spin_lock(&sb->lock); 896 kvmppc_xive_native_sync_sources(sb); 897 arch_spin_unlock(&sb->lock); 898 } 899 } 900 901 kvm_for_each_vcpu(i, vcpu, kvm) { 902 kvmppc_xive_native_vcpu_eq_sync(vcpu); 903 } 904 mutex_unlock(&xive->lock); 905 906 return 0; 907 } 908 909 static int kvmppc_xive_native_set_attr(struct kvm_device *dev, 910 struct kvm_device_attr *attr) 911 { 912 struct kvmppc_xive *xive = dev->private; 913 914 switch (attr->group) { 915 case KVM_DEV_XIVE_GRP_CTRL: 916 switch (attr->attr) { 917 case KVM_DEV_XIVE_RESET: 918 return kvmppc_xive_reset(xive); 919 case KVM_DEV_XIVE_EQ_SYNC: 920 return kvmppc_xive_native_eq_sync(xive); 921 } 922 break; 923 case KVM_DEV_XIVE_GRP_SOURCE: 924 return kvmppc_xive_native_set_source(xive, attr->attr, 925 attr->addr); 926 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 927 return kvmppc_xive_native_set_source_config(xive, attr->attr, 928 attr->addr); 929 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 930 return kvmppc_xive_native_set_queue_config(xive, attr->attr, 931 attr->addr); 932 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 933 return kvmppc_xive_native_sync_source(xive, attr->attr, 934 attr->addr); 935 } 936 return -ENXIO; 937 } 938 939 static int kvmppc_xive_native_get_attr(struct kvm_device *dev, 940 struct kvm_device_attr *attr) 941 { 942 struct kvmppc_xive *xive = dev->private; 943 944 switch (attr->group) { 945 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 946 return kvmppc_xive_native_get_queue_config(xive, attr->attr, 947 attr->addr); 948 } 949 return -ENXIO; 950 } 951 952 static int kvmppc_xive_native_has_attr(struct kvm_device *dev, 953 struct kvm_device_attr *attr) 954 { 955 switch (attr->group) { 956 case KVM_DEV_XIVE_GRP_CTRL: 957 switch (attr->attr) { 958 case KVM_DEV_XIVE_RESET: 959 case KVM_DEV_XIVE_EQ_SYNC: 960 return 0; 961 } 962 break; 963 case KVM_DEV_XIVE_GRP_SOURCE: 964 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG: 965 case KVM_DEV_XIVE_GRP_SOURCE_SYNC: 966 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ && 967 attr->attr < KVMPPC_XIVE_NR_IRQS) 968 return 0; 969 break; 970 case KVM_DEV_XIVE_GRP_EQ_CONFIG: 971 return 0; 972 } 973 return -ENXIO; 974 } 975 976 /* 977 * Called when device fd is closed. kvm->lock is held. 978 */ 979 static void kvmppc_xive_native_release(struct kvm_device *dev) 980 { 981 struct kvmppc_xive *xive = dev->private; 982 struct kvm *kvm = xive->kvm; 983 struct kvm_vcpu *vcpu; 984 int i; 985 986 pr_devel("Releasing xive native device\n"); 987 988 /* 989 * Clear the KVM device file address_space which is used to 990 * unmap the ESB pages when a device is passed-through. 991 */ 992 mutex_lock(&xive->mapping_lock); 993 xive->mapping = NULL; 994 mutex_unlock(&xive->mapping_lock); 995 996 /* 997 * Since this is the device release function, we know that 998 * userspace does not have any open fd or mmap referring to 999 * the device. Therefore there can not be any of the 1000 * device attribute set/get, mmap, or page fault functions 1001 * being executed concurrently, and similarly, the 1002 * connect_vcpu and set/clr_mapped functions also cannot 1003 * be being executed. 1004 */ 1005 1006 debugfs_remove(xive->dentry); 1007 1008 /* 1009 * We should clean up the vCPU interrupt presenters first. 1010 */ 1011 kvm_for_each_vcpu(i, vcpu, kvm) { 1012 /* 1013 * Take vcpu->mutex to ensure that no one_reg get/set ioctl 1014 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done. 1015 * Holding the vcpu->mutex also means that the vcpu cannot 1016 * be executing the KVM_RUN ioctl, and therefore it cannot 1017 * be executing the XIVE push or pull code or accessing 1018 * the XIVE MMIO regions. 1019 */ 1020 mutex_lock(&vcpu->mutex); 1021 kvmppc_xive_native_cleanup_vcpu(vcpu); 1022 mutex_unlock(&vcpu->mutex); 1023 } 1024 1025 /* 1026 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type 1027 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe 1028 * against xive code getting called during vcpu execution or 1029 * set/get one_reg operations. 1030 */ 1031 kvm->arch.xive = NULL; 1032 1033 for (i = 0; i <= xive->max_sbid; i++) { 1034 if (xive->src_blocks[i]) 1035 kvmppc_xive_free_sources(xive->src_blocks[i]); 1036 kfree(xive->src_blocks[i]); 1037 xive->src_blocks[i] = NULL; 1038 } 1039 1040 if (xive->vp_base != XIVE_INVALID_VP) 1041 xive_native_free_vp_block(xive->vp_base); 1042 1043 /* 1044 * A reference of the kvmppc_xive pointer is now kept under 1045 * the xive_devices struct of the machine for reuse. It is 1046 * freed when the VM is destroyed for now until we fix all the 1047 * execution paths. 1048 */ 1049 1050 kfree(dev); 1051 } 1052 1053 /* 1054 * Create a XIVE device. kvm->lock is held. 1055 */ 1056 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) 1057 { 1058 struct kvmppc_xive *xive; 1059 struct kvm *kvm = dev->kvm; 1060 int ret = 0; 1061 1062 pr_devel("Creating xive native device\n"); 1063 1064 if (kvm->arch.xive) 1065 return -EEXIST; 1066 1067 xive = kvmppc_xive_get_device(kvm, type); 1068 if (!xive) 1069 return -ENOMEM; 1070 1071 dev->private = xive; 1072 xive->dev = dev; 1073 xive->kvm = kvm; 1074 kvm->arch.xive = xive; 1075 mutex_init(&xive->mapping_lock); 1076 mutex_init(&xive->lock); 1077 1078 /* 1079 * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for 1080 * a default. Getting the max number of CPUs the VM was 1081 * configured with would improve our usage of the XIVE VP space. 1082 */ 1083 xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS); 1084 pr_devel("VP_Base=%x\n", xive->vp_base); 1085 1086 if (xive->vp_base == XIVE_INVALID_VP) 1087 ret = -ENXIO; 1088 1089 xive->single_escalation = xive_native_has_single_escalation(); 1090 xive->ops = &kvmppc_xive_native_ops; 1091 1092 if (ret) 1093 kfree(xive); 1094 1095 return ret; 1096 } 1097 1098 /* 1099 * Interrupt Pending Buffer (IPB) offset 1100 */ 1101 #define TM_IPB_SHIFT 40 1102 #define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT) 1103 1104 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1105 { 1106 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1107 u64 opal_state; 1108 int rc; 1109 1110 if (!kvmppc_xive_enabled(vcpu)) 1111 return -EPERM; 1112 1113 if (!xc) 1114 return -ENOENT; 1115 1116 /* Thread context registers. We only care about IPB and CPPR */ 1117 val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01; 1118 1119 /* Get the VP state from OPAL */ 1120 rc = xive_native_get_vp_state(xc->vp_id, &opal_state); 1121 if (rc) 1122 return rc; 1123 1124 /* 1125 * Capture the backup of IPB register in the NVT structure and 1126 * merge it in our KVM VP state. 1127 */ 1128 val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK); 1129 1130 pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n", 1131 __func__, 1132 vcpu->arch.xive_saved_state.nsr, 1133 vcpu->arch.xive_saved_state.cppr, 1134 vcpu->arch.xive_saved_state.ipb, 1135 vcpu->arch.xive_saved_state.pipr, 1136 vcpu->arch.xive_saved_state.w01, 1137 (u32) vcpu->arch.xive_cam_word, opal_state); 1138 1139 return 0; 1140 } 1141 1142 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) 1143 { 1144 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1145 struct kvmppc_xive *xive = vcpu->kvm->arch.xive; 1146 1147 pr_devel("%s w01=%016llx vp=%016llx\n", __func__, 1148 val->xive_timaval[0], val->xive_timaval[1]); 1149 1150 if (!kvmppc_xive_enabled(vcpu)) 1151 return -EPERM; 1152 1153 if (!xc || !xive) 1154 return -ENOENT; 1155 1156 /* We can't update the state of a "pushed" VCPU */ 1157 if (WARN_ON(vcpu->arch.xive_pushed)) 1158 return -EBUSY; 1159 1160 /* 1161 * Restore the thread context registers. IPB and CPPR should 1162 * be the only ones that matter. 1163 */ 1164 vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0]; 1165 1166 /* 1167 * There is no need to restore the XIVE internal state (IPB 1168 * stored in the NVT) as the IPB register was merged in KVM VP 1169 * state when captured. 1170 */ 1171 return 0; 1172 } 1173 1174 static int xive_native_debug_show(struct seq_file *m, void *private) 1175 { 1176 struct kvmppc_xive *xive = m->private; 1177 struct kvm *kvm = xive->kvm; 1178 struct kvm_vcpu *vcpu; 1179 unsigned int i; 1180 1181 if (!kvm) 1182 return 0; 1183 1184 seq_puts(m, "=========\nVCPU state\n=========\n"); 1185 1186 kvm_for_each_vcpu(i, vcpu, kvm) { 1187 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 1188 1189 if (!xc) 1190 continue; 1191 1192 seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n", 1193 xc->server_num, 1194 vcpu->arch.xive_saved_state.nsr, 1195 vcpu->arch.xive_saved_state.cppr, 1196 vcpu->arch.xive_saved_state.ipb, 1197 vcpu->arch.xive_saved_state.pipr, 1198 vcpu->arch.xive_saved_state.w01, 1199 (u32) vcpu->arch.xive_cam_word); 1200 1201 kvmppc_xive_debug_show_queues(m, vcpu); 1202 } 1203 1204 return 0; 1205 } 1206 1207 static int xive_native_debug_open(struct inode *inode, struct file *file) 1208 { 1209 return single_open(file, xive_native_debug_show, inode->i_private); 1210 } 1211 1212 static const struct file_operations xive_native_debug_fops = { 1213 .open = xive_native_debug_open, 1214 .read = seq_read, 1215 .llseek = seq_lseek, 1216 .release = single_release, 1217 }; 1218 1219 static void xive_native_debugfs_init(struct kvmppc_xive *xive) 1220 { 1221 char *name; 1222 1223 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive); 1224 if (!name) { 1225 pr_err("%s: no memory for name\n", __func__); 1226 return; 1227 } 1228 1229 xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root, 1230 xive, &xive_native_debug_fops); 1231 1232 pr_debug("%s: created %s\n", __func__, name); 1233 kfree(name); 1234 } 1235 1236 static void kvmppc_xive_native_init(struct kvm_device *dev) 1237 { 1238 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private; 1239 1240 /* Register some debug interfaces */ 1241 xive_native_debugfs_init(xive); 1242 } 1243 1244 struct kvm_device_ops kvm_xive_native_ops = { 1245 .name = "kvm-xive-native", 1246 .create = kvmppc_xive_native_create, 1247 .init = kvmppc_xive_native_init, 1248 .release = kvmppc_xive_native_release, 1249 .set_attr = kvmppc_xive_native_set_attr, 1250 .get_attr = kvmppc_xive_native_get_attr, 1251 .has_attr = kvmppc_xive_native_has_attr, 1252 .mmap = kvmppc_xive_native_mmap, 1253 }; 1254 1255 void kvmppc_xive_native_init_module(void) 1256 { 1257 ; 1258 } 1259 1260 void kvmppc_xive_native_exit_module(void) 1261 { 1262 ; 1263 } 1264