1 /* 2 * Copyright 2012 Michael Ellerman, IBM Corporation. 3 * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License, version 2, as 7 * published by the Free Software Foundation. 8 */ 9 10 #include <linux/kernel.h> 11 #include <linux/kvm_host.h> 12 #include <linux/err.h> 13 #include <linux/gfp.h> 14 #include <linux/anon_inodes.h> 15 16 #include <asm/uaccess.h> 17 #include <asm/kvm_book3s.h> 18 #include <asm/kvm_ppc.h> 19 #include <asm/hvcall.h> 20 #include <asm/xics.h> 21 #include <asm/debug.h> 22 23 #include <linux/debugfs.h> 24 #include <linux/seq_file.h> 25 26 #include "book3s_xics.h" 27 28 #if 1 29 #define XICS_DBG(fmt...) do { } while (0) 30 #else 31 #define XICS_DBG(fmt...) trace_printk(fmt) 32 #endif 33 34 #define ENABLE_REALMODE true 35 #define DEBUG_REALMODE false 36 37 /* 38 * LOCKING 39 * ======= 40 * 41 * Each ICS has a mutex protecting the information about the IRQ 42 * sources and avoiding simultaneous deliveries if the same interrupt. 43 * 44 * ICP operations are done via a single compare & swap transaction 45 * (most ICP state fits in the union kvmppc_icp_state) 46 */ 47 48 /* 49 * TODO 50 * ==== 51 * 52 * - To speed up resends, keep a bitmap of "resend" set bits in the 53 * ICS 54 * 55 * - Speed up server# -> ICP lookup (array ? hash table ?) 56 * 57 * - Make ICS lockless as well, or at least a per-interrupt lock or hashed 58 * locks array to improve scalability 59 */ 60 61 /* -- ICS routines -- */ 62 63 static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, 64 u32 new_irq); 65 66 static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, 67 bool report_status) 68 { 69 struct ics_irq_state *state; 70 struct kvmppc_ics *ics; 71 u16 src; 72 73 XICS_DBG("ics deliver %#x (level: %d)\n", irq, level); 74 75 ics = kvmppc_xics_find_ics(xics, irq, &src); 76 if (!ics) { 77 XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq); 78 return -EINVAL; 79 } 80 state = &ics->irq_state[src]; 81 if (!state->exists) 82 return -EINVAL; 83 84 if (report_status) 85 return state->asserted; 86 87 /* 88 * We set state->asserted locklessly. This should be fine as 89 * we are the only setter, thus concurrent access is undefined 90 * to begin with. 91 */ 92 if (level == KVM_INTERRUPT_SET_LEVEL) 93 state->asserted = 1; 94 else if (level == KVM_INTERRUPT_UNSET) { 95 state->asserted = 0; 96 return 0; 97 } 98 99 /* Attempt delivery */ 100 icp_deliver_irq(xics, NULL, irq); 101 102 return state->asserted; 103 } 104 105 static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, 106 struct kvmppc_icp *icp) 107 { 108 int i; 109 110 mutex_lock(&ics->lock); 111 112 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 113 struct ics_irq_state *state = &ics->irq_state[i]; 114 115 if (!state->resend) 116 continue; 117 118 XICS_DBG("resend %#x prio %#x\n", state->number, 119 state->priority); 120 121 mutex_unlock(&ics->lock); 122 icp_deliver_irq(xics, icp, state->number); 123 mutex_lock(&ics->lock); 124 } 125 126 mutex_unlock(&ics->lock); 127 } 128 129 static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, 130 struct ics_irq_state *state, 131 u32 server, u32 priority, u32 saved_priority) 132 { 133 bool deliver; 134 135 mutex_lock(&ics->lock); 136 137 state->server = server; 138 state->priority = priority; 139 state->saved_priority = saved_priority; 140 deliver = false; 141 if ((state->masked_pending || state->resend) && priority != MASKED) { 142 state->masked_pending = 0; 143 deliver = true; 144 } 145 146 mutex_unlock(&ics->lock); 147 148 return deliver; 149 } 150 151 int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) 152 { 153 struct kvmppc_xics *xics = kvm->arch.xics; 154 struct kvmppc_icp *icp; 155 struct kvmppc_ics *ics; 156 struct ics_irq_state *state; 157 u16 src; 158 159 if (!xics) 160 return -ENODEV; 161 162 ics = kvmppc_xics_find_ics(xics, irq, &src); 163 if (!ics) 164 return -EINVAL; 165 state = &ics->irq_state[src]; 166 167 icp = kvmppc_xics_find_server(kvm, server); 168 if (!icp) 169 return -EINVAL; 170 171 XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n", 172 irq, server, priority, 173 state->masked_pending, state->resend); 174 175 if (write_xive(xics, ics, state, server, priority, priority)) 176 icp_deliver_irq(xics, icp, irq); 177 178 return 0; 179 } 180 181 int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority) 182 { 183 struct kvmppc_xics *xics = kvm->arch.xics; 184 struct kvmppc_ics *ics; 185 struct ics_irq_state *state; 186 u16 src; 187 188 if (!xics) 189 return -ENODEV; 190 191 ics = kvmppc_xics_find_ics(xics, irq, &src); 192 if (!ics) 193 return -EINVAL; 194 state = &ics->irq_state[src]; 195 196 mutex_lock(&ics->lock); 197 *server = state->server; 198 *priority = state->priority; 199 mutex_unlock(&ics->lock); 200 201 return 0; 202 } 203 204 int kvmppc_xics_int_on(struct kvm *kvm, u32 irq) 205 { 206 struct kvmppc_xics *xics = kvm->arch.xics; 207 struct kvmppc_icp *icp; 208 struct kvmppc_ics *ics; 209 struct ics_irq_state *state; 210 u16 src; 211 212 if (!xics) 213 return -ENODEV; 214 215 ics = kvmppc_xics_find_ics(xics, irq, &src); 216 if (!ics) 217 return -EINVAL; 218 state = &ics->irq_state[src]; 219 220 icp = kvmppc_xics_find_server(kvm, state->server); 221 if (!icp) 222 return -EINVAL; 223 224 if (write_xive(xics, ics, state, state->server, state->saved_priority, 225 state->saved_priority)) 226 icp_deliver_irq(xics, icp, irq); 227 228 return 0; 229 } 230 231 int kvmppc_xics_int_off(struct kvm *kvm, u32 irq) 232 { 233 struct kvmppc_xics *xics = kvm->arch.xics; 234 struct kvmppc_ics *ics; 235 struct ics_irq_state *state; 236 u16 src; 237 238 if (!xics) 239 return -ENODEV; 240 241 ics = kvmppc_xics_find_ics(xics, irq, &src); 242 if (!ics) 243 return -EINVAL; 244 state = &ics->irq_state[src]; 245 246 write_xive(xics, ics, state, state->server, MASKED, state->priority); 247 248 return 0; 249 } 250 251 /* -- ICP routines, including hcalls -- */ 252 253 static inline bool icp_try_update(struct kvmppc_icp *icp, 254 union kvmppc_icp_state old, 255 union kvmppc_icp_state new, 256 bool change_self) 257 { 258 bool success; 259 260 /* Calculate new output value */ 261 new.out_ee = (new.xisr && (new.pending_pri < new.cppr)); 262 263 /* Attempt atomic update */ 264 success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw; 265 if (!success) 266 goto bail; 267 268 XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", 269 icp->server_num, 270 old.cppr, old.mfrr, old.pending_pri, old.xisr, 271 old.need_resend, old.out_ee); 272 XICS_DBG("UPD - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", 273 new.cppr, new.mfrr, new.pending_pri, new.xisr, 274 new.need_resend, new.out_ee); 275 /* 276 * Check for output state update 277 * 278 * Note that this is racy since another processor could be updating 279 * the state already. This is why we never clear the interrupt output 280 * here, we only ever set it. The clear only happens prior to doing 281 * an update and only by the processor itself. Currently we do it 282 * in Accept (H_XIRR) and Up_Cppr (H_XPPR). 283 * 284 * We also do not try to figure out whether the EE state has changed, 285 * we unconditionally set it if the new state calls for it. The reason 286 * for that is that we opportunistically remove the pending interrupt 287 * flag when raising CPPR, so we need to set it back here if an 288 * interrupt is still pending. 289 */ 290 if (new.out_ee) { 291 kvmppc_book3s_queue_irqprio(icp->vcpu, 292 BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 293 if (!change_self) 294 kvmppc_fast_vcpu_kick(icp->vcpu); 295 } 296 bail: 297 return success; 298 } 299 300 static void icp_check_resend(struct kvmppc_xics *xics, 301 struct kvmppc_icp *icp) 302 { 303 u32 icsid; 304 305 /* Order this load with the test for need_resend in the caller */ 306 smp_rmb(); 307 for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) { 308 struct kvmppc_ics *ics = xics->ics[icsid]; 309 310 if (!test_and_clear_bit(icsid, icp->resend_map)) 311 continue; 312 if (!ics) 313 continue; 314 ics_check_resend(xics, ics, icp); 315 } 316 } 317 318 static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority, 319 u32 *reject) 320 { 321 union kvmppc_icp_state old_state, new_state; 322 bool success; 323 324 XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority, 325 icp->server_num); 326 327 do { 328 old_state = new_state = ACCESS_ONCE(icp->state); 329 330 *reject = 0; 331 332 /* See if we can deliver */ 333 success = new_state.cppr > priority && 334 new_state.mfrr > priority && 335 new_state.pending_pri > priority; 336 337 /* 338 * If we can, check for a rejection and perform the 339 * delivery 340 */ 341 if (success) { 342 *reject = new_state.xisr; 343 new_state.xisr = irq; 344 new_state.pending_pri = priority; 345 } else { 346 /* 347 * If we failed to deliver we set need_resend 348 * so a subsequent CPPR state change causes us 349 * to try a new delivery. 350 */ 351 new_state.need_resend = true; 352 } 353 354 } while (!icp_try_update(icp, old_state, new_state, false)); 355 356 return success; 357 } 358 359 static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, 360 u32 new_irq) 361 { 362 struct ics_irq_state *state; 363 struct kvmppc_ics *ics; 364 u32 reject; 365 u16 src; 366 367 /* 368 * This is used both for initial delivery of an interrupt and 369 * for subsequent rejection. 370 * 371 * Rejection can be racy vs. resends. We have evaluated the 372 * rejection in an atomic ICP transaction which is now complete, 373 * so potentially the ICP can already accept the interrupt again. 374 * 375 * So we need to retry the delivery. Essentially the reject path 376 * boils down to a failed delivery. Always. 377 * 378 * Now the interrupt could also have moved to a different target, 379 * thus we may need to re-do the ICP lookup as well 380 */ 381 382 again: 383 /* Get the ICS state and lock it */ 384 ics = kvmppc_xics_find_ics(xics, new_irq, &src); 385 if (!ics) { 386 XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq); 387 return; 388 } 389 state = &ics->irq_state[src]; 390 391 /* Get a lock on the ICS */ 392 mutex_lock(&ics->lock); 393 394 /* Get our server */ 395 if (!icp || state->server != icp->server_num) { 396 icp = kvmppc_xics_find_server(xics->kvm, state->server); 397 if (!icp) { 398 pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n", 399 new_irq, state->server); 400 goto out; 401 } 402 } 403 404 /* Clear the resend bit of that interrupt */ 405 state->resend = 0; 406 407 /* 408 * If masked, bail out 409 * 410 * Note: PAPR doesn't mention anything about masked pending 411 * when doing a resend, only when doing a delivery. 412 * 413 * However that would have the effect of losing a masked 414 * interrupt that was rejected and isn't consistent with 415 * the whole masked_pending business which is about not 416 * losing interrupts that occur while masked. 417 * 418 * I don't differenciate normal deliveries and resends, this 419 * implementation will differ from PAPR and not lose such 420 * interrupts. 421 */ 422 if (state->priority == MASKED) { 423 XICS_DBG("irq %#x masked pending\n", new_irq); 424 state->masked_pending = 1; 425 goto out; 426 } 427 428 /* 429 * Try the delivery, this will set the need_resend flag 430 * in the ICP as part of the atomic transaction if the 431 * delivery is not possible. 432 * 433 * Note that if successful, the new delivery might have itself 434 * rejected an interrupt that was "delivered" before we took the 435 * icp mutex. 436 * 437 * In this case we do the whole sequence all over again for the 438 * new guy. We cannot assume that the rejected interrupt is less 439 * favored than the new one, and thus doesn't need to be delivered, 440 * because by the time we exit icp_try_to_deliver() the target 441 * processor may well have alrady consumed & completed it, and thus 442 * the rejected interrupt might actually be already acceptable. 443 */ 444 if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) { 445 /* 446 * Delivery was successful, did we reject somebody else ? 447 */ 448 if (reject && reject != XICS_IPI) { 449 mutex_unlock(&ics->lock); 450 new_irq = reject; 451 goto again; 452 } 453 } else { 454 /* 455 * We failed to deliver the interrupt we need to set the 456 * resend map bit and mark the ICS state as needing a resend 457 */ 458 set_bit(ics->icsid, icp->resend_map); 459 state->resend = 1; 460 461 /* 462 * If the need_resend flag got cleared in the ICP some time 463 * between icp_try_to_deliver() atomic update and now, then 464 * we know it might have missed the resend_map bit. So we 465 * retry 466 */ 467 smp_mb(); 468 if (!icp->state.need_resend) { 469 mutex_unlock(&ics->lock); 470 goto again; 471 } 472 } 473 out: 474 mutex_unlock(&ics->lock); 475 } 476 477 static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, 478 u8 new_cppr) 479 { 480 union kvmppc_icp_state old_state, new_state; 481 bool resend; 482 483 /* 484 * This handles several related states in one operation: 485 * 486 * ICP State: Down_CPPR 487 * 488 * Load CPPR with new value and if the XISR is 0 489 * then check for resends: 490 * 491 * ICP State: Resend 492 * 493 * If MFRR is more favored than CPPR, check for IPIs 494 * and notify ICS of a potential resend. This is done 495 * asynchronously (when used in real mode, we will have 496 * to exit here). 497 * 498 * We do not handle the complete Check_IPI as documented 499 * here. In the PAPR, this state will be used for both 500 * Set_MFRR and Down_CPPR. However, we know that we aren't 501 * changing the MFRR state here so we don't need to handle 502 * the case of an MFRR causing a reject of a pending irq, 503 * this will have been handled when the MFRR was set in the 504 * first place. 505 * 506 * Thus we don't have to handle rejects, only resends. 507 * 508 * When implementing real mode for HV KVM, resend will lead to 509 * a H_TOO_HARD return and the whole transaction will be handled 510 * in virtual mode. 511 */ 512 do { 513 old_state = new_state = ACCESS_ONCE(icp->state); 514 515 /* Down_CPPR */ 516 new_state.cppr = new_cppr; 517 518 /* 519 * Cut down Resend / Check_IPI / IPI 520 * 521 * The logic is that we cannot have a pending interrupt 522 * trumped by an IPI at this point (see above), so we 523 * know that either the pending interrupt is already an 524 * IPI (in which case we don't care to override it) or 525 * it's either more favored than us or non existent 526 */ 527 if (new_state.mfrr < new_cppr && 528 new_state.mfrr <= new_state.pending_pri) { 529 WARN_ON(new_state.xisr != XICS_IPI && 530 new_state.xisr != 0); 531 new_state.pending_pri = new_state.mfrr; 532 new_state.xisr = XICS_IPI; 533 } 534 535 /* Latch/clear resend bit */ 536 resend = new_state.need_resend; 537 new_state.need_resend = 0; 538 539 } while (!icp_try_update(icp, old_state, new_state, true)); 540 541 /* 542 * Now handle resend checks. Those are asynchronous to the ICP 543 * state update in HW (ie bus transactions) so we can handle them 544 * separately here too 545 */ 546 if (resend) 547 icp_check_resend(xics, icp); 548 } 549 550 static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu) 551 { 552 union kvmppc_icp_state old_state, new_state; 553 struct kvmppc_icp *icp = vcpu->arch.icp; 554 u32 xirr; 555 556 /* First, remove EE from the processor */ 557 kvmppc_book3s_dequeue_irqprio(icp->vcpu, 558 BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 559 560 /* 561 * ICP State: Accept_Interrupt 562 * 563 * Return the pending interrupt (if any) along with the 564 * current CPPR, then clear the XISR & set CPPR to the 565 * pending priority 566 */ 567 do { 568 old_state = new_state = ACCESS_ONCE(icp->state); 569 570 xirr = old_state.xisr | (((u32)old_state.cppr) << 24); 571 if (!old_state.xisr) 572 break; 573 new_state.cppr = new_state.pending_pri; 574 new_state.pending_pri = 0xff; 575 new_state.xisr = 0; 576 577 } while (!icp_try_update(icp, old_state, new_state, true)); 578 579 XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr); 580 581 return xirr; 582 } 583 584 static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, 585 unsigned long mfrr) 586 { 587 union kvmppc_icp_state old_state, new_state; 588 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 589 struct kvmppc_icp *icp; 590 u32 reject; 591 bool resend; 592 bool local; 593 594 XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n", 595 vcpu->vcpu_id, server, mfrr); 596 597 icp = vcpu->arch.icp; 598 local = icp->server_num == server; 599 if (!local) { 600 icp = kvmppc_xics_find_server(vcpu->kvm, server); 601 if (!icp) 602 return H_PARAMETER; 603 } 604 605 /* 606 * ICP state: Set_MFRR 607 * 608 * If the CPPR is more favored than the new MFRR, then 609 * nothing needs to be rejected as there can be no XISR to 610 * reject. If the MFRR is being made less favored then 611 * there might be a previously-rejected interrupt needing 612 * to be resent. 613 * 614 * If the CPPR is less favored, then we might be replacing 615 * an interrupt, and thus need to possibly reject it as in 616 * 617 * ICP state: Check_IPI 618 */ 619 do { 620 old_state = new_state = ACCESS_ONCE(icp->state); 621 622 /* Set_MFRR */ 623 new_state.mfrr = mfrr; 624 625 /* Check_IPI */ 626 reject = 0; 627 resend = false; 628 if (mfrr < new_state.cppr) { 629 /* Reject a pending interrupt if not an IPI */ 630 if (mfrr <= new_state.pending_pri) 631 reject = new_state.xisr; 632 new_state.pending_pri = mfrr; 633 new_state.xisr = XICS_IPI; 634 } 635 636 if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { 637 resend = new_state.need_resend; 638 new_state.need_resend = 0; 639 } 640 } while (!icp_try_update(icp, old_state, new_state, local)); 641 642 /* Handle reject */ 643 if (reject && reject != XICS_IPI) 644 icp_deliver_irq(xics, icp, reject); 645 646 /* Handle resend */ 647 if (resend) 648 icp_check_resend(xics, icp); 649 650 return H_SUCCESS; 651 } 652 653 static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) 654 { 655 union kvmppc_icp_state old_state, new_state; 656 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 657 struct kvmppc_icp *icp = vcpu->arch.icp; 658 u32 reject; 659 660 XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr); 661 662 /* 663 * ICP State: Set_CPPR 664 * 665 * We can safely compare the new value with the current 666 * value outside of the transaction as the CPPR is only 667 * ever changed by the processor on itself 668 */ 669 if (cppr > icp->state.cppr) 670 icp_down_cppr(xics, icp, cppr); 671 else if (cppr == icp->state.cppr) 672 return; 673 674 /* 675 * ICP State: Up_CPPR 676 * 677 * The processor is raising its priority, this can result 678 * in a rejection of a pending interrupt: 679 * 680 * ICP State: Reject_Current 681 * 682 * We can remove EE from the current processor, the update 683 * transaction will set it again if needed 684 */ 685 kvmppc_book3s_dequeue_irqprio(icp->vcpu, 686 BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 687 688 do { 689 old_state = new_state = ACCESS_ONCE(icp->state); 690 691 reject = 0; 692 new_state.cppr = cppr; 693 694 if (cppr <= new_state.pending_pri) { 695 reject = new_state.xisr; 696 new_state.xisr = 0; 697 new_state.pending_pri = 0xff; 698 } 699 700 } while (!icp_try_update(icp, old_state, new_state, true)); 701 702 /* 703 * Check for rejects. They are handled by doing a new delivery 704 * attempt (see comments in icp_deliver_irq). 705 */ 706 if (reject && reject != XICS_IPI) 707 icp_deliver_irq(xics, icp, reject); 708 } 709 710 static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) 711 { 712 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 713 struct kvmppc_icp *icp = vcpu->arch.icp; 714 struct kvmppc_ics *ics; 715 struct ics_irq_state *state; 716 u32 irq = xirr & 0x00ffffff; 717 u16 src; 718 719 XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr); 720 721 /* 722 * ICP State: EOI 723 * 724 * Note: If EOI is incorrectly used by SW to lower the CPPR 725 * value (ie more favored), we do not check for rejection of 726 * a pending interrupt, this is a SW error and PAPR sepcifies 727 * that we don't have to deal with it. 728 * 729 * The sending of an EOI to the ICS is handled after the 730 * CPPR update 731 * 732 * ICP State: Down_CPPR which we handle 733 * in a separate function as it's shared with H_CPPR. 734 */ 735 icp_down_cppr(xics, icp, xirr >> 24); 736 737 /* IPIs have no EOI */ 738 if (irq == XICS_IPI) 739 return H_SUCCESS; 740 /* 741 * EOI handling: If the interrupt is still asserted, we need to 742 * resend it. We can take a lockless "peek" at the ICS state here. 743 * 744 * "Message" interrupts will never have "asserted" set 745 */ 746 ics = kvmppc_xics_find_ics(xics, irq, &src); 747 if (!ics) { 748 XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq); 749 return H_PARAMETER; 750 } 751 state = &ics->irq_state[src]; 752 753 /* Still asserted, resend it */ 754 if (state->asserted) 755 icp_deliver_irq(xics, icp, irq); 756 757 return H_SUCCESS; 758 } 759 760 static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) 761 { 762 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 763 struct kvmppc_icp *icp = vcpu->arch.icp; 764 765 XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n", 766 hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt); 767 768 if (icp->rm_action & XICS_RM_KICK_VCPU) 769 kvmppc_fast_vcpu_kick(icp->rm_kick_target); 770 if (icp->rm_action & XICS_RM_CHECK_RESEND) 771 icp_check_resend(xics, icp); 772 if (icp->rm_action & XICS_RM_REJECT) 773 icp_deliver_irq(xics, icp, icp->rm_reject); 774 775 icp->rm_action = 0; 776 777 return H_SUCCESS; 778 } 779 780 int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) 781 { 782 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 783 unsigned long res; 784 int rc = H_SUCCESS; 785 786 /* Check if we have an ICP */ 787 if (!xics || !vcpu->arch.icp) 788 return H_HARDWARE; 789 790 /* Check for real mode returning too hard */ 791 if (xics->real_mode) 792 return kvmppc_xics_rm_complete(vcpu, req); 793 794 switch (req) { 795 case H_XIRR: 796 res = kvmppc_h_xirr(vcpu); 797 kvmppc_set_gpr(vcpu, 4, res); 798 break; 799 case H_CPPR: 800 kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4)); 801 break; 802 case H_EOI: 803 rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4)); 804 break; 805 case H_IPI: 806 rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4), 807 kvmppc_get_gpr(vcpu, 5)); 808 break; 809 } 810 811 return rc; 812 } 813 814 815 /* -- Initialisation code etc. -- */ 816 817 static int xics_debug_show(struct seq_file *m, void *private) 818 { 819 struct kvmppc_xics *xics = m->private; 820 struct kvm *kvm = xics->kvm; 821 struct kvm_vcpu *vcpu; 822 int icsid, i; 823 824 if (!kvm) 825 return 0; 826 827 seq_printf(m, "=========\nICP state\n=========\n"); 828 829 kvm_for_each_vcpu(i, vcpu, kvm) { 830 struct kvmppc_icp *icp = vcpu->arch.icp; 831 union kvmppc_icp_state state; 832 833 if (!icp) 834 continue; 835 836 state.raw = ACCESS_ONCE(icp->state.raw); 837 seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n", 838 icp->server_num, state.xisr, 839 state.pending_pri, state.cppr, state.mfrr, 840 state.out_ee, state.need_resend); 841 } 842 843 for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) { 844 struct kvmppc_ics *ics = xics->ics[icsid]; 845 846 if (!ics) 847 continue; 848 849 seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n", 850 icsid); 851 852 mutex_lock(&ics->lock); 853 854 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 855 struct ics_irq_state *irq = &ics->irq_state[i]; 856 857 seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n", 858 irq->number, irq->server, irq->priority, 859 irq->saved_priority, irq->asserted, 860 irq->resend, irq->masked_pending); 861 862 } 863 mutex_unlock(&ics->lock); 864 } 865 return 0; 866 } 867 868 static int xics_debug_open(struct inode *inode, struct file *file) 869 { 870 return single_open(file, xics_debug_show, inode->i_private); 871 } 872 873 static const struct file_operations xics_debug_fops = { 874 .open = xics_debug_open, 875 .read = seq_read, 876 .llseek = seq_lseek, 877 .release = single_release, 878 }; 879 880 static void xics_debugfs_init(struct kvmppc_xics *xics) 881 { 882 char *name; 883 884 name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics); 885 if (!name) { 886 pr_err("%s: no memory for name\n", __func__); 887 return; 888 } 889 890 xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root, 891 xics, &xics_debug_fops); 892 893 pr_debug("%s: created %s\n", __func__, name); 894 kfree(name); 895 } 896 897 static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, 898 struct kvmppc_xics *xics, int irq) 899 { 900 struct kvmppc_ics *ics; 901 int i, icsid; 902 903 icsid = irq >> KVMPPC_XICS_ICS_SHIFT; 904 905 mutex_lock(&kvm->lock); 906 907 /* ICS already exists - somebody else got here first */ 908 if (xics->ics[icsid]) 909 goto out; 910 911 /* Create the ICS */ 912 ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL); 913 if (!ics) 914 goto out; 915 916 mutex_init(&ics->lock); 917 ics->icsid = icsid; 918 919 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 920 ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i; 921 ics->irq_state[i].priority = MASKED; 922 ics->irq_state[i].saved_priority = MASKED; 923 } 924 smp_wmb(); 925 xics->ics[icsid] = ics; 926 927 if (icsid > xics->max_icsid) 928 xics->max_icsid = icsid; 929 930 out: 931 mutex_unlock(&kvm->lock); 932 return xics->ics[icsid]; 933 } 934 935 int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num) 936 { 937 struct kvmppc_icp *icp; 938 939 if (!vcpu->kvm->arch.xics) 940 return -ENODEV; 941 942 if (kvmppc_xics_find_server(vcpu->kvm, server_num)) 943 return -EEXIST; 944 945 icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL); 946 if (!icp) 947 return -ENOMEM; 948 949 icp->vcpu = vcpu; 950 icp->server_num = server_num; 951 icp->state.mfrr = MASKED; 952 icp->state.pending_pri = MASKED; 953 vcpu->arch.icp = icp; 954 955 XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id); 956 957 return 0; 958 } 959 960 u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu) 961 { 962 struct kvmppc_icp *icp = vcpu->arch.icp; 963 union kvmppc_icp_state state; 964 965 if (!icp) 966 return 0; 967 state = icp->state; 968 return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) | 969 ((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) | 970 ((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) | 971 ((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT); 972 } 973 974 int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval) 975 { 976 struct kvmppc_icp *icp = vcpu->arch.icp; 977 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 978 union kvmppc_icp_state old_state, new_state; 979 struct kvmppc_ics *ics; 980 u8 cppr, mfrr, pending_pri; 981 u32 xisr; 982 u16 src; 983 bool resend; 984 985 if (!icp || !xics) 986 return -ENOENT; 987 988 cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT; 989 xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) & 990 KVM_REG_PPC_ICP_XISR_MASK; 991 mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT; 992 pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT; 993 994 /* Require the new state to be internally consistent */ 995 if (xisr == 0) { 996 if (pending_pri != 0xff) 997 return -EINVAL; 998 } else if (xisr == XICS_IPI) { 999 if (pending_pri != mfrr || pending_pri >= cppr) 1000 return -EINVAL; 1001 } else { 1002 if (pending_pri >= mfrr || pending_pri >= cppr) 1003 return -EINVAL; 1004 ics = kvmppc_xics_find_ics(xics, xisr, &src); 1005 if (!ics) 1006 return -EINVAL; 1007 } 1008 1009 new_state.raw = 0; 1010 new_state.cppr = cppr; 1011 new_state.xisr = xisr; 1012 new_state.mfrr = mfrr; 1013 new_state.pending_pri = pending_pri; 1014 1015 /* 1016 * Deassert the CPU interrupt request. 1017 * icp_try_update will reassert it if necessary. 1018 */ 1019 kvmppc_book3s_dequeue_irqprio(icp->vcpu, 1020 BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 1021 1022 /* 1023 * Note that if we displace an interrupt from old_state.xisr, 1024 * we don't mark it as rejected. We expect userspace to set 1025 * the state of the interrupt sources to be consistent with 1026 * the ICP states (either before or afterwards, which doesn't 1027 * matter). We do handle resends due to CPPR becoming less 1028 * favoured because that is necessary to end up with a 1029 * consistent state in the situation where userspace restores 1030 * the ICS states before the ICP states. 1031 */ 1032 do { 1033 old_state = ACCESS_ONCE(icp->state); 1034 1035 if (new_state.mfrr <= old_state.mfrr) { 1036 resend = false; 1037 new_state.need_resend = old_state.need_resend; 1038 } else { 1039 resend = old_state.need_resend; 1040 new_state.need_resend = 0; 1041 } 1042 } while (!icp_try_update(icp, old_state, new_state, false)); 1043 1044 if (resend) 1045 icp_check_resend(xics, icp); 1046 1047 return 0; 1048 } 1049 1050 static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr) 1051 { 1052 int ret; 1053 struct kvmppc_ics *ics; 1054 struct ics_irq_state *irqp; 1055 u64 __user *ubufp = (u64 __user *) addr; 1056 u16 idx; 1057 u64 val, prio; 1058 1059 ics = kvmppc_xics_find_ics(xics, irq, &idx); 1060 if (!ics) 1061 return -ENOENT; 1062 1063 irqp = &ics->irq_state[idx]; 1064 mutex_lock(&ics->lock); 1065 ret = -ENOENT; 1066 if (irqp->exists) { 1067 val = irqp->server; 1068 prio = irqp->priority; 1069 if (prio == MASKED) { 1070 val |= KVM_XICS_MASKED; 1071 prio = irqp->saved_priority; 1072 } 1073 val |= prio << KVM_XICS_PRIORITY_SHIFT; 1074 if (irqp->asserted) 1075 val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING; 1076 else if (irqp->masked_pending || irqp->resend) 1077 val |= KVM_XICS_PENDING; 1078 ret = 0; 1079 } 1080 mutex_unlock(&ics->lock); 1081 1082 if (!ret && put_user(val, ubufp)) 1083 ret = -EFAULT; 1084 1085 return ret; 1086 } 1087 1088 static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr) 1089 { 1090 struct kvmppc_ics *ics; 1091 struct ics_irq_state *irqp; 1092 u64 __user *ubufp = (u64 __user *) addr; 1093 u16 idx; 1094 u64 val; 1095 u8 prio; 1096 u32 server; 1097 1098 if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS) 1099 return -ENOENT; 1100 1101 ics = kvmppc_xics_find_ics(xics, irq, &idx); 1102 if (!ics) { 1103 ics = kvmppc_xics_create_ics(xics->kvm, xics, irq); 1104 if (!ics) 1105 return -ENOMEM; 1106 } 1107 irqp = &ics->irq_state[idx]; 1108 if (get_user(val, ubufp)) 1109 return -EFAULT; 1110 1111 server = val & KVM_XICS_DESTINATION_MASK; 1112 prio = val >> KVM_XICS_PRIORITY_SHIFT; 1113 if (prio != MASKED && 1114 kvmppc_xics_find_server(xics->kvm, server) == NULL) 1115 return -EINVAL; 1116 1117 mutex_lock(&ics->lock); 1118 irqp->server = server; 1119 irqp->saved_priority = prio; 1120 if (val & KVM_XICS_MASKED) 1121 prio = MASKED; 1122 irqp->priority = prio; 1123 irqp->resend = 0; 1124 irqp->masked_pending = 0; 1125 irqp->asserted = 0; 1126 if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE)) 1127 irqp->asserted = 1; 1128 irqp->exists = 1; 1129 mutex_unlock(&ics->lock); 1130 1131 if (val & KVM_XICS_PENDING) 1132 icp_deliver_irq(xics, NULL, irqp->number); 1133 1134 return 0; 1135 } 1136 1137 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, 1138 bool line_status) 1139 { 1140 struct kvmppc_xics *xics = kvm->arch.xics; 1141 1142 return ics_deliver_irq(xics, irq, level, line_status); 1143 } 1144 1145 static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1146 { 1147 struct kvmppc_xics *xics = dev->private; 1148 1149 switch (attr->group) { 1150 case KVM_DEV_XICS_GRP_SOURCES: 1151 return xics_set_source(xics, attr->attr, attr->addr); 1152 } 1153 return -ENXIO; 1154 } 1155 1156 static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1157 { 1158 struct kvmppc_xics *xics = dev->private; 1159 1160 switch (attr->group) { 1161 case KVM_DEV_XICS_GRP_SOURCES: 1162 return xics_get_source(xics, attr->attr, attr->addr); 1163 } 1164 return -ENXIO; 1165 } 1166 1167 static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1168 { 1169 switch (attr->group) { 1170 case KVM_DEV_XICS_GRP_SOURCES: 1171 if (attr->attr >= KVMPPC_XICS_FIRST_IRQ && 1172 attr->attr < KVMPPC_XICS_NR_IRQS) 1173 return 0; 1174 break; 1175 } 1176 return -ENXIO; 1177 } 1178 1179 static void kvmppc_xics_free(struct kvm_device *dev) 1180 { 1181 struct kvmppc_xics *xics = dev->private; 1182 int i; 1183 struct kvm *kvm = xics->kvm; 1184 1185 debugfs_remove(xics->dentry); 1186 1187 if (kvm) 1188 kvm->arch.xics = NULL; 1189 1190 for (i = 0; i <= xics->max_icsid; i++) 1191 kfree(xics->ics[i]); 1192 kfree(xics); 1193 kfree(dev); 1194 } 1195 1196 static int kvmppc_xics_create(struct kvm_device *dev, u32 type) 1197 { 1198 struct kvmppc_xics *xics; 1199 struct kvm *kvm = dev->kvm; 1200 int ret = 0; 1201 1202 xics = kzalloc(sizeof(*xics), GFP_KERNEL); 1203 if (!xics) 1204 return -ENOMEM; 1205 1206 dev->private = xics; 1207 xics->dev = dev; 1208 xics->kvm = kvm; 1209 1210 /* Already there ? */ 1211 mutex_lock(&kvm->lock); 1212 if (kvm->arch.xics) 1213 ret = -EEXIST; 1214 else 1215 kvm->arch.xics = xics; 1216 mutex_unlock(&kvm->lock); 1217 1218 if (ret) 1219 return ret; 1220 1221 xics_debugfs_init(xics); 1222 1223 #ifdef CONFIG_KVM_BOOK3S_64_HV 1224 if (cpu_has_feature(CPU_FTR_ARCH_206)) { 1225 /* Enable real mode support */ 1226 xics->real_mode = ENABLE_REALMODE; 1227 xics->real_mode_dbg = DEBUG_REALMODE; 1228 } 1229 #endif /* CONFIG_KVM_BOOK3S_64_HV */ 1230 1231 return 0; 1232 } 1233 1234 struct kvm_device_ops kvm_xics_ops = { 1235 .name = "kvm-xics", 1236 .create = kvmppc_xics_create, 1237 .destroy = kvmppc_xics_free, 1238 .set_attr = xics_set_attr, 1239 .get_attr = xics_get_attr, 1240 .has_attr = xics_has_attr, 1241 }; 1242 1243 int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, 1244 u32 xcpu) 1245 { 1246 struct kvmppc_xics *xics = dev->private; 1247 int r = -EBUSY; 1248 1249 if (dev->ops != &kvm_xics_ops) 1250 return -EPERM; 1251 if (xics->kvm != vcpu->kvm) 1252 return -EPERM; 1253 if (vcpu->arch.irq_type) 1254 return -EBUSY; 1255 1256 r = kvmppc_xics_create_icp(vcpu, xcpu); 1257 if (!r) 1258 vcpu->arch.irq_type = KVMPPC_IRQ_XICS; 1259 1260 return r; 1261 } 1262 1263 void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) 1264 { 1265 if (!vcpu->arch.icp) 1266 return; 1267 kfree(vcpu->arch.icp); 1268 vcpu->arch.icp = NULL; 1269 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; 1270 } 1271