1 /* 2 * Copyright 2012 Michael Ellerman, IBM Corporation. 3 * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License, version 2, as 7 * published by the Free Software Foundation. 8 */ 9 10 #include <linux/kernel.h> 11 #include <linux/kvm_host.h> 12 #include <linux/err.h> 13 #include <linux/gfp.h> 14 #include <linux/anon_inodes.h> 15 16 #include <asm/uaccess.h> 17 #include <asm/kvm_book3s.h> 18 #include <asm/kvm_ppc.h> 19 #include <asm/hvcall.h> 20 #include <asm/xics.h> 21 #include <asm/debug.h> 22 23 #include <linux/debugfs.h> 24 #include <linux/seq_file.h> 25 26 #include "book3s_xics.h" 27 28 #if 1 29 #define XICS_DBG(fmt...) do { } while (0) 30 #else 31 #define XICS_DBG(fmt...) trace_printk(fmt) 32 #endif 33 34 #define ENABLE_REALMODE true 35 #define DEBUG_REALMODE false 36 37 /* 38 * LOCKING 39 * ======= 40 * 41 * Each ICS has a mutex protecting the information about the IRQ 42 * sources and avoiding simultaneous deliveries if the same interrupt. 43 * 44 * ICP operations are done via a single compare & swap transaction 45 * (most ICP state fits in the union kvmppc_icp_state) 46 */ 47 48 /* 49 * TODO 50 * ==== 51 * 52 * - To speed up resends, keep a bitmap of "resend" set bits in the 53 * ICS 54 * 55 * - Speed up server# -> ICP lookup (array ? hash table ?) 56 * 57 * - Make ICS lockless as well, or at least a per-interrupt lock or hashed 58 * locks array to improve scalability 59 */ 60 61 /* -- ICS routines -- */ 62 63 static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, 64 u32 new_irq); 65 66 static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level, 67 bool report_status) 68 { 69 struct ics_irq_state *state; 70 struct kvmppc_ics *ics; 71 u16 src; 72 73 XICS_DBG("ics deliver %#x (level: %d)\n", irq, level); 74 75 ics = kvmppc_xics_find_ics(xics, irq, &src); 76 if (!ics) { 77 XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n", irq); 78 return -EINVAL; 79 } 80 state = &ics->irq_state[src]; 81 if (!state->exists) 82 return -EINVAL; 83 84 if (report_status) 85 return state->asserted; 86 87 /* 88 * We set state->asserted locklessly. This should be fine as 89 * we are the only setter, thus concurrent access is undefined 90 * to begin with. 91 */ 92 if (level == KVM_INTERRUPT_SET_LEVEL) 93 state->asserted = 1; 94 else if (level == KVM_INTERRUPT_UNSET) { 95 state->asserted = 0; 96 return 0; 97 } 98 99 /* Attempt delivery */ 100 icp_deliver_irq(xics, NULL, irq); 101 102 return state->asserted; 103 } 104 105 static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, 106 struct kvmppc_icp *icp) 107 { 108 int i; 109 110 mutex_lock(&ics->lock); 111 112 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 113 struct ics_irq_state *state = &ics->irq_state[i]; 114 115 if (!state->resend) 116 continue; 117 118 XICS_DBG("resend %#x prio %#x\n", state->number, 119 state->priority); 120 121 mutex_unlock(&ics->lock); 122 icp_deliver_irq(xics, icp, state->number); 123 mutex_lock(&ics->lock); 124 } 125 126 mutex_unlock(&ics->lock); 127 } 128 129 static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, 130 struct ics_irq_state *state, 131 u32 server, u32 priority, u32 saved_priority) 132 { 133 bool deliver; 134 135 mutex_lock(&ics->lock); 136 137 state->server = server; 138 state->priority = priority; 139 state->saved_priority = saved_priority; 140 deliver = false; 141 if ((state->masked_pending || state->resend) && priority != MASKED) { 142 state->masked_pending = 0; 143 deliver = true; 144 } 145 146 mutex_unlock(&ics->lock); 147 148 return deliver; 149 } 150 151 int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) 152 { 153 struct kvmppc_xics *xics = kvm->arch.xics; 154 struct kvmppc_icp *icp; 155 struct kvmppc_ics *ics; 156 struct ics_irq_state *state; 157 u16 src; 158 159 if (!xics) 160 return -ENODEV; 161 162 ics = kvmppc_xics_find_ics(xics, irq, &src); 163 if (!ics) 164 return -EINVAL; 165 state = &ics->irq_state[src]; 166 167 icp = kvmppc_xics_find_server(kvm, server); 168 if (!icp) 169 return -EINVAL; 170 171 XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n", 172 irq, server, priority, 173 state->masked_pending, state->resend); 174 175 if (write_xive(xics, ics, state, server, priority, priority)) 176 icp_deliver_irq(xics, icp, irq); 177 178 return 0; 179 } 180 181 int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority) 182 { 183 struct kvmppc_xics *xics = kvm->arch.xics; 184 struct kvmppc_ics *ics; 185 struct ics_irq_state *state; 186 u16 src; 187 188 if (!xics) 189 return -ENODEV; 190 191 ics = kvmppc_xics_find_ics(xics, irq, &src); 192 if (!ics) 193 return -EINVAL; 194 state = &ics->irq_state[src]; 195 196 mutex_lock(&ics->lock); 197 *server = state->server; 198 *priority = state->priority; 199 mutex_unlock(&ics->lock); 200 201 return 0; 202 } 203 204 int kvmppc_xics_int_on(struct kvm *kvm, u32 irq) 205 { 206 struct kvmppc_xics *xics = kvm->arch.xics; 207 struct kvmppc_icp *icp; 208 struct kvmppc_ics *ics; 209 struct ics_irq_state *state; 210 u16 src; 211 212 if (!xics) 213 return -ENODEV; 214 215 ics = kvmppc_xics_find_ics(xics, irq, &src); 216 if (!ics) 217 return -EINVAL; 218 state = &ics->irq_state[src]; 219 220 icp = kvmppc_xics_find_server(kvm, state->server); 221 if (!icp) 222 return -EINVAL; 223 224 if (write_xive(xics, ics, state, state->server, state->saved_priority, 225 state->saved_priority)) 226 icp_deliver_irq(xics, icp, irq); 227 228 return 0; 229 } 230 231 int kvmppc_xics_int_off(struct kvm *kvm, u32 irq) 232 { 233 struct kvmppc_xics *xics = kvm->arch.xics; 234 struct kvmppc_ics *ics; 235 struct ics_irq_state *state; 236 u16 src; 237 238 if (!xics) 239 return -ENODEV; 240 241 ics = kvmppc_xics_find_ics(xics, irq, &src); 242 if (!ics) 243 return -EINVAL; 244 state = &ics->irq_state[src]; 245 246 write_xive(xics, ics, state, state->server, MASKED, state->priority); 247 248 return 0; 249 } 250 251 /* -- ICP routines, including hcalls -- */ 252 253 static inline bool icp_try_update(struct kvmppc_icp *icp, 254 union kvmppc_icp_state old, 255 union kvmppc_icp_state new, 256 bool change_self) 257 { 258 bool success; 259 260 /* Calculate new output value */ 261 new.out_ee = (new.xisr && (new.pending_pri < new.cppr)); 262 263 /* Attempt atomic update */ 264 success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw; 265 if (!success) 266 goto bail; 267 268 XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", 269 icp->server_num, 270 old.cppr, old.mfrr, old.pending_pri, old.xisr, 271 old.need_resend, old.out_ee); 272 XICS_DBG("UPD - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", 273 new.cppr, new.mfrr, new.pending_pri, new.xisr, 274 new.need_resend, new.out_ee); 275 /* 276 * Check for output state update 277 * 278 * Note that this is racy since another processor could be updating 279 * the state already. This is why we never clear the interrupt output 280 * here, we only ever set it. The clear only happens prior to doing 281 * an update and only by the processor itself. Currently we do it 282 * in Accept (H_XIRR) and Up_Cppr (H_XPPR). 283 * 284 * We also do not try to figure out whether the EE state has changed, 285 * we unconditionally set it if the new state calls for it. The reason 286 * for that is that we opportunistically remove the pending interrupt 287 * flag when raising CPPR, so we need to set it back here if an 288 * interrupt is still pending. 289 */ 290 if (new.out_ee) { 291 kvmppc_book3s_queue_irqprio(icp->vcpu, 292 BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 293 if (!change_self) 294 kvmppc_fast_vcpu_kick(icp->vcpu); 295 } 296 bail: 297 return success; 298 } 299 300 static void icp_check_resend(struct kvmppc_xics *xics, 301 struct kvmppc_icp *icp) 302 { 303 u32 icsid; 304 305 /* Order this load with the test for need_resend in the caller */ 306 smp_rmb(); 307 for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) { 308 struct kvmppc_ics *ics = xics->ics[icsid]; 309 310 if (!test_and_clear_bit(icsid, icp->resend_map)) 311 continue; 312 if (!ics) 313 continue; 314 ics_check_resend(xics, ics, icp); 315 } 316 } 317 318 static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority, 319 u32 *reject) 320 { 321 union kvmppc_icp_state old_state, new_state; 322 bool success; 323 324 XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority, 325 icp->server_num); 326 327 do { 328 old_state = new_state = ACCESS_ONCE(icp->state); 329 330 *reject = 0; 331 332 /* See if we can deliver */ 333 success = new_state.cppr > priority && 334 new_state.mfrr > priority && 335 new_state.pending_pri > priority; 336 337 /* 338 * If we can, check for a rejection and perform the 339 * delivery 340 */ 341 if (success) { 342 *reject = new_state.xisr; 343 new_state.xisr = irq; 344 new_state.pending_pri = priority; 345 } else { 346 /* 347 * If we failed to deliver we set need_resend 348 * so a subsequent CPPR state change causes us 349 * to try a new delivery. 350 */ 351 new_state.need_resend = true; 352 } 353 354 } while (!icp_try_update(icp, old_state, new_state, false)); 355 356 return success; 357 } 358 359 static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, 360 u32 new_irq) 361 { 362 struct ics_irq_state *state; 363 struct kvmppc_ics *ics; 364 u32 reject; 365 u16 src; 366 367 /* 368 * This is used both for initial delivery of an interrupt and 369 * for subsequent rejection. 370 * 371 * Rejection can be racy vs. resends. We have evaluated the 372 * rejection in an atomic ICP transaction which is now complete, 373 * so potentially the ICP can already accept the interrupt again. 374 * 375 * So we need to retry the delivery. Essentially the reject path 376 * boils down to a failed delivery. Always. 377 * 378 * Now the interrupt could also have moved to a different target, 379 * thus we may need to re-do the ICP lookup as well 380 */ 381 382 again: 383 /* Get the ICS state and lock it */ 384 ics = kvmppc_xics_find_ics(xics, new_irq, &src); 385 if (!ics) { 386 XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n", new_irq); 387 return; 388 } 389 state = &ics->irq_state[src]; 390 391 /* Get a lock on the ICS */ 392 mutex_lock(&ics->lock); 393 394 /* Get our server */ 395 if (!icp || state->server != icp->server_num) { 396 icp = kvmppc_xics_find_server(xics->kvm, state->server); 397 if (!icp) { 398 pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n", 399 new_irq, state->server); 400 goto out; 401 } 402 } 403 404 /* Clear the resend bit of that interrupt */ 405 state->resend = 0; 406 407 /* 408 * If masked, bail out 409 * 410 * Note: PAPR doesn't mention anything about masked pending 411 * when doing a resend, only when doing a delivery. 412 * 413 * However that would have the effect of losing a masked 414 * interrupt that was rejected and isn't consistent with 415 * the whole masked_pending business which is about not 416 * losing interrupts that occur while masked. 417 * 418 * I don't differenciate normal deliveries and resends, this 419 * implementation will differ from PAPR and not lose such 420 * interrupts. 421 */ 422 if (state->priority == MASKED) { 423 XICS_DBG("irq %#x masked pending\n", new_irq); 424 state->masked_pending = 1; 425 goto out; 426 } 427 428 /* 429 * Try the delivery, this will set the need_resend flag 430 * in the ICP as part of the atomic transaction if the 431 * delivery is not possible. 432 * 433 * Note that if successful, the new delivery might have itself 434 * rejected an interrupt that was "delivered" before we took the 435 * icp mutex. 436 * 437 * In this case we do the whole sequence all over again for the 438 * new guy. We cannot assume that the rejected interrupt is less 439 * favored than the new one, and thus doesn't need to be delivered, 440 * because by the time we exit icp_try_to_deliver() the target 441 * processor may well have alrady consumed & completed it, and thus 442 * the rejected interrupt might actually be already acceptable. 443 */ 444 if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) { 445 /* 446 * Delivery was successful, did we reject somebody else ? 447 */ 448 if (reject && reject != XICS_IPI) { 449 mutex_unlock(&ics->lock); 450 new_irq = reject; 451 goto again; 452 } 453 } else { 454 /* 455 * We failed to deliver the interrupt we need to set the 456 * resend map bit and mark the ICS state as needing a resend 457 */ 458 set_bit(ics->icsid, icp->resend_map); 459 state->resend = 1; 460 461 /* 462 * If the need_resend flag got cleared in the ICP some time 463 * between icp_try_to_deliver() atomic update and now, then 464 * we know it might have missed the resend_map bit. So we 465 * retry 466 */ 467 smp_mb(); 468 if (!icp->state.need_resend) { 469 mutex_unlock(&ics->lock); 470 goto again; 471 } 472 } 473 out: 474 mutex_unlock(&ics->lock); 475 } 476 477 static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, 478 u8 new_cppr) 479 { 480 union kvmppc_icp_state old_state, new_state; 481 bool resend; 482 483 /* 484 * This handles several related states in one operation: 485 * 486 * ICP State: Down_CPPR 487 * 488 * Load CPPR with new value and if the XISR is 0 489 * then check for resends: 490 * 491 * ICP State: Resend 492 * 493 * If MFRR is more favored than CPPR, check for IPIs 494 * and notify ICS of a potential resend. This is done 495 * asynchronously (when used in real mode, we will have 496 * to exit here). 497 * 498 * We do not handle the complete Check_IPI as documented 499 * here. In the PAPR, this state will be used for both 500 * Set_MFRR and Down_CPPR. However, we know that we aren't 501 * changing the MFRR state here so we don't need to handle 502 * the case of an MFRR causing a reject of a pending irq, 503 * this will have been handled when the MFRR was set in the 504 * first place. 505 * 506 * Thus we don't have to handle rejects, only resends. 507 * 508 * When implementing real mode for HV KVM, resend will lead to 509 * a H_TOO_HARD return and the whole transaction will be handled 510 * in virtual mode. 511 */ 512 do { 513 old_state = new_state = ACCESS_ONCE(icp->state); 514 515 /* Down_CPPR */ 516 new_state.cppr = new_cppr; 517 518 /* 519 * Cut down Resend / Check_IPI / IPI 520 * 521 * The logic is that we cannot have a pending interrupt 522 * trumped by an IPI at this point (see above), so we 523 * know that either the pending interrupt is already an 524 * IPI (in which case we don't care to override it) or 525 * it's either more favored than us or non existent 526 */ 527 if (new_state.mfrr < new_cppr && 528 new_state.mfrr <= new_state.pending_pri) { 529 WARN_ON(new_state.xisr != XICS_IPI && 530 new_state.xisr != 0); 531 new_state.pending_pri = new_state.mfrr; 532 new_state.xisr = XICS_IPI; 533 } 534 535 /* Latch/clear resend bit */ 536 resend = new_state.need_resend; 537 new_state.need_resend = 0; 538 539 } while (!icp_try_update(icp, old_state, new_state, true)); 540 541 /* 542 * Now handle resend checks. Those are asynchronous to the ICP 543 * state update in HW (ie bus transactions) so we can handle them 544 * separately here too 545 */ 546 if (resend) 547 icp_check_resend(xics, icp); 548 } 549 550 static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu) 551 { 552 union kvmppc_icp_state old_state, new_state; 553 struct kvmppc_icp *icp = vcpu->arch.icp; 554 u32 xirr; 555 556 /* First, remove EE from the processor */ 557 kvmppc_book3s_dequeue_irqprio(icp->vcpu, 558 BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 559 560 /* 561 * ICP State: Accept_Interrupt 562 * 563 * Return the pending interrupt (if any) along with the 564 * current CPPR, then clear the XISR & set CPPR to the 565 * pending priority 566 */ 567 do { 568 old_state = new_state = ACCESS_ONCE(icp->state); 569 570 xirr = old_state.xisr | (((u32)old_state.cppr) << 24); 571 if (!old_state.xisr) 572 break; 573 new_state.cppr = new_state.pending_pri; 574 new_state.pending_pri = 0xff; 575 new_state.xisr = 0; 576 577 } while (!icp_try_update(icp, old_state, new_state, true)); 578 579 XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr); 580 581 return xirr; 582 } 583 584 static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, 585 unsigned long mfrr) 586 { 587 union kvmppc_icp_state old_state, new_state; 588 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 589 struct kvmppc_icp *icp; 590 u32 reject; 591 bool resend; 592 bool local; 593 594 XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n", 595 vcpu->vcpu_id, server, mfrr); 596 597 icp = vcpu->arch.icp; 598 local = icp->server_num == server; 599 if (!local) { 600 icp = kvmppc_xics_find_server(vcpu->kvm, server); 601 if (!icp) 602 return H_PARAMETER; 603 } 604 605 /* 606 * ICP state: Set_MFRR 607 * 608 * If the CPPR is more favored than the new MFRR, then 609 * nothing needs to be rejected as there can be no XISR to 610 * reject. If the MFRR is being made less favored then 611 * there might be a previously-rejected interrupt needing 612 * to be resent. 613 * 614 * If the CPPR is less favored, then we might be replacing 615 * an interrupt, and thus need to possibly reject it as in 616 * 617 * ICP state: Check_IPI 618 */ 619 do { 620 old_state = new_state = ACCESS_ONCE(icp->state); 621 622 /* Set_MFRR */ 623 new_state.mfrr = mfrr; 624 625 /* Check_IPI */ 626 reject = 0; 627 resend = false; 628 if (mfrr < new_state.cppr) { 629 /* Reject a pending interrupt if not an IPI */ 630 if (mfrr <= new_state.pending_pri) 631 reject = new_state.xisr; 632 new_state.pending_pri = mfrr; 633 new_state.xisr = XICS_IPI; 634 } 635 636 if (mfrr > old_state.mfrr && mfrr > new_state.cppr) { 637 resend = new_state.need_resend; 638 new_state.need_resend = 0; 639 } 640 } while (!icp_try_update(icp, old_state, new_state, local)); 641 642 /* Handle reject */ 643 if (reject && reject != XICS_IPI) 644 icp_deliver_irq(xics, icp, reject); 645 646 /* Handle resend */ 647 if (resend) 648 icp_check_resend(xics, icp); 649 650 return H_SUCCESS; 651 } 652 653 static int kvmppc_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server) 654 { 655 union kvmppc_icp_state state; 656 struct kvmppc_icp *icp; 657 658 icp = vcpu->arch.icp; 659 if (icp->server_num != server) { 660 icp = kvmppc_xics_find_server(vcpu->kvm, server); 661 if (!icp) 662 return H_PARAMETER; 663 } 664 state = ACCESS_ONCE(icp->state); 665 kvmppc_set_gpr(vcpu, 4, ((u32)state.cppr << 24) | state.xisr); 666 kvmppc_set_gpr(vcpu, 5, state.mfrr); 667 return H_SUCCESS; 668 } 669 670 static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) 671 { 672 union kvmppc_icp_state old_state, new_state; 673 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 674 struct kvmppc_icp *icp = vcpu->arch.icp; 675 u32 reject; 676 677 XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr); 678 679 /* 680 * ICP State: Set_CPPR 681 * 682 * We can safely compare the new value with the current 683 * value outside of the transaction as the CPPR is only 684 * ever changed by the processor on itself 685 */ 686 if (cppr > icp->state.cppr) 687 icp_down_cppr(xics, icp, cppr); 688 else if (cppr == icp->state.cppr) 689 return; 690 691 /* 692 * ICP State: Up_CPPR 693 * 694 * The processor is raising its priority, this can result 695 * in a rejection of a pending interrupt: 696 * 697 * ICP State: Reject_Current 698 * 699 * We can remove EE from the current processor, the update 700 * transaction will set it again if needed 701 */ 702 kvmppc_book3s_dequeue_irqprio(icp->vcpu, 703 BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 704 705 do { 706 old_state = new_state = ACCESS_ONCE(icp->state); 707 708 reject = 0; 709 new_state.cppr = cppr; 710 711 if (cppr <= new_state.pending_pri) { 712 reject = new_state.xisr; 713 new_state.xisr = 0; 714 new_state.pending_pri = 0xff; 715 } 716 717 } while (!icp_try_update(icp, old_state, new_state, true)); 718 719 /* 720 * Check for rejects. They are handled by doing a new delivery 721 * attempt (see comments in icp_deliver_irq). 722 */ 723 if (reject && reject != XICS_IPI) 724 icp_deliver_irq(xics, icp, reject); 725 } 726 727 static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) 728 { 729 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 730 struct kvmppc_icp *icp = vcpu->arch.icp; 731 struct kvmppc_ics *ics; 732 struct ics_irq_state *state; 733 u32 irq = xirr & 0x00ffffff; 734 u16 src; 735 736 XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr); 737 738 /* 739 * ICP State: EOI 740 * 741 * Note: If EOI is incorrectly used by SW to lower the CPPR 742 * value (ie more favored), we do not check for rejection of 743 * a pending interrupt, this is a SW error and PAPR sepcifies 744 * that we don't have to deal with it. 745 * 746 * The sending of an EOI to the ICS is handled after the 747 * CPPR update 748 * 749 * ICP State: Down_CPPR which we handle 750 * in a separate function as it's shared with H_CPPR. 751 */ 752 icp_down_cppr(xics, icp, xirr >> 24); 753 754 /* IPIs have no EOI */ 755 if (irq == XICS_IPI) 756 return H_SUCCESS; 757 /* 758 * EOI handling: If the interrupt is still asserted, we need to 759 * resend it. We can take a lockless "peek" at the ICS state here. 760 * 761 * "Message" interrupts will never have "asserted" set 762 */ 763 ics = kvmppc_xics_find_ics(xics, irq, &src); 764 if (!ics) { 765 XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq); 766 return H_PARAMETER; 767 } 768 state = &ics->irq_state[src]; 769 770 /* Still asserted, resend it */ 771 if (state->asserted) 772 icp_deliver_irq(xics, icp, irq); 773 774 return H_SUCCESS; 775 } 776 777 static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) 778 { 779 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 780 struct kvmppc_icp *icp = vcpu->arch.icp; 781 782 XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n", 783 hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt); 784 785 if (icp->rm_action & XICS_RM_KICK_VCPU) 786 kvmppc_fast_vcpu_kick(icp->rm_kick_target); 787 if (icp->rm_action & XICS_RM_CHECK_RESEND) 788 icp_check_resend(xics, icp); 789 if (icp->rm_action & XICS_RM_REJECT) 790 icp_deliver_irq(xics, icp, icp->rm_reject); 791 792 icp->rm_action = 0; 793 794 return H_SUCCESS; 795 } 796 797 int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) 798 { 799 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 800 unsigned long res; 801 int rc = H_SUCCESS; 802 803 /* Check if we have an ICP */ 804 if (!xics || !vcpu->arch.icp) 805 return H_HARDWARE; 806 807 /* These requests don't have real-mode implementations at present */ 808 switch (req) { 809 case H_XIRR_X: 810 res = kvmppc_h_xirr(vcpu); 811 kvmppc_set_gpr(vcpu, 4, res); 812 kvmppc_set_gpr(vcpu, 5, get_tb()); 813 return rc; 814 case H_IPOLL: 815 rc = kvmppc_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4)); 816 return rc; 817 } 818 819 /* Check for real mode returning too hard */ 820 if (xics->real_mode) 821 return kvmppc_xics_rm_complete(vcpu, req); 822 823 switch (req) { 824 case H_XIRR: 825 res = kvmppc_h_xirr(vcpu); 826 kvmppc_set_gpr(vcpu, 4, res); 827 break; 828 case H_CPPR: 829 kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4)); 830 break; 831 case H_EOI: 832 rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4)); 833 break; 834 case H_IPI: 835 rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4), 836 kvmppc_get_gpr(vcpu, 5)); 837 break; 838 } 839 840 return rc; 841 } 842 843 844 /* -- Initialisation code etc. -- */ 845 846 static int xics_debug_show(struct seq_file *m, void *private) 847 { 848 struct kvmppc_xics *xics = m->private; 849 struct kvm *kvm = xics->kvm; 850 struct kvm_vcpu *vcpu; 851 int icsid, i; 852 853 if (!kvm) 854 return 0; 855 856 seq_printf(m, "=========\nICP state\n=========\n"); 857 858 kvm_for_each_vcpu(i, vcpu, kvm) { 859 struct kvmppc_icp *icp = vcpu->arch.icp; 860 union kvmppc_icp_state state; 861 862 if (!icp) 863 continue; 864 865 state.raw = ACCESS_ONCE(icp->state.raw); 866 seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n", 867 icp->server_num, state.xisr, 868 state.pending_pri, state.cppr, state.mfrr, 869 state.out_ee, state.need_resend); 870 } 871 872 for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) { 873 struct kvmppc_ics *ics = xics->ics[icsid]; 874 875 if (!ics) 876 continue; 877 878 seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n", 879 icsid); 880 881 mutex_lock(&ics->lock); 882 883 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 884 struct ics_irq_state *irq = &ics->irq_state[i]; 885 886 seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n", 887 irq->number, irq->server, irq->priority, 888 irq->saved_priority, irq->asserted, 889 irq->resend, irq->masked_pending); 890 891 } 892 mutex_unlock(&ics->lock); 893 } 894 return 0; 895 } 896 897 static int xics_debug_open(struct inode *inode, struct file *file) 898 { 899 return single_open(file, xics_debug_show, inode->i_private); 900 } 901 902 static const struct file_operations xics_debug_fops = { 903 .open = xics_debug_open, 904 .read = seq_read, 905 .llseek = seq_lseek, 906 .release = single_release, 907 }; 908 909 static void xics_debugfs_init(struct kvmppc_xics *xics) 910 { 911 char *name; 912 913 name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics); 914 if (!name) { 915 pr_err("%s: no memory for name\n", __func__); 916 return; 917 } 918 919 xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root, 920 xics, &xics_debug_fops); 921 922 pr_debug("%s: created %s\n", __func__, name); 923 kfree(name); 924 } 925 926 static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, 927 struct kvmppc_xics *xics, int irq) 928 { 929 struct kvmppc_ics *ics; 930 int i, icsid; 931 932 icsid = irq >> KVMPPC_XICS_ICS_SHIFT; 933 934 mutex_lock(&kvm->lock); 935 936 /* ICS already exists - somebody else got here first */ 937 if (xics->ics[icsid]) 938 goto out; 939 940 /* Create the ICS */ 941 ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL); 942 if (!ics) 943 goto out; 944 945 mutex_init(&ics->lock); 946 ics->icsid = icsid; 947 948 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { 949 ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i; 950 ics->irq_state[i].priority = MASKED; 951 ics->irq_state[i].saved_priority = MASKED; 952 } 953 smp_wmb(); 954 xics->ics[icsid] = ics; 955 956 if (icsid > xics->max_icsid) 957 xics->max_icsid = icsid; 958 959 out: 960 mutex_unlock(&kvm->lock); 961 return xics->ics[icsid]; 962 } 963 964 int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num) 965 { 966 struct kvmppc_icp *icp; 967 968 if (!vcpu->kvm->arch.xics) 969 return -ENODEV; 970 971 if (kvmppc_xics_find_server(vcpu->kvm, server_num)) 972 return -EEXIST; 973 974 icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL); 975 if (!icp) 976 return -ENOMEM; 977 978 icp->vcpu = vcpu; 979 icp->server_num = server_num; 980 icp->state.mfrr = MASKED; 981 icp->state.pending_pri = MASKED; 982 vcpu->arch.icp = icp; 983 984 XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id); 985 986 return 0; 987 } 988 989 u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu) 990 { 991 struct kvmppc_icp *icp = vcpu->arch.icp; 992 union kvmppc_icp_state state; 993 994 if (!icp) 995 return 0; 996 state = icp->state; 997 return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) | 998 ((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) | 999 ((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) | 1000 ((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT); 1001 } 1002 1003 int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval) 1004 { 1005 struct kvmppc_icp *icp = vcpu->arch.icp; 1006 struct kvmppc_xics *xics = vcpu->kvm->arch.xics; 1007 union kvmppc_icp_state old_state, new_state; 1008 struct kvmppc_ics *ics; 1009 u8 cppr, mfrr, pending_pri; 1010 u32 xisr; 1011 u16 src; 1012 bool resend; 1013 1014 if (!icp || !xics) 1015 return -ENOENT; 1016 1017 cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT; 1018 xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) & 1019 KVM_REG_PPC_ICP_XISR_MASK; 1020 mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT; 1021 pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT; 1022 1023 /* Require the new state to be internally consistent */ 1024 if (xisr == 0) { 1025 if (pending_pri != 0xff) 1026 return -EINVAL; 1027 } else if (xisr == XICS_IPI) { 1028 if (pending_pri != mfrr || pending_pri >= cppr) 1029 return -EINVAL; 1030 } else { 1031 if (pending_pri >= mfrr || pending_pri >= cppr) 1032 return -EINVAL; 1033 ics = kvmppc_xics_find_ics(xics, xisr, &src); 1034 if (!ics) 1035 return -EINVAL; 1036 } 1037 1038 new_state.raw = 0; 1039 new_state.cppr = cppr; 1040 new_state.xisr = xisr; 1041 new_state.mfrr = mfrr; 1042 new_state.pending_pri = pending_pri; 1043 1044 /* 1045 * Deassert the CPU interrupt request. 1046 * icp_try_update will reassert it if necessary. 1047 */ 1048 kvmppc_book3s_dequeue_irqprio(icp->vcpu, 1049 BOOK3S_INTERRUPT_EXTERNAL_LEVEL); 1050 1051 /* 1052 * Note that if we displace an interrupt from old_state.xisr, 1053 * we don't mark it as rejected. We expect userspace to set 1054 * the state of the interrupt sources to be consistent with 1055 * the ICP states (either before or afterwards, which doesn't 1056 * matter). We do handle resends due to CPPR becoming less 1057 * favoured because that is necessary to end up with a 1058 * consistent state in the situation where userspace restores 1059 * the ICS states before the ICP states. 1060 */ 1061 do { 1062 old_state = ACCESS_ONCE(icp->state); 1063 1064 if (new_state.mfrr <= old_state.mfrr) { 1065 resend = false; 1066 new_state.need_resend = old_state.need_resend; 1067 } else { 1068 resend = old_state.need_resend; 1069 new_state.need_resend = 0; 1070 } 1071 } while (!icp_try_update(icp, old_state, new_state, false)); 1072 1073 if (resend) 1074 icp_check_resend(xics, icp); 1075 1076 return 0; 1077 } 1078 1079 static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr) 1080 { 1081 int ret; 1082 struct kvmppc_ics *ics; 1083 struct ics_irq_state *irqp; 1084 u64 __user *ubufp = (u64 __user *) addr; 1085 u16 idx; 1086 u64 val, prio; 1087 1088 ics = kvmppc_xics_find_ics(xics, irq, &idx); 1089 if (!ics) 1090 return -ENOENT; 1091 1092 irqp = &ics->irq_state[idx]; 1093 mutex_lock(&ics->lock); 1094 ret = -ENOENT; 1095 if (irqp->exists) { 1096 val = irqp->server; 1097 prio = irqp->priority; 1098 if (prio == MASKED) { 1099 val |= KVM_XICS_MASKED; 1100 prio = irqp->saved_priority; 1101 } 1102 val |= prio << KVM_XICS_PRIORITY_SHIFT; 1103 if (irqp->asserted) 1104 val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING; 1105 else if (irqp->masked_pending || irqp->resend) 1106 val |= KVM_XICS_PENDING; 1107 ret = 0; 1108 } 1109 mutex_unlock(&ics->lock); 1110 1111 if (!ret && put_user(val, ubufp)) 1112 ret = -EFAULT; 1113 1114 return ret; 1115 } 1116 1117 static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr) 1118 { 1119 struct kvmppc_ics *ics; 1120 struct ics_irq_state *irqp; 1121 u64 __user *ubufp = (u64 __user *) addr; 1122 u16 idx; 1123 u64 val; 1124 u8 prio; 1125 u32 server; 1126 1127 if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS) 1128 return -ENOENT; 1129 1130 ics = kvmppc_xics_find_ics(xics, irq, &idx); 1131 if (!ics) { 1132 ics = kvmppc_xics_create_ics(xics->kvm, xics, irq); 1133 if (!ics) 1134 return -ENOMEM; 1135 } 1136 irqp = &ics->irq_state[idx]; 1137 if (get_user(val, ubufp)) 1138 return -EFAULT; 1139 1140 server = val & KVM_XICS_DESTINATION_MASK; 1141 prio = val >> KVM_XICS_PRIORITY_SHIFT; 1142 if (prio != MASKED && 1143 kvmppc_xics_find_server(xics->kvm, server) == NULL) 1144 return -EINVAL; 1145 1146 mutex_lock(&ics->lock); 1147 irqp->server = server; 1148 irqp->saved_priority = prio; 1149 if (val & KVM_XICS_MASKED) 1150 prio = MASKED; 1151 irqp->priority = prio; 1152 irqp->resend = 0; 1153 irqp->masked_pending = 0; 1154 irqp->asserted = 0; 1155 if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE)) 1156 irqp->asserted = 1; 1157 irqp->exists = 1; 1158 mutex_unlock(&ics->lock); 1159 1160 if (val & KVM_XICS_PENDING) 1161 icp_deliver_irq(xics, NULL, irqp->number); 1162 1163 return 0; 1164 } 1165 1166 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, 1167 bool line_status) 1168 { 1169 struct kvmppc_xics *xics = kvm->arch.xics; 1170 1171 return ics_deliver_irq(xics, irq, level, line_status); 1172 } 1173 1174 static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1175 { 1176 struct kvmppc_xics *xics = dev->private; 1177 1178 switch (attr->group) { 1179 case KVM_DEV_XICS_GRP_SOURCES: 1180 return xics_set_source(xics, attr->attr, attr->addr); 1181 } 1182 return -ENXIO; 1183 } 1184 1185 static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1186 { 1187 struct kvmppc_xics *xics = dev->private; 1188 1189 switch (attr->group) { 1190 case KVM_DEV_XICS_GRP_SOURCES: 1191 return xics_get_source(xics, attr->attr, attr->addr); 1192 } 1193 return -ENXIO; 1194 } 1195 1196 static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 1197 { 1198 switch (attr->group) { 1199 case KVM_DEV_XICS_GRP_SOURCES: 1200 if (attr->attr >= KVMPPC_XICS_FIRST_IRQ && 1201 attr->attr < KVMPPC_XICS_NR_IRQS) 1202 return 0; 1203 break; 1204 } 1205 return -ENXIO; 1206 } 1207 1208 static void kvmppc_xics_free(struct kvm_device *dev) 1209 { 1210 struct kvmppc_xics *xics = dev->private; 1211 int i; 1212 struct kvm *kvm = xics->kvm; 1213 1214 debugfs_remove(xics->dentry); 1215 1216 if (kvm) 1217 kvm->arch.xics = NULL; 1218 1219 for (i = 0; i <= xics->max_icsid; i++) 1220 kfree(xics->ics[i]); 1221 kfree(xics); 1222 kfree(dev); 1223 } 1224 1225 static int kvmppc_xics_create(struct kvm_device *dev, u32 type) 1226 { 1227 struct kvmppc_xics *xics; 1228 struct kvm *kvm = dev->kvm; 1229 int ret = 0; 1230 1231 xics = kzalloc(sizeof(*xics), GFP_KERNEL); 1232 if (!xics) 1233 return -ENOMEM; 1234 1235 dev->private = xics; 1236 xics->dev = dev; 1237 xics->kvm = kvm; 1238 1239 /* Already there ? */ 1240 mutex_lock(&kvm->lock); 1241 if (kvm->arch.xics) 1242 ret = -EEXIST; 1243 else 1244 kvm->arch.xics = xics; 1245 mutex_unlock(&kvm->lock); 1246 1247 if (ret) 1248 return ret; 1249 1250 xics_debugfs_init(xics); 1251 1252 #ifdef CONFIG_KVM_BOOK3S_64_HV 1253 if (cpu_has_feature(CPU_FTR_ARCH_206)) { 1254 /* Enable real mode support */ 1255 xics->real_mode = ENABLE_REALMODE; 1256 xics->real_mode_dbg = DEBUG_REALMODE; 1257 } 1258 #endif /* CONFIG_KVM_BOOK3S_64_HV */ 1259 1260 return 0; 1261 } 1262 1263 struct kvm_device_ops kvm_xics_ops = { 1264 .name = "kvm-xics", 1265 .create = kvmppc_xics_create, 1266 .destroy = kvmppc_xics_free, 1267 .set_attr = xics_set_attr, 1268 .get_attr = xics_get_attr, 1269 .has_attr = xics_has_attr, 1270 }; 1271 1272 int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, 1273 u32 xcpu) 1274 { 1275 struct kvmppc_xics *xics = dev->private; 1276 int r = -EBUSY; 1277 1278 if (dev->ops != &kvm_xics_ops) 1279 return -EPERM; 1280 if (xics->kvm != vcpu->kvm) 1281 return -EPERM; 1282 if (vcpu->arch.irq_type) 1283 return -EBUSY; 1284 1285 r = kvmppc_xics_create_icp(vcpu, xcpu); 1286 if (!r) 1287 vcpu->arch.irq_type = KVMPPC_IRQ_XICS; 1288 1289 return r; 1290 } 1291 1292 void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) 1293 { 1294 if (!vcpu->arch.icp) 1295 return; 1296 kfree(vcpu->arch.icp); 1297 vcpu->arch.icp = NULL; 1298 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; 1299 } 1300