1/* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License, version 2, as 4 * published by the Free Software Foundation. 5 * 6 * This program is distributed in the hope that it will be useful, 7 * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 * GNU General Public License for more details. 10 * 11 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 12 * 13 * Derived from book3s_rmhandlers.S and other files, which are: 14 * 15 * Copyright SUSE Linux Products GmbH 2009 16 * 17 * Authors: Alexander Graf <agraf@suse.de> 18 */ 19 20#include <asm/ppc_asm.h> 21#include <asm/kvm_asm.h> 22#include <asm/reg.h> 23#include <asm/mmu.h> 24#include <asm/page.h> 25#include <asm/ptrace.h> 26#include <asm/hvcall.h> 27#include <asm/asm-offsets.h> 28#include <asm/exception-64s.h> 29#include <asm/kvm_book3s_asm.h> 30#include <asm/book3s/64/mmu-hash.h> 31#include <asm/tm.h> 32#include <asm/opal.h> 33#include <asm/xive-regs.h> 34#include <asm/thread_info.h> 35 36/* Sign-extend HDEC if not on POWER9 */ 37#define EXTEND_HDEC(reg) \ 38BEGIN_FTR_SECTION; \ 39 extsw reg, reg; \ 40END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 41 42#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) 43 44/* Values in HSTATE_NAPPING(r13) */ 45#define NAPPING_CEDE 1 46#define NAPPING_NOVCPU 2 47 48/* Stack frame offsets for kvmppc_hv_entry */ 49#define SFS 160 50#define STACK_SLOT_TRAP (SFS-4) 51#define STACK_SLOT_TID (SFS-16) 52#define STACK_SLOT_PSSCR (SFS-24) 53#define STACK_SLOT_PID (SFS-32) 54#define STACK_SLOT_IAMR (SFS-40) 55#define STACK_SLOT_CIABR (SFS-48) 56#define STACK_SLOT_DAWR (SFS-56) 57#define STACK_SLOT_DAWRX (SFS-64) 58#define STACK_SLOT_HFSCR (SFS-72) 59 60/* 61 * Call kvmppc_hv_entry in real mode. 62 * Must be called with interrupts hard-disabled. 63 * 64 * Input Registers: 65 * 66 * LR = return address to continue at after eventually re-enabling MMU 67 */ 68_GLOBAL_TOC(kvmppc_hv_entry_trampoline) 69 mflr r0 70 std r0, PPC_LR_STKOFF(r1) 71 stdu r1, -112(r1) 72 mfmsr r10 73 std r10, HSTATE_HOST_MSR(r13) 74 LOAD_REG_ADDR(r5, kvmppc_call_hv_entry) 75 li r0,MSR_RI 76 andc r0,r10,r0 77 li r6,MSR_IR | MSR_DR 78 andc r6,r10,r6 79 mtmsrd r0,1 /* clear RI in MSR */ 80 mtsrr0 r5 81 mtsrr1 r6 82 RFI_TO_KERNEL 83 84kvmppc_call_hv_entry: 85BEGIN_FTR_SECTION 86 /* On P9, do LPCR setting, if necessary */ 87 ld r3, HSTATE_SPLIT_MODE(r13) 88 cmpdi r3, 0 89 beq 46f 90 lwz r4, KVM_SPLIT_DO_SET(r3) 91 cmpwi r4, 0 92 beq 46f 93 bl kvmhv_p9_set_lpcr 94 nop 9546: 96END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 97 98 ld r4, HSTATE_KVM_VCPU(r13) 99 bl kvmppc_hv_entry 100 101 /* Back from guest - restore host state and return to caller */ 102 103BEGIN_FTR_SECTION 104 /* Restore host DABR and DABRX */ 105 ld r5,HSTATE_DABR(r13) 106 li r6,7 107 mtspr SPRN_DABR,r5 108 mtspr SPRN_DABRX,r6 109END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 110 111 /* Restore SPRG3 */ 112 ld r3,PACA_SPRG_VDSO(r13) 113 mtspr SPRN_SPRG_VDSO_WRITE,r3 114 115 /* Reload the host's PMU registers */ 116 lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */ 117 cmpwi r4, 0 118 beq 23f /* skip if not */ 119BEGIN_FTR_SECTION 120 ld r3, HSTATE_MMCR0(r13) 121 andi. r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO 122 cmpwi r4, MMCR0_PMAO 123 beql kvmppc_fix_pmao 124END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) 125 lwz r3, HSTATE_PMC1(r13) 126 lwz r4, HSTATE_PMC2(r13) 127 lwz r5, HSTATE_PMC3(r13) 128 lwz r6, HSTATE_PMC4(r13) 129 lwz r8, HSTATE_PMC5(r13) 130 lwz r9, HSTATE_PMC6(r13) 131 mtspr SPRN_PMC1, r3 132 mtspr SPRN_PMC2, r4 133 mtspr SPRN_PMC3, r5 134 mtspr SPRN_PMC4, r6 135 mtspr SPRN_PMC5, r8 136 mtspr SPRN_PMC6, r9 137 ld r3, HSTATE_MMCR0(r13) 138 ld r4, HSTATE_MMCR1(r13) 139 ld r5, HSTATE_MMCRA(r13) 140 ld r6, HSTATE_SIAR(r13) 141 ld r7, HSTATE_SDAR(r13) 142 mtspr SPRN_MMCR1, r4 143 mtspr SPRN_MMCRA, r5 144 mtspr SPRN_SIAR, r6 145 mtspr SPRN_SDAR, r7 146BEGIN_FTR_SECTION 147 ld r8, HSTATE_MMCR2(r13) 148 ld r9, HSTATE_SIER(r13) 149 mtspr SPRN_MMCR2, r8 150 mtspr SPRN_SIER, r9 151END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 152 mtspr SPRN_MMCR0, r3 153 isync 15423: 155 156 /* 157 * Reload DEC. HDEC interrupts were disabled when 158 * we reloaded the host's LPCR value. 159 */ 160 ld r3, HSTATE_DECEXP(r13) 161 mftb r4 162 subf r4, r4, r3 163 mtspr SPRN_DEC, r4 164 165 /* hwthread_req may have got set by cede or no vcpu, so clear it */ 166 li r0, 0 167 stb r0, HSTATE_HWTHREAD_REQ(r13) 168 169 /* 170 * For external interrupts we need to call the Linux 171 * handler to process the interrupt. We do that by jumping 172 * to absolute address 0x500 for external interrupts. 173 * The [h]rfid at the end of the handler will return to 174 * the book3s_hv_interrupts.S code. For other interrupts 175 * we do the rfid to get back to the book3s_hv_interrupts.S 176 * code here. 177 */ 178 ld r8, 112+PPC_LR_STKOFF(r1) 179 addi r1, r1, 112 180 ld r7, HSTATE_HOST_MSR(r13) 181 182 /* Return the trap number on this thread as the return value */ 183 mr r3, r12 184 185 /* 186 * If we came back from the guest via a relocation-on interrupt, 187 * we will be in virtual mode at this point, which makes it a 188 * little easier to get back to the caller. 189 */ 190 mfmsr r0 191 andi. r0, r0, MSR_IR /* in real mode? */ 192 bne .Lvirt_return 193 194 /* RFI into the highmem handler */ 195 mfmsr r6 196 li r0, MSR_RI 197 andc r6, r6, r0 198 mtmsrd r6, 1 /* Clear RI in MSR */ 199 mtsrr0 r8 200 mtsrr1 r7 201 RFI_TO_KERNEL 202 203 /* Virtual-mode return */ 204.Lvirt_return: 205 mtlr r8 206 blr 207 208kvmppc_primary_no_guest: 209 /* We handle this much like a ceded vcpu */ 210 /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ 211 /* HDEC may be larger than DEC for arch >= v3.00, but since the */ 212 /* HDEC value came from DEC in the first place, it will fit */ 213 mfspr r3, SPRN_HDEC 214 mtspr SPRN_DEC, r3 215 /* 216 * Make sure the primary has finished the MMU switch. 217 * We should never get here on a secondary thread, but 218 * check it for robustness' sake. 219 */ 220 ld r5, HSTATE_KVM_VCORE(r13) 22165: lbz r0, VCORE_IN_GUEST(r5) 222 cmpwi r0, 0 223 beq 65b 224 /* Set LPCR. */ 225 ld r8,VCORE_LPCR(r5) 226 mtspr SPRN_LPCR,r8 227 isync 228 /* set our bit in napping_threads */ 229 ld r5, HSTATE_KVM_VCORE(r13) 230 lbz r7, HSTATE_PTID(r13) 231 li r0, 1 232 sld r0, r0, r7 233 addi r6, r5, VCORE_NAPPING_THREADS 2341: lwarx r3, 0, r6 235 or r3, r3, r0 236 stwcx. r3, 0, r6 237 bne 1b 238 /* order napping_threads update vs testing entry_exit_map */ 239 isync 240 li r12, 0 241 lwz r7, VCORE_ENTRY_EXIT(r5) 242 cmpwi r7, 0x100 243 bge kvm_novcpu_exit /* another thread already exiting */ 244 li r3, NAPPING_NOVCPU 245 stb r3, HSTATE_NAPPING(r13) 246 247 li r3, 0 /* Don't wake on privileged (OS) doorbell */ 248 b kvm_do_nap 249 250/* 251 * kvm_novcpu_wakeup 252 * Entered from kvm_start_guest if kvm_hstate.napping is set 253 * to NAPPING_NOVCPU 254 * r2 = kernel TOC 255 * r13 = paca 256 */ 257kvm_novcpu_wakeup: 258 ld r1, HSTATE_HOST_R1(r13) 259 ld r5, HSTATE_KVM_VCORE(r13) 260 li r0, 0 261 stb r0, HSTATE_NAPPING(r13) 262 263 /* check the wake reason */ 264 bl kvmppc_check_wake_reason 265 266 /* 267 * Restore volatile registers since we could have called 268 * a C routine in kvmppc_check_wake_reason. 269 * r5 = VCORE 270 */ 271 ld r5, HSTATE_KVM_VCORE(r13) 272 273 /* see if any other thread is already exiting */ 274 lwz r0, VCORE_ENTRY_EXIT(r5) 275 cmpwi r0, 0x100 276 bge kvm_novcpu_exit 277 278 /* clear our bit in napping_threads */ 279 lbz r7, HSTATE_PTID(r13) 280 li r0, 1 281 sld r0, r0, r7 282 addi r6, r5, VCORE_NAPPING_THREADS 2834: lwarx r7, 0, r6 284 andc r7, r7, r0 285 stwcx. r7, 0, r6 286 bne 4b 287 288 /* See if the wake reason means we need to exit */ 289 cmpdi r3, 0 290 bge kvm_novcpu_exit 291 292 /* See if our timeslice has expired (HDEC is negative) */ 293 mfspr r0, SPRN_HDEC 294 EXTEND_HDEC(r0) 295 li r12, BOOK3S_INTERRUPT_HV_DECREMENTER 296 cmpdi r0, 0 297 blt kvm_novcpu_exit 298 299 /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */ 300 ld r4, HSTATE_KVM_VCPU(r13) 301 cmpdi r4, 0 302 beq kvmppc_primary_no_guest 303 304#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 305 addi r3, r4, VCPU_TB_RMENTRY 306 bl kvmhv_start_timing 307#endif 308 b kvmppc_got_guest 309 310kvm_novcpu_exit: 311#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 312 ld r4, HSTATE_KVM_VCPU(r13) 313 cmpdi r4, 0 314 beq 13f 315 addi r3, r4, VCPU_TB_RMEXIT 316 bl kvmhv_accumulate_time 317#endif 31813: mr r3, r12 319 stw r12, STACK_SLOT_TRAP(r1) 320 bl kvmhv_commence_exit 321 nop 322 b kvmhv_switch_to_host 323 324/* 325 * We come in here when wakened from nap mode. 326 * Relocation is off and most register values are lost. 327 * r13 points to the PACA. 328 * r3 contains the SRR1 wakeup value, SRR1 is trashed. 329 */ 330 .globl kvm_start_guest 331kvm_start_guest: 332 /* Set runlatch bit the minute you wake up from nap */ 333 mfspr r0, SPRN_CTRLF 334 ori r0, r0, 1 335 mtspr SPRN_CTRLT, r0 336 337 /* 338 * Could avoid this and pass it through in r3. For now, 339 * code expects it to be in SRR1. 340 */ 341 mtspr SPRN_SRR1,r3 342 343 ld r2,PACATOC(r13) 344 345 li r0,0 346 stb r0,PACA_FTRACE_ENABLED(r13) 347 348 li r0,KVM_HWTHREAD_IN_KVM 349 stb r0,HSTATE_HWTHREAD_STATE(r13) 350 351 /* NV GPR values from power7_idle() will no longer be valid */ 352 li r0,1 353 stb r0,PACA_NAPSTATELOST(r13) 354 355 /* were we napping due to cede? */ 356 lbz r0,HSTATE_NAPPING(r13) 357 cmpwi r0,NAPPING_CEDE 358 beq kvm_end_cede 359 cmpwi r0,NAPPING_NOVCPU 360 beq kvm_novcpu_wakeup 361 362 ld r1,PACAEMERGSP(r13) 363 subi r1,r1,STACK_FRAME_OVERHEAD 364 365 /* 366 * We weren't napping due to cede, so this must be a secondary 367 * thread being woken up to run a guest, or being woken up due 368 * to a stray IPI. (Or due to some machine check or hypervisor 369 * maintenance interrupt while the core is in KVM.) 370 */ 371 372 /* Check the wake reason in SRR1 to see why we got here */ 373 bl kvmppc_check_wake_reason 374 /* 375 * kvmppc_check_wake_reason could invoke a C routine, but we 376 * have no volatile registers to restore when we return. 377 */ 378 379 cmpdi r3, 0 380 bge kvm_no_guest 381 382 /* get vcore pointer, NULL if we have nothing to run */ 383 ld r5,HSTATE_KVM_VCORE(r13) 384 cmpdi r5,0 385 /* if we have no vcore to run, go back to sleep */ 386 beq kvm_no_guest 387 388kvm_secondary_got_guest: 389 390 /* Set HSTATE_DSCR(r13) to something sensible */ 391 ld r6, PACA_DSCR_DEFAULT(r13) 392 std r6, HSTATE_DSCR(r13) 393 394 /* On thread 0 of a subcore, set HDEC to max */ 395 lbz r4, HSTATE_PTID(r13) 396 cmpwi r4, 0 397 bne 63f 398 LOAD_REG_ADDR(r6, decrementer_max) 399 ld r6, 0(r6) 400 mtspr SPRN_HDEC, r6 401 /* and set per-LPAR registers, if doing dynamic micro-threading */ 402 ld r6, HSTATE_SPLIT_MODE(r13) 403 cmpdi r6, 0 404 beq 63f 405BEGIN_FTR_SECTION 406 ld r0, KVM_SPLIT_RPR(r6) 407 mtspr SPRN_RPR, r0 408 ld r0, KVM_SPLIT_PMMAR(r6) 409 mtspr SPRN_PMMAR, r0 410 ld r0, KVM_SPLIT_LDBAR(r6) 411 mtspr SPRN_LDBAR, r0 412 isync 413FTR_SECTION_ELSE 414 /* On P9 we use the split_info for coordinating LPCR changes */ 415 lwz r4, KVM_SPLIT_DO_SET(r6) 416 cmpwi r4, 0 417 beq 1f 418 mr r3, r6 419 bl kvmhv_p9_set_lpcr 420 nop 4211: 422ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 42363: 424 /* Order load of vcpu after load of vcore */ 425 lwsync 426 ld r4, HSTATE_KVM_VCPU(r13) 427 bl kvmppc_hv_entry 428 429 /* Back from the guest, go back to nap */ 430 /* Clear our vcpu and vcore pointers so we don't come back in early */ 431 li r0, 0 432 std r0, HSTATE_KVM_VCPU(r13) 433 /* 434 * Once we clear HSTATE_KVM_VCORE(r13), the code in 435 * kvmppc_run_core() is going to assume that all our vcpu 436 * state is visible in memory. This lwsync makes sure 437 * that that is true. 438 */ 439 lwsync 440 std r0, HSTATE_KVM_VCORE(r13) 441 442 /* 443 * All secondaries exiting guest will fall through this path. 444 * Before proceeding, just check for HMI interrupt and 445 * invoke opal hmi handler. By now we are sure that the 446 * primary thread on this core/subcore has already made partition 447 * switch/TB resync and we are good to call opal hmi handler. 448 */ 449 cmpwi r12, BOOK3S_INTERRUPT_HMI 450 bne kvm_no_guest 451 452 li r3,0 /* NULL argument */ 453 bl hmi_exception_realmode 454/* 455 * At this point we have finished executing in the guest. 456 * We need to wait for hwthread_req to become zero, since 457 * we may not turn on the MMU while hwthread_req is non-zero. 458 * While waiting we also need to check if we get given a vcpu to run. 459 */ 460kvm_no_guest: 461 lbz r3, HSTATE_HWTHREAD_REQ(r13) 462 cmpwi r3, 0 463 bne 53f 464 HMT_MEDIUM 465 li r0, KVM_HWTHREAD_IN_KERNEL 466 stb r0, HSTATE_HWTHREAD_STATE(r13) 467 /* need to recheck hwthread_req after a barrier, to avoid race */ 468 sync 469 lbz r3, HSTATE_HWTHREAD_REQ(r13) 470 cmpwi r3, 0 471 bne 54f 472/* 473 * We jump to pnv_wakeup_loss, which will return to the caller 474 * of power7_nap in the powernv cpu offline loop. The value we 475 * put in r3 becomes the return value for power7_nap. pnv_wakeup_loss 476 * requires SRR1 in r12. 477 */ 478 li r3, LPCR_PECE0 479 mfspr r4, SPRN_LPCR 480 rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 481 mtspr SPRN_LPCR, r4 482 li r3, 0 483 mfspr r12,SPRN_SRR1 484 b pnv_wakeup_loss 485 48653: HMT_LOW 487 ld r5, HSTATE_KVM_VCORE(r13) 488 cmpdi r5, 0 489 bne 60f 490 ld r3, HSTATE_SPLIT_MODE(r13) 491 cmpdi r3, 0 492 beq kvm_no_guest 493 lwz r0, KVM_SPLIT_DO_SET(r3) 494 cmpwi r0, 0 495 bne kvmhv_do_set 496 lwz r0, KVM_SPLIT_DO_RESTORE(r3) 497 cmpwi r0, 0 498 bne kvmhv_do_restore 499 lbz r0, KVM_SPLIT_DO_NAP(r3) 500 cmpwi r0, 0 501 beq kvm_no_guest 502 HMT_MEDIUM 503 b kvm_unsplit_nap 50460: HMT_MEDIUM 505 b kvm_secondary_got_guest 506 50754: li r0, KVM_HWTHREAD_IN_KVM 508 stb r0, HSTATE_HWTHREAD_STATE(r13) 509 b kvm_no_guest 510 511kvmhv_do_set: 512 /* Set LPCR, LPIDR etc. on P9 */ 513 HMT_MEDIUM 514 bl kvmhv_p9_set_lpcr 515 nop 516 b kvm_no_guest 517 518kvmhv_do_restore: 519 HMT_MEDIUM 520 bl kvmhv_p9_restore_lpcr 521 nop 522 b kvm_no_guest 523 524/* 525 * Here the primary thread is trying to return the core to 526 * whole-core mode, so we need to nap. 527 */ 528kvm_unsplit_nap: 529 /* 530 * When secondaries are napping in kvm_unsplit_nap() with 531 * hwthread_req = 1, HMI goes ignored even though subcores are 532 * already exited the guest. Hence HMI keeps waking up secondaries 533 * from nap in a loop and secondaries always go back to nap since 534 * no vcore is assigned to them. This makes impossible for primary 535 * thread to get hold of secondary threads resulting into a soft 536 * lockup in KVM path. 537 * 538 * Let us check if HMI is pending and handle it before we go to nap. 539 */ 540 cmpwi r12, BOOK3S_INTERRUPT_HMI 541 bne 55f 542 li r3, 0 /* NULL argument */ 543 bl hmi_exception_realmode 54455: 545 /* 546 * Ensure that secondary doesn't nap when it has 547 * its vcore pointer set. 548 */ 549 sync /* matches smp_mb() before setting split_info.do_nap */ 550 ld r0, HSTATE_KVM_VCORE(r13) 551 cmpdi r0, 0 552 bne kvm_no_guest 553 /* clear any pending message */ 554BEGIN_FTR_SECTION 555 lis r6, (PPC_DBELL_SERVER << (63-36))@h 556 PPC_MSGCLR(6) 557END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 558 /* Set kvm_split_mode.napped[tid] = 1 */ 559 ld r3, HSTATE_SPLIT_MODE(r13) 560 li r0, 1 561 lbz r4, HSTATE_TID(r13) 562 addi r4, r4, KVM_SPLIT_NAPPED 563 stbx r0, r3, r4 564 /* Check the do_nap flag again after setting napped[] */ 565 sync 566 lbz r0, KVM_SPLIT_DO_NAP(r3) 567 cmpwi r0, 0 568 beq 57f 569 li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4 570 mfspr r5, SPRN_LPCR 571 rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1) 572 b kvm_nap_sequence 573 57457: li r0, 0 575 stbx r0, r3, r4 576 b kvm_no_guest 577 578/****************************************************************************** 579 * * 580 * Entry code * 581 * * 582 *****************************************************************************/ 583 584.global kvmppc_hv_entry 585kvmppc_hv_entry: 586 587 /* Required state: 588 * 589 * R4 = vcpu pointer (or NULL) 590 * MSR = ~IR|DR 591 * R13 = PACA 592 * R1 = host R1 593 * R2 = TOC 594 * all other volatile GPRS = free 595 * Does not preserve non-volatile GPRs or CR fields 596 */ 597 mflr r0 598 std r0, PPC_LR_STKOFF(r1) 599 stdu r1, -SFS(r1) 600 601 /* Save R1 in the PACA */ 602 std r1, HSTATE_HOST_R1(r13) 603 604 li r6, KVM_GUEST_MODE_HOST_HV 605 stb r6, HSTATE_IN_GUEST(r13) 606 607#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 608 /* Store initial timestamp */ 609 cmpdi r4, 0 610 beq 1f 611 addi r3, r4, VCPU_TB_RMENTRY 612 bl kvmhv_start_timing 6131: 614#endif 615 616 /* Use cr7 as an indication of radix mode */ 617 ld r5, HSTATE_KVM_VCORE(r13) 618 ld r9, VCORE_KVM(r5) /* pointer to struct kvm */ 619 lbz r0, KVM_RADIX(r9) 620 cmpwi cr7, r0, 0 621 622 /* 623 * POWER7/POWER8 host -> guest partition switch code. 624 * We don't have to lock against concurrent tlbies, 625 * but we do have to coordinate across hardware threads. 626 */ 627 /* Set bit in entry map iff exit map is zero. */ 628 li r7, 1 629 lbz r6, HSTATE_PTID(r13) 630 sld r7, r7, r6 631 addi r8, r5, VCORE_ENTRY_EXIT 63221: lwarx r3, 0, r8 633 cmpwi r3, 0x100 /* any threads starting to exit? */ 634 bge secondary_too_late /* if so we're too late to the party */ 635 or r3, r3, r7 636 stwcx. r3, 0, r8 637 bne 21b 638 639 /* Primary thread switches to guest partition. */ 640 cmpwi r6,0 641 bne 10f 642 lwz r7,KVM_LPID(r9) 643BEGIN_FTR_SECTION 644 ld r6,KVM_SDR1(r9) 645 li r0,LPID_RSVD /* switch to reserved LPID */ 646 mtspr SPRN_LPID,r0 647 ptesync 648 mtspr SPRN_SDR1,r6 /* switch to partition page table */ 649END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 650 mtspr SPRN_LPID,r7 651 isync 652 653 /* See if we need to flush the TLB */ 654 lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */ 655BEGIN_FTR_SECTION 656 /* 657 * On POWER9, individual threads can come in here, but the 658 * TLB is shared between the 4 threads in a core, hence 659 * invalidating on one thread invalidates for all. 660 * Thus we make all 4 threads use the same bit here. 661 */ 662 clrrdi r6,r6,2 663END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 664 clrldi r7,r6,64-6 /* extract bit number (6 bits) */ 665 srdi r6,r6,6 /* doubleword number */ 666 sldi r6,r6,3 /* address offset */ 667 add r6,r6,r9 668 addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */ 669 li r8,1 670 sld r8,r8,r7 671 ld r7,0(r6) 672 and. r7,r7,r8 673 beq 22f 674 /* Flush the TLB of any entries for this LPID */ 675 lwz r0,KVM_TLB_SETS(r9) 676 mtctr r0 677 li r7,0x800 /* IS field = 0b10 */ 678 ptesync 679 li r0,0 /* RS for P9 version of tlbiel */ 680 bne cr7, 29f 68128: tlbiel r7 /* On P9, rs=0, RIC=0, PRS=0, R=0 */ 682 addi r7,r7,0x1000 683 bdnz 28b 684 b 30f 68529: PPC_TLBIEL(7,0,2,1,1) /* for radix, RIC=2, PRS=1, R=1 */ 686 addi r7,r7,0x1000 687 bdnz 29b 68830: ptesync 68923: ldarx r7,0,r6 /* clear the bit after TLB flushed */ 690 andc r7,r7,r8 691 stdcx. r7,0,r6 692 bne 23b 693 694 /* Add timebase offset onto timebase */ 69522: ld r8,VCORE_TB_OFFSET(r5) 696 cmpdi r8,0 697 beq 37f 698 std r8, VCORE_TB_OFFSET_APPL(r5) 699 mftb r6 /* current host timebase */ 700 add r8,r8,r6 701 mtspr SPRN_TBU40,r8 /* update upper 40 bits */ 702 mftb r7 /* check if lower 24 bits overflowed */ 703 clrldi r6,r6,40 704 clrldi r7,r7,40 705 cmpld r7,r6 706 bge 37f 707 addis r8,r8,0x100 /* if so, increment upper 40 bits */ 708 mtspr SPRN_TBU40,r8 709 710 /* Load guest PCR value to select appropriate compat mode */ 71137: ld r7, VCORE_PCR(r5) 712 cmpdi r7, 0 713 beq 38f 714 mtspr SPRN_PCR, r7 71538: 716 717BEGIN_FTR_SECTION 718 /* DPDES and VTB are shared between threads */ 719 ld r8, VCORE_DPDES(r5) 720 ld r7, VCORE_VTB(r5) 721 mtspr SPRN_DPDES, r8 722 mtspr SPRN_VTB, r7 723END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 724 725 /* Mark the subcore state as inside guest */ 726 bl kvmppc_subcore_enter_guest 727 nop 728 ld r5, HSTATE_KVM_VCORE(r13) 729 ld r4, HSTATE_KVM_VCPU(r13) 730 li r0,1 731 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ 732 733 /* Do we have a guest vcpu to run? */ 73410: cmpdi r4, 0 735 beq kvmppc_primary_no_guest 736kvmppc_got_guest: 737 /* Increment yield count if they have a VPA */ 738 ld r3, VCPU_VPA(r4) 739 cmpdi r3, 0 740 beq 25f 741 li r6, LPPACA_YIELDCOUNT 742 LWZX_BE r5, r3, r6 743 addi r5, r5, 1 744 STWX_BE r5, r3, r6 745 li r6, 1 746 stb r6, VCPU_VPA_DIRTY(r4) 74725: 748 749 /* Save purr/spurr */ 750 mfspr r5,SPRN_PURR 751 mfspr r6,SPRN_SPURR 752 std r5,HSTATE_PURR(r13) 753 std r6,HSTATE_SPURR(r13) 754 ld r7,VCPU_PURR(r4) 755 ld r8,VCPU_SPURR(r4) 756 mtspr SPRN_PURR,r7 757 mtspr SPRN_SPURR,r8 758 759 /* Save host values of some registers */ 760BEGIN_FTR_SECTION 761 mfspr r5, SPRN_TIDR 762 mfspr r6, SPRN_PSSCR 763 mfspr r7, SPRN_PID 764 mfspr r8, SPRN_IAMR 765 std r5, STACK_SLOT_TID(r1) 766 std r6, STACK_SLOT_PSSCR(r1) 767 std r7, STACK_SLOT_PID(r1) 768 std r8, STACK_SLOT_IAMR(r1) 769 mfspr r5, SPRN_HFSCR 770 std r5, STACK_SLOT_HFSCR(r1) 771END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 772BEGIN_FTR_SECTION 773 mfspr r5, SPRN_CIABR 774 mfspr r6, SPRN_DAWR 775 mfspr r7, SPRN_DAWRX 776 std r5, STACK_SLOT_CIABR(r1) 777 std r6, STACK_SLOT_DAWR(r1) 778 std r7, STACK_SLOT_DAWRX(r1) 779END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 780 781BEGIN_FTR_SECTION 782 /* Set partition DABR */ 783 /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ 784 lwz r5,VCPU_DABRX(r4) 785 ld r6,VCPU_DABR(r4) 786 mtspr SPRN_DABRX,r5 787 mtspr SPRN_DABR,r6 788 isync 789END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 790 791#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 792/* 793 * Branch around the call if both CPU_FTR_TM and 794 * CPU_FTR_P9_TM_HV_ASSIST are off. 795 */ 796BEGIN_FTR_SECTION 797 b 91f 798END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) 799 /* 800 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR 801 */ 802 bl kvmppc_restore_tm 80391: 804#endif 805 806 /* Load guest PMU registers */ 807 /* R4 is live here (vcpu pointer) */ 808 li r3, 1 809 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ 810 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ 811 isync 812BEGIN_FTR_SECTION 813 ld r3, VCPU_MMCR(r4) 814 andi. r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO 815 cmpwi r5, MMCR0_PMAO 816 beql kvmppc_fix_pmao 817END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) 818 lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */ 819 lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */ 820 lwz r6, VCPU_PMC + 8(r4) 821 lwz r7, VCPU_PMC + 12(r4) 822 lwz r8, VCPU_PMC + 16(r4) 823 lwz r9, VCPU_PMC + 20(r4) 824 mtspr SPRN_PMC1, r3 825 mtspr SPRN_PMC2, r5 826 mtspr SPRN_PMC3, r6 827 mtspr SPRN_PMC4, r7 828 mtspr SPRN_PMC5, r8 829 mtspr SPRN_PMC6, r9 830 ld r3, VCPU_MMCR(r4) 831 ld r5, VCPU_MMCR + 8(r4) 832 ld r6, VCPU_MMCR + 16(r4) 833 ld r7, VCPU_SIAR(r4) 834 ld r8, VCPU_SDAR(r4) 835 mtspr SPRN_MMCR1, r5 836 mtspr SPRN_MMCRA, r6 837 mtspr SPRN_SIAR, r7 838 mtspr SPRN_SDAR, r8 839BEGIN_FTR_SECTION 840 ld r5, VCPU_MMCR + 24(r4) 841 ld r6, VCPU_SIER(r4) 842 mtspr SPRN_MMCR2, r5 843 mtspr SPRN_SIER, r6 844BEGIN_FTR_SECTION_NESTED(96) 845 lwz r7, VCPU_PMC + 24(r4) 846 lwz r8, VCPU_PMC + 28(r4) 847 ld r9, VCPU_MMCR + 32(r4) 848 mtspr SPRN_SPMC1, r7 849 mtspr SPRN_SPMC2, r8 850 mtspr SPRN_MMCRS, r9 851END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) 852END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 853 mtspr SPRN_MMCR0, r3 854 isync 855 856 /* Load up FP, VMX and VSX registers */ 857 bl kvmppc_load_fp 858 859 ld r14, VCPU_GPR(R14)(r4) 860 ld r15, VCPU_GPR(R15)(r4) 861 ld r16, VCPU_GPR(R16)(r4) 862 ld r17, VCPU_GPR(R17)(r4) 863 ld r18, VCPU_GPR(R18)(r4) 864 ld r19, VCPU_GPR(R19)(r4) 865 ld r20, VCPU_GPR(R20)(r4) 866 ld r21, VCPU_GPR(R21)(r4) 867 ld r22, VCPU_GPR(R22)(r4) 868 ld r23, VCPU_GPR(R23)(r4) 869 ld r24, VCPU_GPR(R24)(r4) 870 ld r25, VCPU_GPR(R25)(r4) 871 ld r26, VCPU_GPR(R26)(r4) 872 ld r27, VCPU_GPR(R27)(r4) 873 ld r28, VCPU_GPR(R28)(r4) 874 ld r29, VCPU_GPR(R29)(r4) 875 ld r30, VCPU_GPR(R30)(r4) 876 ld r31, VCPU_GPR(R31)(r4) 877 878 /* Switch DSCR to guest value */ 879 ld r5, VCPU_DSCR(r4) 880 mtspr SPRN_DSCR, r5 881 882BEGIN_FTR_SECTION 883 /* Skip next section on POWER7 */ 884 b 8f 885END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 886 /* Load up POWER8-specific registers */ 887 ld r5, VCPU_IAMR(r4) 888 lwz r6, VCPU_PSPB(r4) 889 ld r7, VCPU_FSCR(r4) 890 mtspr SPRN_IAMR, r5 891 mtspr SPRN_PSPB, r6 892 mtspr SPRN_FSCR, r7 893 ld r5, VCPU_DAWR(r4) 894 ld r6, VCPU_DAWRX(r4) 895 ld r7, VCPU_CIABR(r4) 896 ld r8, VCPU_TAR(r4) 897 /* 898 * Handle broken DAWR case by not writing it. This means we 899 * can still store the DAWR register for migration. 900 */ 901BEGIN_FTR_SECTION 902 mtspr SPRN_DAWR, r5 903 mtspr SPRN_DAWRX, r6 904END_FTR_SECTION_IFSET(CPU_FTR_DAWR) 905 mtspr SPRN_CIABR, r7 906 mtspr SPRN_TAR, r8 907 ld r5, VCPU_IC(r4) 908 ld r8, VCPU_EBBHR(r4) 909 mtspr SPRN_IC, r5 910 mtspr SPRN_EBBHR, r8 911 ld r5, VCPU_EBBRR(r4) 912 ld r6, VCPU_BESCR(r4) 913 lwz r7, VCPU_GUEST_PID(r4) 914 ld r8, VCPU_WORT(r4) 915 mtspr SPRN_EBBRR, r5 916 mtspr SPRN_BESCR, r6 917 mtspr SPRN_PID, r7 918 mtspr SPRN_WORT, r8 919BEGIN_FTR_SECTION 920 PPC_INVALIDATE_ERAT 921END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) 922BEGIN_FTR_SECTION 923 /* POWER8-only registers */ 924 ld r5, VCPU_TCSCR(r4) 925 ld r6, VCPU_ACOP(r4) 926 ld r7, VCPU_CSIGR(r4) 927 ld r8, VCPU_TACR(r4) 928 mtspr SPRN_TCSCR, r5 929 mtspr SPRN_ACOP, r6 930 mtspr SPRN_CSIGR, r7 931 mtspr SPRN_TACR, r8 932 nop 933FTR_SECTION_ELSE 934 /* POWER9-only registers */ 935 ld r5, VCPU_TID(r4) 936 ld r6, VCPU_PSSCR(r4) 937 lbz r8, HSTATE_FAKE_SUSPEND(r13) 938 oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */ 939 rldimi r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG 940 ld r7, VCPU_HFSCR(r4) 941 mtspr SPRN_TIDR, r5 942 mtspr SPRN_PSSCR, r6 943 mtspr SPRN_HFSCR, r7 944ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 9458: 946 947 ld r5, VCPU_SPRG0(r4) 948 ld r6, VCPU_SPRG1(r4) 949 ld r7, VCPU_SPRG2(r4) 950 ld r8, VCPU_SPRG3(r4) 951 mtspr SPRN_SPRG0, r5 952 mtspr SPRN_SPRG1, r6 953 mtspr SPRN_SPRG2, r7 954 mtspr SPRN_SPRG3, r8 955 956 /* Load up DAR and DSISR */ 957 ld r5, VCPU_DAR(r4) 958 lwz r6, VCPU_DSISR(r4) 959 mtspr SPRN_DAR, r5 960 mtspr SPRN_DSISR, r6 961 962 /* Restore AMR and UAMOR, set AMOR to all 1s */ 963 ld r5,VCPU_AMR(r4) 964 ld r6,VCPU_UAMOR(r4) 965 li r7,-1 966 mtspr SPRN_AMR,r5 967 mtspr SPRN_UAMOR,r6 968 mtspr SPRN_AMOR,r7 969 970 /* Restore state of CTRL run bit; assume 1 on entry */ 971 lwz r5,VCPU_CTRL(r4) 972 andi. r5,r5,1 973 bne 4f 974 mfspr r6,SPRN_CTRLF 975 clrrdi r6,r6,1 976 mtspr SPRN_CTRLT,r6 9774: 978 /* Secondary threads wait for primary to have done partition switch */ 979 ld r5, HSTATE_KVM_VCORE(r13) 980 lbz r6, HSTATE_PTID(r13) 981 cmpwi r6, 0 982 beq 21f 983 lbz r0, VCORE_IN_GUEST(r5) 984 cmpwi r0, 0 985 bne 21f 986 HMT_LOW 98720: lwz r3, VCORE_ENTRY_EXIT(r5) 988 cmpwi r3, 0x100 989 bge no_switch_exit 990 lbz r0, VCORE_IN_GUEST(r5) 991 cmpwi r0, 0 992 beq 20b 993 HMT_MEDIUM 99421: 995 /* Set LPCR. */ 996 ld r8,VCORE_LPCR(r5) 997 mtspr SPRN_LPCR,r8 998 isync 999 1000 /* 1001 * Set the decrementer to the guest decrementer. 1002 */ 1003 ld r8,VCPU_DEC_EXPIRES(r4) 1004 /* r8 is a host timebase value here, convert to guest TB */ 1005 ld r5,HSTATE_KVM_VCORE(r13) 1006 ld r6,VCORE_TB_OFFSET_APPL(r5) 1007 add r8,r8,r6 1008 mftb r7 1009 subf r3,r7,r8 1010 mtspr SPRN_DEC,r3 1011 1012 /* Check if HDEC expires soon */ 1013 mfspr r3, SPRN_HDEC 1014 EXTEND_HDEC(r3) 1015 cmpdi r3, 512 /* 1 microsecond */ 1016 blt hdec_soon 1017 1018 /* For hash guest, clear out and reload the SLB */ 1019 ld r6, VCPU_KVM(r4) 1020 lbz r0, KVM_RADIX(r6) 1021 cmpwi r0, 0 1022 bne 9f 1023 li r6, 0 1024 slbmte r6, r6 1025 slbia 1026 ptesync 1027 1028 /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */ 1029 lwz r5,VCPU_SLB_MAX(r4) 1030 cmpwi r5,0 1031 beq 9f 1032 mtctr r5 1033 addi r6,r4,VCPU_SLB 10341: ld r8,VCPU_SLB_E(r6) 1035 ld r9,VCPU_SLB_V(r6) 1036 slbmte r9,r8 1037 addi r6,r6,VCPU_SLB_SIZE 1038 bdnz 1b 10399: 1040 1041#ifdef CONFIG_KVM_XICS 1042 /* We are entering the guest on that thread, push VCPU to XIVE */ 1043 ld r10, HSTATE_XIVE_TIMA_PHYS(r13) 1044 cmpldi cr0, r10, 0 1045 beq no_xive 1046 ld r11, VCPU_XIVE_SAVED_STATE(r4) 1047 li r9, TM_QW1_OS 1048 eieio 1049 stdcix r11,r9,r10 1050 lwz r11, VCPU_XIVE_CAM_WORD(r4) 1051 li r9, TM_QW1_OS + TM_WORD2 1052 stwcix r11,r9,r10 1053 li r9, 1 1054 stb r9, VCPU_XIVE_PUSHED(r4) 1055 eieio 1056 1057 /* 1058 * We clear the irq_pending flag. There is a small chance of a 1059 * race vs. the escalation interrupt happening on another 1060 * processor setting it again, but the only consequence is to 1061 * cause a spurrious wakeup on the next H_CEDE which is not an 1062 * issue. 1063 */ 1064 li r0,0 1065 stb r0, VCPU_IRQ_PENDING(r4) 1066 1067 /* 1068 * In single escalation mode, if the escalation interrupt is 1069 * on, we mask it. 1070 */ 1071 lbz r0, VCPU_XIVE_ESC_ON(r4) 1072 cmpwi r0,0 1073 beq 1f 1074 ld r10, VCPU_XIVE_ESC_RADDR(r4) 1075 li r9, XIVE_ESB_SET_PQ_01 1076 ldcix r0, r10, r9 1077 sync 1078 1079 /* We have a possible subtle race here: The escalation interrupt might 1080 * have fired and be on its way to the host queue while we mask it, 1081 * and if we unmask it early enough (re-cede right away), there is 1082 * a theorical possibility that it fires again, thus landing in the 1083 * target queue more than once which is a big no-no. 1084 * 1085 * Fortunately, solving this is rather easy. If the above load setting 1086 * PQ to 01 returns a previous value where P is set, then we know the 1087 * escalation interrupt is somewhere on its way to the host. In that 1088 * case we simply don't clear the xive_esc_on flag below. It will be 1089 * eventually cleared by the handler for the escalation interrupt. 1090 * 1091 * Then, when doing a cede, we check that flag again before re-enabling 1092 * the escalation interrupt, and if set, we abort the cede. 1093 */ 1094 andi. r0, r0, XIVE_ESB_VAL_P 1095 bne- 1f 1096 1097 /* Now P is 0, we can clear the flag */ 1098 li r0, 0 1099 stb r0, VCPU_XIVE_ESC_ON(r4) 11001: 1101no_xive: 1102#endif /* CONFIG_KVM_XICS */ 1103 1104deliver_guest_interrupt: 1105 ld r6, VCPU_CTR(r4) 1106 ld r7, VCPU_XER(r4) 1107 1108 mtctr r6 1109 mtxer r7 1110 1111kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ 1112 ld r10, VCPU_PC(r4) 1113 ld r11, VCPU_MSR(r4) 1114 ld r6, VCPU_SRR0(r4) 1115 ld r7, VCPU_SRR1(r4) 1116 mtspr SPRN_SRR0, r6 1117 mtspr SPRN_SRR1, r7 1118 1119 /* r11 = vcpu->arch.msr & ~MSR_HV */ 1120 rldicl r11, r11, 63 - MSR_HV_LG, 1 1121 rotldi r11, r11, 1 + MSR_HV_LG 1122 ori r11, r11, MSR_ME 1123 1124 /* Check if we can deliver an external or decrementer interrupt now */ 1125 ld r0, VCPU_PENDING_EXC(r4) 1126 rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63 1127 cmpdi cr1, r0, 0 1128 andi. r8, r11, MSR_EE 1129 mfspr r8, SPRN_LPCR 1130 /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */ 1131 rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH 1132 mtspr SPRN_LPCR, r8 1133 isync 1134 beq 5f 1135 li r0, BOOK3S_INTERRUPT_EXTERNAL 1136 bne cr1, 12f 1137 mfspr r0, SPRN_DEC 1138BEGIN_FTR_SECTION 1139 /* On POWER9 check whether the guest has large decrementer enabled */ 1140 andis. r8, r8, LPCR_LD@h 1141 bne 15f 1142END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1143 extsw r0, r0 114415: cmpdi r0, 0 1145 li r0, BOOK3S_INTERRUPT_DECREMENTER 1146 bge 5f 1147 114812: mtspr SPRN_SRR0, r10 1149 mr r10,r0 1150 mtspr SPRN_SRR1, r11 1151 mr r9, r4 1152 bl kvmppc_msr_interrupt 11535: 1154BEGIN_FTR_SECTION 1155 b fast_guest_return 1156END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 1157 /* On POWER9, check for pending doorbell requests */ 1158 lbz r0, VCPU_DBELL_REQ(r4) 1159 cmpwi r0, 0 1160 beq fast_guest_return 1161 ld r5, HSTATE_KVM_VCORE(r13) 1162 /* Set DPDES register so the CPU will take a doorbell interrupt */ 1163 li r0, 1 1164 mtspr SPRN_DPDES, r0 1165 std r0, VCORE_DPDES(r5) 1166 /* Make sure other cpus see vcore->dpdes set before dbell req clear */ 1167 lwsync 1168 /* Clear the pending doorbell request */ 1169 li r0, 0 1170 stb r0, VCPU_DBELL_REQ(r4) 1171 1172/* 1173 * Required state: 1174 * R4 = vcpu 1175 * R10: value for HSRR0 1176 * R11: value for HSRR1 1177 * R13 = PACA 1178 */ 1179fast_guest_return: 1180 li r0,0 1181 stb r0,VCPU_CEDED(r4) /* cancel cede */ 1182 mtspr SPRN_HSRR0,r10 1183 mtspr SPRN_HSRR1,r11 1184 1185 /* Activate guest mode, so faults get handled by KVM */ 1186 li r9, KVM_GUEST_MODE_GUEST_HV 1187 stb r9, HSTATE_IN_GUEST(r13) 1188 1189#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1190 /* Accumulate timing */ 1191 addi r3, r4, VCPU_TB_GUEST 1192 bl kvmhv_accumulate_time 1193#endif 1194 1195 /* Enter guest */ 1196 1197BEGIN_FTR_SECTION 1198 ld r5, VCPU_CFAR(r4) 1199 mtspr SPRN_CFAR, r5 1200END_FTR_SECTION_IFSET(CPU_FTR_CFAR) 1201BEGIN_FTR_SECTION 1202 ld r0, VCPU_PPR(r4) 1203END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 1204 1205 ld r5, VCPU_LR(r4) 1206 lwz r6, VCPU_CR(r4) 1207 mtlr r5 1208 mtcr r6 1209 1210 ld r1, VCPU_GPR(R1)(r4) 1211 ld r2, VCPU_GPR(R2)(r4) 1212 ld r3, VCPU_GPR(R3)(r4) 1213 ld r5, VCPU_GPR(R5)(r4) 1214 ld r6, VCPU_GPR(R6)(r4) 1215 ld r7, VCPU_GPR(R7)(r4) 1216 ld r8, VCPU_GPR(R8)(r4) 1217 ld r9, VCPU_GPR(R9)(r4) 1218 ld r10, VCPU_GPR(R10)(r4) 1219 ld r11, VCPU_GPR(R11)(r4) 1220 ld r12, VCPU_GPR(R12)(r4) 1221 ld r13, VCPU_GPR(R13)(r4) 1222 1223BEGIN_FTR_SECTION 1224 mtspr SPRN_PPR, r0 1225END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 1226 1227/* Move canary into DSISR to check for later */ 1228BEGIN_FTR_SECTION 1229 li r0, 0x7fff 1230 mtspr SPRN_HDSISR, r0 1231END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1232 1233 ld r0, VCPU_GPR(R0)(r4) 1234 ld r4, VCPU_GPR(R4)(r4) 1235 HRFI_TO_GUEST 1236 b . 1237 1238secondary_too_late: 1239 li r12, 0 1240 stw r12, STACK_SLOT_TRAP(r1) 1241 cmpdi r4, 0 1242 beq 11f 1243 stw r12, VCPU_TRAP(r4) 1244#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1245 addi r3, r4, VCPU_TB_RMEXIT 1246 bl kvmhv_accumulate_time 1247#endif 124811: b kvmhv_switch_to_host 1249 1250no_switch_exit: 1251 HMT_MEDIUM 1252 li r12, 0 1253 b 12f 1254hdec_soon: 1255 li r12, BOOK3S_INTERRUPT_HV_DECREMENTER 125612: stw r12, VCPU_TRAP(r4) 1257 mr r9, r4 1258#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1259 addi r3, r4, VCPU_TB_RMEXIT 1260 bl kvmhv_accumulate_time 1261#endif 1262 b guest_bypass 1263 1264/****************************************************************************** 1265 * * 1266 * Exit code * 1267 * * 1268 *****************************************************************************/ 1269 1270/* 1271 * We come here from the first-level interrupt handlers. 1272 */ 1273 .globl kvmppc_interrupt_hv 1274kvmppc_interrupt_hv: 1275 /* 1276 * Register contents: 1277 * R12 = (guest CR << 32) | interrupt vector 1278 * R13 = PACA 1279 * guest R12 saved in shadow VCPU SCRATCH0 1280 * guest CTR saved in shadow VCPU SCRATCH1 if RELOCATABLE 1281 * guest R13 saved in SPRN_SCRATCH0 1282 */ 1283 std r9, HSTATE_SCRATCH2(r13) 1284 lbz r9, HSTATE_IN_GUEST(r13) 1285 cmpwi r9, KVM_GUEST_MODE_HOST_HV 1286 beq kvmppc_bad_host_intr 1287#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 1288 cmpwi r9, KVM_GUEST_MODE_GUEST 1289 ld r9, HSTATE_SCRATCH2(r13) 1290 beq kvmppc_interrupt_pr 1291#endif 1292 /* We're now back in the host but in guest MMU context */ 1293 li r9, KVM_GUEST_MODE_HOST_HV 1294 stb r9, HSTATE_IN_GUEST(r13) 1295 1296 ld r9, HSTATE_KVM_VCPU(r13) 1297 1298 /* Save registers */ 1299 1300 std r0, VCPU_GPR(R0)(r9) 1301 std r1, VCPU_GPR(R1)(r9) 1302 std r2, VCPU_GPR(R2)(r9) 1303 std r3, VCPU_GPR(R3)(r9) 1304 std r4, VCPU_GPR(R4)(r9) 1305 std r5, VCPU_GPR(R5)(r9) 1306 std r6, VCPU_GPR(R6)(r9) 1307 std r7, VCPU_GPR(R7)(r9) 1308 std r8, VCPU_GPR(R8)(r9) 1309 ld r0, HSTATE_SCRATCH2(r13) 1310 std r0, VCPU_GPR(R9)(r9) 1311 std r10, VCPU_GPR(R10)(r9) 1312 std r11, VCPU_GPR(R11)(r9) 1313 ld r3, HSTATE_SCRATCH0(r13) 1314 std r3, VCPU_GPR(R12)(r9) 1315 /* CR is in the high half of r12 */ 1316 srdi r4, r12, 32 1317 stw r4, VCPU_CR(r9) 1318BEGIN_FTR_SECTION 1319 ld r3, HSTATE_CFAR(r13) 1320 std r3, VCPU_CFAR(r9) 1321END_FTR_SECTION_IFSET(CPU_FTR_CFAR) 1322BEGIN_FTR_SECTION 1323 ld r4, HSTATE_PPR(r13) 1324 std r4, VCPU_PPR(r9) 1325END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 1326 1327 /* Restore R1/R2 so we can handle faults */ 1328 ld r1, HSTATE_HOST_R1(r13) 1329 ld r2, PACATOC(r13) 1330 1331 mfspr r10, SPRN_SRR0 1332 mfspr r11, SPRN_SRR1 1333 std r10, VCPU_SRR0(r9) 1334 std r11, VCPU_SRR1(r9) 1335 /* trap is in the low half of r12, clear CR from the high half */ 1336 clrldi r12, r12, 32 1337 andi. r0, r12, 2 /* need to read HSRR0/1? */ 1338 beq 1f 1339 mfspr r10, SPRN_HSRR0 1340 mfspr r11, SPRN_HSRR1 1341 clrrdi r12, r12, 2 13421: std r10, VCPU_PC(r9) 1343 std r11, VCPU_MSR(r9) 1344 1345 GET_SCRATCH0(r3) 1346 mflr r4 1347 std r3, VCPU_GPR(R13)(r9) 1348 std r4, VCPU_LR(r9) 1349 1350 stw r12,VCPU_TRAP(r9) 1351 1352 /* 1353 * Now that we have saved away SRR0/1 and HSRR0/1, 1354 * interrupts are recoverable in principle, so set MSR_RI. 1355 * This becomes important for relocation-on interrupts from 1356 * the guest, which we can get in radix mode on POWER9. 1357 */ 1358 li r0, MSR_RI 1359 mtmsrd r0, 1 1360 1361#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1362 addi r3, r9, VCPU_TB_RMINTR 1363 mr r4, r9 1364 bl kvmhv_accumulate_time 1365 ld r5, VCPU_GPR(R5)(r9) 1366 ld r6, VCPU_GPR(R6)(r9) 1367 ld r7, VCPU_GPR(R7)(r9) 1368 ld r8, VCPU_GPR(R8)(r9) 1369#endif 1370 1371 /* Save HEIR (HV emulation assist reg) in emul_inst 1372 if this is an HEI (HV emulation interrupt, e40) */ 1373 li r3,KVM_INST_FETCH_FAILED 1374 stw r3,VCPU_LAST_INST(r9) 1375 cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST 1376 bne 11f 1377 mfspr r3,SPRN_HEIR 137811: stw r3,VCPU_HEIR(r9) 1379 1380 /* these are volatile across C function calls */ 1381#ifdef CONFIG_RELOCATABLE 1382 ld r3, HSTATE_SCRATCH1(r13) 1383 mtctr r3 1384#else 1385 mfctr r3 1386#endif 1387 mfxer r4 1388 std r3, VCPU_CTR(r9) 1389 std r4, VCPU_XER(r9) 1390 1391#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1392 /* For softpatch interrupt, go off and do TM instruction emulation */ 1393 cmpwi r12, BOOK3S_INTERRUPT_HV_SOFTPATCH 1394 beq kvmppc_tm_emul 1395#endif 1396 1397 /* If this is a page table miss then see if it's theirs or ours */ 1398 cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE 1399 beq kvmppc_hdsi 1400 cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE 1401 beq kvmppc_hisi 1402 1403 /* See if this is a leftover HDEC interrupt */ 1404 cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER 1405 bne 2f 1406 mfspr r3,SPRN_HDEC 1407 EXTEND_HDEC(r3) 1408 cmpdi r3,0 1409 mr r4,r9 1410 bge fast_guest_return 14112: 1412 /* See if this is an hcall we can handle in real mode */ 1413 cmpwi r12,BOOK3S_INTERRUPT_SYSCALL 1414 beq hcall_try_real_mode 1415 1416 /* Hypervisor doorbell - exit only if host IPI flag set */ 1417 cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL 1418 bne 3f 1419BEGIN_FTR_SECTION 1420 PPC_MSGSYNC 1421 lwsync 1422END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1423 lbz r0, HSTATE_HOST_IPI(r13) 1424 cmpwi r0, 0 1425 beq 4f 1426 b guest_exit_cont 14273: 1428 /* If it's a hypervisor facility unavailable interrupt, save HFSCR */ 1429 cmpwi r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL 1430 bne 14f 1431 mfspr r3, SPRN_HFSCR 1432 std r3, VCPU_HFSCR(r9) 1433 b guest_exit_cont 143414: 1435 /* External interrupt ? */ 1436 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL 1437 bne+ guest_exit_cont 1438 1439 /* External interrupt, first check for host_ipi. If this is 1440 * set, we know the host wants us out so let's do it now 1441 */ 1442 bl kvmppc_read_intr 1443 1444 /* 1445 * Restore the active volatile registers after returning from 1446 * a C function. 1447 */ 1448 ld r9, HSTATE_KVM_VCPU(r13) 1449 li r12, BOOK3S_INTERRUPT_EXTERNAL 1450 1451 /* 1452 * kvmppc_read_intr return codes: 1453 * 1454 * Exit to host (r3 > 0) 1455 * 1 An interrupt is pending that needs to be handled by the host 1456 * Exit guest and return to host by branching to guest_exit_cont 1457 * 1458 * 2 Passthrough that needs completion in the host 1459 * Exit guest and return to host by branching to guest_exit_cont 1460 * However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD 1461 * to indicate to the host to complete handling the interrupt 1462 * 1463 * Before returning to guest, we check if any CPU is heading out 1464 * to the host and if so, we head out also. If no CPUs are heading 1465 * check return values <= 0. 1466 * 1467 * Return to guest (r3 <= 0) 1468 * 0 No external interrupt is pending 1469 * -1 A guest wakeup IPI (which has now been cleared) 1470 * In either case, we return to guest to deliver any pending 1471 * guest interrupts. 1472 * 1473 * -2 A PCI passthrough external interrupt was handled 1474 * (interrupt was delivered directly to guest) 1475 * Return to guest to deliver any pending guest interrupts. 1476 */ 1477 1478 cmpdi r3, 1 1479 ble 1f 1480 1481 /* Return code = 2 */ 1482 li r12, BOOK3S_INTERRUPT_HV_RM_HARD 1483 stw r12, VCPU_TRAP(r9) 1484 b guest_exit_cont 1485 14861: /* Return code <= 1 */ 1487 cmpdi r3, 0 1488 bgt guest_exit_cont 1489 1490 /* Return code <= 0 */ 14914: ld r5, HSTATE_KVM_VCORE(r13) 1492 lwz r0, VCORE_ENTRY_EXIT(r5) 1493 cmpwi r0, 0x100 1494 mr r4, r9 1495 blt deliver_guest_interrupt 1496 1497guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ 1498 /* Save more register state */ 1499 mfdar r6 1500 mfdsisr r7 1501 std r6, VCPU_DAR(r9) 1502 stw r7, VCPU_DSISR(r9) 1503 /* don't overwrite fault_dar/fault_dsisr if HDSI */ 1504 cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE 1505 beq mc_cont 1506 std r6, VCPU_FAULT_DAR(r9) 1507 stw r7, VCPU_FAULT_DSISR(r9) 1508 1509 /* See if it is a machine check */ 1510 cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK 1511 beq machine_check_realmode 1512mc_cont: 1513#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1514 addi r3, r9, VCPU_TB_RMEXIT 1515 mr r4, r9 1516 bl kvmhv_accumulate_time 1517#endif 1518#ifdef CONFIG_KVM_XICS 1519 /* We are exiting, pull the VP from the XIVE */ 1520 lbz r0, VCPU_XIVE_PUSHED(r9) 1521 cmpwi cr0, r0, 0 1522 beq 1f 1523 li r7, TM_SPC_PULL_OS_CTX 1524 li r6, TM_QW1_OS 1525 mfmsr r0 1526 andi. r0, r0, MSR_DR /* in real mode? */ 1527 beq 2f 1528 ld r10, HSTATE_XIVE_TIMA_VIRT(r13) 1529 cmpldi cr0, r10, 0 1530 beq 1f 1531 /* First load to pull the context, we ignore the value */ 1532 eieio 1533 lwzx r11, r7, r10 1534 /* Second load to recover the context state (Words 0 and 1) */ 1535 ldx r11, r6, r10 1536 b 3f 15372: ld r10, HSTATE_XIVE_TIMA_PHYS(r13) 1538 cmpldi cr0, r10, 0 1539 beq 1f 1540 /* First load to pull the context, we ignore the value */ 1541 eieio 1542 lwzcix r11, r7, r10 1543 /* Second load to recover the context state (Words 0 and 1) */ 1544 ldcix r11, r6, r10 15453: std r11, VCPU_XIVE_SAVED_STATE(r9) 1546 /* Fixup some of the state for the next load */ 1547 li r10, 0 1548 li r0, 0xff 1549 stb r10, VCPU_XIVE_PUSHED(r9) 1550 stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9) 1551 stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9) 1552 eieio 15531: 1554#endif /* CONFIG_KVM_XICS */ 1555 1556 /* For hash guest, read the guest SLB and save it away */ 1557 ld r5, VCPU_KVM(r9) 1558 lbz r0, KVM_RADIX(r5) 1559 li r5, 0 1560 cmpwi r0, 0 1561 bne 3f /* for radix, save 0 entries */ 1562 lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ 1563 mtctr r0 1564 li r6,0 1565 addi r7,r9,VCPU_SLB 15661: slbmfee r8,r6 1567 andis. r0,r8,SLB_ESID_V@h 1568 beq 2f 1569 add r8,r8,r6 /* put index in */ 1570 slbmfev r3,r6 1571 std r8,VCPU_SLB_E(r7) 1572 std r3,VCPU_SLB_V(r7) 1573 addi r7,r7,VCPU_SLB_SIZE 1574 addi r5,r5,1 15752: addi r6,r6,1 1576 bdnz 1b 1577 /* Finally clear out the SLB */ 1578 li r0,0 1579 slbmte r0,r0 1580 slbia 1581 ptesync 15823: stw r5,VCPU_SLB_MAX(r9) 1583 1584 /* load host SLB entries */ 1585BEGIN_MMU_FTR_SECTION 1586 b 0f 1587END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) 1588 ld r8,PACA_SLBSHADOWPTR(r13) 1589 1590 .rept SLB_NUM_BOLTED 1591 li r3, SLBSHADOW_SAVEAREA 1592 LDX_BE r5, r8, r3 1593 addi r3, r3, 8 1594 LDX_BE r6, r8, r3 1595 andis. r7,r5,SLB_ESID_V@h 1596 beq 1f 1597 slbmte r6,r5 15981: addi r8,r8,16 1599 .endr 16000: 1601 1602guest_bypass: 1603 stw r12, STACK_SLOT_TRAP(r1) 1604 1605 /* Save DEC */ 1606 /* Do this before kvmhv_commence_exit so we know TB is guest TB */ 1607 ld r3, HSTATE_KVM_VCORE(r13) 1608 mfspr r5,SPRN_DEC 1609 mftb r6 1610 /* On P9, if the guest has large decr enabled, don't sign extend */ 1611BEGIN_FTR_SECTION 1612 ld r4, VCORE_LPCR(r3) 1613 andis. r4, r4, LPCR_LD@h 1614 bne 16f 1615END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1616 extsw r5,r5 161716: add r5,r5,r6 1618 /* r5 is a guest timebase value here, convert to host TB */ 1619 ld r4,VCORE_TB_OFFSET_APPL(r3) 1620 subf r5,r4,r5 1621 std r5,VCPU_DEC_EXPIRES(r9) 1622 1623 /* Increment exit count, poke other threads to exit */ 1624 mr r3, r12 1625 bl kvmhv_commence_exit 1626 nop 1627 ld r9, HSTATE_KVM_VCPU(r13) 1628 1629 /* Stop others sending VCPU interrupts to this physical CPU */ 1630 li r0, -1 1631 stw r0, VCPU_CPU(r9) 1632 stw r0, VCPU_THREAD_CPU(r9) 1633 1634 /* Save guest CTRL register, set runlatch to 1 */ 1635 mfspr r6,SPRN_CTRLF 1636 stw r6,VCPU_CTRL(r9) 1637 andi. r0,r6,1 1638 bne 4f 1639 ori r6,r6,1 1640 mtspr SPRN_CTRLT,r6 16414: 1642 /* 1643 * Save the guest PURR/SPURR 1644 */ 1645 mfspr r5,SPRN_PURR 1646 mfspr r6,SPRN_SPURR 1647 ld r7,VCPU_PURR(r9) 1648 ld r8,VCPU_SPURR(r9) 1649 std r5,VCPU_PURR(r9) 1650 std r6,VCPU_SPURR(r9) 1651 subf r5,r7,r5 1652 subf r6,r8,r6 1653 1654 /* 1655 * Restore host PURR/SPURR and add guest times 1656 * so that the time in the guest gets accounted. 1657 */ 1658 ld r3,HSTATE_PURR(r13) 1659 ld r4,HSTATE_SPURR(r13) 1660 add r3,r3,r5 1661 add r4,r4,r6 1662 mtspr SPRN_PURR,r3 1663 mtspr SPRN_SPURR,r4 1664 1665BEGIN_FTR_SECTION 1666 b 8f 1667END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 1668 /* Save POWER8-specific registers */ 1669 mfspr r5, SPRN_IAMR 1670 mfspr r6, SPRN_PSPB 1671 mfspr r7, SPRN_FSCR 1672 std r5, VCPU_IAMR(r9) 1673 stw r6, VCPU_PSPB(r9) 1674 std r7, VCPU_FSCR(r9) 1675 mfspr r5, SPRN_IC 1676 mfspr r7, SPRN_TAR 1677 std r5, VCPU_IC(r9) 1678 std r7, VCPU_TAR(r9) 1679 mfspr r8, SPRN_EBBHR 1680 std r8, VCPU_EBBHR(r9) 1681 mfspr r5, SPRN_EBBRR 1682 mfspr r6, SPRN_BESCR 1683 mfspr r7, SPRN_PID 1684 mfspr r8, SPRN_WORT 1685 std r5, VCPU_EBBRR(r9) 1686 std r6, VCPU_BESCR(r9) 1687 stw r7, VCPU_GUEST_PID(r9) 1688 std r8, VCPU_WORT(r9) 1689BEGIN_FTR_SECTION 1690 mfspr r5, SPRN_TCSCR 1691 mfspr r6, SPRN_ACOP 1692 mfspr r7, SPRN_CSIGR 1693 mfspr r8, SPRN_TACR 1694 std r5, VCPU_TCSCR(r9) 1695 std r6, VCPU_ACOP(r9) 1696 std r7, VCPU_CSIGR(r9) 1697 std r8, VCPU_TACR(r9) 1698FTR_SECTION_ELSE 1699 mfspr r5, SPRN_TIDR 1700 mfspr r6, SPRN_PSSCR 1701 std r5, VCPU_TID(r9) 1702 rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */ 1703 rotldi r6, r6, 60 1704 std r6, VCPU_PSSCR(r9) 1705 /* Restore host HFSCR value */ 1706 ld r7, STACK_SLOT_HFSCR(r1) 1707 mtspr SPRN_HFSCR, r7 1708ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 1709 /* 1710 * Restore various registers to 0, where non-zero values 1711 * set by the guest could disrupt the host. 1712 */ 1713 li r0, 0 1714 mtspr SPRN_PSPB, r0 1715 mtspr SPRN_WORT, r0 1716BEGIN_FTR_SECTION 1717 mtspr SPRN_IAMR, r0 1718 mtspr SPRN_TCSCR, r0 1719 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ 1720 li r0, 1 1721 sldi r0, r0, 31 1722 mtspr SPRN_MMCRS, r0 1723END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 17248: 1725 1726 /* Save and reset AMR and UAMOR before turning on the MMU */ 1727 mfspr r5,SPRN_AMR 1728 mfspr r6,SPRN_UAMOR 1729 std r5,VCPU_AMR(r9) 1730 std r6,VCPU_UAMOR(r9) 1731 li r6,0 1732 mtspr SPRN_AMR,r6 1733 mtspr SPRN_UAMOR, r6 1734 1735 /* Switch DSCR back to host value */ 1736 mfspr r8, SPRN_DSCR 1737 ld r7, HSTATE_DSCR(r13) 1738 std r8, VCPU_DSCR(r9) 1739 mtspr SPRN_DSCR, r7 1740 1741 /* Save non-volatile GPRs */ 1742 std r14, VCPU_GPR(R14)(r9) 1743 std r15, VCPU_GPR(R15)(r9) 1744 std r16, VCPU_GPR(R16)(r9) 1745 std r17, VCPU_GPR(R17)(r9) 1746 std r18, VCPU_GPR(R18)(r9) 1747 std r19, VCPU_GPR(R19)(r9) 1748 std r20, VCPU_GPR(R20)(r9) 1749 std r21, VCPU_GPR(R21)(r9) 1750 std r22, VCPU_GPR(R22)(r9) 1751 std r23, VCPU_GPR(R23)(r9) 1752 std r24, VCPU_GPR(R24)(r9) 1753 std r25, VCPU_GPR(R25)(r9) 1754 std r26, VCPU_GPR(R26)(r9) 1755 std r27, VCPU_GPR(R27)(r9) 1756 std r28, VCPU_GPR(R28)(r9) 1757 std r29, VCPU_GPR(R29)(r9) 1758 std r30, VCPU_GPR(R30)(r9) 1759 std r31, VCPU_GPR(R31)(r9) 1760 1761 /* Save SPRGs */ 1762 mfspr r3, SPRN_SPRG0 1763 mfspr r4, SPRN_SPRG1 1764 mfspr r5, SPRN_SPRG2 1765 mfspr r6, SPRN_SPRG3 1766 std r3, VCPU_SPRG0(r9) 1767 std r4, VCPU_SPRG1(r9) 1768 std r5, VCPU_SPRG2(r9) 1769 std r6, VCPU_SPRG3(r9) 1770 1771 /* save FP state */ 1772 mr r3, r9 1773 bl kvmppc_save_fp 1774 1775#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1776/* 1777 * Branch around the call if both CPU_FTR_TM and 1778 * CPU_FTR_P9_TM_HV_ASSIST are off. 1779 */ 1780BEGIN_FTR_SECTION 1781 b 91f 1782END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) 1783 /* 1784 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR 1785 */ 1786 bl kvmppc_save_tm 178791: 1788#endif 1789 1790 /* Increment yield count if they have a VPA */ 1791 ld r8, VCPU_VPA(r9) /* do they have a VPA? */ 1792 cmpdi r8, 0 1793 beq 25f 1794 li r4, LPPACA_YIELDCOUNT 1795 LWZX_BE r3, r8, r4 1796 addi r3, r3, 1 1797 STWX_BE r3, r8, r4 1798 li r3, 1 1799 stb r3, VCPU_VPA_DIRTY(r9) 180025: 1801 /* Save PMU registers if requested */ 1802 /* r8 and cr0.eq are live here */ 1803BEGIN_FTR_SECTION 1804 /* 1805 * POWER8 seems to have a hardware bug where setting 1806 * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE] 1807 * when some counters are already negative doesn't seem 1808 * to cause a performance monitor alert (and hence interrupt). 1809 * The effect of this is that when saving the PMU state, 1810 * if there is no PMU alert pending when we read MMCR0 1811 * before freezing the counters, but one becomes pending 1812 * before we read the counters, we lose it. 1813 * To work around this, we need a way to freeze the counters 1814 * before reading MMCR0. Normally, freezing the counters 1815 * is done by writing MMCR0 (to set MMCR0[FC]) which 1816 * unavoidably writes MMCR0[PMA0] as well. On POWER8, 1817 * we can also freeze the counters using MMCR2, by writing 1818 * 1s to all the counter freeze condition bits (there are 1819 * 9 bits each for 6 counters). 1820 */ 1821 li r3, -1 /* set all freeze bits */ 1822 clrrdi r3, r3, 10 1823 mfspr r10, SPRN_MMCR2 1824 mtspr SPRN_MMCR2, r3 1825 isync 1826END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 1827 li r3, 1 1828 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ 1829 mfspr r4, SPRN_MMCR0 /* save MMCR0 */ 1830 mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ 1831 mfspr r6, SPRN_MMCRA 1832 /* Clear MMCRA in order to disable SDAR updates */ 1833 li r7, 0 1834 mtspr SPRN_MMCRA, r7 1835 isync 1836 beq 21f /* if no VPA, save PMU stuff anyway */ 1837 lbz r7, LPPACA_PMCINUSE(r8) 1838 cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */ 1839 bne 21f 1840 std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ 1841 b 22f 184221: mfspr r5, SPRN_MMCR1 1843 mfspr r7, SPRN_SIAR 1844 mfspr r8, SPRN_SDAR 1845 std r4, VCPU_MMCR(r9) 1846 std r5, VCPU_MMCR + 8(r9) 1847 std r6, VCPU_MMCR + 16(r9) 1848BEGIN_FTR_SECTION 1849 std r10, VCPU_MMCR + 24(r9) 1850END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 1851 std r7, VCPU_SIAR(r9) 1852 std r8, VCPU_SDAR(r9) 1853 mfspr r3, SPRN_PMC1 1854 mfspr r4, SPRN_PMC2 1855 mfspr r5, SPRN_PMC3 1856 mfspr r6, SPRN_PMC4 1857 mfspr r7, SPRN_PMC5 1858 mfspr r8, SPRN_PMC6 1859 stw r3, VCPU_PMC(r9) 1860 stw r4, VCPU_PMC + 4(r9) 1861 stw r5, VCPU_PMC + 8(r9) 1862 stw r6, VCPU_PMC + 12(r9) 1863 stw r7, VCPU_PMC + 16(r9) 1864 stw r8, VCPU_PMC + 20(r9) 1865BEGIN_FTR_SECTION 1866 mfspr r5, SPRN_SIER 1867 std r5, VCPU_SIER(r9) 1868BEGIN_FTR_SECTION_NESTED(96) 1869 mfspr r6, SPRN_SPMC1 1870 mfspr r7, SPRN_SPMC2 1871 mfspr r8, SPRN_MMCRS 1872 stw r6, VCPU_PMC + 24(r9) 1873 stw r7, VCPU_PMC + 28(r9) 1874 std r8, VCPU_MMCR + 32(r9) 1875 lis r4, 0x8000 1876 mtspr SPRN_MMCRS, r4 1877END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) 1878END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 187922: 1880 1881 /* Restore host values of some registers */ 1882BEGIN_FTR_SECTION 1883 ld r5, STACK_SLOT_CIABR(r1) 1884 ld r6, STACK_SLOT_DAWR(r1) 1885 ld r7, STACK_SLOT_DAWRX(r1) 1886 mtspr SPRN_CIABR, r5 1887 /* 1888 * If the DAWR doesn't work, it's ok to write these here as 1889 * this value should always be zero 1890 */ 1891 mtspr SPRN_DAWR, r6 1892 mtspr SPRN_DAWRX, r7 1893END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 1894BEGIN_FTR_SECTION 1895 ld r5, STACK_SLOT_TID(r1) 1896 ld r6, STACK_SLOT_PSSCR(r1) 1897 ld r7, STACK_SLOT_PID(r1) 1898 ld r8, STACK_SLOT_IAMR(r1) 1899 mtspr SPRN_TIDR, r5 1900 mtspr SPRN_PSSCR, r6 1901 mtspr SPRN_PID, r7 1902 mtspr SPRN_IAMR, r8 1903END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1904 1905#ifdef CONFIG_PPC_RADIX_MMU 1906 /* 1907 * Are we running hash or radix ? 1908 */ 1909 ld r5, VCPU_KVM(r9) 1910 lbz r0, KVM_RADIX(r5) 1911 cmpwi cr2, r0, 0 1912 beq cr2, 4f 1913 1914 /* 1915 * Radix: do eieio; tlbsync; ptesync sequence in case we 1916 * interrupted the guest between a tlbie and a ptesync. 1917 */ 1918 eieio 1919 tlbsync 1920 ptesync 1921 1922 /* Radix: Handle the case where the guest used an illegal PID */ 1923 LOAD_REG_ADDR(r4, mmu_base_pid) 1924 lwz r3, VCPU_GUEST_PID(r9) 1925 lwz r5, 0(r4) 1926 cmpw cr0,r3,r5 1927 blt 2f 1928 1929 /* 1930 * Illegal PID, the HW might have prefetched and cached in the TLB 1931 * some translations for the LPID 0 / guest PID combination which 1932 * Linux doesn't know about, so we need to flush that PID out of 1933 * the TLB. First we need to set LPIDR to 0 so tlbiel applies to 1934 * the right context. 1935 */ 1936 li r0,0 1937 mtspr SPRN_LPID,r0 1938 isync 1939 1940 /* Then do a congruence class local flush */ 1941 ld r6,VCPU_KVM(r9) 1942 lwz r0,KVM_TLB_SETS(r6) 1943 mtctr r0 1944 li r7,0x400 /* IS field = 0b01 */ 1945 ptesync 1946 sldi r0,r3,32 /* RS has PID */ 19471: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */ 1948 addi r7,r7,0x1000 1949 bdnz 1b 1950 ptesync 1951 19522: /* Flush the ERAT on radix P9 DD1 guest exit */ 1953BEGIN_FTR_SECTION 1954 PPC_INVALIDATE_ERAT 1955END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) 19564: 1957#endif /* CONFIG_PPC_RADIX_MMU */ 1958 1959 /* 1960 * POWER7/POWER8 guest -> host partition switch code. 1961 * We don't have to lock against tlbies but we do 1962 * have to coordinate the hardware threads. 1963 * Here STACK_SLOT_TRAP(r1) contains the trap number. 1964 */ 1965kvmhv_switch_to_host: 1966 /* Secondary threads wait for primary to do partition switch */ 1967 ld r5,HSTATE_KVM_VCORE(r13) 1968 ld r4,VCORE_KVM(r5) /* pointer to struct kvm */ 1969 lbz r3,HSTATE_PTID(r13) 1970 cmpwi r3,0 1971 beq 15f 1972 HMT_LOW 197313: lbz r3,VCORE_IN_GUEST(r5) 1974 cmpwi r3,0 1975 bne 13b 1976 HMT_MEDIUM 1977 b 16f 1978 1979 /* Primary thread waits for all the secondaries to exit guest */ 198015: lwz r3,VCORE_ENTRY_EXIT(r5) 1981 rlwinm r0,r3,32-8,0xff 1982 clrldi r3,r3,56 1983 cmpw r3,r0 1984 bne 15b 1985 isync 1986 1987 /* Did we actually switch to the guest at all? */ 1988 lbz r6, VCORE_IN_GUEST(r5) 1989 cmpwi r6, 0 1990 beq 19f 1991 1992 /* Primary thread switches back to host partition */ 1993 lwz r7,KVM_HOST_LPID(r4) 1994BEGIN_FTR_SECTION 1995 ld r6,KVM_HOST_SDR1(r4) 1996 li r8,LPID_RSVD /* switch to reserved LPID */ 1997 mtspr SPRN_LPID,r8 1998 ptesync 1999 mtspr SPRN_SDR1,r6 /* switch to host page table */ 2000END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 2001 mtspr SPRN_LPID,r7 2002 isync 2003 2004BEGIN_FTR_SECTION 2005 /* DPDES and VTB are shared between threads */ 2006 mfspr r7, SPRN_DPDES 2007 mfspr r8, SPRN_VTB 2008 std r7, VCORE_DPDES(r5) 2009 std r8, VCORE_VTB(r5) 2010 /* clear DPDES so we don't get guest doorbells in the host */ 2011 li r8, 0 2012 mtspr SPRN_DPDES, r8 2013END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 2014 2015 /* If HMI, call kvmppc_realmode_hmi_handler() */ 2016 lwz r12, STACK_SLOT_TRAP(r1) 2017 cmpwi r12, BOOK3S_INTERRUPT_HMI 2018 bne 27f 2019 bl kvmppc_realmode_hmi_handler 2020 nop 2021 cmpdi r3, 0 2022 /* 2023 * At this point kvmppc_realmode_hmi_handler may have resync-ed 2024 * the TB, and if it has, we must not subtract the guest timebase 2025 * offset from the timebase. So, skip it. 2026 * 2027 * Also, do not call kvmppc_subcore_exit_guest() because it has 2028 * been invoked as part of kvmppc_realmode_hmi_handler(). 2029 */ 2030 beq 30f 2031 203227: 2033 /* Subtract timebase offset from timebase */ 2034 ld r8, VCORE_TB_OFFSET_APPL(r5) 2035 cmpdi r8,0 2036 beq 17f 2037 li r0, 0 2038 std r0, VCORE_TB_OFFSET_APPL(r5) 2039 mftb r6 /* current guest timebase */ 2040 subf r8,r8,r6 2041 mtspr SPRN_TBU40,r8 /* update upper 40 bits */ 2042 mftb r7 /* check if lower 24 bits overflowed */ 2043 clrldi r6,r6,40 2044 clrldi r7,r7,40 2045 cmpld r7,r6 2046 bge 17f 2047 addis r8,r8,0x100 /* if so, increment upper 40 bits */ 2048 mtspr SPRN_TBU40,r8 2049 205017: bl kvmppc_subcore_exit_guest 2051 nop 205230: ld r5,HSTATE_KVM_VCORE(r13) 2053 ld r4,VCORE_KVM(r5) /* pointer to struct kvm */ 2054 2055 /* Reset PCR */ 2056 ld r0, VCORE_PCR(r5) 2057 cmpdi r0, 0 2058 beq 18f 2059 li r0, 0 2060 mtspr SPRN_PCR, r0 206118: 2062 /* Signal secondary CPUs to continue */ 2063 stb r0,VCORE_IN_GUEST(r5) 206419: lis r8,0x7fff /* MAX_INT@h */ 2065 mtspr SPRN_HDEC,r8 2066 206716: 2068BEGIN_FTR_SECTION 2069 /* On POWER9 with HPT-on-radix we need to wait for all other threads */ 2070 ld r3, HSTATE_SPLIT_MODE(r13) 2071 cmpdi r3, 0 2072 beq 47f 2073 lwz r8, KVM_SPLIT_DO_RESTORE(r3) 2074 cmpwi r8, 0 2075 beq 47f 2076 bl kvmhv_p9_restore_lpcr 2077 nop 2078 b 48f 207947: 2080END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 2081 ld r8,KVM_HOST_LPCR(r4) 2082 mtspr SPRN_LPCR,r8 2083 isync 208448: 2085#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 2086 /* Finish timing, if we have a vcpu */ 2087 ld r4, HSTATE_KVM_VCPU(r13) 2088 cmpdi r4, 0 2089 li r3, 0 2090 beq 2f 2091 bl kvmhv_accumulate_time 20922: 2093#endif 2094 /* Unset guest mode */ 2095 li r0, KVM_GUEST_MODE_NONE 2096 stb r0, HSTATE_IN_GUEST(r13) 2097 2098 lwz r12, STACK_SLOT_TRAP(r1) /* return trap # in r12 */ 2099 ld r0, SFS+PPC_LR_STKOFF(r1) 2100 addi r1, r1, SFS 2101 mtlr r0 2102 blr 2103 2104#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2105/* 2106 * Softpatch interrupt for transactional memory emulation cases 2107 * on POWER9 DD2.2. This is early in the guest exit path - we 2108 * haven't saved registers or done a treclaim yet. 2109 */ 2110kvmppc_tm_emul: 2111 /* Save instruction image in HEIR */ 2112 mfspr r3, SPRN_HEIR 2113 stw r3, VCPU_HEIR(r9) 2114 2115 /* 2116 * The cases we want to handle here are those where the guest 2117 * is in real suspend mode and is trying to transition to 2118 * transactional mode. 2119 */ 2120 lbz r0, HSTATE_FAKE_SUSPEND(r13) 2121 cmpwi r0, 0 /* keep exiting guest if in fake suspend */ 2122 bne guest_exit_cont 2123 rldicl r3, r11, 64 - MSR_TS_S_LG, 62 2124 cmpwi r3, 1 /* or if not in suspend state */ 2125 bne guest_exit_cont 2126 2127 /* Call C code to do the emulation */ 2128 mr r3, r9 2129 bl kvmhv_p9_tm_emulation_early 2130 nop 2131 ld r9, HSTATE_KVM_VCPU(r13) 2132 li r12, BOOK3S_INTERRUPT_HV_SOFTPATCH 2133 cmpwi r3, 0 2134 beq guest_exit_cont /* continue exiting if not handled */ 2135 ld r10, VCPU_PC(r9) 2136 ld r11, VCPU_MSR(r9) 2137 b fast_interrupt_c_return /* go back to guest if handled */ 2138#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ 2139 2140/* 2141 * Check whether an HDSI is an HPTE not found fault or something else. 2142 * If it is an HPTE not found fault that is due to the guest accessing 2143 * a page that they have mapped but which we have paged out, then 2144 * we continue on with the guest exit path. In all other cases, 2145 * reflect the HDSI to the guest as a DSI. 2146 */ 2147kvmppc_hdsi: 2148 ld r3, VCPU_KVM(r9) 2149 lbz r0, KVM_RADIX(r3) 2150 mfspr r4, SPRN_HDAR 2151 mfspr r6, SPRN_HDSISR 2152BEGIN_FTR_SECTION 2153 /* Look for DSISR canary. If we find it, retry instruction */ 2154 cmpdi r6, 0x7fff 2155 beq 6f 2156END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 2157 cmpwi r0, 0 2158 bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */ 2159 /* HPTE not found fault or protection fault? */ 2160 andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h 2161 beq 1f /* if not, send it to the guest */ 2162 andi. r0, r11, MSR_DR /* data relocation enabled? */ 2163 beq 3f 2164BEGIN_FTR_SECTION 2165 mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ 2166 b 4f 2167END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 2168 clrrdi r0, r4, 28 2169 PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */ 2170 li r0, BOOK3S_INTERRUPT_DATA_SEGMENT 2171 bne 7f /* if no SLB entry found */ 21724: std r4, VCPU_FAULT_DAR(r9) 2173 stw r6, VCPU_FAULT_DSISR(r9) 2174 2175 /* Search the hash table. */ 2176 mr r3, r9 /* vcpu pointer */ 2177 li r7, 1 /* data fault */ 2178 bl kvmppc_hpte_hv_fault 2179 ld r9, HSTATE_KVM_VCPU(r13) 2180 ld r10, VCPU_PC(r9) 2181 ld r11, VCPU_MSR(r9) 2182 li r12, BOOK3S_INTERRUPT_H_DATA_STORAGE 2183 cmpdi r3, 0 /* retry the instruction */ 2184 beq 6f 2185 cmpdi r3, -1 /* handle in kernel mode */ 2186 beq guest_exit_cont 2187 cmpdi r3, -2 /* MMIO emulation; need instr word */ 2188 beq 2f 2189 2190 /* Synthesize a DSI (or DSegI) for the guest */ 2191 ld r4, VCPU_FAULT_DAR(r9) 2192 mr r6, r3 21931: li r0, BOOK3S_INTERRUPT_DATA_STORAGE 2194 mtspr SPRN_DSISR, r6 21957: mtspr SPRN_DAR, r4 2196 mtspr SPRN_SRR0, r10 2197 mtspr SPRN_SRR1, r11 2198 mr r10, r0 2199 bl kvmppc_msr_interrupt 2200fast_interrupt_c_return: 22016: ld r7, VCPU_CTR(r9) 2202 ld r8, VCPU_XER(r9) 2203 mtctr r7 2204 mtxer r8 2205 mr r4, r9 2206 b fast_guest_return 2207 22083: ld r5, VCPU_KVM(r9) /* not relocated, use VRMA */ 2209 ld r5, KVM_VRMA_SLB_V(r5) 2210 b 4b 2211 2212 /* If this is for emulated MMIO, load the instruction word */ 22132: li r8, KVM_INST_FETCH_FAILED /* In case lwz faults */ 2214 2215 /* Set guest mode to 'jump over instruction' so if lwz faults 2216 * we'll just continue at the next IP. */ 2217 li r0, KVM_GUEST_MODE_SKIP 2218 stb r0, HSTATE_IN_GUEST(r13) 2219 2220 /* Do the access with MSR:DR enabled */ 2221 mfmsr r3 2222 ori r4, r3, MSR_DR /* Enable paging for data */ 2223 mtmsrd r4 2224 lwz r8, 0(r10) 2225 mtmsrd r3 2226 2227 /* Store the result */ 2228 stw r8, VCPU_LAST_INST(r9) 2229 2230 /* Unset guest mode. */ 2231 li r0, KVM_GUEST_MODE_HOST_HV 2232 stb r0, HSTATE_IN_GUEST(r13) 2233 b guest_exit_cont 2234 2235.Lradix_hdsi: 2236 std r4, VCPU_FAULT_DAR(r9) 2237 stw r6, VCPU_FAULT_DSISR(r9) 2238.Lradix_hisi: 2239 mfspr r5, SPRN_ASDR 2240 std r5, VCPU_FAULT_GPA(r9) 2241 b guest_exit_cont 2242 2243/* 2244 * Similarly for an HISI, reflect it to the guest as an ISI unless 2245 * it is an HPTE not found fault for a page that we have paged out. 2246 */ 2247kvmppc_hisi: 2248 ld r3, VCPU_KVM(r9) 2249 lbz r0, KVM_RADIX(r3) 2250 cmpwi r0, 0 2251 bne .Lradix_hisi /* for radix, just save ASDR */ 2252 andis. r0, r11, SRR1_ISI_NOPT@h 2253 beq 1f 2254 andi. r0, r11, MSR_IR /* instruction relocation enabled? */ 2255 beq 3f 2256BEGIN_FTR_SECTION 2257 mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ 2258 b 4f 2259END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 2260 clrrdi r0, r10, 28 2261 PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */ 2262 li r0, BOOK3S_INTERRUPT_INST_SEGMENT 2263 bne 7f /* if no SLB entry found */ 22644: 2265 /* Search the hash table. */ 2266 mr r3, r9 /* vcpu pointer */ 2267 mr r4, r10 2268 mr r6, r11 2269 li r7, 0 /* instruction fault */ 2270 bl kvmppc_hpte_hv_fault 2271 ld r9, HSTATE_KVM_VCPU(r13) 2272 ld r10, VCPU_PC(r9) 2273 ld r11, VCPU_MSR(r9) 2274 li r12, BOOK3S_INTERRUPT_H_INST_STORAGE 2275 cmpdi r3, 0 /* retry the instruction */ 2276 beq fast_interrupt_c_return 2277 cmpdi r3, -1 /* handle in kernel mode */ 2278 beq guest_exit_cont 2279 2280 /* Synthesize an ISI (or ISegI) for the guest */ 2281 mr r11, r3 22821: li r0, BOOK3S_INTERRUPT_INST_STORAGE 22837: mtspr SPRN_SRR0, r10 2284 mtspr SPRN_SRR1, r11 2285 mr r10, r0 2286 bl kvmppc_msr_interrupt 2287 b fast_interrupt_c_return 2288 22893: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */ 2290 ld r5, KVM_VRMA_SLB_V(r6) 2291 b 4b 2292 2293/* 2294 * Try to handle an hcall in real mode. 2295 * Returns to the guest if we handle it, or continues on up to 2296 * the kernel if we can't (i.e. if we don't have a handler for 2297 * it, or if the handler returns H_TOO_HARD). 2298 * 2299 * r5 - r8 contain hcall args, 2300 * r9 = vcpu, r10 = pc, r11 = msr, r12 = trap, r13 = paca 2301 */ 2302hcall_try_real_mode: 2303 ld r3,VCPU_GPR(R3)(r9) 2304 andi. r0,r11,MSR_PR 2305 /* sc 1 from userspace - reflect to guest syscall */ 2306 bne sc_1_fast_return 2307 clrrdi r3,r3,2 2308 cmpldi r3,hcall_real_table_end - hcall_real_table 2309 bge guest_exit_cont 2310 /* See if this hcall is enabled for in-kernel handling */ 2311 ld r4, VCPU_KVM(r9) 2312 srdi r0, r3, 8 /* r0 = (r3 / 4) >> 6 */ 2313 sldi r0, r0, 3 /* index into kvm->arch.enabled_hcalls[] */ 2314 add r4, r4, r0 2315 ld r0, KVM_ENABLED_HCALLS(r4) 2316 rlwinm r4, r3, 32-2, 0x3f /* r4 = (r3 / 4) & 0x3f */ 2317 srd r0, r0, r4 2318 andi. r0, r0, 1 2319 beq guest_exit_cont 2320 /* Get pointer to handler, if any, and call it */ 2321 LOAD_REG_ADDR(r4, hcall_real_table) 2322 lwax r3,r3,r4 2323 cmpwi r3,0 2324 beq guest_exit_cont 2325 add r12,r3,r4 2326 mtctr r12 2327 mr r3,r9 /* get vcpu pointer */ 2328 ld r4,VCPU_GPR(R4)(r9) 2329 bctrl 2330 cmpdi r3,H_TOO_HARD 2331 beq hcall_real_fallback 2332 ld r4,HSTATE_KVM_VCPU(r13) 2333 std r3,VCPU_GPR(R3)(r4) 2334 ld r10,VCPU_PC(r4) 2335 ld r11,VCPU_MSR(r4) 2336 b fast_guest_return 2337 2338sc_1_fast_return: 2339 mtspr SPRN_SRR0,r10 2340 mtspr SPRN_SRR1,r11 2341 li r10, BOOK3S_INTERRUPT_SYSCALL 2342 bl kvmppc_msr_interrupt 2343 mr r4,r9 2344 b fast_guest_return 2345 2346 /* We've attempted a real mode hcall, but it's punted it back 2347 * to userspace. We need to restore some clobbered volatiles 2348 * before resuming the pass-it-to-qemu path */ 2349hcall_real_fallback: 2350 li r12,BOOK3S_INTERRUPT_SYSCALL 2351 ld r9, HSTATE_KVM_VCPU(r13) 2352 2353 b guest_exit_cont 2354 2355 .globl hcall_real_table 2356hcall_real_table: 2357 .long 0 /* 0 - unused */ 2358 .long DOTSYM(kvmppc_h_remove) - hcall_real_table 2359 .long DOTSYM(kvmppc_h_enter) - hcall_real_table 2360 .long DOTSYM(kvmppc_h_read) - hcall_real_table 2361 .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table 2362 .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table 2363 .long DOTSYM(kvmppc_h_protect) - hcall_real_table 2364 .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table 2365 .long DOTSYM(kvmppc_rm_h_put_tce) - hcall_real_table 2366 .long 0 /* 0x24 - H_SET_SPRG0 */ 2367 .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table 2368 .long 0 /* 0x2c */ 2369 .long 0 /* 0x30 */ 2370 .long 0 /* 0x34 */ 2371 .long 0 /* 0x38 */ 2372 .long 0 /* 0x3c */ 2373 .long 0 /* 0x40 */ 2374 .long 0 /* 0x44 */ 2375 .long 0 /* 0x48 */ 2376 .long 0 /* 0x4c */ 2377 .long 0 /* 0x50 */ 2378 .long 0 /* 0x54 */ 2379 .long 0 /* 0x58 */ 2380 .long 0 /* 0x5c */ 2381 .long 0 /* 0x60 */ 2382#ifdef CONFIG_KVM_XICS 2383 .long DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table 2384 .long DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table 2385 .long DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table 2386 .long DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table 2387 .long DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table 2388#else 2389 .long 0 /* 0x64 - H_EOI */ 2390 .long 0 /* 0x68 - H_CPPR */ 2391 .long 0 /* 0x6c - H_IPI */ 2392 .long 0 /* 0x70 - H_IPOLL */ 2393 .long 0 /* 0x74 - H_XIRR */ 2394#endif 2395 .long 0 /* 0x78 */ 2396 .long 0 /* 0x7c */ 2397 .long 0 /* 0x80 */ 2398 .long 0 /* 0x84 */ 2399 .long 0 /* 0x88 */ 2400 .long 0 /* 0x8c */ 2401 .long 0 /* 0x90 */ 2402 .long 0 /* 0x94 */ 2403 .long 0 /* 0x98 */ 2404 .long 0 /* 0x9c */ 2405 .long 0 /* 0xa0 */ 2406 .long 0 /* 0xa4 */ 2407 .long 0 /* 0xa8 */ 2408 .long 0 /* 0xac */ 2409 .long 0 /* 0xb0 */ 2410 .long 0 /* 0xb4 */ 2411 .long 0 /* 0xb8 */ 2412 .long 0 /* 0xbc */ 2413 .long 0 /* 0xc0 */ 2414 .long 0 /* 0xc4 */ 2415 .long 0 /* 0xc8 */ 2416 .long 0 /* 0xcc */ 2417 .long 0 /* 0xd0 */ 2418 .long 0 /* 0xd4 */ 2419 .long 0 /* 0xd8 */ 2420 .long 0 /* 0xdc */ 2421 .long DOTSYM(kvmppc_h_cede) - hcall_real_table 2422 .long DOTSYM(kvmppc_rm_h_confer) - hcall_real_table 2423 .long 0 /* 0xe8 */ 2424 .long 0 /* 0xec */ 2425 .long 0 /* 0xf0 */ 2426 .long 0 /* 0xf4 */ 2427 .long 0 /* 0xf8 */ 2428 .long 0 /* 0xfc */ 2429 .long 0 /* 0x100 */ 2430 .long 0 /* 0x104 */ 2431 .long 0 /* 0x108 */ 2432 .long 0 /* 0x10c */ 2433 .long 0 /* 0x110 */ 2434 .long 0 /* 0x114 */ 2435 .long 0 /* 0x118 */ 2436 .long 0 /* 0x11c */ 2437 .long 0 /* 0x120 */ 2438 .long DOTSYM(kvmppc_h_bulk_remove) - hcall_real_table 2439 .long 0 /* 0x128 */ 2440 .long 0 /* 0x12c */ 2441 .long 0 /* 0x130 */ 2442 .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table 2443 .long DOTSYM(kvmppc_rm_h_stuff_tce) - hcall_real_table 2444 .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table 2445 .long 0 /* 0x140 */ 2446 .long 0 /* 0x144 */ 2447 .long 0 /* 0x148 */ 2448 .long 0 /* 0x14c */ 2449 .long 0 /* 0x150 */ 2450 .long 0 /* 0x154 */ 2451 .long 0 /* 0x158 */ 2452 .long 0 /* 0x15c */ 2453 .long 0 /* 0x160 */ 2454 .long 0 /* 0x164 */ 2455 .long 0 /* 0x168 */ 2456 .long 0 /* 0x16c */ 2457 .long 0 /* 0x170 */ 2458 .long 0 /* 0x174 */ 2459 .long 0 /* 0x178 */ 2460 .long 0 /* 0x17c */ 2461 .long 0 /* 0x180 */ 2462 .long 0 /* 0x184 */ 2463 .long 0 /* 0x188 */ 2464 .long 0 /* 0x18c */ 2465 .long 0 /* 0x190 */ 2466 .long 0 /* 0x194 */ 2467 .long 0 /* 0x198 */ 2468 .long 0 /* 0x19c */ 2469 .long 0 /* 0x1a0 */ 2470 .long 0 /* 0x1a4 */ 2471 .long 0 /* 0x1a8 */ 2472 .long 0 /* 0x1ac */ 2473 .long 0 /* 0x1b0 */ 2474 .long 0 /* 0x1b4 */ 2475 .long 0 /* 0x1b8 */ 2476 .long 0 /* 0x1bc */ 2477 .long 0 /* 0x1c0 */ 2478 .long 0 /* 0x1c4 */ 2479 .long 0 /* 0x1c8 */ 2480 .long 0 /* 0x1cc */ 2481 .long 0 /* 0x1d0 */ 2482 .long 0 /* 0x1d4 */ 2483 .long 0 /* 0x1d8 */ 2484 .long 0 /* 0x1dc */ 2485 .long 0 /* 0x1e0 */ 2486 .long 0 /* 0x1e4 */ 2487 .long 0 /* 0x1e8 */ 2488 .long 0 /* 0x1ec */ 2489 .long 0 /* 0x1f0 */ 2490 .long 0 /* 0x1f4 */ 2491 .long 0 /* 0x1f8 */ 2492 .long 0 /* 0x1fc */ 2493 .long 0 /* 0x200 */ 2494 .long 0 /* 0x204 */ 2495 .long 0 /* 0x208 */ 2496 .long 0 /* 0x20c */ 2497 .long 0 /* 0x210 */ 2498 .long 0 /* 0x214 */ 2499 .long 0 /* 0x218 */ 2500 .long 0 /* 0x21c */ 2501 .long 0 /* 0x220 */ 2502 .long 0 /* 0x224 */ 2503 .long 0 /* 0x228 */ 2504 .long 0 /* 0x22c */ 2505 .long 0 /* 0x230 */ 2506 .long 0 /* 0x234 */ 2507 .long 0 /* 0x238 */ 2508 .long 0 /* 0x23c */ 2509 .long 0 /* 0x240 */ 2510 .long 0 /* 0x244 */ 2511 .long 0 /* 0x248 */ 2512 .long 0 /* 0x24c */ 2513 .long 0 /* 0x250 */ 2514 .long 0 /* 0x254 */ 2515 .long 0 /* 0x258 */ 2516 .long 0 /* 0x25c */ 2517 .long 0 /* 0x260 */ 2518 .long 0 /* 0x264 */ 2519 .long 0 /* 0x268 */ 2520 .long 0 /* 0x26c */ 2521 .long 0 /* 0x270 */ 2522 .long 0 /* 0x274 */ 2523 .long 0 /* 0x278 */ 2524 .long 0 /* 0x27c */ 2525 .long 0 /* 0x280 */ 2526 .long 0 /* 0x284 */ 2527 .long 0 /* 0x288 */ 2528 .long 0 /* 0x28c */ 2529 .long 0 /* 0x290 */ 2530 .long 0 /* 0x294 */ 2531 .long 0 /* 0x298 */ 2532 .long 0 /* 0x29c */ 2533 .long 0 /* 0x2a0 */ 2534 .long 0 /* 0x2a4 */ 2535 .long 0 /* 0x2a8 */ 2536 .long 0 /* 0x2ac */ 2537 .long 0 /* 0x2b0 */ 2538 .long 0 /* 0x2b4 */ 2539 .long 0 /* 0x2b8 */ 2540 .long 0 /* 0x2bc */ 2541 .long 0 /* 0x2c0 */ 2542 .long 0 /* 0x2c4 */ 2543 .long 0 /* 0x2c8 */ 2544 .long 0 /* 0x2cc */ 2545 .long 0 /* 0x2d0 */ 2546 .long 0 /* 0x2d4 */ 2547 .long 0 /* 0x2d8 */ 2548 .long 0 /* 0x2dc */ 2549 .long 0 /* 0x2e0 */ 2550 .long 0 /* 0x2e4 */ 2551 .long 0 /* 0x2e8 */ 2552 .long 0 /* 0x2ec */ 2553 .long 0 /* 0x2f0 */ 2554 .long 0 /* 0x2f4 */ 2555 .long 0 /* 0x2f8 */ 2556#ifdef CONFIG_KVM_XICS 2557 .long DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table 2558#else 2559 .long 0 /* 0x2fc - H_XIRR_X*/ 2560#endif 2561 .long DOTSYM(kvmppc_h_random) - hcall_real_table 2562 .globl hcall_real_table_end 2563hcall_real_table_end: 2564 2565_GLOBAL(kvmppc_h_set_xdabr) 2566 andi. r0, r5, DABRX_USER | DABRX_KERNEL 2567 beq 6f 2568 li r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI 2569 andc. r0, r5, r0 2570 beq 3f 25716: li r3, H_PARAMETER 2572 blr 2573 2574_GLOBAL(kvmppc_h_set_dabr) 2575 li r5, DABRX_USER | DABRX_KERNEL 25763: 2577BEGIN_FTR_SECTION 2578 b 2f 2579END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 2580 std r4,VCPU_DABR(r3) 2581 stw r5, VCPU_DABRX(r3) 2582 mtspr SPRN_DABRX, r5 2583 /* Work around P7 bug where DABR can get corrupted on mtspr */ 25841: mtspr SPRN_DABR,r4 2585 mfspr r5, SPRN_DABR 2586 cmpd r4, r5 2587 bne 1b 2588 isync 2589 li r3,0 2590 blr 2591 25922: 2593BEGIN_FTR_SECTION 2594 /* POWER9 with disabled DAWR */ 2595 li r3, H_HARDWARE 2596 blr 2597END_FTR_SECTION_IFCLR(CPU_FTR_DAWR) 2598 /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ 2599 rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW 2600 rlwimi r5, r4, 2, DAWRX_WT 2601 clrrdi r4, r4, 3 2602 std r4, VCPU_DAWR(r3) 2603 std r5, VCPU_DAWRX(r3) 2604 mtspr SPRN_DAWR, r4 2605 mtspr SPRN_DAWRX, r5 2606 li r3, 0 2607 blr 2608 2609_GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */ 2610 ori r11,r11,MSR_EE 2611 std r11,VCPU_MSR(r3) 2612 li r0,1 2613 stb r0,VCPU_CEDED(r3) 2614 sync /* order setting ceded vs. testing prodded */ 2615 lbz r5,VCPU_PRODDED(r3) 2616 cmpwi r5,0 2617 bne kvm_cede_prodded 2618 li r12,0 /* set trap to 0 to say hcall is handled */ 2619 stw r12,VCPU_TRAP(r3) 2620 li r0,H_SUCCESS 2621 std r0,VCPU_GPR(R3)(r3) 2622 2623 /* 2624 * Set our bit in the bitmask of napping threads unless all the 2625 * other threads are already napping, in which case we send this 2626 * up to the host. 2627 */ 2628 ld r5,HSTATE_KVM_VCORE(r13) 2629 lbz r6,HSTATE_PTID(r13) 2630 lwz r8,VCORE_ENTRY_EXIT(r5) 2631 clrldi r8,r8,56 2632 li r0,1 2633 sld r0,r0,r6 2634 addi r6,r5,VCORE_NAPPING_THREADS 263531: lwarx r4,0,r6 2636 or r4,r4,r0 2637 cmpw r4,r8 2638 beq kvm_cede_exit 2639 stwcx. r4,0,r6 2640 bne 31b 2641 /* order napping_threads update vs testing entry_exit_map */ 2642 isync 2643 li r0,NAPPING_CEDE 2644 stb r0,HSTATE_NAPPING(r13) 2645 lwz r7,VCORE_ENTRY_EXIT(r5) 2646 cmpwi r7,0x100 2647 bge 33f /* another thread already exiting */ 2648 2649/* 2650 * Although not specifically required by the architecture, POWER7 2651 * preserves the following registers in nap mode, even if an SMT mode 2652 * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3, 2653 * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR. 2654 */ 2655 /* Save non-volatile GPRs */ 2656 std r14, VCPU_GPR(R14)(r3) 2657 std r15, VCPU_GPR(R15)(r3) 2658 std r16, VCPU_GPR(R16)(r3) 2659 std r17, VCPU_GPR(R17)(r3) 2660 std r18, VCPU_GPR(R18)(r3) 2661 std r19, VCPU_GPR(R19)(r3) 2662 std r20, VCPU_GPR(R20)(r3) 2663 std r21, VCPU_GPR(R21)(r3) 2664 std r22, VCPU_GPR(R22)(r3) 2665 std r23, VCPU_GPR(R23)(r3) 2666 std r24, VCPU_GPR(R24)(r3) 2667 std r25, VCPU_GPR(R25)(r3) 2668 std r26, VCPU_GPR(R26)(r3) 2669 std r27, VCPU_GPR(R27)(r3) 2670 std r28, VCPU_GPR(R28)(r3) 2671 std r29, VCPU_GPR(R29)(r3) 2672 std r30, VCPU_GPR(R30)(r3) 2673 std r31, VCPU_GPR(R31)(r3) 2674 2675 /* save FP state */ 2676 bl kvmppc_save_fp 2677 2678#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2679/* 2680 * Branch around the call if both CPU_FTR_TM and 2681 * CPU_FTR_P9_TM_HV_ASSIST are off. 2682 */ 2683BEGIN_FTR_SECTION 2684 b 91f 2685END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) 2686 /* 2687 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR 2688 */ 2689 ld r9, HSTATE_KVM_VCPU(r13) 2690 bl kvmppc_save_tm 269191: 2692#endif 2693 2694 /* 2695 * Set DEC to the smaller of DEC and HDEC, so that we wake 2696 * no later than the end of our timeslice (HDEC interrupts 2697 * don't wake us from nap). 2698 */ 2699 mfspr r3, SPRN_DEC 2700 mfspr r4, SPRN_HDEC 2701 mftb r5 2702BEGIN_FTR_SECTION 2703 /* On P9 check whether the guest has large decrementer mode enabled */ 2704 ld r6, HSTATE_KVM_VCORE(r13) 2705 ld r6, VCORE_LPCR(r6) 2706 andis. r6, r6, LPCR_LD@h 2707 bne 68f 2708END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 2709 extsw r3, r3 271068: EXTEND_HDEC(r4) 2711 cmpd r3, r4 2712 ble 67f 2713 mtspr SPRN_DEC, r4 271467: 2715 /* save expiry time of guest decrementer */ 2716 add r3, r3, r5 2717 ld r4, HSTATE_KVM_VCPU(r13) 2718 ld r5, HSTATE_KVM_VCORE(r13) 2719 ld r6, VCORE_TB_OFFSET_APPL(r5) 2720 subf r3, r6, r3 /* convert to host TB value */ 2721 std r3, VCPU_DEC_EXPIRES(r4) 2722 2723#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 2724 ld r4, HSTATE_KVM_VCPU(r13) 2725 addi r3, r4, VCPU_TB_CEDE 2726 bl kvmhv_accumulate_time 2727#endif 2728 2729 lis r3, LPCR_PECEDP@h /* Do wake on privileged doorbell */ 2730 2731 /* 2732 * Take a nap until a decrementer or external or doobell interrupt 2733 * occurs, with PECE1 and PECE0 set in LPCR. 2734 * On POWER8, set PECEDH, and if we are ceding, also set PECEDP. 2735 * Also clear the runlatch bit before napping. 2736 */ 2737kvm_do_nap: 2738 mfspr r0, SPRN_CTRLF 2739 clrrdi r0, r0, 1 2740 mtspr SPRN_CTRLT, r0 2741 2742 li r0,1 2743 stb r0,HSTATE_HWTHREAD_REQ(r13) 2744 mfspr r5,SPRN_LPCR 2745 ori r5,r5,LPCR_PECE0 | LPCR_PECE1 2746BEGIN_FTR_SECTION 2747 ori r5, r5, LPCR_PECEDH 2748 rlwimi r5, r3, 0, LPCR_PECEDP 2749END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 2750 2751kvm_nap_sequence: /* desired LPCR value in r5 */ 2752BEGIN_FTR_SECTION 2753 /* 2754 * PSSCR bits: exit criterion = 1 (wakeup based on LPCR at sreset) 2755 * enable state loss = 1 (allow SMT mode switch) 2756 * requested level = 0 (just stop dispatching) 2757 */ 2758 lis r3, (PSSCR_EC | PSSCR_ESL)@h 2759 mtspr SPRN_PSSCR, r3 2760 /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */ 2761 li r4, LPCR_PECE_HVEE@higher 2762 sldi r4, r4, 32 2763 or r5, r5, r4 2764END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 2765 mtspr SPRN_LPCR,r5 2766 isync 2767 li r0, 0 2768 std r0, HSTATE_SCRATCH0(r13) 2769 ptesync 2770 ld r0, HSTATE_SCRATCH0(r13) 27711: cmpd r0, r0 2772 bne 1b 2773BEGIN_FTR_SECTION 2774 nap 2775FTR_SECTION_ELSE 2776 PPC_STOP 2777ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 2778 b . 2779 278033: mr r4, r3 2781 li r3, 0 2782 li r12, 0 2783 b 34f 2784 2785kvm_end_cede: 2786 /* get vcpu pointer */ 2787 ld r4, HSTATE_KVM_VCPU(r13) 2788 2789 /* Woken by external or decrementer interrupt */ 2790 ld r1, HSTATE_HOST_R1(r13) 2791 2792#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 2793 addi r3, r4, VCPU_TB_RMINTR 2794 bl kvmhv_accumulate_time 2795#endif 2796 2797#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2798/* 2799 * Branch around the call if both CPU_FTR_TM and 2800 * CPU_FTR_P9_TM_HV_ASSIST are off. 2801 */ 2802BEGIN_FTR_SECTION 2803 b 91f 2804END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) 2805 /* 2806 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR 2807 */ 2808 bl kvmppc_restore_tm 280991: 2810#endif 2811 2812 /* load up FP state */ 2813 bl kvmppc_load_fp 2814 2815 /* Restore guest decrementer */ 2816 ld r3, VCPU_DEC_EXPIRES(r4) 2817 ld r5, HSTATE_KVM_VCORE(r13) 2818 ld r6, VCORE_TB_OFFSET_APPL(r5) 2819 add r3, r3, r6 /* convert host TB to guest TB value */ 2820 mftb r7 2821 subf r3, r7, r3 2822 mtspr SPRN_DEC, r3 2823 2824 /* Load NV GPRS */ 2825 ld r14, VCPU_GPR(R14)(r4) 2826 ld r15, VCPU_GPR(R15)(r4) 2827 ld r16, VCPU_GPR(R16)(r4) 2828 ld r17, VCPU_GPR(R17)(r4) 2829 ld r18, VCPU_GPR(R18)(r4) 2830 ld r19, VCPU_GPR(R19)(r4) 2831 ld r20, VCPU_GPR(R20)(r4) 2832 ld r21, VCPU_GPR(R21)(r4) 2833 ld r22, VCPU_GPR(R22)(r4) 2834 ld r23, VCPU_GPR(R23)(r4) 2835 ld r24, VCPU_GPR(R24)(r4) 2836 ld r25, VCPU_GPR(R25)(r4) 2837 ld r26, VCPU_GPR(R26)(r4) 2838 ld r27, VCPU_GPR(R27)(r4) 2839 ld r28, VCPU_GPR(R28)(r4) 2840 ld r29, VCPU_GPR(R29)(r4) 2841 ld r30, VCPU_GPR(R30)(r4) 2842 ld r31, VCPU_GPR(R31)(r4) 2843 2844 /* Check the wake reason in SRR1 to see why we got here */ 2845 bl kvmppc_check_wake_reason 2846 2847 /* 2848 * Restore volatile registers since we could have called a 2849 * C routine in kvmppc_check_wake_reason 2850 * r4 = VCPU 2851 * r3 tells us whether we need to return to host or not 2852 * WARNING: it gets checked further down: 2853 * should not modify r3 until this check is done. 2854 */ 2855 ld r4, HSTATE_KVM_VCPU(r13) 2856 2857 /* clear our bit in vcore->napping_threads */ 285834: ld r5,HSTATE_KVM_VCORE(r13) 2859 lbz r7,HSTATE_PTID(r13) 2860 li r0,1 2861 sld r0,r0,r7 2862 addi r6,r5,VCORE_NAPPING_THREADS 286332: lwarx r7,0,r6 2864 andc r7,r7,r0 2865 stwcx. r7,0,r6 2866 bne 32b 2867 li r0,0 2868 stb r0,HSTATE_NAPPING(r13) 2869 2870 /* See if the wake reason saved in r3 means we need to exit */ 2871 stw r12, VCPU_TRAP(r4) 2872 mr r9, r4 2873 cmpdi r3, 0 2874 bgt guest_exit_cont 2875 2876 /* see if any other thread is already exiting */ 2877 lwz r0,VCORE_ENTRY_EXIT(r5) 2878 cmpwi r0,0x100 2879 bge guest_exit_cont 2880 2881 b kvmppc_cede_reentry /* if not go back to guest */ 2882 2883 /* cede when already previously prodded case */ 2884kvm_cede_prodded: 2885 li r0,0 2886 stb r0,VCPU_PRODDED(r3) 2887 sync /* order testing prodded vs. clearing ceded */ 2888 stb r0,VCPU_CEDED(r3) 2889 li r3,H_SUCCESS 2890 blr 2891 2892 /* we've ceded but we want to give control to the host */ 2893kvm_cede_exit: 2894 ld r9, HSTATE_KVM_VCPU(r13) 2895#ifdef CONFIG_KVM_XICS 2896 /* Abort if we still have a pending escalation */ 2897 lbz r5, VCPU_XIVE_ESC_ON(r9) 2898 cmpwi r5, 0 2899 beq 1f 2900 li r0, 0 2901 stb r0, VCPU_CEDED(r9) 29021: /* Enable XIVE escalation */ 2903 li r5, XIVE_ESB_SET_PQ_00 2904 mfmsr r0 2905 andi. r0, r0, MSR_DR /* in real mode? */ 2906 beq 1f 2907 ld r10, VCPU_XIVE_ESC_VADDR(r9) 2908 cmpdi r10, 0 2909 beq 3f 2910 ldx r0, r10, r5 2911 b 2f 29121: ld r10, VCPU_XIVE_ESC_RADDR(r9) 2913 cmpdi r10, 0 2914 beq 3f 2915 ldcix r0, r10, r5 29162: sync 2917 li r0, 1 2918 stb r0, VCPU_XIVE_ESC_ON(r9) 2919#endif /* CONFIG_KVM_XICS */ 29203: b guest_exit_cont 2921 2922 /* Try to handle a machine check in real mode */ 2923machine_check_realmode: 2924 mr r3, r9 /* get vcpu pointer */ 2925 bl kvmppc_realmode_machine_check 2926 nop 2927 ld r9, HSTATE_KVM_VCPU(r13) 2928 li r12, BOOK3S_INTERRUPT_MACHINE_CHECK 2929 /* 2930 * For the guest that is FWNMI capable, deliver all the MCE errors 2931 * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit 2932 * reason. This new approach injects machine check errors in guest 2933 * address space to guest with additional information in the form 2934 * of RTAS event, thus enabling guest kernel to suitably handle 2935 * such errors. 2936 * 2937 * For the guest that is not FWNMI capable (old QEMU) fallback 2938 * to old behaviour for backward compatibility: 2939 * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either 2940 * through machine check interrupt (set HSRR0 to 0x200). 2941 * For handled errors (no-fatal), just go back to guest execution 2942 * with current HSRR0. 2943 * if we receive machine check with MSR(RI=0) then deliver it to 2944 * guest as machine check causing guest to crash. 2945 */ 2946 ld r11, VCPU_MSR(r9) 2947 rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */ 2948 bne mc_cont /* if so, exit to host */ 2949 /* Check if guest is capable of handling NMI exit */ 2950 ld r10, VCPU_KVM(r9) 2951 lbz r10, KVM_FWNMI(r10) 2952 cmpdi r10, 1 /* FWNMI capable? */ 2953 beq mc_cont /* if so, exit with KVM_EXIT_NMI. */ 2954 2955 /* if not, fall through for backward compatibility. */ 2956 andi. r10, r11, MSR_RI /* check for unrecoverable exception */ 2957 beq 1f /* Deliver a machine check to guest */ 2958 ld r10, VCPU_PC(r9) 2959 cmpdi r3, 0 /* Did we handle MCE ? */ 2960 bne 2f /* Continue guest execution. */ 2961 /* If not, deliver a machine check. SRR0/1 are already set */ 29621: li r10, BOOK3S_INTERRUPT_MACHINE_CHECK 2963 bl kvmppc_msr_interrupt 29642: b fast_interrupt_c_return 2965 2966/* 2967 * Check the reason we woke from nap, and take appropriate action. 2968 * Returns (in r3): 2969 * 0 if nothing needs to be done 2970 * 1 if something happened that needs to be handled by the host 2971 * -1 if there was a guest wakeup (IPI or msgsnd) 2972 * -2 if we handled a PCI passthrough interrupt (returned by 2973 * kvmppc_read_intr only) 2974 * 2975 * Also sets r12 to the interrupt vector for any interrupt that needs 2976 * to be handled now by the host (0x500 for external interrupt), or zero. 2977 * Modifies all volatile registers (since it may call a C function). 2978 * This routine calls kvmppc_read_intr, a C function, if an external 2979 * interrupt is pending. 2980 */ 2981kvmppc_check_wake_reason: 2982 mfspr r6, SPRN_SRR1 2983BEGIN_FTR_SECTION 2984 rlwinm r6, r6, 45-31, 0xf /* extract wake reason field (P8) */ 2985FTR_SECTION_ELSE 2986 rlwinm r6, r6, 45-31, 0xe /* P7 wake reason field is 3 bits */ 2987ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S) 2988 cmpwi r6, 8 /* was it an external interrupt? */ 2989 beq 7f /* if so, see what it was */ 2990 li r3, 0 2991 li r12, 0 2992 cmpwi r6, 6 /* was it the decrementer? */ 2993 beq 0f 2994BEGIN_FTR_SECTION 2995 cmpwi r6, 5 /* privileged doorbell? */ 2996 beq 0f 2997 cmpwi r6, 3 /* hypervisor doorbell? */ 2998 beq 3f 2999END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 3000 cmpwi r6, 0xa /* Hypervisor maintenance ? */ 3001 beq 4f 3002 li r3, 1 /* anything else, return 1 */ 30030: blr 3004 3005 /* hypervisor doorbell */ 30063: li r12, BOOK3S_INTERRUPT_H_DOORBELL 3007 3008 /* 3009 * Clear the doorbell as we will invoke the handler 3010 * explicitly in the guest exit path. 3011 */ 3012 lis r6, (PPC_DBELL_SERVER << (63-36))@h 3013 PPC_MSGCLR(6) 3014 /* see if it's a host IPI */ 3015 li r3, 1 3016BEGIN_FTR_SECTION 3017 PPC_MSGSYNC 3018 lwsync 3019END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 3020 lbz r0, HSTATE_HOST_IPI(r13) 3021 cmpwi r0, 0 3022 bnelr 3023 /* if not, return -1 */ 3024 li r3, -1 3025 blr 3026 3027 /* Woken up due to Hypervisor maintenance interrupt */ 30284: li r12, BOOK3S_INTERRUPT_HMI 3029 li r3, 1 3030 blr 3031 3032 /* external interrupt - create a stack frame so we can call C */ 30337: mflr r0 3034 std r0, PPC_LR_STKOFF(r1) 3035 stdu r1, -PPC_MIN_STKFRM(r1) 3036 bl kvmppc_read_intr 3037 nop 3038 li r12, BOOK3S_INTERRUPT_EXTERNAL 3039 cmpdi r3, 1 3040 ble 1f 3041 3042 /* 3043 * Return code of 2 means PCI passthrough interrupt, but 3044 * we need to return back to host to complete handling the 3045 * interrupt. Trap reason is expected in r12 by guest 3046 * exit code. 3047 */ 3048 li r12, BOOK3S_INTERRUPT_HV_RM_HARD 30491: 3050 ld r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1) 3051 addi r1, r1, PPC_MIN_STKFRM 3052 mtlr r0 3053 blr 3054 3055/* 3056 * Save away FP, VMX and VSX registers. 3057 * r3 = vcpu pointer 3058 * N.B. r30 and r31 are volatile across this function, 3059 * thus it is not callable from C. 3060 */ 3061kvmppc_save_fp: 3062 mflr r30 3063 mr r31,r3 3064 mfmsr r5 3065 ori r8,r5,MSR_FP 3066#ifdef CONFIG_ALTIVEC 3067BEGIN_FTR_SECTION 3068 oris r8,r8,MSR_VEC@h 3069END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 3070#endif 3071#ifdef CONFIG_VSX 3072BEGIN_FTR_SECTION 3073 oris r8,r8,MSR_VSX@h 3074END_FTR_SECTION_IFSET(CPU_FTR_VSX) 3075#endif 3076 mtmsrd r8 3077 addi r3,r3,VCPU_FPRS 3078 bl store_fp_state 3079#ifdef CONFIG_ALTIVEC 3080BEGIN_FTR_SECTION 3081 addi r3,r31,VCPU_VRS 3082 bl store_vr_state 3083END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 3084#endif 3085 mfspr r6,SPRN_VRSAVE 3086 stw r6,VCPU_VRSAVE(r31) 3087 mtlr r30 3088 blr 3089 3090/* 3091 * Load up FP, VMX and VSX registers 3092 * r4 = vcpu pointer 3093 * N.B. r30 and r31 are volatile across this function, 3094 * thus it is not callable from C. 3095 */ 3096kvmppc_load_fp: 3097 mflr r30 3098 mr r31,r4 3099 mfmsr r9 3100 ori r8,r9,MSR_FP 3101#ifdef CONFIG_ALTIVEC 3102BEGIN_FTR_SECTION 3103 oris r8,r8,MSR_VEC@h 3104END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 3105#endif 3106#ifdef CONFIG_VSX 3107BEGIN_FTR_SECTION 3108 oris r8,r8,MSR_VSX@h 3109END_FTR_SECTION_IFSET(CPU_FTR_VSX) 3110#endif 3111 mtmsrd r8 3112 addi r3,r4,VCPU_FPRS 3113 bl load_fp_state 3114#ifdef CONFIG_ALTIVEC 3115BEGIN_FTR_SECTION 3116 addi r3,r31,VCPU_VRS 3117 bl load_vr_state 3118END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) 3119#endif 3120 lwz r7,VCPU_VRSAVE(r31) 3121 mtspr SPRN_VRSAVE,r7 3122 mtlr r30 3123 mr r4,r31 3124 blr 3125 3126#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 3127/* 3128 * Save transactional state and TM-related registers. 3129 * Called with r9 pointing to the vcpu struct. 3130 * This can modify all checkpointed registers, but 3131 * restores r1, r2 and r9 (vcpu pointer) before exit. 3132 */ 3133kvmppc_save_tm: 3134 mflr r0 3135 std r0, PPC_LR_STKOFF(r1) 3136 stdu r1, -PPC_MIN_STKFRM(r1) 3137 3138 /* Turn on TM. */ 3139 mfmsr r8 3140 li r0, 1 3141 rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG 3142 mtmsrd r8 3143 3144 ld r5, VCPU_MSR(r9) 3145 rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 3146 beq 1f /* TM not active in guest. */ 3147 3148 std r1, HSTATE_HOST_R1(r13) 3149 li r3, TM_CAUSE_KVM_RESCHED 3150 3151BEGIN_FTR_SECTION 3152 lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */ 3153 cmpwi r0, 0 3154 beq 3f 3155 rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */ 3156 beq 4f 3157BEGIN_FTR_SECTION_NESTED(96) 3158 bl pnv_power9_force_smt4_catch 3159END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) 3160 nop 3161 b 6f 31623: 3163 /* Emulation of the treclaim instruction needs TEXASR before treclaim */ 3164 mfspr r6, SPRN_TEXASR 3165 std r6, VCPU_ORIG_TEXASR(r9) 31666: 3167END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) 3168 3169 /* Clear the MSR RI since r1, r13 are all going to be foobar. */ 3170 li r5, 0 3171 mtmsrd r5, 1 3172 3173 /* All GPRs are volatile at this point. */ 3174 TRECLAIM(R3) 3175 3176 /* Temporarily store r13 and r9 so we have some regs to play with */ 3177 SET_SCRATCH0(r13) 3178 GET_PACA(r13) 3179 std r9, PACATMSCRATCH(r13) 3180 3181 /* If doing TM emulation on POWER9 DD2.2, check for fake suspend mode */ 3182BEGIN_FTR_SECTION 3183 lbz r9, HSTATE_FAKE_SUSPEND(r13) 3184 cmpwi r9, 0 3185 beq 2f 3186 /* 3187 * We were in fake suspend, so we are not going to save the 3188 * register state as the guest checkpointed state (since 3189 * we already have it), therefore we can now use any volatile GPR. 3190 */ 3191 /* Reload stack pointer and TOC. */ 3192 ld r1, HSTATE_HOST_R1(r13) 3193 ld r2, PACATOC(r13) 3194 /* Set MSR RI now we have r1 and r13 back. */ 3195 li r5, MSR_RI 3196 mtmsrd r5, 1 3197 HMT_MEDIUM 3198 ld r6, HSTATE_DSCR(r13) 3199 mtspr SPRN_DSCR, r6 3200BEGIN_FTR_SECTION_NESTED(96) 3201 bl pnv_power9_force_smt4_release 3202END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96) 3203 nop 3204 32054: 3206 mfspr r3, SPRN_PSSCR 3207 /* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */ 3208 li r0, PSSCR_FAKE_SUSPEND 3209 andc r3, r3, r0 3210 mtspr SPRN_PSSCR, r3 3211 ld r9, HSTATE_KVM_VCPU(r13) 3212 /* Don't save TEXASR, use value from last exit in real suspend state */ 3213 b 11f 32142: 3215END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) 3216 3217 ld r9, HSTATE_KVM_VCPU(r13) 3218 3219 /* Get a few more GPRs free. */ 3220 std r29, VCPU_GPRS_TM(29)(r9) 3221 std r30, VCPU_GPRS_TM(30)(r9) 3222 std r31, VCPU_GPRS_TM(31)(r9) 3223 3224 /* Save away PPR and DSCR soon so don't run with user values. */ 3225 mfspr r31, SPRN_PPR 3226 HMT_MEDIUM 3227 mfspr r30, SPRN_DSCR 3228 ld r29, HSTATE_DSCR(r13) 3229 mtspr SPRN_DSCR, r29 3230 3231 /* Save all but r9, r13 & r29-r31 */ 3232 reg = 0 3233 .rept 29 3234 .if (reg != 9) && (reg != 13) 3235 std reg, VCPU_GPRS_TM(reg)(r9) 3236 .endif 3237 reg = reg + 1 3238 .endr 3239 /* ... now save r13 */ 3240 GET_SCRATCH0(r4) 3241 std r4, VCPU_GPRS_TM(13)(r9) 3242 /* ... and save r9 */ 3243 ld r4, PACATMSCRATCH(r13) 3244 std r4, VCPU_GPRS_TM(9)(r9) 3245 3246 /* Reload stack pointer and TOC. */ 3247 ld r1, HSTATE_HOST_R1(r13) 3248 ld r2, PACATOC(r13) 3249 3250 /* Set MSR RI now we have r1 and r13 back. */ 3251 li r5, MSR_RI 3252 mtmsrd r5, 1 3253 3254 /* Save away checkpinted SPRs. */ 3255 std r31, VCPU_PPR_TM(r9) 3256 std r30, VCPU_DSCR_TM(r9) 3257 mflr r5 3258 mfcr r6 3259 mfctr r7 3260 mfspr r8, SPRN_AMR 3261 mfspr r10, SPRN_TAR 3262 mfxer r11 3263 std r5, VCPU_LR_TM(r9) 3264 stw r6, VCPU_CR_TM(r9) 3265 std r7, VCPU_CTR_TM(r9) 3266 std r8, VCPU_AMR_TM(r9) 3267 std r10, VCPU_TAR_TM(r9) 3268 std r11, VCPU_XER_TM(r9) 3269 3270 /* Restore r12 as trap number. */ 3271 lwz r12, VCPU_TRAP(r9) 3272 3273 /* Save FP/VSX. */ 3274 addi r3, r9, VCPU_FPRS_TM 3275 bl store_fp_state 3276 addi r3, r9, VCPU_VRS_TM 3277 bl store_vr_state 3278 mfspr r6, SPRN_VRSAVE 3279 stw r6, VCPU_VRSAVE_TM(r9) 32801: 3281 /* 3282 * We need to save these SPRs after the treclaim so that the software 3283 * error code is recorded correctly in the TEXASR. Also the user may 3284 * change these outside of a transaction, so they must always be 3285 * context switched. 3286 */ 3287 mfspr r7, SPRN_TEXASR 3288 std r7, VCPU_TEXASR(r9) 328911: 3290 mfspr r5, SPRN_TFHAR 3291 mfspr r6, SPRN_TFIAR 3292 std r5, VCPU_TFHAR(r9) 3293 std r6, VCPU_TFIAR(r9) 3294 3295 addi r1, r1, PPC_MIN_STKFRM 3296 ld r0, PPC_LR_STKOFF(r1) 3297 mtlr r0 3298 blr 3299 3300/* 3301 * Restore transactional state and TM-related registers. 3302 * Called with r4 pointing to the vcpu struct. 3303 * This potentially modifies all checkpointed registers. 3304 * It restores r1, r2, r4 from the PACA. 3305 */ 3306kvmppc_restore_tm: 3307 mflr r0 3308 std r0, PPC_LR_STKOFF(r1) 3309 3310 /* Turn on TM/FP/VSX/VMX so we can restore them. */ 3311 mfmsr r5 3312 li r6, MSR_TM >> 32 3313 sldi r6, r6, 32 3314 or r5, r5, r6 3315 ori r5, r5, MSR_FP 3316 oris r5, r5, (MSR_VEC | MSR_VSX)@h 3317 mtmsrd r5 3318 3319 /* 3320 * The user may change these outside of a transaction, so they must 3321 * always be context switched. 3322 */ 3323 ld r5, VCPU_TFHAR(r4) 3324 ld r6, VCPU_TFIAR(r4) 3325 ld r7, VCPU_TEXASR(r4) 3326 mtspr SPRN_TFHAR, r5 3327 mtspr SPRN_TFIAR, r6 3328 mtspr SPRN_TEXASR, r7 3329 3330 li r0, 0 3331 stb r0, HSTATE_FAKE_SUSPEND(r13) 3332 ld r5, VCPU_MSR(r4) 3333 rldicl. r5, r5, 64 - MSR_TS_S_LG, 62 3334 beqlr /* TM not active in guest */ 3335 std r1, HSTATE_HOST_R1(r13) 3336 3337 /* Make sure the failure summary is set, otherwise we'll program check 3338 * when we trechkpt. It's possible that this might have been not set 3339 * on a kvmppc_set_one_reg() call but we shouldn't let this crash the 3340 * host. 3341 */ 3342 oris r7, r7, (TEXASR_FS)@h 3343 mtspr SPRN_TEXASR, r7 3344 3345 /* 3346 * If we are doing TM emulation for the guest on a POWER9 DD2, 3347 * then we don't actually do a trechkpt -- we either set up 3348 * fake-suspend mode, or emulate a TM rollback. 3349 */ 3350BEGIN_FTR_SECTION 3351 b .Ldo_tm_fake_load 3352END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST) 3353 3354 /* 3355 * We need to load up the checkpointed state for the guest. 3356 * We need to do this early as it will blow away any GPRs, VSRs and 3357 * some SPRs. 3358 */ 3359 3360 mr r31, r4 3361 addi r3, r31, VCPU_FPRS_TM 3362 bl load_fp_state 3363 addi r3, r31, VCPU_VRS_TM 3364 bl load_vr_state 3365 mr r4, r31 3366 lwz r7, VCPU_VRSAVE_TM(r4) 3367 mtspr SPRN_VRSAVE, r7 3368 3369 ld r5, VCPU_LR_TM(r4) 3370 lwz r6, VCPU_CR_TM(r4) 3371 ld r7, VCPU_CTR_TM(r4) 3372 ld r8, VCPU_AMR_TM(r4) 3373 ld r9, VCPU_TAR_TM(r4) 3374 ld r10, VCPU_XER_TM(r4) 3375 mtlr r5 3376 mtcr r6 3377 mtctr r7 3378 mtspr SPRN_AMR, r8 3379 mtspr SPRN_TAR, r9 3380 mtxer r10 3381 3382 /* 3383 * Load up PPR and DSCR values but don't put them in the actual SPRs 3384 * till the last moment to avoid running with userspace PPR and DSCR for 3385 * too long. 3386 */ 3387 ld r29, VCPU_DSCR_TM(r4) 3388 ld r30, VCPU_PPR_TM(r4) 3389 3390 std r2, PACATMSCRATCH(r13) /* Save TOC */ 3391 3392 /* Clear the MSR RI since r1, r13 are all going to be foobar. */ 3393 li r5, 0 3394 mtmsrd r5, 1 3395 3396 /* Load GPRs r0-r28 */ 3397 reg = 0 3398 .rept 29 3399 ld reg, VCPU_GPRS_TM(reg)(r31) 3400 reg = reg + 1 3401 .endr 3402 3403 mtspr SPRN_DSCR, r29 3404 mtspr SPRN_PPR, r30 3405 3406 /* Load final GPRs */ 3407 ld 29, VCPU_GPRS_TM(29)(r31) 3408 ld 30, VCPU_GPRS_TM(30)(r31) 3409 ld 31, VCPU_GPRS_TM(31)(r31) 3410 3411 /* TM checkpointed state is now setup. All GPRs are now volatile. */ 3412 TRECHKPT 3413 3414 /* Now let's get back the state we need. */ 3415 HMT_MEDIUM 3416 GET_PACA(r13) 3417 ld r29, HSTATE_DSCR(r13) 3418 mtspr SPRN_DSCR, r29 3419 ld r4, HSTATE_KVM_VCPU(r13) 3420 ld r1, HSTATE_HOST_R1(r13) 3421 ld r2, PACATMSCRATCH(r13) 3422 3423 /* Set the MSR RI since we have our registers back. */ 3424 li r5, MSR_RI 3425 mtmsrd r5, 1 34269: 3427 ld r0, PPC_LR_STKOFF(r1) 3428 mtlr r0 3429 blr 3430 3431.Ldo_tm_fake_load: 3432 cmpwi r5, 1 /* check for suspended state */ 3433 bgt 10f 3434 stb r5, HSTATE_FAKE_SUSPEND(r13) 3435 b 9b /* and return */ 343610: stdu r1, -PPC_MIN_STKFRM(r1) 3437 /* guest is in transactional state, so simulate rollback */ 3438 mr r3, r4 3439 bl kvmhv_emulate_tm_rollback 3440 nop 3441 ld r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */ 3442 addi r1, r1, PPC_MIN_STKFRM 3443 b 9b 3444#endif 3445 3446/* 3447 * We come here if we get any exception or interrupt while we are 3448 * executing host real mode code while in guest MMU context. 3449 * r12 is (CR << 32) | vector 3450 * r13 points to our PACA 3451 * r12 is saved in HSTATE_SCRATCH0(r13) 3452 * ctr is saved in HSTATE_SCRATCH1(r13) if RELOCATABLE 3453 * r9 is saved in HSTATE_SCRATCH2(r13) 3454 * r13 is saved in HSPRG1 3455 * cfar is saved in HSTATE_CFAR(r13) 3456 * ppr is saved in HSTATE_PPR(r13) 3457 */ 3458kvmppc_bad_host_intr: 3459 /* 3460 * Switch to the emergency stack, but start half-way down in 3461 * case we were already on it. 3462 */ 3463 mr r9, r1 3464 std r1, PACAR1(r13) 3465 ld r1, PACAEMERGSP(r13) 3466 subi r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE 3467 std r9, 0(r1) 3468 std r0, GPR0(r1) 3469 std r9, GPR1(r1) 3470 std r2, GPR2(r1) 3471 SAVE_4GPRS(3, r1) 3472 SAVE_2GPRS(7, r1) 3473 srdi r0, r12, 32 3474 clrldi r12, r12, 32 3475 std r0, _CCR(r1) 3476 std r12, _TRAP(r1) 3477 andi. r0, r12, 2 3478 beq 1f 3479 mfspr r3, SPRN_HSRR0 3480 mfspr r4, SPRN_HSRR1 3481 mfspr r5, SPRN_HDAR 3482 mfspr r6, SPRN_HDSISR 3483 b 2f 34841: mfspr r3, SPRN_SRR0 3485 mfspr r4, SPRN_SRR1 3486 mfspr r5, SPRN_DAR 3487 mfspr r6, SPRN_DSISR 34882: std r3, _NIP(r1) 3489 std r4, _MSR(r1) 3490 std r5, _DAR(r1) 3491 std r6, _DSISR(r1) 3492 ld r9, HSTATE_SCRATCH2(r13) 3493 ld r12, HSTATE_SCRATCH0(r13) 3494 GET_SCRATCH0(r0) 3495 SAVE_4GPRS(9, r1) 3496 std r0, GPR13(r1) 3497 SAVE_NVGPRS(r1) 3498 ld r5, HSTATE_CFAR(r13) 3499 std r5, ORIG_GPR3(r1) 3500 mflr r3 3501#ifdef CONFIG_RELOCATABLE 3502 ld r4, HSTATE_SCRATCH1(r13) 3503#else 3504 mfctr r4 3505#endif 3506 mfxer r5 3507 lbz r6, PACAIRQSOFTMASK(r13) 3508 std r3, _LINK(r1) 3509 std r4, _CTR(r1) 3510 std r5, _XER(r1) 3511 std r6, SOFTE(r1) 3512 ld r2, PACATOC(r13) 3513 LOAD_REG_IMMEDIATE(3, 0x7265677368657265) 3514 std r3, STACK_FRAME_OVERHEAD-16(r1) 3515 3516 /* 3517 * On POWER9 do a minimal restore of the MMU and call C code, 3518 * which will print a message and panic. 3519 * XXX On POWER7 and POWER8, we just spin here since we don't 3520 * know what the other threads are doing (and we don't want to 3521 * coordinate with them) - but at least we now have register state 3522 * in memory that we might be able to look at from another CPU. 3523 */ 3524BEGIN_FTR_SECTION 3525 b . 3526END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 3527 ld r9, HSTATE_KVM_VCPU(r13) 3528 ld r10, VCPU_KVM(r9) 3529 3530 li r0, 0 3531 mtspr SPRN_AMR, r0 3532 mtspr SPRN_IAMR, r0 3533 mtspr SPRN_CIABR, r0 3534 mtspr SPRN_DAWRX, r0 3535 3536 /* Flush the ERAT on radix P9 DD1 guest exit */ 3537BEGIN_FTR_SECTION 3538 PPC_INVALIDATE_ERAT 3539END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) 3540 3541BEGIN_MMU_FTR_SECTION 3542 b 4f 3543END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) 3544 3545 slbmte r0, r0 3546 slbia 3547 ptesync 3548 ld r8, PACA_SLBSHADOWPTR(r13) 3549 .rept SLB_NUM_BOLTED 3550 li r3, SLBSHADOW_SAVEAREA 3551 LDX_BE r5, r8, r3 3552 addi r3, r3, 8 3553 LDX_BE r6, r8, r3 3554 andis. r7, r5, SLB_ESID_V@h 3555 beq 3f 3556 slbmte r6, r5 35573: addi r8, r8, 16 3558 .endr 3559 35604: lwz r7, KVM_HOST_LPID(r10) 3561 mtspr SPRN_LPID, r7 3562 mtspr SPRN_PID, r0 3563 ld r8, KVM_HOST_LPCR(r10) 3564 mtspr SPRN_LPCR, r8 3565 isync 3566 li r0, KVM_GUEST_MODE_NONE 3567 stb r0, HSTATE_IN_GUEST(r13) 3568 3569 /* 3570 * Turn on the MMU and jump to C code 3571 */ 3572 bcl 20, 31, .+4 35735: mflr r3 3574 addi r3, r3, 9f - 5b 3575 ld r4, PACAKMSR(r13) 3576 mtspr SPRN_SRR0, r3 3577 mtspr SPRN_SRR1, r4 3578 RFI_TO_KERNEL 35799: addi r3, r1, STACK_FRAME_OVERHEAD 3580 bl kvmppc_bad_interrupt 3581 b 9b 3582 3583/* 3584 * This mimics the MSR transition on IRQ delivery. The new guest MSR is taken 3585 * from VCPU_INTR_MSR and is modified based on the required TM state changes. 3586 * r11 has the guest MSR value (in/out) 3587 * r9 has a vcpu pointer (in) 3588 * r0 is used as a scratch register 3589 */ 3590kvmppc_msr_interrupt: 3591 rldicl r0, r11, 64 - MSR_TS_S_LG, 62 3592 cmpwi r0, 2 /* Check if we are in transactional state.. */ 3593 ld r11, VCPU_INTR_MSR(r9) 3594 bne 1f 3595 /* ... if transactional, change to suspended */ 3596 li r0, 1 35971: rldimi r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG 3598 blr 3599 3600/* 3601 * This works around a hardware bug on POWER8E processors, where 3602 * writing a 1 to the MMCR0[PMAO] bit doesn't generate a 3603 * performance monitor interrupt. Instead, when we need to have 3604 * an interrupt pending, we have to arrange for a counter to overflow. 3605 */ 3606kvmppc_fix_pmao: 3607 li r3, 0 3608 mtspr SPRN_MMCR2, r3 3609 lis r3, (MMCR0_PMXE | MMCR0_FCECE)@h 3610 ori r3, r3, MMCR0_PMCjCE | MMCR0_C56RUN 3611 mtspr SPRN_MMCR0, r3 3612 lis r3, 0x7fff 3613 ori r3, r3, 0xffff 3614 mtspr SPRN_PMC6, r3 3615 isync 3616 blr 3617 3618#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 3619/* 3620 * Start timing an activity 3621 * r3 = pointer to time accumulation struct, r4 = vcpu 3622 */ 3623kvmhv_start_timing: 3624 ld r5, HSTATE_KVM_VCORE(r13) 3625 ld r6, VCORE_TB_OFFSET_APPL(r5) 3626 mftb r5 3627 subf r5, r6, r5 /* subtract current timebase offset */ 3628 std r3, VCPU_CUR_ACTIVITY(r4) 3629 std r5, VCPU_ACTIVITY_START(r4) 3630 blr 3631 3632/* 3633 * Accumulate time to one activity and start another. 3634 * r3 = pointer to new time accumulation struct, r4 = vcpu 3635 */ 3636kvmhv_accumulate_time: 3637 ld r5, HSTATE_KVM_VCORE(r13) 3638 ld r8, VCORE_TB_OFFSET_APPL(r5) 3639 ld r5, VCPU_CUR_ACTIVITY(r4) 3640 ld r6, VCPU_ACTIVITY_START(r4) 3641 std r3, VCPU_CUR_ACTIVITY(r4) 3642 mftb r7 3643 subf r7, r8, r7 /* subtract current timebase offset */ 3644 std r7, VCPU_ACTIVITY_START(r4) 3645 cmpdi r5, 0 3646 beqlr 3647 subf r3, r6, r7 3648 ld r8, TAS_SEQCOUNT(r5) 3649 cmpdi r8, 0 3650 addi r8, r8, 1 3651 std r8, TAS_SEQCOUNT(r5) 3652 lwsync 3653 ld r7, TAS_TOTAL(r5) 3654 add r7, r7, r3 3655 std r7, TAS_TOTAL(r5) 3656 ld r6, TAS_MIN(r5) 3657 ld r7, TAS_MAX(r5) 3658 beq 3f 3659 cmpd r3, r6 3660 bge 1f 36613: std r3, TAS_MIN(r5) 36621: cmpd r3, r7 3663 ble 2f 3664 std r3, TAS_MAX(r5) 36652: lwsync 3666 addi r8, r8, 1 3667 std r8, TAS_SEQCOUNT(r5) 3668 blr 3669#endif 3670