1f6ce7f20SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
20cf1bfd2SMarcelo Tosatti /*
30cf1bfd2SMarcelo Tosatti * KVM paravirt_ops implementation
40cf1bfd2SMarcelo Tosatti *
50cf1bfd2SMarcelo Tosatti * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
60cf1bfd2SMarcelo Tosatti * Copyright IBM Corporation, 2007
70cf1bfd2SMarcelo Tosatti * Authors: Anthony Liguori <aliguori@us.ibm.com>
80cf1bfd2SMarcelo Tosatti */
90cf1bfd2SMarcelo Tosatti
105aefd786SZhenzhong Duan #define pr_fmt(fmt) "kvm-guest: " fmt
115aefd786SZhenzhong Duan
1256dd9470SFrederic Weisbecker #include <linux/context_tracking.h>
13186f4360SPaul Gortmaker #include <linux/init.h>
1426d05b36SPaolo Bonzini #include <linux/irq.h>
150cf1bfd2SMarcelo Tosatti #include <linux/kernel.h>
160cf1bfd2SMarcelo Tosatti #include <linux/kvm_para.h>
170cf1bfd2SMarcelo Tosatti #include <linux/cpu.h>
180cf1bfd2SMarcelo Tosatti #include <linux/mm.h>
191da8a77bSMarcelo Tosatti #include <linux/highmem.h>
20096d14a3SMarcelo Tosatti #include <linux/hardirq.h>
21fd10cde9SGleb Natapov #include <linux/notifier.h>
22fd10cde9SGleb Natapov #include <linux/reboot.h>
23631bc487SGleb Natapov #include <linux/hash.h>
24631bc487SGleb Natapov #include <linux/sched.h>
25631bc487SGleb Natapov #include <linux/slab.h>
26631bc487SGleb Natapov #include <linux/kprobes.h>
279919e39aSUlrich Obergfell #include <linux/nmi.h>
289db284f3SRik van Riel #include <linux/swait.h>
298b79feffSVitaly Kuznetsov #include <linux/syscore_ops.h>
304d96f910STom Lendacky #include <linux/cc_platform.h>
31f4495615SAshish Kalra #include <linux/efi.h>
32a90ede7bSMarcelo Tosatti #include <asm/timer.h>
33fd10cde9SGleb Natapov #include <asm/cpu.h>
34631bc487SGleb Natapov #include <asm/traps.h>
35631bc487SGleb Natapov #include <asm/desc.h>
366c047cd9SGleb Natapov #include <asm/tlbflush.h>
37ab9cf499SMichael S. Tsirkin #include <asm/apic.h>
38ab9cf499SMichael S. Tsirkin #include <asm/apicdef.h>
39fc73373bSPrarit Bhargava #include <asm/hypervisor.h>
4048a8b97cSPeter Zijlstra #include <asm/tlb.h>
4119308a41SYi Wang #include <asm/cpuidle_haltpoll.h>
4299419b25STom Lendacky #include <asm/ptrace.h>
433d6b8413SVitaly Kuznetsov #include <asm/reboot.h>
4499419b25STom Lendacky #include <asm/svm.h>
45f4495615SAshish Kalra #include <asm/e820/api.h>
46096d14a3SMarcelo Tosatti
47ef68017eSAndy Lutomirski DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
48ef68017eSAndy Lutomirski
49fd10cde9SGleb Natapov static int kvmapf = 1;
50fd10cde9SGleb Natapov
parse_no_kvmapf(char * arg)51afdc3f58SDou Liyang static int __init parse_no_kvmapf(char *arg)
52fd10cde9SGleb Natapov {
53fd10cde9SGleb Natapov kvmapf = 0;
54fd10cde9SGleb Natapov return 0;
55fd10cde9SGleb Natapov }
56fd10cde9SGleb Natapov
57fd10cde9SGleb Natapov early_param("no-kvmapf", parse_no_kvmapf);
58fd10cde9SGleb Natapov
59d910f5c1SGlauber Costa static int steal_acc = 1;
parse_no_stealacc(char * arg)60afdc3f58SDou Liyang static int __init parse_no_stealacc(char *arg)
61d910f5c1SGlauber Costa {
62d910f5c1SGlauber Costa steal_acc = 0;
63d910f5c1SGlauber Costa return 0;
64d910f5c1SGlauber Costa }
65d910f5c1SGlauber Costa
66d910f5c1SGlauber Costa early_param("no-steal-acc", parse_no_stealacc);
67d910f5c1SGlauber Costa
6847162761SBrijesh Singh static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
6914e581c3SAndi Kleen DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
70d910f5c1SGlauber Costa static int has_steal_clock = 0;
71096d14a3SMarcelo Tosatti
720361bdfdSWanpeng Li static int has_guest_poll = 0;
730cf1bfd2SMarcelo Tosatti /*
740cf1bfd2SMarcelo Tosatti * No need for any "IO delay" on KVM
750cf1bfd2SMarcelo Tosatti */
kvm_io_delay(void)760cf1bfd2SMarcelo Tosatti static void kvm_io_delay(void)
770cf1bfd2SMarcelo Tosatti {
780cf1bfd2SMarcelo Tosatti }
790cf1bfd2SMarcelo Tosatti
80631bc487SGleb Natapov #define KVM_TASK_SLEEP_HASHBITS 8
81631bc487SGleb Natapov #define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS)
82631bc487SGleb Natapov
83631bc487SGleb Natapov struct kvm_task_sleep_node {
84631bc487SGleb Natapov struct hlist_node link;
859db284f3SRik van Riel struct swait_queue_head wq;
86631bc487SGleb Natapov u32 token;
87631bc487SGleb Natapov int cpu;
88631bc487SGleb Natapov };
89631bc487SGleb Natapov
90631bc487SGleb Natapov static struct kvm_task_sleep_head {
919db284f3SRik van Riel raw_spinlock_t lock;
92631bc487SGleb Natapov struct hlist_head list;
93631bc487SGleb Natapov } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
94631bc487SGleb Natapov
_find_apf_task(struct kvm_task_sleep_head * b,u32 token)95631bc487SGleb Natapov static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
96631bc487SGleb Natapov u32 token)
97631bc487SGleb Natapov {
98631bc487SGleb Natapov struct hlist_node *p;
99631bc487SGleb Natapov
100631bc487SGleb Natapov hlist_for_each(p, &b->list) {
101631bc487SGleb Natapov struct kvm_task_sleep_node *n =
102631bc487SGleb Natapov hlist_entry(p, typeof(*n), link);
103631bc487SGleb Natapov if (n->token == token)
104631bc487SGleb Natapov return n;
105631bc487SGleb Natapov }
106631bc487SGleb Natapov
107631bc487SGleb Natapov return NULL;
108631bc487SGleb Natapov }
109631bc487SGleb Natapov
kvm_async_pf_queue_task(u32 token,struct kvm_task_sleep_node * n)1103a7c8fafSThomas Gleixner static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n)
111631bc487SGleb Natapov {
112631bc487SGleb Natapov u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
113631bc487SGleb Natapov struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
1146bca69adSThomas Gleixner struct kvm_task_sleep_node *e;
1159b132fbeSLi Zhong
1169db284f3SRik van Riel raw_spin_lock(&b->lock);
117631bc487SGleb Natapov e = _find_apf_task(b, token);
118631bc487SGleb Natapov if (e) {
119631bc487SGleb Natapov /* dummy entry exist -> wake up was delivered ahead of PF */
120631bc487SGleb Natapov hlist_del(&e->link);
1219db284f3SRik van Riel raw_spin_unlock(&b->lock);
1226bca69adSThomas Gleixner kfree(e);
1236bca69adSThomas Gleixner return false;
124631bc487SGleb Natapov }
125631bc487SGleb Natapov
1266bca69adSThomas Gleixner n->token = token;
1276bca69adSThomas Gleixner n->cpu = smp_processor_id();
1286bca69adSThomas Gleixner init_swait_queue_head(&n->wq);
1296bca69adSThomas Gleixner hlist_add_head(&n->link, &b->list);
1309db284f3SRik van Riel raw_spin_unlock(&b->lock);
1316bca69adSThomas Gleixner return true;
1326bca69adSThomas Gleixner }
1336bca69adSThomas Gleixner
1346bca69adSThomas Gleixner /*
1356bca69adSThomas Gleixner * kvm_async_pf_task_wait_schedule - Wait for pagefault to be handled
1366bca69adSThomas Gleixner * @token: Token to identify the sleep node entry
1376bca69adSThomas Gleixner *
1386bca69adSThomas Gleixner * Invoked from the async pagefault handling code or from the VM exit page
1396bca69adSThomas Gleixner * fault handler. In both cases RCU is watching.
1406bca69adSThomas Gleixner */
kvm_async_pf_task_wait_schedule(u32 token)1416bca69adSThomas Gleixner void kvm_async_pf_task_wait_schedule(u32 token)
1426bca69adSThomas Gleixner {
1436bca69adSThomas Gleixner struct kvm_task_sleep_node n;
1446bca69adSThomas Gleixner DECLARE_SWAITQUEUE(wait);
1456bca69adSThomas Gleixner
1466bca69adSThomas Gleixner lockdep_assert_irqs_disabled();
1476bca69adSThomas Gleixner
1483a7c8fafSThomas Gleixner if (!kvm_async_pf_queue_task(token, &n))
1496bca69adSThomas Gleixner return;
150631bc487SGleb Natapov
151631bc487SGleb Natapov for (;;) {
152b3dae109SPeter Zijlstra prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
153631bc487SGleb Natapov if (hlist_unhashed(&n.link))
154631bc487SGleb Natapov break;
1556c047cd9SGleb Natapov
156631bc487SGleb Natapov local_irq_enable();
157631bc487SGleb Natapov schedule();
158631bc487SGleb Natapov local_irq_disable();
1596bca69adSThomas Gleixner }
1606bca69adSThomas Gleixner finish_swait(&n.wq, &wait);
1616bca69adSThomas Gleixner }
1626bca69adSThomas Gleixner EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule);
1636bca69adSThomas Gleixner
apf_task_wake_one(struct kvm_task_sleep_node * n)164631bc487SGleb Natapov static void apf_task_wake_one(struct kvm_task_sleep_node *n)
165631bc487SGleb Natapov {
166631bc487SGleb Natapov hlist_del_init(&n->link);
1673a7c8fafSThomas Gleixner if (swq_has_sleeper(&n->wq))
168b3dae109SPeter Zijlstra swake_up_one(&n->wq);
169631bc487SGleb Natapov }
170631bc487SGleb Natapov
apf_task_wake_all(void)171631bc487SGleb Natapov static void apf_task_wake_all(void)
172631bc487SGleb Natapov {
173631bc487SGleb Natapov int i;
174631bc487SGleb Natapov
175631bc487SGleb Natapov for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
176631bc487SGleb Natapov struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
1776bca69adSThomas Gleixner struct kvm_task_sleep_node *n;
1786bca69adSThomas Gleixner struct hlist_node *p, *next;
1796bca69adSThomas Gleixner
1809db284f3SRik van Riel raw_spin_lock(&b->lock);
181631bc487SGleb Natapov hlist_for_each_safe(p, next, &b->list) {
1826bca69adSThomas Gleixner n = hlist_entry(p, typeof(*n), link);
183631bc487SGleb Natapov if (n->cpu == smp_processor_id())
184631bc487SGleb Natapov apf_task_wake_one(n);
185631bc487SGleb Natapov }
1869db284f3SRik van Riel raw_spin_unlock(&b->lock);
187631bc487SGleb Natapov }
188631bc487SGleb Natapov }
189631bc487SGleb Natapov
kvm_async_pf_task_wake(u32 token)190631bc487SGleb Natapov void kvm_async_pf_task_wake(u32 token)
191631bc487SGleb Natapov {
192631bc487SGleb Natapov u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
193631bc487SGleb Natapov struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
1940547758aSSean Christopherson struct kvm_task_sleep_node *n, *dummy = NULL;
195631bc487SGleb Natapov
196631bc487SGleb Natapov if (token == ~0) {
197631bc487SGleb Natapov apf_task_wake_all();
198631bc487SGleb Natapov return;
199631bc487SGleb Natapov }
200631bc487SGleb Natapov
201631bc487SGleb Natapov again:
2029db284f3SRik van Riel raw_spin_lock(&b->lock);
203631bc487SGleb Natapov n = _find_apf_task(b, token);
204631bc487SGleb Natapov if (!n) {
205631bc487SGleb Natapov /*
2060547758aSSean Christopherson * Async #PF not yet handled, add a dummy entry for the token.
2070547758aSSean Christopherson * Allocating the token must be down outside of the raw lock
2080547758aSSean Christopherson * as the allocator is preemptible on PREEMPT_RT kernels.
209631bc487SGleb Natapov */
2100547758aSSean Christopherson if (!dummy) {
2119db284f3SRik van Riel raw_spin_unlock(&b->lock);
212baec4f5aSPaolo Bonzini dummy = kzalloc(sizeof(*dummy), GFP_ATOMIC);
2130547758aSSean Christopherson
2140547758aSSean Christopherson /*
2150547758aSSean Christopherson * Continue looping on allocation failure, eventually
2160547758aSSean Christopherson * the async #PF will be handled and allocating a new
2170547758aSSean Christopherson * node will be unnecessary.
2180547758aSSean Christopherson */
2190547758aSSean Christopherson if (!dummy)
220631bc487SGleb Natapov cpu_relax();
2210547758aSSean Christopherson
2220547758aSSean Christopherson /*
2230547758aSSean Christopherson * Recheck for async #PF completion before enqueueing
2240547758aSSean Christopherson * the dummy token to avoid duplicate list entries.
2250547758aSSean Christopherson */
226631bc487SGleb Natapov goto again;
227631bc487SGleb Natapov }
2280547758aSSean Christopherson dummy->token = token;
2290547758aSSean Christopherson dummy->cpu = smp_processor_id();
2300547758aSSean Christopherson init_swait_queue_head(&dummy->wq);
2310547758aSSean Christopherson hlist_add_head(&dummy->link, &b->list);
2320547758aSSean Christopherson dummy = NULL;
2336bca69adSThomas Gleixner } else {
234631bc487SGleb Natapov apf_task_wake_one(n);
2356bca69adSThomas Gleixner }
2369db284f3SRik van Riel raw_spin_unlock(&b->lock);
2370547758aSSean Christopherson
2380547758aSSean Christopherson /* A dummy token might be allocated and ultimately not used. */
2390547758aSSean Christopherson kfree(dummy);
240631bc487SGleb Natapov }
241631bc487SGleb Natapov EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
242631bc487SGleb Natapov
kvm_read_and_reset_apf_flags(void)24391eeafeaSThomas Gleixner noinstr u32 kvm_read_and_reset_apf_flags(void)
244631bc487SGleb Natapov {
24568fd66f1SVitaly Kuznetsov u32 flags = 0;
246631bc487SGleb Natapov
24789cbc767SChristoph Lameter if (__this_cpu_read(apf_reason.enabled)) {
24868fd66f1SVitaly Kuznetsov flags = __this_cpu_read(apf_reason.flags);
24968fd66f1SVitaly Kuznetsov __this_cpu_write(apf_reason.flags, 0);
250631bc487SGleb Natapov }
251631bc487SGleb Natapov
25268fd66f1SVitaly Kuznetsov return flags;
253631bc487SGleb Natapov }
25468fd66f1SVitaly Kuznetsov EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
255631bc487SGleb Natapov
__kvm_handle_async_pf(struct pt_regs * regs,u32 token)25691eeafeaSThomas Gleixner noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
257631bc487SGleb Natapov {
258b1d40575SVitaly Kuznetsov u32 flags = kvm_read_and_reset_apf_flags();
259a27a0a55SThomas Gleixner irqentry_state_t state;
2606bca69adSThomas Gleixner
261b1d40575SVitaly Kuznetsov if (!flags)
262ef68017eSAndy Lutomirski return false;
2636bca69adSThomas Gleixner
264a27a0a55SThomas Gleixner state = irqentry_enter(regs);
26591eeafeaSThomas Gleixner instrumentation_begin();
26691eeafeaSThomas Gleixner
2676bca69adSThomas Gleixner /*
2686bca69adSThomas Gleixner * If the host managed to inject an async #PF into an interrupt
2696bca69adSThomas Gleixner * disabled region, then die hard as this is not going to end well
2706bca69adSThomas Gleixner * and the host side is seriously broken.
2716bca69adSThomas Gleixner */
2726bca69adSThomas Gleixner if (unlikely(!(regs->flags & X86_EFLAGS_IF)))
2736bca69adSThomas Gleixner panic("Host injected async #PF in interrupt disabled region\n");
2746bca69adSThomas Gleixner
275b1d40575SVitaly Kuznetsov if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
2763a7c8fafSThomas Gleixner if (unlikely(!(user_mode(regs))))
2773a7c8fafSThomas Gleixner panic("Host injected async #PF in kernel mode\n");
2783a7c8fafSThomas Gleixner /* Page is swapped out by the host. */
2793a7c8fafSThomas Gleixner kvm_async_pf_task_wait_schedule(token);
2806bca69adSThomas Gleixner } else {
281b1d40575SVitaly Kuznetsov WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags);
282631bc487SGleb Natapov }
28391eeafeaSThomas Gleixner
28491eeafeaSThomas Gleixner instrumentation_end();
285a27a0a55SThomas Gleixner irqentry_exit(regs, state);
2866bca69adSThomas Gleixner return true;
287631bc487SGleb Natapov }
288631bc487SGleb Natapov
DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)28926d05b36SPaolo Bonzini DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
290b1d40575SVitaly Kuznetsov {
29126d05b36SPaolo Bonzini struct pt_regs *old_regs = set_irq_regs(regs);
292b1d40575SVitaly Kuznetsov u32 token;
293b1d40575SVitaly Kuznetsov
294670c04adSDave Hansen apic_eoi();
295cc17b225SVitaly Kuznetsov
296b1d40575SVitaly Kuznetsov inc_irq_stat(irq_hv_callback_count);
297b1d40575SVitaly Kuznetsov
298b1d40575SVitaly Kuznetsov if (__this_cpu_read(apf_reason.enabled)) {
299b1d40575SVitaly Kuznetsov token = __this_cpu_read(apf_reason.token);
300b1d40575SVitaly Kuznetsov kvm_async_pf_task_wake(token);
301b1d40575SVitaly Kuznetsov __this_cpu_write(apf_reason.token, 0);
302b1d40575SVitaly Kuznetsov wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
303b1d40575SVitaly Kuznetsov }
304b1d40575SVitaly Kuznetsov
30526d05b36SPaolo Bonzini set_irq_regs(old_regs);
306b1d40575SVitaly Kuznetsov }
307b1d40575SVitaly Kuznetsov
paravirt_ops_setup(void)308d3ac8815SRakib Mullick static void __init paravirt_ops_setup(void)
3090cf1bfd2SMarcelo Tosatti {
3100cf1bfd2SMarcelo Tosatti pv_info.name = "KVM";
31129fa6825SAndy Lutomirski
3120cf1bfd2SMarcelo Tosatti if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
3135c83511bSJuergen Gross pv_ops.cpu.io_delay = kvm_io_delay;
3140cf1bfd2SMarcelo Tosatti
315a90ede7bSMarcelo Tosatti #ifdef CONFIG_X86_IO_APIC
316a90ede7bSMarcelo Tosatti no_timer_check = 1;
317a90ede7bSMarcelo Tosatti #endif
3180cf1bfd2SMarcelo Tosatti }
3190cf1bfd2SMarcelo Tosatti
kvm_register_steal_time(void)320d910f5c1SGlauber Costa static void kvm_register_steal_time(void)
321d910f5c1SGlauber Costa {
322d910f5c1SGlauber Costa int cpu = smp_processor_id();
323d910f5c1SGlauber Costa struct kvm_steal_time *st = &per_cpu(steal_time, cpu);
324d910f5c1SGlauber Costa
325d910f5c1SGlauber Costa if (!has_steal_clock)
326d910f5c1SGlauber Costa return;
327d910f5c1SGlauber Costa
3285dfd486cSDave Hansen wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
329f3f26daeSDavid Woodhouse pr_debug("stealtime: cpu %d, msr %llx\n", cpu,
3305aefd786SZhenzhong Duan (unsigned long long) slow_virt_to_phys(st));
331d910f5c1SGlauber Costa }
332d910f5c1SGlauber Costa
33347162761SBrijesh Singh static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
334ab9cf499SMichael S. Tsirkin
kvm_guest_apic_eoi_write(void)3352744a7ceSThomas Gleixner static notrace __maybe_unused void kvm_guest_apic_eoi_write(void)
336ab9cf499SMichael S. Tsirkin {
337ab9cf499SMichael S. Tsirkin /**
338ab9cf499SMichael S. Tsirkin * This relies on __test_and_clear_bit to modify the memory
339ab9cf499SMichael S. Tsirkin * in a way that is atomic with respect to the local CPU.
340ab9cf499SMichael S. Tsirkin * The hypervisor only accesses this memory from the local CPU so
341ab9cf499SMichael S. Tsirkin * there's no need for lock or memory barriers.
342ab9cf499SMichael S. Tsirkin * An optimization barrier is implied in apic write.
343ab9cf499SMichael S. Tsirkin */
34489cbc767SChristoph Lameter if (__test_and_clear_bit(KVM_PV_EOI_BIT, this_cpu_ptr(&kvm_apic_eoi)))
345ab9cf499SMichael S. Tsirkin return;
3460fa07576SThomas Gleixner apic_native_eoi();
347ab9cf499SMichael S. Tsirkin }
348ab9cf499SMichael S. Tsirkin
kvm_guest_cpu_init(void)349ed3cf152SNicholas Krause static void kvm_guest_cpu_init(void)
350fd10cde9SGleb Natapov {
351b1d40575SVitaly Kuznetsov if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
35200009406SRafael Mendonca u64 pa;
353ef68017eSAndy Lutomirski
354ef68017eSAndy Lutomirski WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));
355ef68017eSAndy Lutomirski
356ef68017eSAndy Lutomirski pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
357b1d40575SVitaly Kuznetsov pa |= KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
35852a5c155SWanpeng Li
359fe2a3027SRadim Krčmář if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
360fe2a3027SRadim Krčmář pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
361fe2a3027SRadim Krčmář
362b1d40575SVitaly Kuznetsov wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
363b1d40575SVitaly Kuznetsov
36452a5c155SWanpeng Li wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
36589cbc767SChristoph Lameter __this_cpu_write(apf_reason.enabled, 1);
366f3f26daeSDavid Woodhouse pr_debug("setup async PF for cpu %d\n", smp_processor_id());
367fd10cde9SGleb Natapov }
368d910f5c1SGlauber Costa
369ab9cf499SMichael S. Tsirkin if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
370ab9cf499SMichael S. Tsirkin unsigned long pa;
3716bca69adSThomas Gleixner
372ab9cf499SMichael S. Tsirkin /* Size alignment is implied but just to make it explicit. */
373ab9cf499SMichael S. Tsirkin BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
37489cbc767SChristoph Lameter __this_cpu_write(kvm_apic_eoi, 0);
37589cbc767SChristoph Lameter pa = slow_virt_to_phys(this_cpu_ptr(&kvm_apic_eoi))
3765dfd486cSDave Hansen | KVM_MSR_ENABLED;
377ab9cf499SMichael S. Tsirkin wrmsrl(MSR_KVM_PV_EOI_EN, pa);
378ab9cf499SMichael S. Tsirkin }
379ab9cf499SMichael S. Tsirkin
380d910f5c1SGlauber Costa if (has_steal_clock)
381d910f5c1SGlauber Costa kvm_register_steal_time();
382fd10cde9SGleb Natapov }
383fd10cde9SGleb Natapov
kvm_pv_disable_apf(void)384ab9cf499SMichael S. Tsirkin static void kvm_pv_disable_apf(void)
385fd10cde9SGleb Natapov {
38689cbc767SChristoph Lameter if (!__this_cpu_read(apf_reason.enabled))
387fd10cde9SGleb Natapov return;
388fd10cde9SGleb Natapov
389fd10cde9SGleb Natapov wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
39089cbc767SChristoph Lameter __this_cpu_write(apf_reason.enabled, 0);
391fd10cde9SGleb Natapov
392f3f26daeSDavid Woodhouse pr_debug("disable async PF for cpu %d\n", smp_processor_id());
393fd10cde9SGleb Natapov }
394fd10cde9SGleb Natapov
kvm_disable_steal_time(void)3953d6b8413SVitaly Kuznetsov static void kvm_disable_steal_time(void)
3963d6b8413SVitaly Kuznetsov {
3973d6b8413SVitaly Kuznetsov if (!has_steal_clock)
3983d6b8413SVitaly Kuznetsov return;
3993d6b8413SVitaly Kuznetsov
4003d6b8413SVitaly Kuznetsov wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
4013d6b8413SVitaly Kuznetsov }
4023d6b8413SVitaly Kuznetsov
kvm_steal_clock(int cpu)403d910f5c1SGlauber Costa static u64 kvm_steal_clock(int cpu)
404d910f5c1SGlauber Costa {
405d910f5c1SGlauber Costa u64 steal;
406d910f5c1SGlauber Costa struct kvm_steal_time *src;
407d910f5c1SGlauber Costa int version;
408d910f5c1SGlauber Costa
409d910f5c1SGlauber Costa src = &per_cpu(steal_time, cpu);
410d910f5c1SGlauber Costa do {
411d910f5c1SGlauber Costa version = src->version;
4125a48a622SWanpeng Li virt_rmb();
413d910f5c1SGlauber Costa steal = src->steal;
4145a48a622SWanpeng Li virt_rmb();
415d910f5c1SGlauber Costa } while ((version & 1) || (version != src->version));
416d910f5c1SGlauber Costa
417d910f5c1SGlauber Costa return steal;
418d910f5c1SGlauber Costa }
419d910f5c1SGlauber Costa
__set_percpu_decrypted(void * ptr,unsigned long size)42047162761SBrijesh Singh static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
42147162761SBrijesh Singh {
42247162761SBrijesh Singh early_set_memory_decrypted((unsigned long) ptr, size);
42347162761SBrijesh Singh }
42447162761SBrijesh Singh
42547162761SBrijesh Singh /*
42647162761SBrijesh Singh * Iterate through all possible CPUs and map the memory region pointed
42747162761SBrijesh Singh * by apf_reason, steal_time and kvm_apic_eoi as decrypted at once.
42847162761SBrijesh Singh *
42947162761SBrijesh Singh * Note: we iterate through all possible CPUs to ensure that CPUs
43047162761SBrijesh Singh * hotplugged will have their per-cpu variable already mapped as
43147162761SBrijesh Singh * decrypted.
43247162761SBrijesh Singh */
sev_map_percpu_data(void)43347162761SBrijesh Singh static void __init sev_map_percpu_data(void)
43447162761SBrijesh Singh {
43547162761SBrijesh Singh int cpu;
43647162761SBrijesh Singh
4374d96f910STom Lendacky if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
43847162761SBrijesh Singh return;
43947162761SBrijesh Singh
44047162761SBrijesh Singh for_each_possible_cpu(cpu) {
44147162761SBrijesh Singh __set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason));
44247162761SBrijesh Singh __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));
44347162761SBrijesh Singh __set_percpu_decrypted(&per_cpu(kvm_apic_eoi, cpu), sizeof(kvm_apic_eoi));
44447162761SBrijesh Singh }
44547162761SBrijesh Singh }
44647162761SBrijesh Singh
kvm_guest_cpu_offline(bool shutdown)4473d6b8413SVitaly Kuznetsov static void kvm_guest_cpu_offline(bool shutdown)
4488b79feffSVitaly Kuznetsov {
4498b79feffSVitaly Kuznetsov kvm_disable_steal_time();
4508b79feffSVitaly Kuznetsov if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
4518b79feffSVitaly Kuznetsov wrmsrl(MSR_KVM_PV_EOI_EN, 0);
452f4495615SAshish Kalra if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
453f4495615SAshish Kalra wrmsrl(MSR_KVM_MIGRATION_CONTROL, 0);
4548b79feffSVitaly Kuznetsov kvm_pv_disable_apf();
4553d6b8413SVitaly Kuznetsov if (!shutdown)
4568b79feffSVitaly Kuznetsov apf_task_wake_all();
457c02027b5SVitaly Kuznetsov kvmclock_disable();
4588b79feffSVitaly Kuznetsov }
4598b79feffSVitaly Kuznetsov
kvm_cpu_online(unsigned int cpu)4608b79feffSVitaly Kuznetsov static int kvm_cpu_online(unsigned int cpu)
4618b79feffSVitaly Kuznetsov {
4628b79feffSVitaly Kuznetsov unsigned long flags;
4638b79feffSVitaly Kuznetsov
4648b79feffSVitaly Kuznetsov local_irq_save(flags);
4658b79feffSVitaly Kuznetsov kvm_guest_cpu_init();
4668b79feffSVitaly Kuznetsov local_irq_restore(flags);
4678b79feffSVitaly Kuznetsov return 0;
4688b79feffSVitaly Kuznetsov }
4698b79feffSVitaly Kuznetsov
4702b519b57SWanpeng Li #ifdef CONFIG_SMP
4712b519b57SWanpeng Li
4722b519b57SWanpeng Li static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
4732b519b57SWanpeng Li
pv_tlb_flush_supported(void)474a262bca3SWanpeng Li static bool pv_tlb_flush_supported(void)
475a262bca3SWanpeng Li {
476a262bca3SWanpeng Li return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
477a262bca3SWanpeng Li !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
478ec756e40SWanpeng Li kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
47940cd58dbSWanpeng Li !boot_cpu_has(X86_FEATURE_MWAIT) &&
480ec756e40SWanpeng Li (num_possible_cpus() != 1));
481a262bca3SWanpeng Li }
482a262bca3SWanpeng Li
pv_ipi_supported(void)483a262bca3SWanpeng Li static bool pv_ipi_supported(void)
484a262bca3SWanpeng Li {
485ec756e40SWanpeng Li return (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
486ec756e40SWanpeng Li (num_possible_cpus() != 1));
487a262bca3SWanpeng Li }
488a262bca3SWanpeng Li
pv_sched_yield_supported(void)489a262bca3SWanpeng Li static bool pv_sched_yield_supported(void)
490a262bca3SWanpeng Li {
491a262bca3SWanpeng Li return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
492a262bca3SWanpeng Li !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
493ec756e40SWanpeng Li kvm_para_has_feature(KVM_FEATURE_STEAL_TIME) &&
49440cd58dbSWanpeng Li !boot_cpu_has(X86_FEATURE_MWAIT) &&
495ec756e40SWanpeng Li (num_possible_cpus() != 1));
496a262bca3SWanpeng Li }
497a262bca3SWanpeng Li
498aaffcfd1SWanpeng Li #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
499aaffcfd1SWanpeng Li
__send_ipi_mask(const struct cpumask * mask,int vector)500aaffcfd1SWanpeng Li static void __send_ipi_mask(const struct cpumask *mask, int vector)
501aaffcfd1SWanpeng Li {
502aaffcfd1SWanpeng Li unsigned long flags;
503aaffcfd1SWanpeng Li int cpu, apic_id, icr;
504aaffcfd1SWanpeng Li int min = 0, max = 0;
505aaffcfd1SWanpeng Li #ifdef CONFIG_X86_64
506aaffcfd1SWanpeng Li __uint128_t ipi_bitmap = 0;
507aaffcfd1SWanpeng Li #else
508aaffcfd1SWanpeng Li u64 ipi_bitmap = 0;
509aaffcfd1SWanpeng Li #endif
510de81c2f9SSean Christopherson long ret;
511aaffcfd1SWanpeng Li
512aaffcfd1SWanpeng Li if (cpumask_empty(mask))
513aaffcfd1SWanpeng Li return;
514aaffcfd1SWanpeng Li
515aaffcfd1SWanpeng Li local_irq_save(flags);
516aaffcfd1SWanpeng Li
517aaffcfd1SWanpeng Li switch (vector) {
518aaffcfd1SWanpeng Li default:
519aaffcfd1SWanpeng Li icr = APIC_DM_FIXED | vector;
520aaffcfd1SWanpeng Li break;
521aaffcfd1SWanpeng Li case NMI_VECTOR:
522aaffcfd1SWanpeng Li icr = APIC_DM_NMI;
523aaffcfd1SWanpeng Li break;
524aaffcfd1SWanpeng Li }
525aaffcfd1SWanpeng Li
526aaffcfd1SWanpeng Li for_each_cpu(cpu, mask) {
527aaffcfd1SWanpeng Li apic_id = per_cpu(x86_cpu_to_apicid, cpu);
528aaffcfd1SWanpeng Li if (!ipi_bitmap) {
529aaffcfd1SWanpeng Li min = max = apic_id;
530aaffcfd1SWanpeng Li } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
531aaffcfd1SWanpeng Li ipi_bitmap <<= min - apic_id;
532aaffcfd1SWanpeng Li min = apic_id;
533c15e0ae4SLi RongQing } else if (apic_id > min && apic_id < min + KVM_IPI_CLUSTER_SIZE) {
534aaffcfd1SWanpeng Li max = apic_id < max ? max : apic_id;
535aaffcfd1SWanpeng Li } else {
536de81c2f9SSean Christopherson ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
537aaffcfd1SWanpeng Li (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
5385aefd786SZhenzhong Duan WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
5395aefd786SZhenzhong Duan ret);
540aaffcfd1SWanpeng Li min = max = apic_id;
541aaffcfd1SWanpeng Li ipi_bitmap = 0;
542aaffcfd1SWanpeng Li }
543aaffcfd1SWanpeng Li __set_bit(apic_id - min, (unsigned long *)&ipi_bitmap);
544aaffcfd1SWanpeng Li }
545aaffcfd1SWanpeng Li
546aaffcfd1SWanpeng Li if (ipi_bitmap) {
547de81c2f9SSean Christopherson ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
548aaffcfd1SWanpeng Li (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
5495aefd786SZhenzhong Duan WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
5505aefd786SZhenzhong Duan ret);
551aaffcfd1SWanpeng Li }
552aaffcfd1SWanpeng Li
553aaffcfd1SWanpeng Li local_irq_restore(flags);
554aaffcfd1SWanpeng Li }
555aaffcfd1SWanpeng Li
kvm_send_ipi_mask(const struct cpumask * mask,int vector)556aaffcfd1SWanpeng Li static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
557aaffcfd1SWanpeng Li {
558aaffcfd1SWanpeng Li __send_ipi_mask(mask, vector);
559aaffcfd1SWanpeng Li }
560aaffcfd1SWanpeng Li
kvm_send_ipi_mask_allbutself(const struct cpumask * mask,int vector)561aaffcfd1SWanpeng Li static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
562aaffcfd1SWanpeng Li {
563aaffcfd1SWanpeng Li unsigned int this_cpu = smp_processor_id();
5648a9442f4SWanpeng Li struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
565aaffcfd1SWanpeng Li const struct cpumask *local_mask;
566aaffcfd1SWanpeng Li
5678a9442f4SWanpeng Li cpumask_copy(new_mask, mask);
5688a9442f4SWanpeng Li cpumask_clear_cpu(this_cpu, new_mask);
5698a9442f4SWanpeng Li local_mask = new_mask;
570aaffcfd1SWanpeng Li __send_ipi_mask(local_mask, vector);
571aaffcfd1SWanpeng Li }
572aaffcfd1SWanpeng Li
setup_efi_kvm_sev_migration(void)573f4495615SAshish Kalra static int __init setup_efi_kvm_sev_migration(void)
574f4495615SAshish Kalra {
575f4495615SAshish Kalra efi_char16_t efi_sev_live_migration_enabled[] = L"SevLiveMigrationEnabled";
576f4495615SAshish Kalra efi_guid_t efi_variable_guid = AMD_SEV_MEM_ENCRYPT_GUID;
577f4495615SAshish Kalra efi_status_t status;
578f4495615SAshish Kalra unsigned long size;
579f4495615SAshish Kalra bool enabled;
580f4495615SAshish Kalra
581b9ecb9a9SPaolo Bonzini if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) ||
582f4495615SAshish Kalra !kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
583f4495615SAshish Kalra return 0;
584f4495615SAshish Kalra
585f4495615SAshish Kalra if (!efi_enabled(EFI_BOOT))
586f4495615SAshish Kalra return 0;
587f4495615SAshish Kalra
588f4495615SAshish Kalra if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
589f4495615SAshish Kalra pr_info("%s : EFI runtime services are not enabled\n", __func__);
590f4495615SAshish Kalra return 0;
591f4495615SAshish Kalra }
592f4495615SAshish Kalra
593f4495615SAshish Kalra size = sizeof(enabled);
594f4495615SAshish Kalra
595f4495615SAshish Kalra /* Get variable contents into buffer */
596f4495615SAshish Kalra status = efi.get_variable(efi_sev_live_migration_enabled,
597f4495615SAshish Kalra &efi_variable_guid, NULL, &size, &enabled);
598f4495615SAshish Kalra
599f4495615SAshish Kalra if (status == EFI_NOT_FOUND) {
600f4495615SAshish Kalra pr_info("%s : EFI live migration variable not found\n", __func__);
601f4495615SAshish Kalra return 0;
602f4495615SAshish Kalra }
603f4495615SAshish Kalra
604f4495615SAshish Kalra if (status != EFI_SUCCESS) {
605f4495615SAshish Kalra pr_info("%s : EFI variable retrieval failed\n", __func__);
606f4495615SAshish Kalra return 0;
607f4495615SAshish Kalra }
608f4495615SAshish Kalra
609f4495615SAshish Kalra if (enabled == 0) {
610f4495615SAshish Kalra pr_info("%s: live migration disabled in EFI\n", __func__);
611f4495615SAshish Kalra return 0;
612f4495615SAshish Kalra }
613f4495615SAshish Kalra
614f4495615SAshish Kalra pr_info("%s : live migration enabled in EFI\n", __func__);
615f4495615SAshish Kalra wrmsrl(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY);
616f4495615SAshish Kalra
617f4495615SAshish Kalra return 1;
618f4495615SAshish Kalra }
619f4495615SAshish Kalra
620f4495615SAshish Kalra late_initcall(setup_efi_kvm_sev_migration);
621f4495615SAshish Kalra
622aaffcfd1SWanpeng Li /*
623aaffcfd1SWanpeng Li * Set the IPI entry points
624aaffcfd1SWanpeng Li */
kvm_setup_pv_ipi(void)625d6f361eaSThomas Gleixner static __init void kvm_setup_pv_ipi(void)
626aaffcfd1SWanpeng Li {
627d6f361eaSThomas Gleixner apic_update_callback(send_IPI_mask, kvm_send_ipi_mask);
628d6f361eaSThomas Gleixner apic_update_callback(send_IPI_mask_allbutself, kvm_send_ipi_mask_allbutself);
6295aefd786SZhenzhong Duan pr_info("setup PV IPIs\n");
630aaffcfd1SWanpeng Li }
631aaffcfd1SWanpeng Li
kvm_smp_send_call_func_ipi(const struct cpumask * mask)632f85f6e7bSWanpeng Li static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
633f85f6e7bSWanpeng Li {
634f85f6e7bSWanpeng Li int cpu;
635f85f6e7bSWanpeng Li
636f85f6e7bSWanpeng Li native_send_call_func_ipi(mask);
637f85f6e7bSWanpeng Li
638f85f6e7bSWanpeng Li /* Make sure other vCPUs get a chance to run if they need to. */
639f85f6e7bSWanpeng Li for_each_cpu(cpu, mask) {
6409ee83635SLi RongQing if (!idle_cpu(cpu) && vcpu_is_preempted(cpu)) {
641f85f6e7bSWanpeng Li kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
642f85f6e7bSWanpeng Li break;
643f85f6e7bSWanpeng Li }
644f85f6e7bSWanpeng Li }
645f85f6e7bSWanpeng Li }
646f85f6e7bSWanpeng Li
kvm_flush_tlb_multi(const struct cpumask * cpumask,const struct flush_tlb_info * info)647152d32aaSLinus Torvalds static void kvm_flush_tlb_multi(const struct cpumask *cpumask,
6482b519b57SWanpeng Li const struct flush_tlb_info *info)
6492b519b57SWanpeng Li {
6502b519b57SWanpeng Li u8 state;
6512b519b57SWanpeng Li int cpu;
6522b519b57SWanpeng Li struct kvm_steal_time *src;
6532b519b57SWanpeng Li struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
6542b519b57SWanpeng Li
6552b519b57SWanpeng Li cpumask_copy(flushmask, cpumask);
6562b519b57SWanpeng Li /*
6572b519b57SWanpeng Li * We have to call flush only on online vCPUs. And
6582b519b57SWanpeng Li * queue flush_on_enter for pre-empted vCPUs
6592b519b57SWanpeng Li */
6602b519b57SWanpeng Li for_each_cpu(cpu, flushmask) {
661152d32aaSLinus Torvalds /*
662152d32aaSLinus Torvalds * The local vCPU is never preempted, so we do not explicitly
663152d32aaSLinus Torvalds * skip check for local vCPU - it will never be cleared from
664152d32aaSLinus Torvalds * flushmask.
665152d32aaSLinus Torvalds */
6662b519b57SWanpeng Li src = &per_cpu(steal_time, cpu);
6672b519b57SWanpeng Li state = READ_ONCE(src->preempted);
6682b519b57SWanpeng Li if ((state & KVM_VCPU_PREEMPTED)) {
6692b519b57SWanpeng Li if (try_cmpxchg(&src->preempted, &state,
6702b519b57SWanpeng Li state | KVM_VCPU_FLUSH_TLB))
6712b519b57SWanpeng Li __cpumask_clear_cpu(cpu, flushmask);
6722b519b57SWanpeng Li }
6732b519b57SWanpeng Li }
6742b519b57SWanpeng Li
675152d32aaSLinus Torvalds native_flush_tlb_multi(flushmask, info);
6762b519b57SWanpeng Li }
6772b519b57SWanpeng Li
kvm_alloc_cpumask(void)6782b519b57SWanpeng Li static __init int kvm_alloc_cpumask(void)
6792b519b57SWanpeng Li {
6802b519b57SWanpeng Li int cpu;
6812b519b57SWanpeng Li
6822b519b57SWanpeng Li if (!kvm_para_available() || nopv)
6832b519b57SWanpeng Li return 0;
6842b519b57SWanpeng Li
6852b519b57SWanpeng Li if (pv_tlb_flush_supported() || pv_ipi_supported())
6862b519b57SWanpeng Li for_each_possible_cpu(cpu) {
6872b519b57SWanpeng Li zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
6882b519b57SWanpeng Li GFP_KERNEL, cpu_to_node(cpu));
6892b519b57SWanpeng Li }
6902b519b57SWanpeng Li
6912b519b57SWanpeng Li return 0;
6922b519b57SWanpeng Li }
6932b519b57SWanpeng Li arch_initcall(kvm_alloc_cpumask);
6942b519b57SWanpeng Li
kvm_smp_prepare_boot_cpu(void)695ca3f1017SGleb Natapov static void __init kvm_smp_prepare_boot_cpu(void)
696ca3f1017SGleb Natapov {
69747162761SBrijesh Singh /*
69847162761SBrijesh Singh * Map the per-cpu variables as decrypted before kvm_guest_cpu_init()
69947162761SBrijesh Singh * shares the guest physical address with the hypervisor.
70047162761SBrijesh Singh */
70147162761SBrijesh Singh sev_map_percpu_data();
70247162761SBrijesh Singh
703fd10cde9SGleb Natapov kvm_guest_cpu_init();
704ca3f1017SGleb Natapov native_smp_prepare_boot_cpu();
70592b75202SSrivatsa Vaddagiri kvm_spinlock_init();
706ca3f1017SGleb Natapov }
707fd10cde9SGleb Natapov
kvm_cpu_down_prepare(unsigned int cpu)7089a20ea4bSSebastian Andrzej Siewior static int kvm_cpu_down_prepare(unsigned int cpu)
7099a20ea4bSSebastian Andrzej Siewior {
7108b79feffSVitaly Kuznetsov unsigned long flags;
7118b79feffSVitaly Kuznetsov
7128b79feffSVitaly Kuznetsov local_irq_save(flags);
7133d6b8413SVitaly Kuznetsov kvm_guest_cpu_offline(false);
7148b79feffSVitaly Kuznetsov local_irq_restore(flags);
7159a20ea4bSSebastian Andrzej Siewior return 0;
7169a20ea4bSSebastian Andrzej Siewior }
7172b519b57SWanpeng Li
718ca3f1017SGleb Natapov #endif
719ca3f1017SGleb Natapov
kvm_suspend(void)7208b79feffSVitaly Kuznetsov static int kvm_suspend(void)
7218b79feffSVitaly Kuznetsov {
7220361bdfdSWanpeng Li u64 val = 0;
7230361bdfdSWanpeng Li
7243d6b8413SVitaly Kuznetsov kvm_guest_cpu_offline(false);
7258b79feffSVitaly Kuznetsov
7260361bdfdSWanpeng Li #ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
7270361bdfdSWanpeng Li if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
7280361bdfdSWanpeng Li rdmsrl(MSR_KVM_POLL_CONTROL, val);
7290361bdfdSWanpeng Li has_guest_poll = !(val & 1);
7300361bdfdSWanpeng Li #endif
7318b79feffSVitaly Kuznetsov return 0;
7328b79feffSVitaly Kuznetsov }
7338b79feffSVitaly Kuznetsov
kvm_resume(void)7348b79feffSVitaly Kuznetsov static void kvm_resume(void)
7358b79feffSVitaly Kuznetsov {
7368b79feffSVitaly Kuznetsov kvm_cpu_online(raw_smp_processor_id());
7370361bdfdSWanpeng Li
7380361bdfdSWanpeng Li #ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
7390361bdfdSWanpeng Li if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll)
7400361bdfdSWanpeng Li wrmsrl(MSR_KVM_POLL_CONTROL, 0);
7410361bdfdSWanpeng Li #endif
7428b79feffSVitaly Kuznetsov }
7438b79feffSVitaly Kuznetsov
7448b79feffSVitaly Kuznetsov static struct syscore_ops kvm_syscore_ops = {
7458b79feffSVitaly Kuznetsov .suspend = kvm_suspend,
7468b79feffSVitaly Kuznetsov .resume = kvm_resume,
7478b79feffSVitaly Kuznetsov };
7488b79feffSVitaly Kuznetsov
kvm_pv_guest_cpu_reboot(void * unused)749384fc672SVitaly Kuznetsov static void kvm_pv_guest_cpu_reboot(void *unused)
750384fc672SVitaly Kuznetsov {
751384fc672SVitaly Kuznetsov kvm_guest_cpu_offline(true);
752384fc672SVitaly Kuznetsov }
753384fc672SVitaly Kuznetsov
kvm_pv_reboot_notify(struct notifier_block * nb,unsigned long code,void * unused)754384fc672SVitaly Kuznetsov static int kvm_pv_reboot_notify(struct notifier_block *nb,
755384fc672SVitaly Kuznetsov unsigned long code, void *unused)
756384fc672SVitaly Kuznetsov {
757384fc672SVitaly Kuznetsov if (code == SYS_RESTART)
758384fc672SVitaly Kuznetsov on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
759384fc672SVitaly Kuznetsov return NOTIFY_DONE;
760384fc672SVitaly Kuznetsov }
761384fc672SVitaly Kuznetsov
762384fc672SVitaly Kuznetsov static struct notifier_block kvm_pv_reboot_nb = {
763384fc672SVitaly Kuznetsov .notifier_call = kvm_pv_reboot_notify,
764384fc672SVitaly Kuznetsov };
765384fc672SVitaly Kuznetsov
7663d6b8413SVitaly Kuznetsov /*
7673d6b8413SVitaly Kuznetsov * After a PV feature is registered, the host will keep writing to the
7683d6b8413SVitaly Kuznetsov * registered memory location. If the guest happens to shutdown, this memory
7693d6b8413SVitaly Kuznetsov * won't be valid. In cases like kexec, in which you install a new kernel, this
7703d6b8413SVitaly Kuznetsov * means a random memory location will be kept being written.
7713d6b8413SVitaly Kuznetsov */
7723d6b8413SVitaly Kuznetsov #ifdef CONFIG_KEXEC_CORE
kvm_crash_shutdown(struct pt_regs * regs)7733d6b8413SVitaly Kuznetsov static void kvm_crash_shutdown(struct pt_regs *regs)
7743d6b8413SVitaly Kuznetsov {
7753d6b8413SVitaly Kuznetsov kvm_guest_cpu_offline(true);
7763d6b8413SVitaly Kuznetsov native_machine_crash_shutdown(regs);
7773d6b8413SVitaly Kuznetsov }
7783d6b8413SVitaly Kuznetsov #endif
7793d6b8413SVitaly Kuznetsov
780d063de55SLi RongQing #if defined(CONFIG_X86_32) || !defined(CONFIG_SMP)
781d063de55SLi RongQing bool __kvm_vcpu_is_preempted(long cpu);
782d063de55SLi RongQing
__kvm_vcpu_is_preempted(long cpu)783d063de55SLi RongQing __visible bool __kvm_vcpu_is_preempted(long cpu)
784d063de55SLi RongQing {
785d063de55SLi RongQing struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
786d063de55SLi RongQing
787d063de55SLi RongQing return !!(src->preempted & KVM_VCPU_PREEMPTED);
788d063de55SLi RongQing }
789d063de55SLi RongQing PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
790d063de55SLi RongQing
791d063de55SLi RongQing #else
792d063de55SLi RongQing
793d063de55SLi RongQing #include <asm/asm-offsets.h>
794d063de55SLi RongQing
795d063de55SLi RongQing extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
796d063de55SLi RongQing
797d063de55SLi RongQing /*
798d063de55SLi RongQing * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and
799d063de55SLi RongQing * restoring to/from the stack.
800d063de55SLi RongQing */
801f1a033ccSJuergen Gross #define PV_VCPU_PREEMPTED_ASM \
802f1a033ccSJuergen Gross "movq __per_cpu_offset(,%rdi,8), %rax\n\t" \
803f1a033ccSJuergen Gross "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax)\n\t" \
804f1a033ccSJuergen Gross "setne %al\n\t"
805d063de55SLi RongQing
806f1a033ccSJuergen Gross DEFINE_PARAVIRT_ASM(__raw_callee_save___kvm_vcpu_is_preempted,
807f1a033ccSJuergen Gross PV_VCPU_PREEMPTED_ASM, .text);
808d063de55SLi RongQing #endif
809d063de55SLi RongQing
kvm_guest_init(void)810f3614646SJuergen Gross static void __init kvm_guest_init(void)
8110cf1bfd2SMarcelo Tosatti {
812631bc487SGleb Natapov int i;
813631bc487SGleb Natapov
8140cf1bfd2SMarcelo Tosatti paravirt_ops_setup();
815fd10cde9SGleb Natapov register_reboot_notifier(&kvm_pv_reboot_nb);
816631bc487SGleb Natapov for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
8179db284f3SRik van Riel raw_spin_lock_init(&async_pf_sleepers[i].lock);
818631bc487SGleb Natapov
819d910f5c1SGlauber Costa if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
820d910f5c1SGlauber Costa has_steal_clock = 1;
821a0e2bf7cSJuergen Gross static_call_update(pv_steal_clock, kvm_steal_clock);
822d063de55SLi RongQing
823d063de55SLi RongQing pv_ops.lock.vcpu_is_preempted =
824d063de55SLi RongQing PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
825d910f5c1SGlauber Costa }
826d910f5c1SGlauber Costa
82790536664SMichael S. Tsirkin if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
8282744a7ceSThomas Gleixner apic_update_callback(eoi, kvm_guest_apic_eoi_write);
829ab9cf499SMichael S. Tsirkin
830b1d40575SVitaly Kuznetsov if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
831ef68017eSAndy Lutomirski static_branch_enable(&kvm_async_pf_enabled);
83226d05b36SPaolo Bonzini alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_kvm_asyncpf_interrupt);
833b1d40575SVitaly Kuznetsov }
834ef68017eSAndy Lutomirski
835ca3f1017SGleb Natapov #ifdef CONFIG_SMP
8362b519b57SWanpeng Li if (pv_tlb_flush_supported()) {
837152d32aaSLinus Torvalds pv_ops.mmu.flush_tlb_multi = kvm_flush_tlb_multi;
8382b519b57SWanpeng Li pv_ops.mmu.tlb_remove_table = tlb_remove_table;
8392b519b57SWanpeng Li pr_info("KVM setup pv remote TLB flush\n");
8402b519b57SWanpeng Li }
8412b519b57SWanpeng Li
842ca3f1017SGleb Natapov smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
843a262bca3SWanpeng Li if (pv_sched_yield_supported()) {
844f85f6e7bSWanpeng Li smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
8455aefd786SZhenzhong Duan pr_info("setup PV sched yield\n");
846f85f6e7bSWanpeng Li }
8479a20ea4bSSebastian Andrzej Siewior if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
8489a20ea4bSSebastian Andrzej Siewior kvm_cpu_online, kvm_cpu_down_prepare) < 0)
8495aefd786SZhenzhong Duan pr_err("failed to install cpu hotplug callbacks\n");
850fd10cde9SGleb Natapov #else
85147162761SBrijesh Singh sev_map_percpu_data();
852fd10cde9SGleb Natapov kvm_guest_cpu_init();
853ca3f1017SGleb Natapov #endif
8549919e39aSUlrich Obergfell
8553d6b8413SVitaly Kuznetsov #ifdef CONFIG_KEXEC_CORE
8563d6b8413SVitaly Kuznetsov machine_ops.crash_shutdown = kvm_crash_shutdown;
8573d6b8413SVitaly Kuznetsov #endif
8583d6b8413SVitaly Kuznetsov
8598b79feffSVitaly Kuznetsov register_syscore_ops(&kvm_syscore_ops);
8608b79feffSVitaly Kuznetsov
8619919e39aSUlrich Obergfell /*
8629919e39aSUlrich Obergfell * Hard lockup detection is enabled by default. Disable it, as guests
8639919e39aSUlrich Obergfell * can get false positives too easily, for example if the host is
8649919e39aSUlrich Obergfell * overcommitted.
8659919e39aSUlrich Obergfell */
866692297d8SUlrich Obergfell hardlockup_detector_disable();
8670cf1bfd2SMarcelo Tosatti }
868d910f5c1SGlauber Costa
__kvm_cpuid_base(void)8691c300a40SPaolo Bonzini static noinline uint32_t __kvm_cpuid_base(void)
8701c300a40SPaolo Bonzini {
8711c300a40SPaolo Bonzini if (boot_cpu_data.cpuid_level < 0)
8721c300a40SPaolo Bonzini return 0; /* So we don't blow up on old processors */
8731c300a40SPaolo Bonzini
8740c9f3536SBorislav Petkov if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
875760849b1SPaul Durrant return hypervisor_cpuid_base(KVM_SIGNATURE, 0);
8761c300a40SPaolo Bonzini
8771c300a40SPaolo Bonzini return 0;
8781c300a40SPaolo Bonzini }
8791c300a40SPaolo Bonzini
kvm_cpuid_base(void)8801c300a40SPaolo Bonzini static inline uint32_t kvm_cpuid_base(void)
8811c300a40SPaolo Bonzini {
8821c300a40SPaolo Bonzini static int kvm_cpuid_base = -1;
8831c300a40SPaolo Bonzini
8841c300a40SPaolo Bonzini if (kvm_cpuid_base == -1)
8851c300a40SPaolo Bonzini kvm_cpuid_base = __kvm_cpuid_base();
8861c300a40SPaolo Bonzini
8871c300a40SPaolo Bonzini return kvm_cpuid_base;
8881c300a40SPaolo Bonzini }
8891c300a40SPaolo Bonzini
kvm_para_available(void)8901c300a40SPaolo Bonzini bool kvm_para_available(void)
8911c300a40SPaolo Bonzini {
8921c300a40SPaolo Bonzini return kvm_cpuid_base() != 0;
8931c300a40SPaolo Bonzini }
8941c300a40SPaolo Bonzini EXPORT_SYMBOL_GPL(kvm_para_available);
8951c300a40SPaolo Bonzini
kvm_arch_para_features(void)89677f01bdfSPaolo Bonzini unsigned int kvm_arch_para_features(void)
89777f01bdfSPaolo Bonzini {
89877f01bdfSPaolo Bonzini return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES);
89977f01bdfSPaolo Bonzini }
90077f01bdfSPaolo Bonzini
kvm_arch_para_hints(void)901a4429e53SWanpeng Li unsigned int kvm_arch_para_hints(void)
902a4429e53SWanpeng Li {
903a4429e53SWanpeng Li return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES);
904a4429e53SWanpeng Li }
9051328edcaSWanpeng Li EXPORT_SYMBOL_GPL(kvm_arch_para_hints);
906a4429e53SWanpeng Li
kvm_detect(void)9079df56f19SJason Wang static uint32_t __init kvm_detect(void)
908fc73373bSPrarit Bhargava {
9099df56f19SJason Wang return kvm_cpuid_base();
910fc73373bSPrarit Bhargava }
911fc73373bSPrarit Bhargava
kvm_apic_init(void)912d63bae07SWanpeng Li static void __init kvm_apic_init(void)
913d63bae07SWanpeng Li {
9142b519b57SWanpeng Li #ifdef CONFIG_SMP
915a262bca3SWanpeng Li if (pv_ipi_supported())
916aaffcfd1SWanpeng Li kvm_setup_pv_ipi();
917aaffcfd1SWanpeng Li #endif
918d63bae07SWanpeng Li }
919d63bae07SWanpeng Li
kvm_msi_ext_dest_id(void)9202e008ffeSDavid Woodhouse static bool __init kvm_msi_ext_dest_id(void)
9212e008ffeSDavid Woodhouse {
9222e008ffeSDavid Woodhouse return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID);
9232e008ffeSDavid Woodhouse }
9242e008ffeSDavid Woodhouse
kvm_sev_hc_page_enc_status(unsigned long pfn,int npages,bool enc)925f4495615SAshish Kalra static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc)
926f4495615SAshish Kalra {
927f4495615SAshish Kalra kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages,
928f4495615SAshish Kalra KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
929f4495615SAshish Kalra }
930f4495615SAshish Kalra
kvm_init_platform(void)931d63bae07SWanpeng Li static void __init kvm_init_platform(void)
932d63bae07SWanpeng Li {
933b9ecb9a9SPaolo Bonzini if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
934f4495615SAshish Kalra kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
935f4495615SAshish Kalra unsigned long nr_pages;
93673f1b4feSAshish Kalra int i;
937f4495615SAshish Kalra
938f4495615SAshish Kalra pv_ops.mmu.notify_page_enc_status_changed =
939f4495615SAshish Kalra kvm_sev_hc_page_enc_status;
940f4495615SAshish Kalra
941f4495615SAshish Kalra /*
94273f1b4feSAshish Kalra * Reset the host's shared pages list related to kernel
94373f1b4feSAshish Kalra * specific page encryption status settings before we load a
94473f1b4feSAshish Kalra * new kernel by kexec. Reset the page encryption status
94573f1b4feSAshish Kalra * during early boot intead of just before kexec to avoid SMP
94673f1b4feSAshish Kalra * races during kvm_pv_guest_cpu_reboot().
94773f1b4feSAshish Kalra * NOTE: We cannot reset the complete shared pages list
94873f1b4feSAshish Kalra * here as we need to retain the UEFI/OVMF firmware
94973f1b4feSAshish Kalra * specific settings.
95073f1b4feSAshish Kalra */
95173f1b4feSAshish Kalra
95273f1b4feSAshish Kalra for (i = 0; i < e820_table->nr_entries; i++) {
95373f1b4feSAshish Kalra struct e820_entry *entry = &e820_table->entries[i];
95473f1b4feSAshish Kalra
95573f1b4feSAshish Kalra if (entry->type != E820_TYPE_RAM)
95673f1b4feSAshish Kalra continue;
95773f1b4feSAshish Kalra
95873f1b4feSAshish Kalra nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE);
95973f1b4feSAshish Kalra
96073f1b4feSAshish Kalra kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, entry->addr,
96173f1b4feSAshish Kalra nr_pages,
96273f1b4feSAshish Kalra KVM_MAP_GPA_RANGE_ENCRYPTED | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
96373f1b4feSAshish Kalra }
96473f1b4feSAshish Kalra
96573f1b4feSAshish Kalra /*
966f4495615SAshish Kalra * Ensure that _bss_decrypted section is marked as decrypted in the
967f4495615SAshish Kalra * shared pages list.
968f4495615SAshish Kalra */
969f4495615SAshish Kalra early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
970*ac3f9c9fSSteve Rutherford __end_bss_decrypted - __start_bss_decrypted, 0);
971f4495615SAshish Kalra
972f4495615SAshish Kalra /*
973f4495615SAshish Kalra * If not booted using EFI, enable Live migration support.
974f4495615SAshish Kalra */
975f4495615SAshish Kalra if (!efi_enabled(EFI_BOOT))
976f4495615SAshish Kalra wrmsrl(MSR_KVM_MIGRATION_CONTROL,
977f4495615SAshish Kalra KVM_MIGRATION_READY);
978f4495615SAshish Kalra }
979e61cf2e3SLinus Torvalds kvmclock_init();
980d63bae07SWanpeng Li x86_platform.apic_post_init = kvm_apic_init;
981d63bae07SWanpeng Li }
982d63bae07SWanpeng Li
98399419b25STom Lendacky #if defined(CONFIG_AMD_MEM_ENCRYPT)
kvm_sev_es_hcall_prepare(struct ghcb * ghcb,struct pt_regs * regs)98499419b25STom Lendacky static void kvm_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs)
98599419b25STom Lendacky {
98699419b25STom Lendacky /* RAX and CPL are already in the GHCB */
98799419b25STom Lendacky ghcb_set_rbx(ghcb, regs->bx);
98899419b25STom Lendacky ghcb_set_rcx(ghcb, regs->cx);
98999419b25STom Lendacky ghcb_set_rdx(ghcb, regs->dx);
99099419b25STom Lendacky ghcb_set_rsi(ghcb, regs->si);
99199419b25STom Lendacky }
99299419b25STom Lendacky
kvm_sev_es_hcall_finish(struct ghcb * ghcb,struct pt_regs * regs)99399419b25STom Lendacky static bool kvm_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
99499419b25STom Lendacky {
99599419b25STom Lendacky /* No checking of the return state needed */
99699419b25STom Lendacky return true;
99799419b25STom Lendacky }
99899419b25STom Lendacky #endif
99999419b25STom Lendacky
100003b2a320SJuergen Gross const __initconst struct hypervisor_x86 x86_hyper_kvm = {
1001fc73373bSPrarit Bhargava .name = "KVM",
1002fc73373bSPrarit Bhargava .detect = kvm_detect,
100303b2a320SJuergen Gross .type = X86_HYPER_KVM,
1004f3614646SJuergen Gross .init.guest_late_init = kvm_guest_init,
1005f72e38e8SJuergen Gross .init.x2apic_available = kvm_para_available,
10062e008ffeSDavid Woodhouse .init.msi_ext_dest_id = kvm_msi_ext_dest_id,
1007d63bae07SWanpeng Li .init.init_platform = kvm_init_platform,
100899419b25STom Lendacky #if defined(CONFIG_AMD_MEM_ENCRYPT)
100999419b25STom Lendacky .runtime.sev_es_hcall_prepare = kvm_sev_es_hcall_prepare,
101099419b25STom Lendacky .runtime.sev_es_hcall_finish = kvm_sev_es_hcall_finish,
101199419b25STom Lendacky #endif
1012fc73373bSPrarit Bhargava };
1013fc73373bSPrarit Bhargava
activate_jump_labels(void)1014d910f5c1SGlauber Costa static __init int activate_jump_labels(void)
1015d910f5c1SGlauber Costa {
1016d910f5c1SGlauber Costa if (has_steal_clock) {
1017c5905afbSIngo Molnar static_key_slow_inc(¶virt_steal_enabled);
1018d910f5c1SGlauber Costa if (steal_acc)
1019c5905afbSIngo Molnar static_key_slow_inc(¶virt_steal_rq_enabled);
1020d910f5c1SGlauber Costa }
1021d910f5c1SGlauber Costa
1022d910f5c1SGlauber Costa return 0;
1023d910f5c1SGlauber Costa }
1024d910f5c1SGlauber Costa arch_initcall(activate_jump_labels);
102592b75202SSrivatsa Vaddagiri
102692b75202SSrivatsa Vaddagiri #ifdef CONFIG_PARAVIRT_SPINLOCKS
102792b75202SSrivatsa Vaddagiri
102892b75202SSrivatsa Vaddagiri /* Kick a cpu by its apicid. Used to wake up a halted vcpu */
kvm_kick_cpu(int cpu)102936bd6213SRaghavendra K T static void kvm_kick_cpu(int cpu)
103092b75202SSrivatsa Vaddagiri {
103192b75202SSrivatsa Vaddagiri int apicid;
103292b75202SSrivatsa Vaddagiri unsigned long flags = 0;
103392b75202SSrivatsa Vaddagiri
103492b75202SSrivatsa Vaddagiri apicid = per_cpu(x86_cpu_to_apicid, cpu);
103592b75202SSrivatsa Vaddagiri kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
103692b75202SSrivatsa Vaddagiri }
103792b75202SSrivatsa Vaddagiri
1038bf0c7c34SWaiman Long #include <asm/qspinlock.h>
1039bf0c7c34SWaiman Long
kvm_wait(u8 * ptr,u8 val)1040bf0c7c34SWaiman Long static void kvm_wait(u8 *ptr, u8 val)
1041bf0c7c34SWaiman Long {
1042bf0c7c34SWaiman Long if (in_nmi())
1043bf0c7c34SWaiman Long return;
1044bf0c7c34SWaiman Long
1045bf0c7c34SWaiman Long /*
1046bf0c7c34SWaiman Long * halt until it's our turn and kicked. Note that we do safe halt
1047bf0c7c34SWaiman Long * for irq enabled case to avoid hang when lock info is overwritten
1048bf0c7c34SWaiman Long * in irq spinlock slowpath and no spurious interrupt occur to save us.
1049bf0c7c34SWaiman Long */
1050f4e61f0cSWanpeng Li if (irqs_disabled()) {
1051f4e61f0cSWanpeng Li if (READ_ONCE(*ptr) == val)
1052bf0c7c34SWaiman Long halt();
1053f4e61f0cSWanpeng Li } else {
1054f4e61f0cSWanpeng Li local_irq_disable();
1055f4e61f0cSWanpeng Li
1056a40b2fd0SLai Jiangshan /* safe_halt() will enable IRQ */
1057f4e61f0cSWanpeng Li if (READ_ONCE(*ptr) == val)
1058bf0c7c34SWaiman Long safe_halt();
1059a40b2fd0SLai Jiangshan else
1060f4e61f0cSWanpeng Li local_irq_enable();
1061f4e61f0cSWanpeng Li }
1062bf0c7c34SWaiman Long }
1063bf0c7c34SWaiman Long
106492b75202SSrivatsa Vaddagiri /*
106592b75202SSrivatsa Vaddagiri * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
106692b75202SSrivatsa Vaddagiri */
kvm_spinlock_init(void)106792b75202SSrivatsa Vaddagiri void __init kvm_spinlock_init(void)
106892b75202SSrivatsa Vaddagiri {
106905eee619SZhenzhong Duan /*
107005eee619SZhenzhong Duan * In case host doesn't support KVM_FEATURE_PV_UNHALT there is still an
107105eee619SZhenzhong Duan * advantage of keeping virt_spin_lock_key enabled: virt_spin_lock() is
107205eee619SZhenzhong Duan * preferred over native qspinlock when vCPU is preempted.
107305eee619SZhenzhong Duan */
107405eee619SZhenzhong Duan if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) {
107505eee619SZhenzhong Duan pr_info("PV spinlocks disabled, no host support\n");
107692b75202SSrivatsa Vaddagiri return;
1077de585020SZhenzhong Duan }
107892b75202SSrivatsa Vaddagiri
107905eee619SZhenzhong Duan /*
108005eee619SZhenzhong Duan * Disable PV spinlocks and use native qspinlock when dedicated pCPUs
108105eee619SZhenzhong Duan * are available.
108205eee619SZhenzhong Duan */
108305eee619SZhenzhong Duan if (kvm_para_has_hint(KVM_HINTS_REALTIME)) {
108405eee619SZhenzhong Duan pr_info("PV spinlocks disabled with KVM_HINTS_REALTIME hints\n");
108505eee619SZhenzhong Duan goto out;
108605eee619SZhenzhong Duan }
1087b2798ba0SWanpeng Li
108805eee619SZhenzhong Duan if (num_possible_cpus() == 1) {
108905eee619SZhenzhong Duan pr_info("PV spinlocks disabled, single CPU\n");
109005eee619SZhenzhong Duan goto out;
109105eee619SZhenzhong Duan }
109205eee619SZhenzhong Duan
109305eee619SZhenzhong Duan if (nopvspin) {
109405eee619SZhenzhong Duan pr_info("PV spinlocks disabled, forced by \"nopvspin\" parameter\n");
109505eee619SZhenzhong Duan goto out;
109605eee619SZhenzhong Duan }
109705eee619SZhenzhong Duan
109805eee619SZhenzhong Duan pr_info("PV spinlocks enabled\n");
10993553ae56SWaiman Long
1100bf0c7c34SWaiman Long __pv_init_lock_hash();
11015c83511bSJuergen Gross pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
11025c83511bSJuergen Gross pv_ops.lock.queued_spin_unlock =
11035c83511bSJuergen Gross PV_CALLEE_SAVE(__pv_queued_spin_unlock);
11045c83511bSJuergen Gross pv_ops.lock.wait = kvm_wait;
11055c83511bSJuergen Gross pv_ops.lock.kick = kvm_kick_cpu;
11063cded417SPeter Zijlstra
110705eee619SZhenzhong Duan /*
110805eee619SZhenzhong Duan * When PV spinlock is enabled which is preferred over
110905eee619SZhenzhong Duan * virt_spin_lock(), virt_spin_lock_key's value is meaningless.
111005eee619SZhenzhong Duan * Just disable it anyway.
111105eee619SZhenzhong Duan */
111205eee619SZhenzhong Duan out:
111305eee619SZhenzhong Duan static_branch_disable(&virt_spin_lock_key);
111492b75202SSrivatsa Vaddagiri }
11153dbef3e3SRaghavendra K T
111692b75202SSrivatsa Vaddagiri #endif /* CONFIG_PARAVIRT_SPINLOCKS */
1117a1c4423bSMarcelo Tosatti
1118a1c4423bSMarcelo Tosatti #ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
1119a1c4423bSMarcelo Tosatti
kvm_disable_host_haltpoll(void * i)1120a1c4423bSMarcelo Tosatti static void kvm_disable_host_haltpoll(void *i)
1121a1c4423bSMarcelo Tosatti {
1122a1c4423bSMarcelo Tosatti wrmsrl(MSR_KVM_POLL_CONTROL, 0);
1123a1c4423bSMarcelo Tosatti }
1124a1c4423bSMarcelo Tosatti
kvm_enable_host_haltpoll(void * i)1125a1c4423bSMarcelo Tosatti static void kvm_enable_host_haltpoll(void *i)
1126a1c4423bSMarcelo Tosatti {
1127a1c4423bSMarcelo Tosatti wrmsrl(MSR_KVM_POLL_CONTROL, 1);
1128a1c4423bSMarcelo Tosatti }
1129a1c4423bSMarcelo Tosatti
arch_haltpoll_enable(unsigned int cpu)113097d3eb9dSJoao Martins void arch_haltpoll_enable(unsigned int cpu)
1131a1c4423bSMarcelo Tosatti {
1132a1c4423bSMarcelo Tosatti if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
11335aefd786SZhenzhong Duan pr_err_once("host does not support poll control\n");
11345aefd786SZhenzhong Duan pr_err_once("host upgrade recommended\n");
1135a1c4423bSMarcelo Tosatti return;
1136a1c4423bSMarcelo Tosatti }
1137a1c4423bSMarcelo Tosatti
1138a1c4423bSMarcelo Tosatti /* Enable guest halt poll disables host halt poll */
113997d3eb9dSJoao Martins smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1);
1140a1c4423bSMarcelo Tosatti }
1141a1c4423bSMarcelo Tosatti EXPORT_SYMBOL_GPL(arch_haltpoll_enable);
1142a1c4423bSMarcelo Tosatti
arch_haltpoll_disable(unsigned int cpu)114397d3eb9dSJoao Martins void arch_haltpoll_disable(unsigned int cpu)
1144a1c4423bSMarcelo Tosatti {
1145a1c4423bSMarcelo Tosatti if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
1146a1c4423bSMarcelo Tosatti return;
1147a1c4423bSMarcelo Tosatti
1148b785a442SLi Qiang /* Disable guest halt poll enables host halt poll */
114997d3eb9dSJoao Martins smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1);
1150a1c4423bSMarcelo Tosatti }
1151a1c4423bSMarcelo Tosatti EXPORT_SYMBOL_GPL(arch_haltpoll_disable);
1152a1c4423bSMarcelo Tosatti #endif
1153