xref: /openbmc/linux/arch/x86/kvm/lapic.c (revision b0e55fef624e511e060fa05e4ca96cae6d902f04)
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 /*
4  * Local APIC virtualization
5  *
6  * Copyright (C) 2006 Qumranet, Inc.
7  * Copyright (C) 2007 Novell
8  * Copyright (C) 2007 Intel
9  * Copyright 2009 Red Hat, Inc. and/or its affiliates.
10  *
11  * Authors:
12  *   Dor Laor <dor.laor@qumranet.com>
13  *   Gregory Haskins <ghaskins@novell.com>
14  *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
15  *
16  * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
17  */
18 
19 #include <linux/kvm_host.h>
20 #include <linux/kvm.h>
21 #include <linux/mm.h>
22 #include <linux/highmem.h>
23 #include <linux/smp.h>
24 #include <linux/hrtimer.h>
25 #include <linux/io.h>
26 #include <linux/export.h>
27 #include <linux/math64.h>
28 #include <linux/slab.h>
29 #include <asm/processor.h>
30 #include <asm/msr.h>
31 #include <asm/page.h>
32 #include <asm/current.h>
33 #include <asm/apicdef.h>
34 #include <asm/delay.h>
35 #include <linux/atomic.h>
36 #include <linux/jump_label.h>
37 #include "kvm_cache_regs.h"
38 #include "irq.h"
39 #include "trace.h"
40 #include "x86.h"
41 #include "cpuid.h"
42 #include "hyperv.h"
43 
44 #ifndef CONFIG_X86_64
45 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
46 #else
47 #define mod_64(x, y) ((x) % (y))
48 #endif
49 
50 #define PRId64 "d"
51 #define PRIx64 "llx"
52 #define PRIu64 "u"
53 #define PRIo64 "o"
54 
55 /* 14 is the version for Xeon and Pentium 8.4.8*/
56 #define APIC_VERSION			(0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
57 #define LAPIC_MMIO_LENGTH		(1 << 12)
58 /* followed define is not in apicdef.h */
59 #define APIC_SHORT_MASK			0xc0000
60 #define APIC_DEST_NOSHORT		0x0
61 #define APIC_DEST_MASK			0x800
62 #define MAX_APIC_VECTOR			256
63 #define APIC_VECTORS_PER_REG		32
64 
65 #define APIC_BROADCAST			0xFF
66 #define X2APIC_BROADCAST		0xFFFFFFFFul
67 
68 static bool lapic_timer_advance_dynamic __read_mostly;
69 #define LAPIC_TIMER_ADVANCE_ADJUST_MIN	100	/* clock cycles */
70 #define LAPIC_TIMER_ADVANCE_ADJUST_MAX	10000	/* clock cycles */
71 #define LAPIC_TIMER_ADVANCE_NS_INIT	1000
72 #define LAPIC_TIMER_ADVANCE_NS_MAX     5000
73 /* step-by-step approximation to mitigate fluctuation */
74 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
75 
76 static inline int apic_test_vector(int vec, void *bitmap)
77 {
78 	return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
79 }
80 
81 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
82 {
83 	struct kvm_lapic *apic = vcpu->arch.apic;
84 
85 	return apic_test_vector(vector, apic->regs + APIC_ISR) ||
86 		apic_test_vector(vector, apic->regs + APIC_IRR);
87 }
88 
89 static inline int __apic_test_and_set_vector(int vec, void *bitmap)
90 {
91 	return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
92 }
93 
94 static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
95 {
96 	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
97 }
98 
99 struct static_key_deferred apic_hw_disabled __read_mostly;
100 struct static_key_deferred apic_sw_disabled __read_mostly;
101 
102 static inline int apic_enabled(struct kvm_lapic *apic)
103 {
104 	return kvm_apic_sw_enabled(apic) &&	kvm_apic_hw_enabled(apic);
105 }
106 
107 #define LVT_MASK	\
108 	(APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
109 
110 #define LINT_MASK	\
111 	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
112 	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
113 
114 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
115 {
116 	return apic->vcpu->vcpu_id;
117 }
118 
119 bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
120 {
121 	return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
122 }
123 EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);
124 
125 static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
126 {
127 	return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
128 }
129 
130 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
131 		u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
132 	switch (map->mode) {
133 	case KVM_APIC_MODE_X2APIC: {
134 		u32 offset = (dest_id >> 16) * 16;
135 		u32 max_apic_id = map->max_apic_id;
136 
137 		if (offset <= max_apic_id) {
138 			u8 cluster_size = min(max_apic_id - offset + 1, 16U);
139 
140 			offset = array_index_nospec(offset, map->max_apic_id + 1);
141 			*cluster = &map->phys_map[offset];
142 			*mask = dest_id & (0xffff >> (16 - cluster_size));
143 		} else {
144 			*mask = 0;
145 		}
146 
147 		return true;
148 		}
149 	case KVM_APIC_MODE_XAPIC_FLAT:
150 		*cluster = map->xapic_flat_map;
151 		*mask = dest_id & 0xff;
152 		return true;
153 	case KVM_APIC_MODE_XAPIC_CLUSTER:
154 		*cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
155 		*mask = dest_id & 0xf;
156 		return true;
157 	default:
158 		/* Not optimized. */
159 		return false;
160 	}
161 }
162 
163 static void kvm_apic_map_free(struct rcu_head *rcu)
164 {
165 	struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
166 
167 	kvfree(map);
168 }
169 
170 static void recalculate_apic_map(struct kvm *kvm)
171 {
172 	struct kvm_apic_map *new, *old = NULL;
173 	struct kvm_vcpu *vcpu;
174 	int i;
175 	u32 max_id = 255; /* enough space for any xAPIC ID */
176 
177 	mutex_lock(&kvm->arch.apic_map_lock);
178 
179 	kvm_for_each_vcpu(i, vcpu, kvm)
180 		if (kvm_apic_present(vcpu))
181 			max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
182 
183 	new = kvzalloc(sizeof(struct kvm_apic_map) +
184 	                   sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
185 			   GFP_KERNEL_ACCOUNT);
186 
187 	if (!new)
188 		goto out;
189 
190 	new->max_apic_id = max_id;
191 
192 	kvm_for_each_vcpu(i, vcpu, kvm) {
193 		struct kvm_lapic *apic = vcpu->arch.apic;
194 		struct kvm_lapic **cluster;
195 		u16 mask;
196 		u32 ldr;
197 		u8 xapic_id;
198 		u32 x2apic_id;
199 
200 		if (!kvm_apic_present(vcpu))
201 			continue;
202 
203 		xapic_id = kvm_xapic_id(apic);
204 		x2apic_id = kvm_x2apic_id(apic);
205 
206 		/* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
207 		if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
208 				x2apic_id <= new->max_apic_id)
209 			new->phys_map[x2apic_id] = apic;
210 		/*
211 		 * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
212 		 * prevent them from masking VCPUs with APIC ID <= 0xff.
213 		 */
214 		if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
215 			new->phys_map[xapic_id] = apic;
216 
217 		if (!kvm_apic_sw_enabled(apic))
218 			continue;
219 
220 		ldr = kvm_lapic_get_reg(apic, APIC_LDR);
221 
222 		if (apic_x2apic_mode(apic)) {
223 			new->mode |= KVM_APIC_MODE_X2APIC;
224 		} else if (ldr) {
225 			ldr = GET_APIC_LOGICAL_ID(ldr);
226 			if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
227 				new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
228 			else
229 				new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
230 		}
231 
232 		if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
233 			continue;
234 
235 		if (mask)
236 			cluster[ffs(mask) - 1] = apic;
237 	}
238 out:
239 	old = rcu_dereference_protected(kvm->arch.apic_map,
240 			lockdep_is_held(&kvm->arch.apic_map_lock));
241 	rcu_assign_pointer(kvm->arch.apic_map, new);
242 	mutex_unlock(&kvm->arch.apic_map_lock);
243 
244 	if (old)
245 		call_rcu(&old->rcu, kvm_apic_map_free);
246 
247 	kvm_make_scan_ioapic_request(kvm);
248 }
249 
250 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
251 {
252 	bool enabled = val & APIC_SPIV_APIC_ENABLED;
253 
254 	kvm_lapic_set_reg(apic, APIC_SPIV, val);
255 
256 	if (enabled != apic->sw_enabled) {
257 		apic->sw_enabled = enabled;
258 		if (enabled)
259 			static_key_slow_dec_deferred(&apic_sw_disabled);
260 		else
261 			static_key_slow_inc(&apic_sw_disabled.key);
262 
263 		recalculate_apic_map(apic->vcpu->kvm);
264 	}
265 }
266 
267 static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
268 {
269 	kvm_lapic_set_reg(apic, APIC_ID, id << 24);
270 	recalculate_apic_map(apic->vcpu->kvm);
271 }
272 
273 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
274 {
275 	kvm_lapic_set_reg(apic, APIC_LDR, id);
276 	recalculate_apic_map(apic->vcpu->kvm);
277 }
278 
279 static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
280 {
281 	return ((id >> 4) << 16) | (1 << (id & 0xf));
282 }
283 
284 static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
285 {
286 	u32 ldr = kvm_apic_calc_x2apic_ldr(id);
287 
288 	WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
289 
290 	kvm_lapic_set_reg(apic, APIC_ID, id);
291 	kvm_lapic_set_reg(apic, APIC_LDR, ldr);
292 	recalculate_apic_map(apic->vcpu->kvm);
293 }
294 
295 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
296 {
297 	return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
298 }
299 
300 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
301 {
302 	return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
303 }
304 
305 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
306 {
307 	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
308 }
309 
310 static inline int apic_lvtt_period(struct kvm_lapic *apic)
311 {
312 	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
313 }
314 
315 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
316 {
317 	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
318 }
319 
320 static inline int apic_lvt_nmi_mode(u32 lvt_val)
321 {
322 	return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
323 }
324 
325 void kvm_apic_set_version(struct kvm_vcpu *vcpu)
326 {
327 	struct kvm_lapic *apic = vcpu->arch.apic;
328 	struct kvm_cpuid_entry2 *feat;
329 	u32 v = APIC_VERSION;
330 
331 	if (!lapic_in_kernel(vcpu))
332 		return;
333 
334 	/*
335 	 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
336 	 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
337 	 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
338 	 * version first and level-triggered interrupts never get EOIed in
339 	 * IOAPIC.
340 	 */
341 	feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
342 	if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
343 	    !ioapic_in_kernel(vcpu->kvm))
344 		v |= APIC_LVR_DIRECTED_EOI;
345 	kvm_lapic_set_reg(apic, APIC_LVR, v);
346 }
347 
348 static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
349 	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
350 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
351 	LVT_MASK | APIC_MODE_MASK,	/* LVTPC */
352 	LINT_MASK, LINT_MASK,	/* LVT0-1 */
353 	LVT_MASK		/* LVTERR */
354 };
355 
356 static int find_highest_vector(void *bitmap)
357 {
358 	int vec;
359 	u32 *reg;
360 
361 	for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
362 	     vec >= 0; vec -= APIC_VECTORS_PER_REG) {
363 		reg = bitmap + REG_POS(vec);
364 		if (*reg)
365 			return __fls(*reg) + vec;
366 	}
367 
368 	return -1;
369 }
370 
371 static u8 count_vectors(void *bitmap)
372 {
373 	int vec;
374 	u32 *reg;
375 	u8 count = 0;
376 
377 	for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
378 		reg = bitmap + REG_POS(vec);
379 		count += hweight32(*reg);
380 	}
381 
382 	return count;
383 }
384 
385 bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
386 {
387 	u32 i, vec;
388 	u32 pir_val, irr_val, prev_irr_val;
389 	int max_updated_irr;
390 
391 	max_updated_irr = -1;
392 	*max_irr = -1;
393 
394 	for (i = vec = 0; i <= 7; i++, vec += 32) {
395 		pir_val = READ_ONCE(pir[i]);
396 		irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
397 		if (pir_val) {
398 			prev_irr_val = irr_val;
399 			irr_val |= xchg(&pir[i], 0);
400 			*((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
401 			if (prev_irr_val != irr_val) {
402 				max_updated_irr =
403 					__fls(irr_val ^ prev_irr_val) + vec;
404 			}
405 		}
406 		if (irr_val)
407 			*max_irr = __fls(irr_val) + vec;
408 	}
409 
410 	return ((max_updated_irr != -1) &&
411 		(max_updated_irr == *max_irr));
412 }
413 EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
414 
415 bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
416 {
417 	struct kvm_lapic *apic = vcpu->arch.apic;
418 
419 	return __kvm_apic_update_irr(pir, apic->regs, max_irr);
420 }
421 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
422 
423 static inline int apic_search_irr(struct kvm_lapic *apic)
424 {
425 	return find_highest_vector(apic->regs + APIC_IRR);
426 }
427 
428 static inline int apic_find_highest_irr(struct kvm_lapic *apic)
429 {
430 	int result;
431 
432 	/*
433 	 * Note that irr_pending is just a hint. It will be always
434 	 * true with virtual interrupt delivery enabled.
435 	 */
436 	if (!apic->irr_pending)
437 		return -1;
438 
439 	result = apic_search_irr(apic);
440 	ASSERT(result == -1 || result >= 16);
441 
442 	return result;
443 }
444 
445 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
446 {
447 	struct kvm_vcpu *vcpu;
448 
449 	vcpu = apic->vcpu;
450 
451 	if (unlikely(vcpu->arch.apicv_active)) {
452 		/* need to update RVI */
453 		kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
454 		kvm_x86_ops->hwapic_irr_update(vcpu,
455 				apic_find_highest_irr(apic));
456 	} else {
457 		apic->irr_pending = false;
458 		kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
459 		if (apic_search_irr(apic) != -1)
460 			apic->irr_pending = true;
461 	}
462 }
463 
464 static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
465 {
466 	struct kvm_vcpu *vcpu;
467 
468 	if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
469 		return;
470 
471 	vcpu = apic->vcpu;
472 
473 	/*
474 	 * With APIC virtualization enabled, all caching is disabled
475 	 * because the processor can modify ISR under the hood.  Instead
476 	 * just set SVI.
477 	 */
478 	if (unlikely(vcpu->arch.apicv_active))
479 		kvm_x86_ops->hwapic_isr_update(vcpu, vec);
480 	else {
481 		++apic->isr_count;
482 		BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
483 		/*
484 		 * ISR (in service register) bit is set when injecting an interrupt.
485 		 * The highest vector is injected. Thus the latest bit set matches
486 		 * the highest bit in ISR.
487 		 */
488 		apic->highest_isr_cache = vec;
489 	}
490 }
491 
492 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
493 {
494 	int result;
495 
496 	/*
497 	 * Note that isr_count is always 1, and highest_isr_cache
498 	 * is always -1, with APIC virtualization enabled.
499 	 */
500 	if (!apic->isr_count)
501 		return -1;
502 	if (likely(apic->highest_isr_cache != -1))
503 		return apic->highest_isr_cache;
504 
505 	result = find_highest_vector(apic->regs + APIC_ISR);
506 	ASSERT(result == -1 || result >= 16);
507 
508 	return result;
509 }
510 
511 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
512 {
513 	struct kvm_vcpu *vcpu;
514 	if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
515 		return;
516 
517 	vcpu = apic->vcpu;
518 
519 	/*
520 	 * We do get here for APIC virtualization enabled if the guest
521 	 * uses the Hyper-V APIC enlightenment.  In this case we may need
522 	 * to trigger a new interrupt delivery by writing the SVI field;
523 	 * on the other hand isr_count and highest_isr_cache are unused
524 	 * and must be left alone.
525 	 */
526 	if (unlikely(vcpu->arch.apicv_active))
527 		kvm_x86_ops->hwapic_isr_update(vcpu,
528 					       apic_find_highest_isr(apic));
529 	else {
530 		--apic->isr_count;
531 		BUG_ON(apic->isr_count < 0);
532 		apic->highest_isr_cache = -1;
533 	}
534 }
535 
536 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
537 {
538 	/* This may race with setting of irr in __apic_accept_irq() and
539 	 * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
540 	 * will cause vmexit immediately and the value will be recalculated
541 	 * on the next vmentry.
542 	 */
543 	return apic_find_highest_irr(vcpu->arch.apic);
544 }
545 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
546 
547 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
548 			     int vector, int level, int trig_mode,
549 			     struct dest_map *dest_map);
550 
551 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
552 		     struct dest_map *dest_map)
553 {
554 	struct kvm_lapic *apic = vcpu->arch.apic;
555 
556 	return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
557 			irq->level, irq->trig_mode, dest_map);
558 }
559 
560 static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
561 			 struct kvm_lapic_irq *irq, u32 min)
562 {
563 	int i, count = 0;
564 	struct kvm_vcpu *vcpu;
565 
566 	if (min > map->max_apic_id)
567 		return 0;
568 
569 	for_each_set_bit(i, ipi_bitmap,
570 		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
571 		if (map->phys_map[min + i]) {
572 			vcpu = map->phys_map[min + i]->vcpu;
573 			count += kvm_apic_set_irq(vcpu, irq, NULL);
574 		}
575 	}
576 
577 	return count;
578 }
579 
580 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
581 		    unsigned long ipi_bitmap_high, u32 min,
582 		    unsigned long icr, int op_64_bit)
583 {
584 	struct kvm_apic_map *map;
585 	struct kvm_lapic_irq irq = {0};
586 	int cluster_size = op_64_bit ? 64 : 32;
587 	int count;
588 
589 	if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
590 		return -KVM_EINVAL;
591 
592 	irq.vector = icr & APIC_VECTOR_MASK;
593 	irq.delivery_mode = icr & APIC_MODE_MASK;
594 	irq.level = (icr & APIC_INT_ASSERT) != 0;
595 	irq.trig_mode = icr & APIC_INT_LEVELTRIG;
596 
597 	rcu_read_lock();
598 	map = rcu_dereference(kvm->arch.apic_map);
599 
600 	count = -EOPNOTSUPP;
601 	if (likely(map)) {
602 		count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
603 		min += cluster_size;
604 		count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
605 	}
606 
607 	rcu_read_unlock();
608 	return count;
609 }
610 
611 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
612 {
613 
614 	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
615 				      sizeof(val));
616 }
617 
618 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
619 {
620 
621 	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
622 				      sizeof(*val));
623 }
624 
625 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
626 {
627 	return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
628 }
629 
630 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
631 {
632 	u8 val;
633 	if (pv_eoi_get_user(vcpu, &val) < 0)
634 		printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
635 			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
636 	return val & 0x1;
637 }
638 
639 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
640 {
641 	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
642 		printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
643 			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
644 		return;
645 	}
646 	__set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
647 }
648 
649 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
650 {
651 	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
652 		printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
653 			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
654 		return;
655 	}
656 	__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
657 }
658 
659 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
660 {
661 	int highest_irr;
662 	if (apic->vcpu->arch.apicv_active)
663 		highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
664 	else
665 		highest_irr = apic_find_highest_irr(apic);
666 	if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
667 		return -1;
668 	return highest_irr;
669 }
670 
671 static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
672 {
673 	u32 tpr, isrv, ppr, old_ppr;
674 	int isr;
675 
676 	old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
677 	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
678 	isr = apic_find_highest_isr(apic);
679 	isrv = (isr != -1) ? isr : 0;
680 
681 	if ((tpr & 0xf0) >= (isrv & 0xf0))
682 		ppr = tpr & 0xff;
683 	else
684 		ppr = isrv & 0xf0;
685 
686 	*new_ppr = ppr;
687 	if (old_ppr != ppr)
688 		kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
689 
690 	return ppr < old_ppr;
691 }
692 
693 static void apic_update_ppr(struct kvm_lapic *apic)
694 {
695 	u32 ppr;
696 
697 	if (__apic_update_ppr(apic, &ppr) &&
698 	    apic_has_interrupt_for_ppr(apic, ppr) != -1)
699 		kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
700 }
701 
702 void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
703 {
704 	apic_update_ppr(vcpu->arch.apic);
705 }
706 EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
707 
708 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
709 {
710 	kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
711 	apic_update_ppr(apic);
712 }
713 
714 static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
715 {
716 	return mda == (apic_x2apic_mode(apic) ?
717 			X2APIC_BROADCAST : APIC_BROADCAST);
718 }
719 
720 static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
721 {
722 	if (kvm_apic_broadcast(apic, mda))
723 		return true;
724 
725 	if (apic_x2apic_mode(apic))
726 		return mda == kvm_x2apic_id(apic);
727 
728 	/*
729 	 * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
730 	 * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
731 	 * this allows unique addressing of VCPUs with APIC ID over 0xff.
732 	 * The 0xff condition is needed because writeable xAPIC ID.
733 	 */
734 	if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
735 		return true;
736 
737 	return mda == kvm_xapic_id(apic);
738 }
739 
740 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
741 {
742 	u32 logical_id;
743 
744 	if (kvm_apic_broadcast(apic, mda))
745 		return true;
746 
747 	logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
748 
749 	if (apic_x2apic_mode(apic))
750 		return ((logical_id >> 16) == (mda >> 16))
751 		       && (logical_id & mda & 0xffff) != 0;
752 
753 	logical_id = GET_APIC_LOGICAL_ID(logical_id);
754 
755 	switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
756 	case APIC_DFR_FLAT:
757 		return (logical_id & mda) != 0;
758 	case APIC_DFR_CLUSTER:
759 		return ((logical_id >> 4) == (mda >> 4))
760 		       && (logical_id & mda & 0xf) != 0;
761 	default:
762 		return false;
763 	}
764 }
765 
766 /* The KVM local APIC implementation has two quirks:
767  *
768  *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
769  *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
770  *    KVM doesn't do that aliasing.
771  *
772  *  - in-kernel IOAPIC messages have to be delivered directly to
773  *    x2APIC, because the kernel does not support interrupt remapping.
774  *    In order to support broadcast without interrupt remapping, x2APIC
775  *    rewrites the destination of non-IPI messages from APIC_BROADCAST
776  *    to X2APIC_BROADCAST.
777  *
778  * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
779  * important when userspace wants to use x2APIC-format MSIs, because
780  * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
781  */
782 static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
783 		struct kvm_lapic *source, struct kvm_lapic *target)
784 {
785 	bool ipi = source != NULL;
786 
787 	if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
788 	    !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
789 		return X2APIC_BROADCAST;
790 
791 	return dest_id;
792 }
793 
794 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
795 			   int short_hand, unsigned int dest, int dest_mode)
796 {
797 	struct kvm_lapic *target = vcpu->arch.apic;
798 	u32 mda = kvm_apic_mda(vcpu, dest, source, target);
799 
800 	ASSERT(target);
801 	switch (short_hand) {
802 	case APIC_DEST_NOSHORT:
803 		if (dest_mode == APIC_DEST_PHYSICAL)
804 			return kvm_apic_match_physical_addr(target, mda);
805 		else
806 			return kvm_apic_match_logical_addr(target, mda);
807 	case APIC_DEST_SELF:
808 		return target == source;
809 	case APIC_DEST_ALLINC:
810 		return true;
811 	case APIC_DEST_ALLBUT:
812 		return target != source;
813 	default:
814 		return false;
815 	}
816 }
817 EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
818 
819 int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
820 		       const unsigned long *bitmap, u32 bitmap_size)
821 {
822 	u32 mod;
823 	int i, idx = -1;
824 
825 	mod = vector % dest_vcpus;
826 
827 	for (i = 0; i <= mod; i++) {
828 		idx = find_next_bit(bitmap, bitmap_size, idx + 1);
829 		BUG_ON(idx == bitmap_size);
830 	}
831 
832 	return idx;
833 }
834 
835 static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
836 {
837 	if (!kvm->arch.disabled_lapic_found) {
838 		kvm->arch.disabled_lapic_found = true;
839 		printk(KERN_INFO
840 		       "Disabled LAPIC found during irq injection\n");
841 	}
842 }
843 
844 static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
845 		struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
846 {
847 	if (kvm->arch.x2apic_broadcast_quirk_disabled) {
848 		if ((irq->dest_id == APIC_BROADCAST &&
849 				map->mode != KVM_APIC_MODE_X2APIC))
850 			return true;
851 		if (irq->dest_id == X2APIC_BROADCAST)
852 			return true;
853 	} else {
854 		bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
855 		if (irq->dest_id == (x2apic_ipi ?
856 		                     X2APIC_BROADCAST : APIC_BROADCAST))
857 			return true;
858 	}
859 
860 	return false;
861 }
862 
863 /* Return true if the interrupt can be handled by using *bitmap as index mask
864  * for valid destinations in *dst array.
865  * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
866  * Note: we may have zero kvm_lapic destinations when we return true, which
867  * means that the interrupt should be dropped.  In this case, *bitmap would be
868  * zero and *dst undefined.
869  */
870 static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
871 		struct kvm_lapic **src, struct kvm_lapic_irq *irq,
872 		struct kvm_apic_map *map, struct kvm_lapic ***dst,
873 		unsigned long *bitmap)
874 {
875 	int i, lowest;
876 
877 	if (irq->shorthand == APIC_DEST_SELF && src) {
878 		*dst = src;
879 		*bitmap = 1;
880 		return true;
881 	} else if (irq->shorthand)
882 		return false;
883 
884 	if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
885 		return false;
886 
887 	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
888 		if (irq->dest_id > map->max_apic_id) {
889 			*bitmap = 0;
890 		} else {
891 			u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
892 			*dst = &map->phys_map[dest_id];
893 			*bitmap = 1;
894 		}
895 		return true;
896 	}
897 
898 	*bitmap = 0;
899 	if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
900 				(u16 *)bitmap))
901 		return false;
902 
903 	if (!kvm_lowest_prio_delivery(irq))
904 		return true;
905 
906 	if (!kvm_vector_hashing_enabled()) {
907 		lowest = -1;
908 		for_each_set_bit(i, bitmap, 16) {
909 			if (!(*dst)[i])
910 				continue;
911 			if (lowest < 0)
912 				lowest = i;
913 			else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
914 						(*dst)[lowest]->vcpu) < 0)
915 				lowest = i;
916 		}
917 	} else {
918 		if (!*bitmap)
919 			return true;
920 
921 		lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
922 				bitmap, 16);
923 
924 		if (!(*dst)[lowest]) {
925 			kvm_apic_disabled_lapic_found(kvm);
926 			*bitmap = 0;
927 			return true;
928 		}
929 	}
930 
931 	*bitmap = (lowest >= 0) ? 1 << lowest : 0;
932 
933 	return true;
934 }
935 
936 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
937 		struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
938 {
939 	struct kvm_apic_map *map;
940 	unsigned long bitmap;
941 	struct kvm_lapic **dst = NULL;
942 	int i;
943 	bool ret;
944 
945 	*r = -1;
946 
947 	if (irq->shorthand == APIC_DEST_SELF) {
948 		*r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
949 		return true;
950 	}
951 
952 	rcu_read_lock();
953 	map = rcu_dereference(kvm->arch.apic_map);
954 
955 	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
956 	if (ret) {
957 		*r = 0;
958 		for_each_set_bit(i, &bitmap, 16) {
959 			if (!dst[i])
960 				continue;
961 			*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
962 		}
963 	}
964 
965 	rcu_read_unlock();
966 	return ret;
967 }
968 
969 /*
970  * This routine tries to handler interrupts in posted mode, here is how
971  * it deals with different cases:
972  * - For single-destination interrupts, handle it in posted mode
973  * - Else if vector hashing is enabled and it is a lowest-priority
974  *   interrupt, handle it in posted mode and use the following mechanism
975  *   to find the destinaiton vCPU.
976  *	1. For lowest-priority interrupts, store all the possible
977  *	   destination vCPUs in an array.
978  *	2. Use "guest vector % max number of destination vCPUs" to find
979  *	   the right destination vCPU in the array for the lowest-priority
980  *	   interrupt.
981  * - Otherwise, use remapped mode to inject the interrupt.
982  */
983 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
984 			struct kvm_vcpu **dest_vcpu)
985 {
986 	struct kvm_apic_map *map;
987 	unsigned long bitmap;
988 	struct kvm_lapic **dst = NULL;
989 	bool ret = false;
990 
991 	if (irq->shorthand)
992 		return false;
993 
994 	rcu_read_lock();
995 	map = rcu_dereference(kvm->arch.apic_map);
996 
997 	if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
998 			hweight16(bitmap) == 1) {
999 		unsigned long i = find_first_bit(&bitmap, 16);
1000 
1001 		if (dst[i]) {
1002 			*dest_vcpu = dst[i]->vcpu;
1003 			ret = true;
1004 		}
1005 	}
1006 
1007 	rcu_read_unlock();
1008 	return ret;
1009 }
1010 
1011 /*
1012  * Add a pending IRQ into lapic.
1013  * Return 1 if successfully added and 0 if discarded.
1014  */
1015 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1016 			     int vector, int level, int trig_mode,
1017 			     struct dest_map *dest_map)
1018 {
1019 	int result = 0;
1020 	struct kvm_vcpu *vcpu = apic->vcpu;
1021 
1022 	trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1023 				  trig_mode, vector);
1024 	switch (delivery_mode) {
1025 	case APIC_DM_LOWEST:
1026 		vcpu->arch.apic_arb_prio++;
1027 		/* fall through */
1028 	case APIC_DM_FIXED:
1029 		if (unlikely(trig_mode && !level))
1030 			break;
1031 
1032 		/* FIXME add logic for vcpu on reset */
1033 		if (unlikely(!apic_enabled(apic)))
1034 			break;
1035 
1036 		result = 1;
1037 
1038 		if (dest_map) {
1039 			__set_bit(vcpu->vcpu_id, dest_map->map);
1040 			dest_map->vectors[vcpu->vcpu_id] = vector;
1041 		}
1042 
1043 		if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1044 			if (trig_mode)
1045 				kvm_lapic_set_vector(vector,
1046 						     apic->regs + APIC_TMR);
1047 			else
1048 				kvm_lapic_clear_vector(vector,
1049 						       apic->regs + APIC_TMR);
1050 		}
1051 
1052 		if (vcpu->arch.apicv_active)
1053 			kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
1054 		else {
1055 			kvm_lapic_set_irr(vector, apic);
1056 
1057 			kvm_make_request(KVM_REQ_EVENT, vcpu);
1058 			kvm_vcpu_kick(vcpu);
1059 		}
1060 		break;
1061 
1062 	case APIC_DM_REMRD:
1063 		result = 1;
1064 		vcpu->arch.pv.pv_unhalted = 1;
1065 		kvm_make_request(KVM_REQ_EVENT, vcpu);
1066 		kvm_vcpu_kick(vcpu);
1067 		break;
1068 
1069 	case APIC_DM_SMI:
1070 		result = 1;
1071 		kvm_make_request(KVM_REQ_SMI, vcpu);
1072 		kvm_vcpu_kick(vcpu);
1073 		break;
1074 
1075 	case APIC_DM_NMI:
1076 		result = 1;
1077 		kvm_inject_nmi(vcpu);
1078 		kvm_vcpu_kick(vcpu);
1079 		break;
1080 
1081 	case APIC_DM_INIT:
1082 		if (!trig_mode || level) {
1083 			result = 1;
1084 			/* assumes that there are only KVM_APIC_INIT/SIPI */
1085 			apic->pending_events = (1UL << KVM_APIC_INIT);
1086 			/* make sure pending_events is visible before sending
1087 			 * the request */
1088 			smp_wmb();
1089 			kvm_make_request(KVM_REQ_EVENT, vcpu);
1090 			kvm_vcpu_kick(vcpu);
1091 		}
1092 		break;
1093 
1094 	case APIC_DM_STARTUP:
1095 		result = 1;
1096 		apic->sipi_vector = vector;
1097 		/* make sure sipi_vector is visible for the receiver */
1098 		smp_wmb();
1099 		set_bit(KVM_APIC_SIPI, &apic->pending_events);
1100 		kvm_make_request(KVM_REQ_EVENT, vcpu);
1101 		kvm_vcpu_kick(vcpu);
1102 		break;
1103 
1104 	case APIC_DM_EXTINT:
1105 		/*
1106 		 * Should only be called by kvm_apic_local_deliver() with LVT0,
1107 		 * before NMI watchdog was enabled. Already handled by
1108 		 * kvm_apic_accept_pic_intr().
1109 		 */
1110 		break;
1111 
1112 	default:
1113 		printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1114 		       delivery_mode);
1115 		break;
1116 	}
1117 	return result;
1118 }
1119 
1120 /*
1121  * This routine identifies the destination vcpus mask meant to receive the
1122  * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
1123  * out the destination vcpus array and set the bitmap or it traverses to
1124  * each available vcpu to identify the same.
1125  */
1126 void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
1127 			      unsigned long *vcpu_bitmap)
1128 {
1129 	struct kvm_lapic **dest_vcpu = NULL;
1130 	struct kvm_lapic *src = NULL;
1131 	struct kvm_apic_map *map;
1132 	struct kvm_vcpu *vcpu;
1133 	unsigned long bitmap;
1134 	int i, vcpu_idx;
1135 	bool ret;
1136 
1137 	rcu_read_lock();
1138 	map = rcu_dereference(kvm->arch.apic_map);
1139 
1140 	ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
1141 					  &bitmap);
1142 	if (ret) {
1143 		for_each_set_bit(i, &bitmap, 16) {
1144 			if (!dest_vcpu[i])
1145 				continue;
1146 			vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
1147 			__set_bit(vcpu_idx, vcpu_bitmap);
1148 		}
1149 	} else {
1150 		kvm_for_each_vcpu(i, vcpu, kvm) {
1151 			if (!kvm_apic_present(vcpu))
1152 				continue;
1153 			if (!kvm_apic_match_dest(vcpu, NULL,
1154 						 irq->delivery_mode,
1155 						 irq->dest_id,
1156 						 irq->dest_mode))
1157 				continue;
1158 			__set_bit(i, vcpu_bitmap);
1159 		}
1160 	}
1161 	rcu_read_unlock();
1162 }
1163 
1164 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1165 {
1166 	return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1167 }
1168 
1169 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1170 {
1171 	return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1172 }
1173 
1174 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1175 {
1176 	int trigger_mode;
1177 
1178 	/* Eoi the ioapic only if the ioapic doesn't own the vector. */
1179 	if (!kvm_ioapic_handles_vector(apic, vector))
1180 		return;
1181 
1182 	/* Request a KVM exit to inform the userspace IOAPIC. */
1183 	if (irqchip_split(apic->vcpu->kvm)) {
1184 		apic->vcpu->arch.pending_ioapic_eoi = vector;
1185 		kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1186 		return;
1187 	}
1188 
1189 	if (apic_test_vector(vector, apic->regs + APIC_TMR))
1190 		trigger_mode = IOAPIC_LEVEL_TRIG;
1191 	else
1192 		trigger_mode = IOAPIC_EDGE_TRIG;
1193 
1194 	kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1195 }
1196 
1197 static int apic_set_eoi(struct kvm_lapic *apic)
1198 {
1199 	int vector = apic_find_highest_isr(apic);
1200 
1201 	trace_kvm_eoi(apic, vector);
1202 
1203 	/*
1204 	 * Not every write EOI will has corresponding ISR,
1205 	 * one example is when Kernel check timer on setup_IO_APIC
1206 	 */
1207 	if (vector == -1)
1208 		return vector;
1209 
1210 	apic_clear_isr(vector, apic);
1211 	apic_update_ppr(apic);
1212 
1213 	if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
1214 		kvm_hv_synic_send_eoi(apic->vcpu, vector);
1215 
1216 	kvm_ioapic_send_eoi(apic, vector);
1217 	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1218 	return vector;
1219 }
1220 
1221 /*
1222  * this interface assumes a trap-like exit, which has already finished
1223  * desired side effect including vISR and vPPR update.
1224  */
1225 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1226 {
1227 	struct kvm_lapic *apic = vcpu->arch.apic;
1228 
1229 	trace_kvm_eoi(apic, vector);
1230 
1231 	kvm_ioapic_send_eoi(apic, vector);
1232 	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1233 }
1234 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1235 
1236 static void apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
1237 {
1238 	struct kvm_lapic_irq irq;
1239 
1240 	irq.vector = icr_low & APIC_VECTOR_MASK;
1241 	irq.delivery_mode = icr_low & APIC_MODE_MASK;
1242 	irq.dest_mode = icr_low & APIC_DEST_MASK;
1243 	irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1244 	irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1245 	irq.shorthand = icr_low & APIC_SHORT_MASK;
1246 	irq.msi_redir_hint = false;
1247 	if (apic_x2apic_mode(apic))
1248 		irq.dest_id = icr_high;
1249 	else
1250 		irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
1251 
1252 	trace_kvm_apic_ipi(icr_low, irq.dest_id);
1253 
1254 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1255 }
1256 
1257 static u32 apic_get_tmcct(struct kvm_lapic *apic)
1258 {
1259 	ktime_t remaining, now;
1260 	s64 ns;
1261 	u32 tmcct;
1262 
1263 	ASSERT(apic != NULL);
1264 
1265 	/* if initial count is 0, current count should also be 0 */
1266 	if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1267 		apic->lapic_timer.period == 0)
1268 		return 0;
1269 
1270 	now = ktime_get();
1271 	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1272 	if (ktime_to_ns(remaining) < 0)
1273 		remaining = 0;
1274 
1275 	ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1276 	tmcct = div64_u64(ns,
1277 			 (APIC_BUS_CYCLE_NS * apic->divide_count));
1278 
1279 	return tmcct;
1280 }
1281 
1282 static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1283 {
1284 	struct kvm_vcpu *vcpu = apic->vcpu;
1285 	struct kvm_run *run = vcpu->run;
1286 
1287 	kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1288 	run->tpr_access.rip = kvm_rip_read(vcpu);
1289 	run->tpr_access.is_write = write;
1290 }
1291 
1292 static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1293 {
1294 	if (apic->vcpu->arch.tpr_access_reporting)
1295 		__report_tpr_access(apic, write);
1296 }
1297 
1298 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1299 {
1300 	u32 val = 0;
1301 
1302 	if (offset >= LAPIC_MMIO_LENGTH)
1303 		return 0;
1304 
1305 	switch (offset) {
1306 	case APIC_ARBPRI:
1307 		break;
1308 
1309 	case APIC_TMCCT:	/* Timer CCR */
1310 		if (apic_lvtt_tscdeadline(apic))
1311 			return 0;
1312 
1313 		val = apic_get_tmcct(apic);
1314 		break;
1315 	case APIC_PROCPRI:
1316 		apic_update_ppr(apic);
1317 		val = kvm_lapic_get_reg(apic, offset);
1318 		break;
1319 	case APIC_TASKPRI:
1320 		report_tpr_access(apic, false);
1321 		/* fall thru */
1322 	default:
1323 		val = kvm_lapic_get_reg(apic, offset);
1324 		break;
1325 	}
1326 
1327 	return val;
1328 }
1329 
1330 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1331 {
1332 	return container_of(dev, struct kvm_lapic, dev);
1333 }
1334 
1335 #define APIC_REG_MASK(reg)	(1ull << ((reg) >> 4))
1336 #define APIC_REGS_MASK(first, count) \
1337 	(APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1338 
1339 int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1340 		void *data)
1341 {
1342 	unsigned char alignment = offset & 0xf;
1343 	u32 result;
1344 	/* this bitmask has a bit cleared for each reserved register */
1345 	u64 valid_reg_mask =
1346 		APIC_REG_MASK(APIC_ID) |
1347 		APIC_REG_MASK(APIC_LVR) |
1348 		APIC_REG_MASK(APIC_TASKPRI) |
1349 		APIC_REG_MASK(APIC_PROCPRI) |
1350 		APIC_REG_MASK(APIC_LDR) |
1351 		APIC_REG_MASK(APIC_DFR) |
1352 		APIC_REG_MASK(APIC_SPIV) |
1353 		APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1354 		APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1355 		APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1356 		APIC_REG_MASK(APIC_ESR) |
1357 		APIC_REG_MASK(APIC_ICR) |
1358 		APIC_REG_MASK(APIC_ICR2) |
1359 		APIC_REG_MASK(APIC_LVTT) |
1360 		APIC_REG_MASK(APIC_LVTTHMR) |
1361 		APIC_REG_MASK(APIC_LVTPC) |
1362 		APIC_REG_MASK(APIC_LVT0) |
1363 		APIC_REG_MASK(APIC_LVT1) |
1364 		APIC_REG_MASK(APIC_LVTERR) |
1365 		APIC_REG_MASK(APIC_TMICT) |
1366 		APIC_REG_MASK(APIC_TMCCT) |
1367 		APIC_REG_MASK(APIC_TDCR);
1368 
1369 	/* ARBPRI is not valid on x2APIC */
1370 	if (!apic_x2apic_mode(apic))
1371 		valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
1372 
1373 	if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
1374 		return 1;
1375 
1376 	result = __apic_read(apic, offset & ~0xf);
1377 
1378 	trace_kvm_apic_read(offset, result);
1379 
1380 	switch (len) {
1381 	case 1:
1382 	case 2:
1383 	case 4:
1384 		memcpy(data, (char *)&result + alignment, len);
1385 		break;
1386 	default:
1387 		printk(KERN_ERR "Local APIC read with len = %x, "
1388 		       "should be 1,2, or 4 instead\n", len);
1389 		break;
1390 	}
1391 	return 0;
1392 }
1393 EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
1394 
1395 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1396 {
1397 	return addr >= apic->base_address &&
1398 		addr < apic->base_address + LAPIC_MMIO_LENGTH;
1399 }
1400 
1401 static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1402 			   gpa_t address, int len, void *data)
1403 {
1404 	struct kvm_lapic *apic = to_lapic(this);
1405 	u32 offset = address - apic->base_address;
1406 
1407 	if (!apic_mmio_in_range(apic, address))
1408 		return -EOPNOTSUPP;
1409 
1410 	if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1411 		if (!kvm_check_has_quirk(vcpu->kvm,
1412 					 KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1413 			return -EOPNOTSUPP;
1414 
1415 		memset(data, 0xff, len);
1416 		return 0;
1417 	}
1418 
1419 	kvm_lapic_reg_read(apic, offset, len, data);
1420 
1421 	return 0;
1422 }
1423 
1424 static void update_divide_count(struct kvm_lapic *apic)
1425 {
1426 	u32 tmp1, tmp2, tdcr;
1427 
1428 	tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1429 	tmp1 = tdcr & 0xf;
1430 	tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1431 	apic->divide_count = 0x1 << (tmp2 & 0x7);
1432 }
1433 
1434 static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1435 {
1436 	/*
1437 	 * Do not allow the guest to program periodic timers with small
1438 	 * interval, since the hrtimers are not throttled by the host
1439 	 * scheduler.
1440 	 */
1441 	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1442 		s64 min_period = min_timer_period_us * 1000LL;
1443 
1444 		if (apic->lapic_timer.period < min_period) {
1445 			pr_info_ratelimited(
1446 			    "kvm: vcpu %i: requested %lld ns "
1447 			    "lapic timer period limited to %lld ns\n",
1448 			    apic->vcpu->vcpu_id,
1449 			    apic->lapic_timer.period, min_period);
1450 			apic->lapic_timer.period = min_period;
1451 		}
1452 	}
1453 }
1454 
1455 static void apic_update_lvtt(struct kvm_lapic *apic)
1456 {
1457 	u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1458 			apic->lapic_timer.timer_mode_mask;
1459 
1460 	if (apic->lapic_timer.timer_mode != timer_mode) {
1461 		if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1462 				APIC_LVT_TIMER_TSCDEADLINE)) {
1463 			hrtimer_cancel(&apic->lapic_timer.timer);
1464 			kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1465 			apic->lapic_timer.period = 0;
1466 			apic->lapic_timer.tscdeadline = 0;
1467 		}
1468 		apic->lapic_timer.timer_mode = timer_mode;
1469 		limit_periodic_timer_frequency(apic);
1470 	}
1471 }
1472 
1473 /*
1474  * On APICv, this test will cause a busy wait
1475  * during a higher-priority task.
1476  */
1477 
1478 static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1479 {
1480 	struct kvm_lapic *apic = vcpu->arch.apic;
1481 	u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1482 
1483 	if (kvm_apic_hw_enabled(apic)) {
1484 		int vec = reg & APIC_VECTOR_MASK;
1485 		void *bitmap = apic->regs + APIC_ISR;
1486 
1487 		if (vcpu->arch.apicv_active)
1488 			bitmap = apic->regs + APIC_IRR;
1489 
1490 		if (apic_test_vector(vec, bitmap))
1491 			return true;
1492 	}
1493 	return false;
1494 }
1495 
1496 static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1497 {
1498 	u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1499 
1500 	/*
1501 	 * If the guest TSC is running at a different ratio than the host, then
1502 	 * convert the delay to nanoseconds to achieve an accurate delay.  Note
1503 	 * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1504 	 * always for VMX enabled hardware.
1505 	 */
1506 	if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
1507 		__delay(min(guest_cycles,
1508 			nsec_to_cycles(vcpu, timer_advance_ns)));
1509 	} else {
1510 		u64 delay_ns = guest_cycles * 1000000ULL;
1511 		do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1512 		ndelay(min_t(u32, delay_ns, timer_advance_ns));
1513 	}
1514 }
1515 
1516 static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1517 					      s64 advance_expire_delta)
1518 {
1519 	struct kvm_lapic *apic = vcpu->arch.apic;
1520 	u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1521 	u64 ns;
1522 
1523 	/* Do not adjust for tiny fluctuations or large random spikes. */
1524 	if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1525 	    abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1526 		return;
1527 
1528 	/* too early */
1529 	if (advance_expire_delta < 0) {
1530 		ns = -advance_expire_delta * 1000000ULL;
1531 		do_div(ns, vcpu->arch.virtual_tsc_khz);
1532 		timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1533 	} else {
1534 	/* too late */
1535 		ns = advance_expire_delta * 1000000ULL;
1536 		do_div(ns, vcpu->arch.virtual_tsc_khz);
1537 		timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1538 	}
1539 
1540 	if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1541 		timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
1542 	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1543 }
1544 
1545 static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1546 {
1547 	struct kvm_lapic *apic = vcpu->arch.apic;
1548 	u64 guest_tsc, tsc_deadline;
1549 
1550 	if (apic->lapic_timer.expired_tscdeadline == 0)
1551 		return;
1552 
1553 	tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1554 	apic->lapic_timer.expired_tscdeadline = 0;
1555 	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1556 	apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
1557 
1558 	if (guest_tsc < tsc_deadline)
1559 		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1560 
1561 	if (lapic_timer_advance_dynamic)
1562 		adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
1563 }
1564 
1565 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1566 {
1567 	if (lapic_timer_int_injected(vcpu))
1568 		__kvm_wait_lapic_expire(vcpu);
1569 }
1570 EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1571 
1572 static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1573 {
1574 	struct kvm_timer *ktimer = &apic->lapic_timer;
1575 
1576 	kvm_apic_local_deliver(apic, APIC_LVTT);
1577 	if (apic_lvtt_tscdeadline(apic))
1578 		ktimer->tscdeadline = 0;
1579 	if (apic_lvtt_oneshot(apic)) {
1580 		ktimer->tscdeadline = 0;
1581 		ktimer->target_expiration = 0;
1582 	}
1583 }
1584 
1585 static void apic_timer_expired(struct kvm_lapic *apic)
1586 {
1587 	struct kvm_vcpu *vcpu = apic->vcpu;
1588 	struct kvm_timer *ktimer = &apic->lapic_timer;
1589 
1590 	if (atomic_read(&apic->lapic_timer.pending))
1591 		return;
1592 
1593 	if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1594 		ktimer->expired_tscdeadline = ktimer->tscdeadline;
1595 
1596 	if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1597 		if (apic->lapic_timer.timer_advance_ns)
1598 			__kvm_wait_lapic_expire(vcpu);
1599 		kvm_apic_inject_pending_timer_irqs(apic);
1600 		return;
1601 	}
1602 
1603 	atomic_inc(&apic->lapic_timer.pending);
1604 	kvm_set_pending_timer(vcpu);
1605 }
1606 
1607 static void start_sw_tscdeadline(struct kvm_lapic *apic)
1608 {
1609 	struct kvm_timer *ktimer = &apic->lapic_timer;
1610 	u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1611 	u64 ns = 0;
1612 	ktime_t expire;
1613 	struct kvm_vcpu *vcpu = apic->vcpu;
1614 	unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1615 	unsigned long flags;
1616 	ktime_t now;
1617 
1618 	if (unlikely(!tscdeadline || !this_tsc_khz))
1619 		return;
1620 
1621 	local_irq_save(flags);
1622 
1623 	now = ktime_get();
1624 	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1625 
1626 	ns = (tscdeadline - guest_tsc) * 1000000ULL;
1627 	do_div(ns, this_tsc_khz);
1628 
1629 	if (likely(tscdeadline > guest_tsc) &&
1630 	    likely(ns > apic->lapic_timer.timer_advance_ns)) {
1631 		expire = ktime_add_ns(now, ns);
1632 		expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1633 		hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
1634 	} else
1635 		apic_timer_expired(apic);
1636 
1637 	local_irq_restore(flags);
1638 }
1639 
1640 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1641 {
1642 	ktime_t now, remaining;
1643 	u64 ns_remaining_old, ns_remaining_new;
1644 
1645 	apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1646 		* APIC_BUS_CYCLE_NS * apic->divide_count;
1647 	limit_periodic_timer_frequency(apic);
1648 
1649 	now = ktime_get();
1650 	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1651 	if (ktime_to_ns(remaining) < 0)
1652 		remaining = 0;
1653 
1654 	ns_remaining_old = ktime_to_ns(remaining);
1655 	ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1656 	                                   apic->divide_count, old_divisor);
1657 
1658 	apic->lapic_timer.tscdeadline +=
1659 		nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1660 		nsec_to_cycles(apic->vcpu, ns_remaining_old);
1661 	apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
1662 }
1663 
1664 static bool set_target_expiration(struct kvm_lapic *apic)
1665 {
1666 	ktime_t now;
1667 	u64 tscl = rdtsc();
1668 
1669 	now = ktime_get();
1670 	apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1671 		* APIC_BUS_CYCLE_NS * apic->divide_count;
1672 
1673 	if (!apic->lapic_timer.period) {
1674 		apic->lapic_timer.tscdeadline = 0;
1675 		return false;
1676 	}
1677 
1678 	limit_periodic_timer_frequency(apic);
1679 
1680 	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1681 		nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
1682 	apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
1683 
1684 	return true;
1685 }
1686 
1687 static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1688 {
1689 	ktime_t now = ktime_get();
1690 	u64 tscl = rdtsc();
1691 	ktime_t delta;
1692 
1693 	/*
1694 	 * Synchronize both deadlines to the same time source or
1695 	 * differences in the periods (caused by differences in the
1696 	 * underlying clocks or numerical approximation errors) will
1697 	 * cause the two to drift apart over time as the errors
1698 	 * accumulate.
1699 	 */
1700 	apic->lapic_timer.target_expiration =
1701 		ktime_add_ns(apic->lapic_timer.target_expiration,
1702 				apic->lapic_timer.period);
1703 	delta = ktime_sub(apic->lapic_timer.target_expiration, now);
1704 	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1705 		nsec_to_cycles(apic->vcpu, delta);
1706 }
1707 
1708 static void start_sw_period(struct kvm_lapic *apic)
1709 {
1710 	if (!apic->lapic_timer.period)
1711 		return;
1712 
1713 	if (ktime_after(ktime_get(),
1714 			apic->lapic_timer.target_expiration)) {
1715 		apic_timer_expired(apic);
1716 
1717 		if (apic_lvtt_oneshot(apic))
1718 			return;
1719 
1720 		advance_periodic_target_expiration(apic);
1721 	}
1722 
1723 	hrtimer_start(&apic->lapic_timer.timer,
1724 		apic->lapic_timer.target_expiration,
1725 		HRTIMER_MODE_ABS);
1726 }
1727 
1728 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1729 {
1730 	if (!lapic_in_kernel(vcpu))
1731 		return false;
1732 
1733 	return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1734 }
1735 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1736 
1737 static void cancel_hv_timer(struct kvm_lapic *apic)
1738 {
1739 	WARN_ON(preemptible());
1740 	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1741 	kvm_x86_ops->cancel_hv_timer(apic->vcpu);
1742 	apic->lapic_timer.hv_timer_in_use = false;
1743 }
1744 
1745 static bool start_hv_timer(struct kvm_lapic *apic)
1746 {
1747 	struct kvm_timer *ktimer = &apic->lapic_timer;
1748 	struct kvm_vcpu *vcpu = apic->vcpu;
1749 	bool expired;
1750 
1751 	WARN_ON(preemptible());
1752 	if (!kvm_x86_ops->set_hv_timer)
1753 		return false;
1754 
1755 	if (!ktimer->tscdeadline)
1756 		return false;
1757 
1758 	if (kvm_x86_ops->set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
1759 		return false;
1760 
1761 	ktimer->hv_timer_in_use = true;
1762 	hrtimer_cancel(&ktimer->timer);
1763 
1764 	/*
1765 	 * To simplify handling the periodic timer, leave the hv timer running
1766 	 * even if the deadline timer has expired, i.e. rely on the resulting
1767 	 * VM-Exit to recompute the periodic timer's target expiration.
1768 	 */
1769 	if (!apic_lvtt_period(apic)) {
1770 		/*
1771 		 * Cancel the hv timer if the sw timer fired while the hv timer
1772 		 * was being programmed, or if the hv timer itself expired.
1773 		 */
1774 		if (atomic_read(&ktimer->pending)) {
1775 			cancel_hv_timer(apic);
1776 		} else if (expired) {
1777 			apic_timer_expired(apic);
1778 			cancel_hv_timer(apic);
1779 		}
1780 	}
1781 
1782 	trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
1783 
1784 	return true;
1785 }
1786 
1787 static void start_sw_timer(struct kvm_lapic *apic)
1788 {
1789 	struct kvm_timer *ktimer = &apic->lapic_timer;
1790 
1791 	WARN_ON(preemptible());
1792 	if (apic->lapic_timer.hv_timer_in_use)
1793 		cancel_hv_timer(apic);
1794 	if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1795 		return;
1796 
1797 	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1798 		start_sw_period(apic);
1799 	else if (apic_lvtt_tscdeadline(apic))
1800 		start_sw_tscdeadline(apic);
1801 	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
1802 }
1803 
1804 static void restart_apic_timer(struct kvm_lapic *apic)
1805 {
1806 	preempt_disable();
1807 
1808 	if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
1809 		goto out;
1810 
1811 	if (!start_hv_timer(apic))
1812 		start_sw_timer(apic);
1813 out:
1814 	preempt_enable();
1815 }
1816 
1817 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1818 {
1819 	struct kvm_lapic *apic = vcpu->arch.apic;
1820 
1821 	preempt_disable();
1822 	/* If the preempt notifier has already run, it also called apic_timer_expired */
1823 	if (!apic->lapic_timer.hv_timer_in_use)
1824 		goto out;
1825 	WARN_ON(swait_active(&vcpu->wq));
1826 	cancel_hv_timer(apic);
1827 	apic_timer_expired(apic);
1828 
1829 	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1830 		advance_periodic_target_expiration(apic);
1831 		restart_apic_timer(apic);
1832 	}
1833 out:
1834 	preempt_enable();
1835 }
1836 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
1837 
1838 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
1839 {
1840 	restart_apic_timer(vcpu->arch.apic);
1841 }
1842 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
1843 
1844 void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
1845 {
1846 	struct kvm_lapic *apic = vcpu->arch.apic;
1847 
1848 	preempt_disable();
1849 	/* Possibly the TSC deadline timer is not enabled yet */
1850 	if (apic->lapic_timer.hv_timer_in_use)
1851 		start_sw_timer(apic);
1852 	preempt_enable();
1853 }
1854 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
1855 
1856 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
1857 {
1858 	struct kvm_lapic *apic = vcpu->arch.apic;
1859 
1860 	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1861 	restart_apic_timer(apic);
1862 }
1863 
1864 static void start_apic_timer(struct kvm_lapic *apic)
1865 {
1866 	atomic_set(&apic->lapic_timer.pending, 0);
1867 
1868 	if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1869 	    && !set_target_expiration(apic))
1870 		return;
1871 
1872 	restart_apic_timer(apic);
1873 }
1874 
1875 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
1876 {
1877 	bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
1878 
1879 	if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
1880 		apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
1881 		if (lvt0_in_nmi_mode) {
1882 			atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1883 		} else
1884 			atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1885 	}
1886 }
1887 
1888 int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1889 {
1890 	int ret = 0;
1891 
1892 	trace_kvm_apic_write(reg, val);
1893 
1894 	switch (reg) {
1895 	case APIC_ID:		/* Local APIC ID */
1896 		if (!apic_x2apic_mode(apic))
1897 			kvm_apic_set_xapic_id(apic, val >> 24);
1898 		else
1899 			ret = 1;
1900 		break;
1901 
1902 	case APIC_TASKPRI:
1903 		report_tpr_access(apic, true);
1904 		apic_set_tpr(apic, val & 0xff);
1905 		break;
1906 
1907 	case APIC_EOI:
1908 		apic_set_eoi(apic);
1909 		break;
1910 
1911 	case APIC_LDR:
1912 		if (!apic_x2apic_mode(apic))
1913 			kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
1914 		else
1915 			ret = 1;
1916 		break;
1917 
1918 	case APIC_DFR:
1919 		if (!apic_x2apic_mode(apic)) {
1920 			kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
1921 			recalculate_apic_map(apic->vcpu->kvm);
1922 		} else
1923 			ret = 1;
1924 		break;
1925 
1926 	case APIC_SPIV: {
1927 		u32 mask = 0x3ff;
1928 		if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
1929 			mask |= APIC_SPIV_DIRECTED_EOI;
1930 		apic_set_spiv(apic, val & mask);
1931 		if (!(val & APIC_SPIV_APIC_ENABLED)) {
1932 			int i;
1933 			u32 lvt_val;
1934 
1935 			for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
1936 				lvt_val = kvm_lapic_get_reg(apic,
1937 						       APIC_LVTT + 0x10 * i);
1938 				kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
1939 					     lvt_val | APIC_LVT_MASKED);
1940 			}
1941 			apic_update_lvtt(apic);
1942 			atomic_set(&apic->lapic_timer.pending, 0);
1943 
1944 		}
1945 		break;
1946 	}
1947 	case APIC_ICR:
1948 		/* No delay here, so we always clear the pending bit */
1949 		val &= ~(1 << 12);
1950 		apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
1951 		kvm_lapic_set_reg(apic, APIC_ICR, val);
1952 		break;
1953 
1954 	case APIC_ICR2:
1955 		if (!apic_x2apic_mode(apic))
1956 			val &= 0xff000000;
1957 		kvm_lapic_set_reg(apic, APIC_ICR2, val);
1958 		break;
1959 
1960 	case APIC_LVT0:
1961 		apic_manage_nmi_watchdog(apic, val);
1962 		/* fall through */
1963 	case APIC_LVTTHMR:
1964 	case APIC_LVTPC:
1965 	case APIC_LVT1:
1966 	case APIC_LVTERR:
1967 		/* TODO: Check vector */
1968 		if (!kvm_apic_sw_enabled(apic))
1969 			val |= APIC_LVT_MASKED;
1970 
1971 		val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
1972 		kvm_lapic_set_reg(apic, reg, val);
1973 
1974 		break;
1975 
1976 	case APIC_LVTT:
1977 		if (!kvm_apic_sw_enabled(apic))
1978 			val |= APIC_LVT_MASKED;
1979 		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
1980 		kvm_lapic_set_reg(apic, APIC_LVTT, val);
1981 		apic_update_lvtt(apic);
1982 		break;
1983 
1984 	case APIC_TMICT:
1985 		if (apic_lvtt_tscdeadline(apic))
1986 			break;
1987 
1988 		hrtimer_cancel(&apic->lapic_timer.timer);
1989 		kvm_lapic_set_reg(apic, APIC_TMICT, val);
1990 		start_apic_timer(apic);
1991 		break;
1992 
1993 	case APIC_TDCR: {
1994 		uint32_t old_divisor = apic->divide_count;
1995 
1996 		kvm_lapic_set_reg(apic, APIC_TDCR, val);
1997 		update_divide_count(apic);
1998 		if (apic->divide_count != old_divisor &&
1999 				apic->lapic_timer.period) {
2000 			hrtimer_cancel(&apic->lapic_timer.timer);
2001 			update_target_expiration(apic, old_divisor);
2002 			restart_apic_timer(apic);
2003 		}
2004 		break;
2005 	}
2006 	case APIC_ESR:
2007 		if (apic_x2apic_mode(apic) && val != 0)
2008 			ret = 1;
2009 		break;
2010 
2011 	case APIC_SELF_IPI:
2012 		if (apic_x2apic_mode(apic)) {
2013 			kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
2014 		} else
2015 			ret = 1;
2016 		break;
2017 	default:
2018 		ret = 1;
2019 		break;
2020 	}
2021 
2022 	return ret;
2023 }
2024 EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
2025 
2026 static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
2027 			    gpa_t address, int len, const void *data)
2028 {
2029 	struct kvm_lapic *apic = to_lapic(this);
2030 	unsigned int offset = address - apic->base_address;
2031 	u32 val;
2032 
2033 	if (!apic_mmio_in_range(apic, address))
2034 		return -EOPNOTSUPP;
2035 
2036 	if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2037 		if (!kvm_check_has_quirk(vcpu->kvm,
2038 					 KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2039 			return -EOPNOTSUPP;
2040 
2041 		return 0;
2042 	}
2043 
2044 	/*
2045 	 * APIC register must be aligned on 128-bits boundary.
2046 	 * 32/64/128 bits registers must be accessed thru 32 bits.
2047 	 * Refer SDM 8.4.1
2048 	 */
2049 	if (len != 4 || (offset & 0xf))
2050 		return 0;
2051 
2052 	val = *(u32*)data;
2053 
2054 	kvm_lapic_reg_write(apic, offset & 0xff0, val);
2055 
2056 	return 0;
2057 }
2058 
2059 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2060 {
2061 	kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2062 }
2063 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
2064 
2065 /* emulate APIC access in a trap manner */
2066 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2067 {
2068 	u32 val = 0;
2069 
2070 	/* hw has done the conditional check and inst decode */
2071 	offset &= 0xff0;
2072 
2073 	kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
2074 
2075 	/* TODO: optimize to just emulate side effect w/o one more write */
2076 	kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
2077 }
2078 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
2079 
2080 void kvm_free_lapic(struct kvm_vcpu *vcpu)
2081 {
2082 	struct kvm_lapic *apic = vcpu->arch.apic;
2083 
2084 	if (!vcpu->arch.apic)
2085 		return;
2086 
2087 	hrtimer_cancel(&apic->lapic_timer.timer);
2088 
2089 	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
2090 		static_key_slow_dec_deferred(&apic_hw_disabled);
2091 
2092 	if (!apic->sw_enabled)
2093 		static_key_slow_dec_deferred(&apic_sw_disabled);
2094 
2095 	if (apic->regs)
2096 		free_page((unsigned long)apic->regs);
2097 
2098 	kfree(apic);
2099 }
2100 
2101 /*
2102  *----------------------------------------------------------------------
2103  * LAPIC interface
2104  *----------------------------------------------------------------------
2105  */
2106 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2107 {
2108 	struct kvm_lapic *apic = vcpu->arch.apic;
2109 
2110 	if (!lapic_in_kernel(vcpu) ||
2111 		!apic_lvtt_tscdeadline(apic))
2112 		return 0;
2113 
2114 	return apic->lapic_timer.tscdeadline;
2115 }
2116 
2117 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2118 {
2119 	struct kvm_lapic *apic = vcpu->arch.apic;
2120 
2121 	if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
2122 			apic_lvtt_period(apic))
2123 		return;
2124 
2125 	hrtimer_cancel(&apic->lapic_timer.timer);
2126 	apic->lapic_timer.tscdeadline = data;
2127 	start_apic_timer(apic);
2128 }
2129 
2130 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2131 {
2132 	struct kvm_lapic *apic = vcpu->arch.apic;
2133 
2134 	apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
2135 		     | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
2136 }
2137 
2138 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2139 {
2140 	u64 tpr;
2141 
2142 	tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
2143 
2144 	return (tpr & 0xf0) >> 4;
2145 }
2146 
2147 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
2148 {
2149 	u64 old_value = vcpu->arch.apic_base;
2150 	struct kvm_lapic *apic = vcpu->arch.apic;
2151 
2152 	if (!apic)
2153 		value |= MSR_IA32_APICBASE_BSP;
2154 
2155 	vcpu->arch.apic_base = value;
2156 
2157 	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2158 		kvm_update_cpuid(vcpu);
2159 
2160 	if (!apic)
2161 		return;
2162 
2163 	/* update jump label if enable bit changes */
2164 	if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2165 		if (value & MSR_IA32_APICBASE_ENABLE) {
2166 			kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2167 			static_key_slow_dec_deferred(&apic_hw_disabled);
2168 		} else {
2169 			static_key_slow_inc(&apic_hw_disabled.key);
2170 			recalculate_apic_map(vcpu->kvm);
2171 		}
2172 	}
2173 
2174 	if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
2175 		kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2176 
2177 	if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
2178 		kvm_x86_ops->set_virtual_apic_mode(vcpu);
2179 
2180 	apic->base_address = apic->vcpu->arch.apic_base &
2181 			     MSR_IA32_APICBASE_BASE;
2182 
2183 	if ((value & MSR_IA32_APICBASE_ENABLE) &&
2184 	     apic->base_address != APIC_DEFAULT_PHYS_BASE)
2185 		pr_warn_once("APIC base relocation is unsupported by KVM");
2186 }
2187 
2188 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2189 {
2190 	struct kvm_lapic *apic = vcpu->arch.apic;
2191 	int i;
2192 
2193 	if (!apic)
2194 		return;
2195 
2196 	/* Stop the timer in case it's a reset to an active apic */
2197 	hrtimer_cancel(&apic->lapic_timer.timer);
2198 
2199 	if (!init_event) {
2200 		kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
2201 		                         MSR_IA32_APICBASE_ENABLE);
2202 		kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2203 	}
2204 	kvm_apic_set_version(apic->vcpu);
2205 
2206 	for (i = 0; i < KVM_APIC_LVT_NUM; i++)
2207 		kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
2208 	apic_update_lvtt(apic);
2209 	if (kvm_vcpu_is_reset_bsp(vcpu) &&
2210 	    kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2211 		kvm_lapic_set_reg(apic, APIC_LVT0,
2212 			     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2213 	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2214 
2215 	kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU);
2216 	apic_set_spiv(apic, 0xff);
2217 	kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2218 	if (!apic_x2apic_mode(apic))
2219 		kvm_apic_set_ldr(apic, 0);
2220 	kvm_lapic_set_reg(apic, APIC_ESR, 0);
2221 	kvm_lapic_set_reg(apic, APIC_ICR, 0);
2222 	kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2223 	kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2224 	kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2225 	for (i = 0; i < 8; i++) {
2226 		kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2227 		kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2228 		kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2229 	}
2230 	apic->irr_pending = vcpu->arch.apicv_active;
2231 	apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
2232 	apic->highest_isr_cache = -1;
2233 	update_divide_count(apic);
2234 	atomic_set(&apic->lapic_timer.pending, 0);
2235 	if (kvm_vcpu_is_bsp(vcpu))
2236 		kvm_lapic_set_base(vcpu,
2237 				vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
2238 	vcpu->arch.pv_eoi.msr_val = 0;
2239 	apic_update_ppr(apic);
2240 	if (vcpu->arch.apicv_active) {
2241 		kvm_x86_ops->apicv_post_state_restore(vcpu);
2242 		kvm_x86_ops->hwapic_irr_update(vcpu, -1);
2243 		kvm_x86_ops->hwapic_isr_update(vcpu, -1);
2244 	}
2245 
2246 	vcpu->arch.apic_arb_prio = 0;
2247 	vcpu->arch.apic_attention = 0;
2248 }
2249 
2250 /*
2251  *----------------------------------------------------------------------
2252  * timer interface
2253  *----------------------------------------------------------------------
2254  */
2255 
2256 static bool lapic_is_periodic(struct kvm_lapic *apic)
2257 {
2258 	return apic_lvtt_period(apic);
2259 }
2260 
2261 int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2262 {
2263 	struct kvm_lapic *apic = vcpu->arch.apic;
2264 
2265 	if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2266 		return atomic_read(&apic->lapic_timer.pending);
2267 
2268 	return 0;
2269 }
2270 
2271 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2272 {
2273 	u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2274 	int vector, mode, trig_mode;
2275 
2276 	if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2277 		vector = reg & APIC_VECTOR_MASK;
2278 		mode = reg & APIC_MODE_MASK;
2279 		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2280 		return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
2281 					NULL);
2282 	}
2283 	return 0;
2284 }
2285 
2286 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2287 {
2288 	struct kvm_lapic *apic = vcpu->arch.apic;
2289 
2290 	if (apic)
2291 		kvm_apic_local_deliver(apic, APIC_LVT0);
2292 }
2293 
2294 static const struct kvm_io_device_ops apic_mmio_ops = {
2295 	.read     = apic_mmio_read,
2296 	.write    = apic_mmio_write,
2297 };
2298 
2299 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2300 {
2301 	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2302 	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2303 
2304 	apic_timer_expired(apic);
2305 
2306 	if (lapic_is_periodic(apic)) {
2307 		advance_periodic_target_expiration(apic);
2308 		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2309 		return HRTIMER_RESTART;
2310 	} else
2311 		return HRTIMER_NORESTART;
2312 }
2313 
2314 int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
2315 {
2316 	struct kvm_lapic *apic;
2317 
2318 	ASSERT(vcpu != NULL);
2319 
2320 	apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
2321 	if (!apic)
2322 		goto nomem;
2323 
2324 	vcpu->arch.apic = apic;
2325 
2326 	apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
2327 	if (!apic->regs) {
2328 		printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2329 		       vcpu->vcpu_id);
2330 		goto nomem_free_apic;
2331 	}
2332 	apic->vcpu = vcpu;
2333 
2334 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2335 		     HRTIMER_MODE_ABS_HARD);
2336 	apic->lapic_timer.timer.function = apic_timer_fn;
2337 	if (timer_advance_ns == -1) {
2338 		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2339 		lapic_timer_advance_dynamic = true;
2340 	} else {
2341 		apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2342 		lapic_timer_advance_dynamic = false;
2343 	}
2344 
2345 	/*
2346 	 * APIC is created enabled. This will prevent kvm_lapic_set_base from
2347 	 * thinking that APIC state has changed.
2348 	 */
2349 	vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2350 	static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2351 	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2352 
2353 	return 0;
2354 nomem_free_apic:
2355 	kfree(apic);
2356 	vcpu->arch.apic = NULL;
2357 nomem:
2358 	return -ENOMEM;
2359 }
2360 
2361 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2362 {
2363 	struct kvm_lapic *apic = vcpu->arch.apic;
2364 	u32 ppr;
2365 
2366 	if (!kvm_apic_hw_enabled(apic))
2367 		return -1;
2368 
2369 	__apic_update_ppr(apic, &ppr);
2370 	return apic_has_interrupt_for_ppr(apic, ppr);
2371 }
2372 
2373 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2374 {
2375 	u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2376 	int r = 0;
2377 
2378 	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2379 		r = 1;
2380 	if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2381 	    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2382 		r = 1;
2383 	return r;
2384 }
2385 
2386 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2387 {
2388 	struct kvm_lapic *apic = vcpu->arch.apic;
2389 
2390 	if (atomic_read(&apic->lapic_timer.pending) > 0) {
2391 		kvm_apic_inject_pending_timer_irqs(apic);
2392 		atomic_set(&apic->lapic_timer.pending, 0);
2393 	}
2394 }
2395 
2396 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2397 {
2398 	int vector = kvm_apic_has_interrupt(vcpu);
2399 	struct kvm_lapic *apic = vcpu->arch.apic;
2400 	u32 ppr;
2401 
2402 	if (vector == -1)
2403 		return -1;
2404 
2405 	/*
2406 	 * We get here even with APIC virtualization enabled, if doing
2407 	 * nested virtualization and L1 runs with the "acknowledge interrupt
2408 	 * on exit" mode.  Then we cannot inject the interrupt via RVI,
2409 	 * because the process would deliver it through the IDT.
2410 	 */
2411 
2412 	apic_clear_irr(vector, apic);
2413 	if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
2414 		/*
2415 		 * For auto-EOI interrupts, there might be another pending
2416 		 * interrupt above PPR, so check whether to raise another
2417 		 * KVM_REQ_EVENT.
2418 		 */
2419 		apic_update_ppr(apic);
2420 	} else {
2421 		/*
2422 		 * For normal interrupts, PPR has been raised and there cannot
2423 		 * be a higher-priority pending interrupt---except if there was
2424 		 * a concurrent interrupt injection, but that would have
2425 		 * triggered KVM_REQ_EVENT already.
2426 		 */
2427 		apic_set_isr(vector, apic);
2428 		__apic_update_ppr(apic, &ppr);
2429 	}
2430 
2431 	return vector;
2432 }
2433 
2434 static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2435 		struct kvm_lapic_state *s, bool set)
2436 {
2437 	if (apic_x2apic_mode(vcpu->arch.apic)) {
2438 		u32 *id = (u32 *)(s->regs + APIC_ID);
2439 		u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2440 
2441 		if (vcpu->kvm->arch.x2apic_format) {
2442 			if (*id != vcpu->vcpu_id)
2443 				return -EINVAL;
2444 		} else {
2445 			if (set)
2446 				*id >>= 24;
2447 			else
2448 				*id <<= 24;
2449 		}
2450 
2451 		/* In x2APIC mode, the LDR is fixed and based on the id */
2452 		if (set)
2453 			*ldr = kvm_apic_calc_x2apic_ldr(*id);
2454 	}
2455 
2456 	return 0;
2457 }
2458 
2459 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2460 {
2461 	memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2462 	return kvm_apic_state_fixup(vcpu, s, false);
2463 }
2464 
2465 int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2466 {
2467 	struct kvm_lapic *apic = vcpu->arch.apic;
2468 	int r;
2469 
2470 
2471 	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2472 	/* set SPIV separately to get count of SW disabled APICs right */
2473 	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2474 
2475 	r = kvm_apic_state_fixup(vcpu, s, true);
2476 	if (r)
2477 		return r;
2478 	memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
2479 
2480 	recalculate_apic_map(vcpu->kvm);
2481 	kvm_apic_set_version(vcpu);
2482 
2483 	apic_update_ppr(apic);
2484 	hrtimer_cancel(&apic->lapic_timer.timer);
2485 	apic_update_lvtt(apic);
2486 	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2487 	update_divide_count(apic);
2488 	start_apic_timer(apic);
2489 	apic->irr_pending = true;
2490 	apic->isr_count = vcpu->arch.apicv_active ?
2491 				1 : count_vectors(apic->regs + APIC_ISR);
2492 	apic->highest_isr_cache = -1;
2493 	if (vcpu->arch.apicv_active) {
2494 		kvm_x86_ops->apicv_post_state_restore(vcpu);
2495 		kvm_x86_ops->hwapic_irr_update(vcpu,
2496 				apic_find_highest_irr(apic));
2497 		kvm_x86_ops->hwapic_isr_update(vcpu,
2498 				apic_find_highest_isr(apic));
2499 	}
2500 	kvm_make_request(KVM_REQ_EVENT, vcpu);
2501 	if (ioapic_in_kernel(vcpu->kvm))
2502 		kvm_rtc_eoi_tracking_restore_one(vcpu);
2503 
2504 	vcpu->arch.apic_arb_prio = 0;
2505 
2506 	return 0;
2507 }
2508 
2509 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2510 {
2511 	struct hrtimer *timer;
2512 
2513 	if (!lapic_in_kernel(vcpu) ||
2514 		kvm_can_post_timer_interrupt(vcpu))
2515 		return;
2516 
2517 	timer = &vcpu->arch.apic->lapic_timer.timer;
2518 	if (hrtimer_cancel(timer))
2519 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
2520 }
2521 
2522 /*
2523  * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2524  *
2525  * Detect whether guest triggered PV EOI since the
2526  * last entry. If yes, set EOI on guests's behalf.
2527  * Clear PV EOI in guest memory in any case.
2528  */
2529 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2530 					struct kvm_lapic *apic)
2531 {
2532 	bool pending;
2533 	int vector;
2534 	/*
2535 	 * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2536 	 * and KVM_PV_EOI_ENABLED in guest memory as follows:
2537 	 *
2538 	 * KVM_APIC_PV_EOI_PENDING is unset:
2539 	 * 	-> host disabled PV EOI.
2540 	 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2541 	 * 	-> host enabled PV EOI, guest did not execute EOI yet.
2542 	 * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2543 	 * 	-> host enabled PV EOI, guest executed EOI.
2544 	 */
2545 	BUG_ON(!pv_eoi_enabled(vcpu));
2546 	pending = pv_eoi_get_pending(vcpu);
2547 	/*
2548 	 * Clear pending bit in any case: it will be set again on vmentry.
2549 	 * While this might not be ideal from performance point of view,
2550 	 * this makes sure pv eoi is only enabled when we know it's safe.
2551 	 */
2552 	pv_eoi_clr_pending(vcpu);
2553 	if (pending)
2554 		return;
2555 	vector = apic_set_eoi(apic);
2556 	trace_kvm_pv_eoi(apic, vector);
2557 }
2558 
2559 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2560 {
2561 	u32 data;
2562 
2563 	if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2564 		apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2565 
2566 	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2567 		return;
2568 
2569 	if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2570 				  sizeof(u32)))
2571 		return;
2572 
2573 	apic_set_tpr(vcpu->arch.apic, data & 0xff);
2574 }
2575 
2576 /*
2577  * apic_sync_pv_eoi_to_guest - called before vmentry
2578  *
2579  * Detect whether it's safe to enable PV EOI and
2580  * if yes do so.
2581  */
2582 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2583 					struct kvm_lapic *apic)
2584 {
2585 	if (!pv_eoi_enabled(vcpu) ||
2586 	    /* IRR set or many bits in ISR: could be nested. */
2587 	    apic->irr_pending ||
2588 	    /* Cache not set: could be safe but we don't bother. */
2589 	    apic->highest_isr_cache == -1 ||
2590 	    /* Need EOI to update ioapic. */
2591 	    kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2592 		/*
2593 		 * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2594 		 * so we need not do anything here.
2595 		 */
2596 		return;
2597 	}
2598 
2599 	pv_eoi_set_pending(apic->vcpu);
2600 }
2601 
2602 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2603 {
2604 	u32 data, tpr;
2605 	int max_irr, max_isr;
2606 	struct kvm_lapic *apic = vcpu->arch.apic;
2607 
2608 	apic_sync_pv_eoi_to_guest(vcpu, apic);
2609 
2610 	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2611 		return;
2612 
2613 	tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2614 	max_irr = apic_find_highest_irr(apic);
2615 	if (max_irr < 0)
2616 		max_irr = 0;
2617 	max_isr = apic_find_highest_isr(apic);
2618 	if (max_isr < 0)
2619 		max_isr = 0;
2620 	data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2621 
2622 	kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2623 				sizeof(u32));
2624 }
2625 
2626 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2627 {
2628 	if (vapic_addr) {
2629 		if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2630 					&vcpu->arch.apic->vapic_cache,
2631 					vapic_addr, sizeof(u32)))
2632 			return -EINVAL;
2633 		__set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2634 	} else {
2635 		__clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2636 	}
2637 
2638 	vcpu->arch.apic->vapic_addr = vapic_addr;
2639 	return 0;
2640 }
2641 
2642 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2643 {
2644 	struct kvm_lapic *apic = vcpu->arch.apic;
2645 	u32 reg = (msr - APIC_BASE_MSR) << 4;
2646 
2647 	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2648 		return 1;
2649 
2650 	if (reg == APIC_ICR2)
2651 		return 1;
2652 
2653 	/* if this is ICR write vector before command */
2654 	if (reg == APIC_ICR)
2655 		kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2656 	return kvm_lapic_reg_write(apic, reg, (u32)data);
2657 }
2658 
2659 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2660 {
2661 	struct kvm_lapic *apic = vcpu->arch.apic;
2662 	u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
2663 
2664 	if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2665 		return 1;
2666 
2667 	if (reg == APIC_DFR || reg == APIC_ICR2)
2668 		return 1;
2669 
2670 	if (kvm_lapic_reg_read(apic, reg, 4, &low))
2671 		return 1;
2672 	if (reg == APIC_ICR)
2673 		kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2674 
2675 	*data = (((u64)high) << 32) | low;
2676 
2677 	return 0;
2678 }
2679 
2680 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2681 {
2682 	struct kvm_lapic *apic = vcpu->arch.apic;
2683 
2684 	if (!lapic_in_kernel(vcpu))
2685 		return 1;
2686 
2687 	/* if this is ICR write vector before command */
2688 	if (reg == APIC_ICR)
2689 		kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2690 	return kvm_lapic_reg_write(apic, reg, (u32)data);
2691 }
2692 
2693 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2694 {
2695 	struct kvm_lapic *apic = vcpu->arch.apic;
2696 	u32 low, high = 0;
2697 
2698 	if (!lapic_in_kernel(vcpu))
2699 		return 1;
2700 
2701 	if (kvm_lapic_reg_read(apic, reg, 4, &low))
2702 		return 1;
2703 	if (reg == APIC_ICR)
2704 		kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2705 
2706 	*data = (((u64)high) << 32) | low;
2707 
2708 	return 0;
2709 }
2710 
2711 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
2712 {
2713 	u64 addr = data & ~KVM_MSR_ENABLED;
2714 	struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
2715 	unsigned long new_len;
2716 
2717 	if (!IS_ALIGNED(addr, 4))
2718 		return 1;
2719 
2720 	vcpu->arch.pv_eoi.msr_val = data;
2721 	if (!pv_eoi_enabled(vcpu))
2722 		return 0;
2723 
2724 	if (addr == ghc->gpa && len <= ghc->len)
2725 		new_len = ghc->len;
2726 	else
2727 		new_len = len;
2728 
2729 	return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
2730 }
2731 
2732 void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2733 {
2734 	struct kvm_lapic *apic = vcpu->arch.apic;
2735 	u8 sipi_vector;
2736 	unsigned long pe;
2737 
2738 	if (!lapic_in_kernel(vcpu) || !apic->pending_events)
2739 		return;
2740 
2741 	/*
2742 	 * INITs are latched while CPU is in specific states
2743 	 * (SMM, VMX non-root mode, SVM with GIF=0).
2744 	 * Because a CPU cannot be in these states immediately
2745 	 * after it has processed an INIT signal (and thus in
2746 	 * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
2747 	 * and leave the INIT pending.
2748 	 */
2749 	if (kvm_vcpu_latch_init(vcpu)) {
2750 		WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
2751 		if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
2752 			clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2753 		return;
2754 	}
2755 
2756 	pe = xchg(&apic->pending_events, 0);
2757 	if (test_bit(KVM_APIC_INIT, &pe)) {
2758 		kvm_vcpu_reset(vcpu, true);
2759 		if (kvm_vcpu_is_bsp(apic->vcpu))
2760 			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2761 		else
2762 			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
2763 	}
2764 	if (test_bit(KVM_APIC_SIPI, &pe) &&
2765 	    vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
2766 		/* evaluate pending_events before reading the vector */
2767 		smp_rmb();
2768 		sipi_vector = apic->sipi_vector;
2769 		kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
2770 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2771 	}
2772 }
2773 
2774 void kvm_lapic_init(void)
2775 {
2776 	/* do not patch jump label more than once per second */
2777 	jump_label_rate_limit(&apic_hw_disabled, HZ);
2778 	jump_label_rate_limit(&apic_sw_disabled, HZ);
2779 }
2780 
2781 void kvm_lapic_exit(void)
2782 {
2783 	static_key_deferred_flush(&apic_hw_disabled);
2784 	static_key_deferred_flush(&apic_sw_disabled);
2785 }
2786