xref: /openbmc/linux/arch/x86/kvm/svm/avic.c (revision 3cea11cd)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Kernel-based Virtual Machine driver for Linux
4  *
5  * AMD SVM support
6  *
7  * Copyright (C) 2006 Qumranet, Inc.
8  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
9  *
10  * Authors:
11  *   Yaniv Kamay  <yaniv@qumranet.com>
12  *   Avi Kivity   <avi@qumranet.com>
13  */
14 
15 #define pr_fmt(fmt) "SVM: " fmt
16 
17 #include <linux/kvm_types.h>
18 #include <linux/hashtable.h>
19 #include <linux/amd-iommu.h>
20 #include <linux/kvm_host.h>
21 
22 #include <asm/irq_remapping.h>
23 
24 #include "trace.h"
25 #include "lapic.h"
26 #include "x86.h"
27 #include "irq.h"
28 #include "svm.h"
29 
30 /* enable / disable AVIC */
31 int avic;
32 #ifdef CONFIG_X86_LOCAL_APIC
33 module_param(avic, int, S_IRUGO);
34 #endif
35 
36 #define SVM_AVIC_DOORBELL	0xc001011b
37 
38 #define AVIC_HPA_MASK	~((0xFFFULL << 52) | 0xFFF)
39 
40 /*
41  * 0xff is broadcast, so the max index allowed for physical APIC ID
42  * table is 0xfe.  APIC IDs above 0xff are reserved.
43  */
44 #define AVIC_MAX_PHYSICAL_ID_COUNT	255
45 
46 #define AVIC_UNACCEL_ACCESS_WRITE_MASK		1
47 #define AVIC_UNACCEL_ACCESS_OFFSET_MASK		0xFF0
48 #define AVIC_UNACCEL_ACCESS_VECTOR_MASK		0xFFFFFFFF
49 
50 /* AVIC GATAG is encoded using VM and VCPU IDs */
51 #define AVIC_VCPU_ID_BITS		8
52 #define AVIC_VCPU_ID_MASK		((1 << AVIC_VCPU_ID_BITS) - 1)
53 
54 #define AVIC_VM_ID_BITS			24
55 #define AVIC_VM_ID_NR			(1 << AVIC_VM_ID_BITS)
56 #define AVIC_VM_ID_MASK			((1 << AVIC_VM_ID_BITS) - 1)
57 
58 #define AVIC_GATAG(x, y)		(((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
59 						(y & AVIC_VCPU_ID_MASK))
60 #define AVIC_GATAG_TO_VMID(x)		((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
61 #define AVIC_GATAG_TO_VCPUID(x)		(x & AVIC_VCPU_ID_MASK)
62 
63 /* Note:
64  * This hash table is used to map VM_ID to a struct kvm_svm,
65  * when handling AMD IOMMU GALOG notification to schedule in
66  * a particular vCPU.
67  */
68 #define SVM_VM_DATA_HASH_BITS	8
69 static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
70 static u32 next_vm_id = 0;
71 static bool next_vm_id_wrapped = 0;
72 static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
73 
74 /*
75  * This is a wrapper of struct amd_iommu_ir_data.
76  */
77 struct amd_svm_iommu_ir {
78 	struct list_head node;	/* Used by SVM for per-vcpu ir_list */
79 	void *data;		/* Storing pointer to struct amd_ir_data */
80 };
81 
82 enum avic_ipi_failure_cause {
83 	AVIC_IPI_FAILURE_INVALID_INT_TYPE,
84 	AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
85 	AVIC_IPI_FAILURE_INVALID_TARGET,
86 	AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
87 };
88 
89 /* Note:
90  * This function is called from IOMMU driver to notify
91  * SVM to schedule in a particular vCPU of a particular VM.
92  */
93 int avic_ga_log_notifier(u32 ga_tag)
94 {
95 	unsigned long flags;
96 	struct kvm_svm *kvm_svm;
97 	struct kvm_vcpu *vcpu = NULL;
98 	u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
99 	u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
100 
101 	pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
102 	trace_kvm_avic_ga_log(vm_id, vcpu_id);
103 
104 	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
105 	hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
106 		if (kvm_svm->avic_vm_id != vm_id)
107 			continue;
108 		vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
109 		break;
110 	}
111 	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
112 
113 	/* Note:
114 	 * At this point, the IOMMU should have already set the pending
115 	 * bit in the vAPIC backing page. So, we just need to schedule
116 	 * in the vcpu.
117 	 */
118 	if (vcpu)
119 		kvm_vcpu_wake_up(vcpu);
120 
121 	return 0;
122 }
123 
124 void avic_vm_destroy(struct kvm *kvm)
125 {
126 	unsigned long flags;
127 	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
128 
129 	if (!avic)
130 		return;
131 
132 	if (kvm_svm->avic_logical_id_table_page)
133 		__free_page(kvm_svm->avic_logical_id_table_page);
134 	if (kvm_svm->avic_physical_id_table_page)
135 		__free_page(kvm_svm->avic_physical_id_table_page);
136 
137 	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
138 	hash_del(&kvm_svm->hnode);
139 	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
140 }
141 
142 int avic_vm_init(struct kvm *kvm)
143 {
144 	unsigned long flags;
145 	int err = -ENOMEM;
146 	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
147 	struct kvm_svm *k2;
148 	struct page *p_page;
149 	struct page *l_page;
150 	u32 vm_id;
151 
152 	if (!avic)
153 		return 0;
154 
155 	/* Allocating physical APIC ID table (4KB) */
156 	p_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
157 	if (!p_page)
158 		goto free_avic;
159 
160 	kvm_svm->avic_physical_id_table_page = p_page;
161 
162 	/* Allocating logical APIC ID table (4KB) */
163 	l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
164 	if (!l_page)
165 		goto free_avic;
166 
167 	kvm_svm->avic_logical_id_table_page = l_page;
168 
169 	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
170  again:
171 	vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
172 	if (vm_id == 0) { /* id is 1-based, zero is not okay */
173 		next_vm_id_wrapped = 1;
174 		goto again;
175 	}
176 	/* Is it still in use? Only possible if wrapped at least once */
177 	if (next_vm_id_wrapped) {
178 		hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
179 			if (k2->avic_vm_id == vm_id)
180 				goto again;
181 		}
182 	}
183 	kvm_svm->avic_vm_id = vm_id;
184 	hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
185 	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
186 
187 	return 0;
188 
189 free_avic:
190 	avic_vm_destroy(kvm);
191 	return err;
192 }
193 
194 void avic_init_vmcb(struct vcpu_svm *svm)
195 {
196 	struct vmcb *vmcb = svm->vmcb;
197 	struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
198 	phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
199 	phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
200 	phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
201 
202 	vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
203 	vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
204 	vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
205 	vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
206 	if (kvm_apicv_activated(svm->vcpu.kvm))
207 		vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
208 	else
209 		vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
210 }
211 
212 static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
213 				       unsigned int index)
214 {
215 	u64 *avic_physical_id_table;
216 	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
217 
218 	if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
219 		return NULL;
220 
221 	avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
222 
223 	return &avic_physical_id_table[index];
224 }
225 
226 /**
227  * Note:
228  * AVIC hardware walks the nested page table to check permissions,
229  * but does not use the SPA address specified in the leaf page
230  * table entry since it uses  address in the AVIC_BACKING_PAGE pointer
231  * field of the VMCB. Therefore, we set up the
232  * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
233  */
234 static int avic_update_access_page(struct kvm *kvm, bool activate)
235 {
236 	int ret = 0;
237 
238 	mutex_lock(&kvm->slots_lock);
239 	/*
240 	 * During kvm_destroy_vm(), kvm_pit_set_reinject() could trigger
241 	 * APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
242 	 * memory region. So, we need to ensure that kvm->mm == current->mm.
243 	 */
244 	if ((kvm->arch.apic_access_page_done == activate) ||
245 	    (kvm->mm != current->mm))
246 		goto out;
247 
248 	ret = __x86_set_memory_region(kvm,
249 				      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
250 				      APIC_DEFAULT_PHYS_BASE,
251 				      activate ? PAGE_SIZE : 0);
252 	if (ret)
253 		goto out;
254 
255 	kvm->arch.apic_access_page_done = activate;
256 out:
257 	mutex_unlock(&kvm->slots_lock);
258 	return ret;
259 }
260 
261 static int avic_init_backing_page(struct kvm_vcpu *vcpu)
262 {
263 	u64 *entry, new_entry;
264 	int id = vcpu->vcpu_id;
265 	struct vcpu_svm *svm = to_svm(vcpu);
266 
267 	if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
268 		return -EINVAL;
269 
270 	if (!svm->vcpu.arch.apic->regs)
271 		return -EINVAL;
272 
273 	if (kvm_apicv_activated(vcpu->kvm)) {
274 		int ret;
275 
276 		ret = avic_update_access_page(vcpu->kvm, true);
277 		if (ret)
278 			return ret;
279 	}
280 
281 	svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
282 
283 	/* Setting AVIC backing page address in the phy APIC ID table */
284 	entry = avic_get_physical_id_entry(vcpu, id);
285 	if (!entry)
286 		return -EINVAL;
287 
288 	new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
289 			      AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
290 			      AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
291 	WRITE_ONCE(*entry, new_entry);
292 
293 	svm->avic_physical_id_cache = entry;
294 
295 	return 0;
296 }
297 
298 int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
299 {
300 	u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
301 	u32 icrl = svm->vmcb->control.exit_info_1;
302 	u32 id = svm->vmcb->control.exit_info_2 >> 32;
303 	u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
304 	struct kvm_lapic *apic = svm->vcpu.arch.apic;
305 
306 	trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
307 
308 	switch (id) {
309 	case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
310 		/*
311 		 * AVIC hardware handles the generation of
312 		 * IPIs when the specified Message Type is Fixed
313 		 * (also known as fixed delivery mode) and
314 		 * the Trigger Mode is edge-triggered. The hardware
315 		 * also supports self and broadcast delivery modes
316 		 * specified via the Destination Shorthand(DSH)
317 		 * field of the ICRL. Logical and physical APIC ID
318 		 * formats are supported. All other IPI types cause
319 		 * a #VMEXIT, which needs to emulated.
320 		 */
321 		kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
322 		kvm_lapic_reg_write(apic, APIC_ICR, icrl);
323 		break;
324 	case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
325 		int i;
326 		struct kvm_vcpu *vcpu;
327 		struct kvm *kvm = svm->vcpu.kvm;
328 		struct kvm_lapic *apic = svm->vcpu.arch.apic;
329 
330 		/*
331 		 * At this point, we expect that the AVIC HW has already
332 		 * set the appropriate IRR bits on the valid target
333 		 * vcpus. So, we just need to kick the appropriate vcpu.
334 		 */
335 		kvm_for_each_vcpu(i, vcpu, kvm) {
336 			bool m = kvm_apic_match_dest(vcpu, apic,
337 						     icrl & APIC_SHORT_MASK,
338 						     GET_APIC_DEST_FIELD(icrh),
339 						     icrl & APIC_DEST_MASK);
340 
341 			if (m && !avic_vcpu_is_running(vcpu))
342 				kvm_vcpu_wake_up(vcpu);
343 		}
344 		break;
345 	}
346 	case AVIC_IPI_FAILURE_INVALID_TARGET:
347 		WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
348 			  index, svm->vcpu.vcpu_id, icrh, icrl);
349 		break;
350 	case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
351 		WARN_ONCE(1, "Invalid backing page\n");
352 		break;
353 	default:
354 		pr_err("Unknown IPI interception\n");
355 	}
356 
357 	return 1;
358 }
359 
360 static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
361 {
362 	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
363 	int index;
364 	u32 *logical_apic_id_table;
365 	int dlid = GET_APIC_LOGICAL_ID(ldr);
366 
367 	if (!dlid)
368 		return NULL;
369 
370 	if (flat) { /* flat */
371 		index = ffs(dlid) - 1;
372 		if (index > 7)
373 			return NULL;
374 	} else { /* cluster */
375 		int cluster = (dlid & 0xf0) >> 4;
376 		int apic = ffs(dlid & 0x0f) - 1;
377 
378 		if ((apic < 0) || (apic > 7) ||
379 		    (cluster >= 0xf))
380 			return NULL;
381 		index = (cluster << 2) + apic;
382 	}
383 
384 	logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
385 
386 	return &logical_apic_id_table[index];
387 }
388 
389 static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
390 {
391 	bool flat;
392 	u32 *entry, new_entry;
393 
394 	flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
395 	entry = avic_get_logical_id_entry(vcpu, ldr, flat);
396 	if (!entry)
397 		return -EINVAL;
398 
399 	new_entry = READ_ONCE(*entry);
400 	new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
401 	new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
402 	new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
403 	WRITE_ONCE(*entry, new_entry);
404 
405 	return 0;
406 }
407 
408 static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
409 {
410 	struct vcpu_svm *svm = to_svm(vcpu);
411 	bool flat = svm->dfr_reg == APIC_DFR_FLAT;
412 	u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
413 
414 	if (entry)
415 		clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
416 }
417 
418 static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
419 {
420 	int ret = 0;
421 	struct vcpu_svm *svm = to_svm(vcpu);
422 	u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
423 	u32 id = kvm_xapic_id(vcpu->arch.apic);
424 
425 	if (ldr == svm->ldr_reg)
426 		return 0;
427 
428 	avic_invalidate_logical_id_entry(vcpu);
429 
430 	if (ldr)
431 		ret = avic_ldr_write(vcpu, id, ldr);
432 
433 	if (!ret)
434 		svm->ldr_reg = ldr;
435 
436 	return ret;
437 }
438 
439 static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
440 {
441 	u64 *old, *new;
442 	struct vcpu_svm *svm = to_svm(vcpu);
443 	u32 id = kvm_xapic_id(vcpu->arch.apic);
444 
445 	if (vcpu->vcpu_id == id)
446 		return 0;
447 
448 	old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
449 	new = avic_get_physical_id_entry(vcpu, id);
450 	if (!new || !old)
451 		return 1;
452 
453 	/* We need to move physical_id_entry to new offset */
454 	*new = *old;
455 	*old = 0ULL;
456 	to_svm(vcpu)->avic_physical_id_cache = new;
457 
458 	/*
459 	 * Also update the guest physical APIC ID in the logical
460 	 * APIC ID table entry if already setup the LDR.
461 	 */
462 	if (svm->ldr_reg)
463 		avic_handle_ldr_update(vcpu);
464 
465 	return 0;
466 }
467 
468 static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
469 {
470 	struct vcpu_svm *svm = to_svm(vcpu);
471 	u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
472 
473 	if (svm->dfr_reg == dfr)
474 		return;
475 
476 	avic_invalidate_logical_id_entry(vcpu);
477 	svm->dfr_reg = dfr;
478 }
479 
480 static int avic_unaccel_trap_write(struct vcpu_svm *svm)
481 {
482 	struct kvm_lapic *apic = svm->vcpu.arch.apic;
483 	u32 offset = svm->vmcb->control.exit_info_1 &
484 				AVIC_UNACCEL_ACCESS_OFFSET_MASK;
485 
486 	switch (offset) {
487 	case APIC_ID:
488 		if (avic_handle_apic_id_update(&svm->vcpu))
489 			return 0;
490 		break;
491 	case APIC_LDR:
492 		if (avic_handle_ldr_update(&svm->vcpu))
493 			return 0;
494 		break;
495 	case APIC_DFR:
496 		avic_handle_dfr_update(&svm->vcpu);
497 		break;
498 	default:
499 		break;
500 	}
501 
502 	kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
503 
504 	return 1;
505 }
506 
507 static bool is_avic_unaccelerated_access_trap(u32 offset)
508 {
509 	bool ret = false;
510 
511 	switch (offset) {
512 	case APIC_ID:
513 	case APIC_EOI:
514 	case APIC_RRR:
515 	case APIC_LDR:
516 	case APIC_DFR:
517 	case APIC_SPIV:
518 	case APIC_ESR:
519 	case APIC_ICR:
520 	case APIC_LVTT:
521 	case APIC_LVTTHMR:
522 	case APIC_LVTPC:
523 	case APIC_LVT0:
524 	case APIC_LVT1:
525 	case APIC_LVTERR:
526 	case APIC_TMICT:
527 	case APIC_TDCR:
528 		ret = true;
529 		break;
530 	default:
531 		break;
532 	}
533 	return ret;
534 }
535 
536 int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
537 {
538 	int ret = 0;
539 	u32 offset = svm->vmcb->control.exit_info_1 &
540 		     AVIC_UNACCEL_ACCESS_OFFSET_MASK;
541 	u32 vector = svm->vmcb->control.exit_info_2 &
542 		     AVIC_UNACCEL_ACCESS_VECTOR_MASK;
543 	bool write = (svm->vmcb->control.exit_info_1 >> 32) &
544 		     AVIC_UNACCEL_ACCESS_WRITE_MASK;
545 	bool trap = is_avic_unaccelerated_access_trap(offset);
546 
547 	trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
548 					    trap, write, vector);
549 	if (trap) {
550 		/* Handling Trap */
551 		WARN_ONCE(!write, "svm: Handling trap read.\n");
552 		ret = avic_unaccel_trap_write(svm);
553 	} else {
554 		/* Handling Fault */
555 		ret = kvm_emulate_instruction(&svm->vcpu, 0);
556 	}
557 
558 	return ret;
559 }
560 
561 int avic_init_vcpu(struct vcpu_svm *svm)
562 {
563 	int ret;
564 	struct kvm_vcpu *vcpu = &svm->vcpu;
565 
566 	if (!avic || !irqchip_in_kernel(vcpu->kvm))
567 		return 0;
568 
569 	ret = avic_init_backing_page(&svm->vcpu);
570 	if (ret)
571 		return ret;
572 
573 	INIT_LIST_HEAD(&svm->ir_list);
574 	spin_lock_init(&svm->ir_list_lock);
575 	svm->dfr_reg = APIC_DFR_FLAT;
576 
577 	return ret;
578 }
579 
580 void avic_post_state_restore(struct kvm_vcpu *vcpu)
581 {
582 	if (avic_handle_apic_id_update(vcpu) != 0)
583 		return;
584 	avic_handle_dfr_update(vcpu);
585 	avic_handle_ldr_update(vcpu);
586 }
587 
588 void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate)
589 {
590 	if (!avic || !lapic_in_kernel(vcpu))
591 		return;
592 
593 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
594 	kvm_request_apicv_update(vcpu->kvm, activate,
595 				 APICV_INHIBIT_REASON_IRQWIN);
596 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
597 }
598 
599 void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
600 {
601 	return;
602 }
603 
604 void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
605 {
606 }
607 
608 void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
609 {
610 }
611 
612 static int svm_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
613 {
614 	int ret = 0;
615 	unsigned long flags;
616 	struct amd_svm_iommu_ir *ir;
617 	struct vcpu_svm *svm = to_svm(vcpu);
618 
619 	if (!kvm_arch_has_assigned_device(vcpu->kvm))
620 		return 0;
621 
622 	/*
623 	 * Here, we go through the per-vcpu ir_list to update all existing
624 	 * interrupt remapping table entry targeting this vcpu.
625 	 */
626 	spin_lock_irqsave(&svm->ir_list_lock, flags);
627 
628 	if (list_empty(&svm->ir_list))
629 		goto out;
630 
631 	list_for_each_entry(ir, &svm->ir_list, node) {
632 		if (activate)
633 			ret = amd_iommu_activate_guest_mode(ir->data);
634 		else
635 			ret = amd_iommu_deactivate_guest_mode(ir->data);
636 		if (ret)
637 			break;
638 	}
639 out:
640 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
641 	return ret;
642 }
643 
644 void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
645 {
646 	struct vcpu_svm *svm = to_svm(vcpu);
647 	struct vmcb *vmcb = svm->vmcb;
648 	bool activated = kvm_vcpu_apicv_active(vcpu);
649 
650 	if (!avic)
651 		return;
652 
653 	if (activated) {
654 		/**
655 		 * During AVIC temporary deactivation, guest could update
656 		 * APIC ID, DFR and LDR registers, which would not be trapped
657 		 * by avic_unaccelerated_access_interception(). In this case,
658 		 * we need to check and update the AVIC logical APIC ID table
659 		 * accordingly before re-activating.
660 		 */
661 		avic_post_state_restore(vcpu);
662 		vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
663 	} else {
664 		vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
665 	}
666 	vmcb_mark_dirty(vmcb, VMCB_AVIC);
667 
668 	svm_set_pi_irte_mode(vcpu, activated);
669 }
670 
671 void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
672 {
673 	return;
674 }
675 
676 int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
677 {
678 	if (!vcpu->arch.apicv_active)
679 		return -1;
680 
681 	kvm_lapic_set_irr(vec, vcpu->arch.apic);
682 	smp_mb__after_atomic();
683 
684 	if (avic_vcpu_is_running(vcpu)) {
685 		int cpuid = vcpu->cpu;
686 
687 		if (cpuid != get_cpu())
688 			wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
689 		put_cpu();
690 	} else
691 		kvm_vcpu_wake_up(vcpu);
692 
693 	return 0;
694 }
695 
696 bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
697 {
698 	return false;
699 }
700 
701 static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
702 {
703 	unsigned long flags;
704 	struct amd_svm_iommu_ir *cur;
705 
706 	spin_lock_irqsave(&svm->ir_list_lock, flags);
707 	list_for_each_entry(cur, &svm->ir_list, node) {
708 		if (cur->data != pi->ir_data)
709 			continue;
710 		list_del(&cur->node);
711 		kfree(cur);
712 		break;
713 	}
714 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
715 }
716 
717 static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
718 {
719 	int ret = 0;
720 	unsigned long flags;
721 	struct amd_svm_iommu_ir *ir;
722 
723 	/**
724 	 * In some cases, the existing irte is updaed and re-set,
725 	 * so we need to check here if it's already been * added
726 	 * to the ir_list.
727 	 */
728 	if (pi->ir_data && (pi->prev_ga_tag != 0)) {
729 		struct kvm *kvm = svm->vcpu.kvm;
730 		u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
731 		struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
732 		struct vcpu_svm *prev_svm;
733 
734 		if (!prev_vcpu) {
735 			ret = -EINVAL;
736 			goto out;
737 		}
738 
739 		prev_svm = to_svm(prev_vcpu);
740 		svm_ir_list_del(prev_svm, pi);
741 	}
742 
743 	/**
744 	 * Allocating new amd_iommu_pi_data, which will get
745 	 * add to the per-vcpu ir_list.
746 	 */
747 	ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
748 	if (!ir) {
749 		ret = -ENOMEM;
750 		goto out;
751 	}
752 	ir->data = pi->ir_data;
753 
754 	spin_lock_irqsave(&svm->ir_list_lock, flags);
755 	list_add(&ir->node, &svm->ir_list);
756 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
757 out:
758 	return ret;
759 }
760 
761 /**
762  * Note:
763  * The HW cannot support posting multicast/broadcast
764  * interrupts to a vCPU. So, we still use legacy interrupt
765  * remapping for these kind of interrupts.
766  *
767  * For lowest-priority interrupts, we only support
768  * those with single CPU as the destination, e.g. user
769  * configures the interrupts via /proc/irq or uses
770  * irqbalance to make the interrupts single-CPU.
771  */
772 static int
773 get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
774 		 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
775 {
776 	struct kvm_lapic_irq irq;
777 	struct kvm_vcpu *vcpu = NULL;
778 
779 	kvm_set_msi_irq(kvm, e, &irq);
780 
781 	if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
782 	    !kvm_irq_is_postable(&irq)) {
783 		pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
784 			 __func__, irq.vector);
785 		return -1;
786 	}
787 
788 	pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
789 		 irq.vector);
790 	*svm = to_svm(vcpu);
791 	vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
792 	vcpu_info->vector = irq.vector;
793 
794 	return 0;
795 }
796 
797 /*
798  * svm_update_pi_irte - set IRTE for Posted-Interrupts
799  *
800  * @kvm: kvm
801  * @host_irq: host irq of the interrupt
802  * @guest_irq: gsi of the interrupt
803  * @set: set or unset PI
804  * returns 0 on success, < 0 on failure
805  */
806 int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
807 		       uint32_t guest_irq, bool set)
808 {
809 	struct kvm_kernel_irq_routing_entry *e;
810 	struct kvm_irq_routing_table *irq_rt;
811 	int idx, ret = -EINVAL;
812 
813 	if (!kvm_arch_has_assigned_device(kvm) ||
814 	    !irq_remapping_cap(IRQ_POSTING_CAP))
815 		return 0;
816 
817 	pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
818 		 __func__, host_irq, guest_irq, set);
819 
820 	idx = srcu_read_lock(&kvm->irq_srcu);
821 	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
822 	WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
823 
824 	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
825 		struct vcpu_data vcpu_info;
826 		struct vcpu_svm *svm = NULL;
827 
828 		if (e->type != KVM_IRQ_ROUTING_MSI)
829 			continue;
830 
831 		/**
832 		 * Here, we setup with legacy mode in the following cases:
833 		 * 1. When cannot target interrupt to a specific vcpu.
834 		 * 2. Unsetting posted interrupt.
835 		 * 3. APIC virtialization is disabled for the vcpu.
836 		 * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
837 		 */
838 		if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
839 		    kvm_vcpu_apicv_active(&svm->vcpu)) {
840 			struct amd_iommu_pi_data pi;
841 
842 			/* Try to enable guest_mode in IRTE */
843 			pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
844 					    AVIC_HPA_MASK);
845 			pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
846 						     svm->vcpu.vcpu_id);
847 			pi.is_guest_mode = true;
848 			pi.vcpu_data = &vcpu_info;
849 			ret = irq_set_vcpu_affinity(host_irq, &pi);
850 
851 			/**
852 			 * Here, we successfully setting up vcpu affinity in
853 			 * IOMMU guest mode. Now, we need to store the posted
854 			 * interrupt information in a per-vcpu ir_list so that
855 			 * we can reference to them directly when we update vcpu
856 			 * scheduling information in IOMMU irte.
857 			 */
858 			if (!ret && pi.is_guest_mode)
859 				svm_ir_list_add(svm, &pi);
860 		} else {
861 			/* Use legacy mode in IRTE */
862 			struct amd_iommu_pi_data pi;
863 
864 			/**
865 			 * Here, pi is used to:
866 			 * - Tell IOMMU to use legacy mode for this interrupt.
867 			 * - Retrieve ga_tag of prior interrupt remapping data.
868 			 */
869 			pi.prev_ga_tag = 0;
870 			pi.is_guest_mode = false;
871 			ret = irq_set_vcpu_affinity(host_irq, &pi);
872 
873 			/**
874 			 * Check if the posted interrupt was previously
875 			 * setup with the guest_mode by checking if the ga_tag
876 			 * was cached. If so, we need to clean up the per-vcpu
877 			 * ir_list.
878 			 */
879 			if (!ret && pi.prev_ga_tag) {
880 				int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
881 				struct kvm_vcpu *vcpu;
882 
883 				vcpu = kvm_get_vcpu_by_id(kvm, id);
884 				if (vcpu)
885 					svm_ir_list_del(to_svm(vcpu), &pi);
886 			}
887 		}
888 
889 		if (!ret && svm) {
890 			trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
891 						 e->gsi, vcpu_info.vector,
892 						 vcpu_info.pi_desc_addr, set);
893 		}
894 
895 		if (ret < 0) {
896 			pr_err("%s: failed to update PI IRTE\n", __func__);
897 			goto out;
898 		}
899 	}
900 
901 	ret = 0;
902 out:
903 	srcu_read_unlock(&kvm->irq_srcu, idx);
904 	return ret;
905 }
906 
907 bool svm_check_apicv_inhibit_reasons(ulong bit)
908 {
909 	ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
910 			  BIT(APICV_INHIBIT_REASON_HYPERV) |
911 			  BIT(APICV_INHIBIT_REASON_NESTED) |
912 			  BIT(APICV_INHIBIT_REASON_IRQWIN) |
913 			  BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
914 			  BIT(APICV_INHIBIT_REASON_X2APIC);
915 
916 	return supported & BIT(bit);
917 }
918 
919 void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate)
920 {
921 	avic_update_access_page(kvm, activate);
922 }
923 
924 static inline int
925 avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
926 {
927 	int ret = 0;
928 	unsigned long flags;
929 	struct amd_svm_iommu_ir *ir;
930 	struct vcpu_svm *svm = to_svm(vcpu);
931 
932 	if (!kvm_arch_has_assigned_device(vcpu->kvm))
933 		return 0;
934 
935 	/*
936 	 * Here, we go through the per-vcpu ir_list to update all existing
937 	 * interrupt remapping table entry targeting this vcpu.
938 	 */
939 	spin_lock_irqsave(&svm->ir_list_lock, flags);
940 
941 	if (list_empty(&svm->ir_list))
942 		goto out;
943 
944 	list_for_each_entry(ir, &svm->ir_list, node) {
945 		ret = amd_iommu_update_ga(cpu, r, ir->data);
946 		if (ret)
947 			break;
948 	}
949 out:
950 	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
951 	return ret;
952 }
953 
954 void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
955 {
956 	u64 entry;
957 	/* ID = 0xff (broadcast), ID > 0xff (reserved) */
958 	int h_physical_id = kvm_cpu_get_apicid(cpu);
959 	struct vcpu_svm *svm = to_svm(vcpu);
960 
961 	if (!kvm_vcpu_apicv_active(vcpu))
962 		return;
963 
964 	/*
965 	 * Since the host physical APIC id is 8 bits,
966 	 * we can support host APIC ID upto 255.
967 	 */
968 	if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
969 		return;
970 
971 	entry = READ_ONCE(*(svm->avic_physical_id_cache));
972 	WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
973 
974 	entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
975 	entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
976 
977 	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
978 	if (svm->avic_is_running)
979 		entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
980 
981 	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
982 	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
983 					svm->avic_is_running);
984 }
985 
986 void avic_vcpu_put(struct kvm_vcpu *vcpu)
987 {
988 	u64 entry;
989 	struct vcpu_svm *svm = to_svm(vcpu);
990 
991 	if (!kvm_vcpu_apicv_active(vcpu))
992 		return;
993 
994 	entry = READ_ONCE(*(svm->avic_physical_id_cache));
995 	if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
996 		avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
997 
998 	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
999 	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
1000 }
1001 
1002 /**
1003  * This function is called during VCPU halt/unhalt.
1004  */
1005 static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
1006 {
1007 	struct vcpu_svm *svm = to_svm(vcpu);
1008 
1009 	svm->avic_is_running = is_run;
1010 	if (is_run)
1011 		avic_vcpu_load(vcpu, vcpu->cpu);
1012 	else
1013 		avic_vcpu_put(vcpu);
1014 }
1015 
1016 void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
1017 {
1018 	avic_set_running(vcpu, false);
1019 }
1020 
1021 void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
1022 {
1023 	if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
1024 		kvm_vcpu_update_apicv(vcpu);
1025 	avic_set_running(vcpu, true);
1026 }
1027