xref: /openbmc/linux/arch/arm64/kvm/vgic/vgic.c (revision 19b438592238b3b40c3f945bb5f9c4ca971c0c45)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015, 2016 ARM Ltd.
4  */
5 
6 #include <linux/interrupt.h>
7 #include <linux/irq.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/list_sort.h>
11 #include <linux/nospec.h>
12 
13 #include <asm/kvm_hyp.h>
14 
15 #include "vgic.h"
16 
17 #define CREATE_TRACE_POINTS
18 #include "trace.h"
19 
20 struct vgic_global kvm_vgic_global_state __ro_after_init = {
21 	.gicv3_cpuif = STATIC_KEY_FALSE_INIT,
22 };
23 
24 /*
25  * Locking order is always:
26  * kvm->lock (mutex)
27  *   its->cmd_lock (mutex)
28  *     its->its_lock (mutex)
29  *       vgic_cpu->ap_list_lock		must be taken with IRQs disabled
30  *         kvm->lpi_list_lock		must be taken with IRQs disabled
31  *           vgic_irq->irq_lock		must be taken with IRQs disabled
32  *
33  * As the ap_list_lock might be taken from the timer interrupt handler,
34  * we have to disable IRQs before taking this lock and everything lower
35  * than it.
36  *
37  * If you need to take multiple locks, always take the upper lock first,
38  * then the lower ones, e.g. first take the its_lock, then the irq_lock.
39  * If you are already holding a lock and need to take a higher one, you
40  * have to drop the lower ranking lock first and re-aquire it after having
41  * taken the upper one.
42  *
43  * When taking more than one ap_list_lock at the same time, always take the
44  * lowest numbered VCPU's ap_list_lock first, so:
45  *   vcpuX->vcpu_id < vcpuY->vcpu_id:
46  *     raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
47  *     raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
48  *
49  * Since the VGIC must support injecting virtual interrupts from ISRs, we have
50  * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer
51  * spinlocks for any lock that may be taken while injecting an interrupt.
52  */
53 
54 /*
55  * Iterate over the VM's list of mapped LPIs to find the one with a
56  * matching interrupt ID and return a reference to the IRQ structure.
57  */
58 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
59 {
60 	struct vgic_dist *dist = &kvm->arch.vgic;
61 	struct vgic_irq *irq = NULL;
62 	unsigned long flags;
63 
64 	raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
65 
66 	list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
67 		if (irq->intid != intid)
68 			continue;
69 
70 		/*
71 		 * This increases the refcount, the caller is expected to
72 		 * call vgic_put_irq() later once it's finished with the IRQ.
73 		 */
74 		vgic_get_irq_kref(irq);
75 		goto out_unlock;
76 	}
77 	irq = NULL;
78 
79 out_unlock:
80 	raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
81 
82 	return irq;
83 }
84 
85 /*
86  * This looks up the virtual interrupt ID to get the corresponding
87  * struct vgic_irq. It also increases the refcount, so any caller is expected
88  * to call vgic_put_irq() once it's finished with this IRQ.
89  */
90 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
91 			      u32 intid)
92 {
93 	/* SGIs and PPIs */
94 	if (intid <= VGIC_MAX_PRIVATE) {
95 		intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1);
96 		return &vcpu->arch.vgic_cpu.private_irqs[intid];
97 	}
98 
99 	/* SPIs */
100 	if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) {
101 		intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS);
102 		return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
103 	}
104 
105 	/* LPIs */
106 	if (intid >= VGIC_MIN_LPI)
107 		return vgic_get_lpi(kvm, intid);
108 
109 	WARN(1, "Looking up struct vgic_irq for reserved INTID");
110 	return NULL;
111 }
112 
113 /*
114  * We can't do anything in here, because we lack the kvm pointer to
115  * lock and remove the item from the lpi_list. So we keep this function
116  * empty and use the return value of kref_put() to trigger the freeing.
117  */
118 static void vgic_irq_release(struct kref *ref)
119 {
120 }
121 
122 /*
123  * Drop the refcount on the LPI. Must be called with lpi_list_lock held.
124  */
125 void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
126 {
127 	struct vgic_dist *dist = &kvm->arch.vgic;
128 
129 	if (!kref_put(&irq->refcount, vgic_irq_release))
130 		return;
131 
132 	list_del(&irq->lpi_list);
133 	dist->lpi_list_count--;
134 
135 	kfree(irq);
136 }
137 
138 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
139 {
140 	struct vgic_dist *dist = &kvm->arch.vgic;
141 	unsigned long flags;
142 
143 	if (irq->intid < VGIC_MIN_LPI)
144 		return;
145 
146 	raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
147 	__vgic_put_lpi_locked(kvm, irq);
148 	raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
149 }
150 
151 void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
152 {
153 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
154 	struct vgic_irq *irq, *tmp;
155 	unsigned long flags;
156 
157 	raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
158 
159 	list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
160 		if (irq->intid >= VGIC_MIN_LPI) {
161 			raw_spin_lock(&irq->irq_lock);
162 			list_del(&irq->ap_list);
163 			irq->vcpu = NULL;
164 			raw_spin_unlock(&irq->irq_lock);
165 			vgic_put_irq(vcpu->kvm, irq);
166 		}
167 	}
168 
169 	raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
170 }
171 
172 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
173 {
174 	WARN_ON(irq_set_irqchip_state(irq->host_irq,
175 				      IRQCHIP_STATE_PENDING,
176 				      pending));
177 }
178 
179 bool vgic_get_phys_line_level(struct vgic_irq *irq)
180 {
181 	bool line_level;
182 
183 	BUG_ON(!irq->hw);
184 
185 	if (irq->ops && irq->ops->get_input_level)
186 		return irq->ops->get_input_level(irq->intid);
187 
188 	WARN_ON(irq_get_irqchip_state(irq->host_irq,
189 				      IRQCHIP_STATE_PENDING,
190 				      &line_level));
191 	return line_level;
192 }
193 
194 /* Set/Clear the physical active state */
195 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
196 {
197 
198 	BUG_ON(!irq->hw);
199 	WARN_ON(irq_set_irqchip_state(irq->host_irq,
200 				      IRQCHIP_STATE_ACTIVE,
201 				      active));
202 }
203 
204 /**
205  * kvm_vgic_target_oracle - compute the target vcpu for an irq
206  *
207  * @irq:	The irq to route. Must be already locked.
208  *
209  * Based on the current state of the interrupt (enabled, pending,
210  * active, vcpu and target_vcpu), compute the next vcpu this should be
211  * given to. Return NULL if this shouldn't be injected at all.
212  *
213  * Requires the IRQ lock to be held.
214  */
215 static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
216 {
217 	lockdep_assert_held(&irq->irq_lock);
218 
219 	/* If the interrupt is active, it must stay on the current vcpu */
220 	if (irq->active)
221 		return irq->vcpu ? : irq->target_vcpu;
222 
223 	/*
224 	 * If the IRQ is not active but enabled and pending, we should direct
225 	 * it to its configured target VCPU.
226 	 * If the distributor is disabled, pending interrupts shouldn't be
227 	 * forwarded.
228 	 */
229 	if (irq->enabled && irq_is_pending(irq)) {
230 		if (unlikely(irq->target_vcpu &&
231 			     !irq->target_vcpu->kvm->arch.vgic.enabled))
232 			return NULL;
233 
234 		return irq->target_vcpu;
235 	}
236 
237 	/* If neither active nor pending and enabled, then this IRQ should not
238 	 * be queued to any VCPU.
239 	 */
240 	return NULL;
241 }
242 
243 /*
244  * The order of items in the ap_lists defines how we'll pack things in LRs as
245  * well, the first items in the list being the first things populated in the
246  * LRs.
247  *
248  * A hard rule is that active interrupts can never be pushed out of the LRs
249  * (and therefore take priority) since we cannot reliably trap on deactivation
250  * of IRQs and therefore they have to be present in the LRs.
251  *
252  * Otherwise things should be sorted by the priority field and the GIC
253  * hardware support will take care of preemption of priority groups etc.
254  *
255  * Return negative if "a" sorts before "b", 0 to preserve order, and positive
256  * to sort "b" before "a".
257  */
258 static int vgic_irq_cmp(void *priv, const struct list_head *a,
259 			const struct list_head *b)
260 {
261 	struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
262 	struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
263 	bool penda, pendb;
264 	int ret;
265 
266 	/*
267 	 * list_sort may call this function with the same element when
268 	 * the list is fairly long.
269 	 */
270 	if (unlikely(irqa == irqb))
271 		return 0;
272 
273 	raw_spin_lock(&irqa->irq_lock);
274 	raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
275 
276 	if (irqa->active || irqb->active) {
277 		ret = (int)irqb->active - (int)irqa->active;
278 		goto out;
279 	}
280 
281 	penda = irqa->enabled && irq_is_pending(irqa);
282 	pendb = irqb->enabled && irq_is_pending(irqb);
283 
284 	if (!penda || !pendb) {
285 		ret = (int)pendb - (int)penda;
286 		goto out;
287 	}
288 
289 	/* Both pending and enabled, sort by priority */
290 	ret = irqa->priority - irqb->priority;
291 out:
292 	raw_spin_unlock(&irqb->irq_lock);
293 	raw_spin_unlock(&irqa->irq_lock);
294 	return ret;
295 }
296 
297 /* Must be called with the ap_list_lock held */
298 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
299 {
300 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
301 
302 	lockdep_assert_held(&vgic_cpu->ap_list_lock);
303 
304 	list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
305 }
306 
307 /*
308  * Only valid injection if changing level for level-triggered IRQs or for a
309  * rising edge, and in-kernel connected IRQ lines can only be controlled by
310  * their owner.
311  */
312 static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
313 {
314 	if (irq->owner != owner)
315 		return false;
316 
317 	switch (irq->config) {
318 	case VGIC_CONFIG_LEVEL:
319 		return irq->line_level != level;
320 	case VGIC_CONFIG_EDGE:
321 		return level;
322 	}
323 
324 	return false;
325 }
326 
327 /*
328  * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
329  * Do the queuing if necessary, taking the right locks in the right order.
330  * Returns true when the IRQ was queued, false otherwise.
331  *
332  * Needs to be entered with the IRQ lock already held, but will return
333  * with all locks dropped.
334  */
335 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
336 			   unsigned long flags)
337 {
338 	struct kvm_vcpu *vcpu;
339 
340 	lockdep_assert_held(&irq->irq_lock);
341 
342 retry:
343 	vcpu = vgic_target_oracle(irq);
344 	if (irq->vcpu || !vcpu) {
345 		/*
346 		 * If this IRQ is already on a VCPU's ap_list, then it
347 		 * cannot be moved or modified and there is no more work for
348 		 * us to do.
349 		 *
350 		 * Otherwise, if the irq is not pending and enabled, it does
351 		 * not need to be inserted into an ap_list and there is also
352 		 * no more work for us to do.
353 		 */
354 		raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
355 
356 		/*
357 		 * We have to kick the VCPU here, because we could be
358 		 * queueing an edge-triggered interrupt for which we
359 		 * get no EOI maintenance interrupt. In that case,
360 		 * while the IRQ is already on the VCPU's AP list, the
361 		 * VCPU could have EOI'ed the original interrupt and
362 		 * won't see this one until it exits for some other
363 		 * reason.
364 		 */
365 		if (vcpu) {
366 			kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
367 			kvm_vcpu_kick(vcpu);
368 		}
369 		return false;
370 	}
371 
372 	/*
373 	 * We must unlock the irq lock to take the ap_list_lock where
374 	 * we are going to insert this new pending interrupt.
375 	 */
376 	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
377 
378 	/* someone can do stuff here, which we re-check below */
379 
380 	raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
381 	raw_spin_lock(&irq->irq_lock);
382 
383 	/*
384 	 * Did something change behind our backs?
385 	 *
386 	 * There are two cases:
387 	 * 1) The irq lost its pending state or was disabled behind our
388 	 *    backs and/or it was queued to another VCPU's ap_list.
389 	 * 2) Someone changed the affinity on this irq behind our
390 	 *    backs and we are now holding the wrong ap_list_lock.
391 	 *
392 	 * In both cases, drop the locks and retry.
393 	 */
394 
395 	if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
396 		raw_spin_unlock(&irq->irq_lock);
397 		raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock,
398 					   flags);
399 
400 		raw_spin_lock_irqsave(&irq->irq_lock, flags);
401 		goto retry;
402 	}
403 
404 	/*
405 	 * Grab a reference to the irq to reflect the fact that it is
406 	 * now in the ap_list.
407 	 */
408 	vgic_get_irq_kref(irq);
409 	list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
410 	irq->vcpu = vcpu;
411 
412 	raw_spin_unlock(&irq->irq_lock);
413 	raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
414 
415 	kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
416 	kvm_vcpu_kick(vcpu);
417 
418 	return true;
419 }
420 
421 /**
422  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
423  * @kvm:     The VM structure pointer
424  * @cpuid:   The CPU for PPIs
425  * @intid:   The INTID to inject a new state to.
426  * @level:   Edge-triggered:  true:  to trigger the interrupt
427  *			      false: to ignore the call
428  *	     Level-sensitive  true:  raise the input signal
429  *			      false: lower the input signal
430  * @owner:   The opaque pointer to the owner of the IRQ being raised to verify
431  *           that the caller is allowed to inject this IRQ.  Userspace
432  *           injections will have owner == NULL.
433  *
434  * The VGIC is not concerned with devices being active-LOW or active-HIGH for
435  * level-sensitive interrupts.  You can think of the level parameter as 1
436  * being HIGH and 0 being LOW and all devices being active-HIGH.
437  */
438 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
439 			bool level, void *owner)
440 {
441 	struct kvm_vcpu *vcpu;
442 	struct vgic_irq *irq;
443 	unsigned long flags;
444 	int ret;
445 
446 	trace_vgic_update_irq_pending(cpuid, intid, level);
447 
448 	ret = vgic_lazy_init(kvm);
449 	if (ret)
450 		return ret;
451 
452 	vcpu = kvm_get_vcpu(kvm, cpuid);
453 	if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
454 		return -EINVAL;
455 
456 	irq = vgic_get_irq(kvm, vcpu, intid);
457 	if (!irq)
458 		return -EINVAL;
459 
460 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
461 
462 	if (!vgic_validate_injection(irq, level, owner)) {
463 		/* Nothing to see here, move along... */
464 		raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
465 		vgic_put_irq(kvm, irq);
466 		return 0;
467 	}
468 
469 	if (irq->config == VGIC_CONFIG_LEVEL)
470 		irq->line_level = level;
471 	else
472 		irq->pending_latch = true;
473 
474 	vgic_queue_irq_unlock(kvm, irq, flags);
475 	vgic_put_irq(kvm, irq);
476 
477 	return 0;
478 }
479 
480 /* @irq->irq_lock must be held */
481 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
482 			    unsigned int host_irq,
483 			    struct irq_ops *ops)
484 {
485 	struct irq_desc *desc;
486 	struct irq_data *data;
487 
488 	/*
489 	 * Find the physical IRQ number corresponding to @host_irq
490 	 */
491 	desc = irq_to_desc(host_irq);
492 	if (!desc) {
493 		kvm_err("%s: no interrupt descriptor\n", __func__);
494 		return -EINVAL;
495 	}
496 	data = irq_desc_get_irq_data(desc);
497 	while (data->parent_data)
498 		data = data->parent_data;
499 
500 	irq->hw = true;
501 	irq->host_irq = host_irq;
502 	irq->hwintid = data->hwirq;
503 	irq->ops = ops;
504 	return 0;
505 }
506 
507 /* @irq->irq_lock must be held */
508 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
509 {
510 	irq->hw = false;
511 	irq->hwintid = 0;
512 	irq->ops = NULL;
513 }
514 
515 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
516 			  u32 vintid, struct irq_ops *ops)
517 {
518 	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
519 	unsigned long flags;
520 	int ret;
521 
522 	BUG_ON(!irq);
523 
524 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
525 	ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops);
526 	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
527 	vgic_put_irq(vcpu->kvm, irq);
528 
529 	return ret;
530 }
531 
532 /**
533  * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
534  * @vcpu: The VCPU pointer
535  * @vintid: The INTID of the interrupt
536  *
537  * Reset the active and pending states of a mapped interrupt.  Kernel
538  * subsystems injecting mapped interrupts should reset their interrupt lines
539  * when we are doing a reset of the VM.
540  */
541 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
542 {
543 	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
544 	unsigned long flags;
545 
546 	if (!irq->hw)
547 		goto out;
548 
549 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
550 	irq->active = false;
551 	irq->pending_latch = false;
552 	irq->line_level = false;
553 	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
554 out:
555 	vgic_put_irq(vcpu->kvm, irq);
556 }
557 
558 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
559 {
560 	struct vgic_irq *irq;
561 	unsigned long flags;
562 
563 	if (!vgic_initialized(vcpu->kvm))
564 		return -EAGAIN;
565 
566 	irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
567 	BUG_ON(!irq);
568 
569 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
570 	kvm_vgic_unmap_irq(irq);
571 	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
572 	vgic_put_irq(vcpu->kvm, irq);
573 
574 	return 0;
575 }
576 
577 /**
578  * kvm_vgic_set_owner - Set the owner of an interrupt for a VM
579  *
580  * @vcpu:   Pointer to the VCPU (used for PPIs)
581  * @intid:  The virtual INTID identifying the interrupt (PPI or SPI)
582  * @owner:  Opaque pointer to the owner
583  *
584  * Returns 0 if intid is not already used by another in-kernel device and the
585  * owner is set, otherwise returns an error code.
586  */
587 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
588 {
589 	struct vgic_irq *irq;
590 	unsigned long flags;
591 	int ret = 0;
592 
593 	if (!vgic_initialized(vcpu->kvm))
594 		return -EAGAIN;
595 
596 	/* SGIs and LPIs cannot be wired up to any device */
597 	if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
598 		return -EINVAL;
599 
600 	irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
601 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
602 	if (irq->owner && irq->owner != owner)
603 		ret = -EEXIST;
604 	else
605 		irq->owner = owner;
606 	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
607 
608 	return ret;
609 }
610 
611 /**
612  * vgic_prune_ap_list - Remove non-relevant interrupts from the list
613  *
614  * @vcpu: The VCPU pointer
615  *
616  * Go over the list of "interesting" interrupts, and prune those that we
617  * won't have to consider in the near future.
618  */
619 static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
620 {
621 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
622 	struct vgic_irq *irq, *tmp;
623 
624 	DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
625 
626 retry:
627 	raw_spin_lock(&vgic_cpu->ap_list_lock);
628 
629 	list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
630 		struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
631 		bool target_vcpu_needs_kick = false;
632 
633 		raw_spin_lock(&irq->irq_lock);
634 
635 		BUG_ON(vcpu != irq->vcpu);
636 
637 		target_vcpu = vgic_target_oracle(irq);
638 
639 		if (!target_vcpu) {
640 			/*
641 			 * We don't need to process this interrupt any
642 			 * further, move it off the list.
643 			 */
644 			list_del(&irq->ap_list);
645 			irq->vcpu = NULL;
646 			raw_spin_unlock(&irq->irq_lock);
647 
648 			/*
649 			 * This vgic_put_irq call matches the
650 			 * vgic_get_irq_kref in vgic_queue_irq_unlock,
651 			 * where we added the LPI to the ap_list. As
652 			 * we remove the irq from the list, we drop
653 			 * also drop the refcount.
654 			 */
655 			vgic_put_irq(vcpu->kvm, irq);
656 			continue;
657 		}
658 
659 		if (target_vcpu == vcpu) {
660 			/* We're on the right CPU */
661 			raw_spin_unlock(&irq->irq_lock);
662 			continue;
663 		}
664 
665 		/* This interrupt looks like it has to be migrated. */
666 
667 		raw_spin_unlock(&irq->irq_lock);
668 		raw_spin_unlock(&vgic_cpu->ap_list_lock);
669 
670 		/*
671 		 * Ensure locking order by always locking the smallest
672 		 * ID first.
673 		 */
674 		if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
675 			vcpuA = vcpu;
676 			vcpuB = target_vcpu;
677 		} else {
678 			vcpuA = target_vcpu;
679 			vcpuB = vcpu;
680 		}
681 
682 		raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
683 		raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
684 				      SINGLE_DEPTH_NESTING);
685 		raw_spin_lock(&irq->irq_lock);
686 
687 		/*
688 		 * If the affinity has been preserved, move the
689 		 * interrupt around. Otherwise, it means things have
690 		 * changed while the interrupt was unlocked, and we
691 		 * need to replay this.
692 		 *
693 		 * In all cases, we cannot trust the list not to have
694 		 * changed, so we restart from the beginning.
695 		 */
696 		if (target_vcpu == vgic_target_oracle(irq)) {
697 			struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
698 
699 			list_del(&irq->ap_list);
700 			irq->vcpu = target_vcpu;
701 			list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
702 			target_vcpu_needs_kick = true;
703 		}
704 
705 		raw_spin_unlock(&irq->irq_lock);
706 		raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
707 		raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
708 
709 		if (target_vcpu_needs_kick) {
710 			kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
711 			kvm_vcpu_kick(target_vcpu);
712 		}
713 
714 		goto retry;
715 	}
716 
717 	raw_spin_unlock(&vgic_cpu->ap_list_lock);
718 }
719 
720 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
721 {
722 	if (kvm_vgic_global_state.type == VGIC_V2)
723 		vgic_v2_fold_lr_state(vcpu);
724 	else
725 		vgic_v3_fold_lr_state(vcpu);
726 }
727 
728 /* Requires the irq_lock to be held. */
729 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
730 				    struct vgic_irq *irq, int lr)
731 {
732 	lockdep_assert_held(&irq->irq_lock);
733 
734 	if (kvm_vgic_global_state.type == VGIC_V2)
735 		vgic_v2_populate_lr(vcpu, irq, lr);
736 	else
737 		vgic_v3_populate_lr(vcpu, irq, lr);
738 }
739 
740 static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
741 {
742 	if (kvm_vgic_global_state.type == VGIC_V2)
743 		vgic_v2_clear_lr(vcpu, lr);
744 	else
745 		vgic_v3_clear_lr(vcpu, lr);
746 }
747 
748 static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
749 {
750 	if (kvm_vgic_global_state.type == VGIC_V2)
751 		vgic_v2_set_underflow(vcpu);
752 	else
753 		vgic_v3_set_underflow(vcpu);
754 }
755 
756 /* Requires the ap_list_lock to be held. */
757 static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
758 				 bool *multi_sgi)
759 {
760 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
761 	struct vgic_irq *irq;
762 	int count = 0;
763 
764 	*multi_sgi = false;
765 
766 	lockdep_assert_held(&vgic_cpu->ap_list_lock);
767 
768 	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
769 		int w;
770 
771 		raw_spin_lock(&irq->irq_lock);
772 		/* GICv2 SGIs can count for more than one... */
773 		w = vgic_irq_get_lr_count(irq);
774 		raw_spin_unlock(&irq->irq_lock);
775 
776 		count += w;
777 		*multi_sgi |= (w > 1);
778 	}
779 	return count;
780 }
781 
782 /* Requires the VCPU's ap_list_lock to be held. */
783 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
784 {
785 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
786 	struct vgic_irq *irq;
787 	int count;
788 	bool multi_sgi;
789 	u8 prio = 0xff;
790 	int i = 0;
791 
792 	lockdep_assert_held(&vgic_cpu->ap_list_lock);
793 
794 	count = compute_ap_list_depth(vcpu, &multi_sgi);
795 	if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
796 		vgic_sort_ap_list(vcpu);
797 
798 	count = 0;
799 
800 	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
801 		raw_spin_lock(&irq->irq_lock);
802 
803 		/*
804 		 * If we have multi-SGIs in the pipeline, we need to
805 		 * guarantee that they are all seen before any IRQ of
806 		 * lower priority. In that case, we need to filter out
807 		 * these interrupts by exiting early. This is easy as
808 		 * the AP list has been sorted already.
809 		 */
810 		if (multi_sgi && irq->priority > prio) {
811 			_raw_spin_unlock(&irq->irq_lock);
812 			break;
813 		}
814 
815 		if (likely(vgic_target_oracle(irq) == vcpu)) {
816 			vgic_populate_lr(vcpu, irq, count++);
817 
818 			if (irq->source)
819 				prio = irq->priority;
820 		}
821 
822 		raw_spin_unlock(&irq->irq_lock);
823 
824 		if (count == kvm_vgic_global_state.nr_lr) {
825 			if (!list_is_last(&irq->ap_list,
826 					  &vgic_cpu->ap_list_head))
827 				vgic_set_underflow(vcpu);
828 			break;
829 		}
830 	}
831 
832 	/* Nuke remaining LRs */
833 	for (i = count ; i < kvm_vgic_global_state.nr_lr; i++)
834 		vgic_clear_lr(vcpu, i);
835 
836 	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
837 		vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count;
838 	else
839 		vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count;
840 }
841 
842 static inline bool can_access_vgic_from_kernel(void)
843 {
844 	/*
845 	 * GICv2 can always be accessed from the kernel because it is
846 	 * memory-mapped, and VHE systems can access GICv3 EL2 system
847 	 * registers.
848 	 */
849 	return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
850 }
851 
852 static inline void vgic_save_state(struct kvm_vcpu *vcpu)
853 {
854 	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
855 		vgic_v2_save_state(vcpu);
856 	else
857 		__vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
858 }
859 
860 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
861 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
862 {
863 	int used_lrs;
864 
865 	/* An empty ap_list_head implies used_lrs == 0 */
866 	if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
867 		return;
868 
869 	if (can_access_vgic_from_kernel())
870 		vgic_save_state(vcpu);
871 
872 	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
873 		used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
874 	else
875 		used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
876 
877 	if (used_lrs)
878 		vgic_fold_lr_state(vcpu);
879 	vgic_prune_ap_list(vcpu);
880 }
881 
882 static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
883 {
884 	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
885 		vgic_v2_restore_state(vcpu);
886 	else
887 		__vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
888 }
889 
890 /* Flush our emulation state into the GIC hardware before entering the guest. */
891 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
892 {
893 	/*
894 	 * If there are no virtual interrupts active or pending for this
895 	 * VCPU, then there is no work to do and we can bail out without
896 	 * taking any lock.  There is a potential race with someone injecting
897 	 * interrupts to the VCPU, but it is a benign race as the VCPU will
898 	 * either observe the new interrupt before or after doing this check,
899 	 * and introducing additional synchronization mechanism doesn't change
900 	 * this.
901 	 *
902 	 * Note that we still need to go through the whole thing if anything
903 	 * can be directly injected (GICv4).
904 	 */
905 	if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
906 	    !vgic_supports_direct_msis(vcpu->kvm))
907 		return;
908 
909 	DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
910 
911 	if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) {
912 		raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
913 		vgic_flush_lr_state(vcpu);
914 		raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
915 	}
916 
917 	if (can_access_vgic_from_kernel())
918 		vgic_restore_state(vcpu);
919 
920 	if (vgic_supports_direct_msis(vcpu->kvm))
921 		vgic_v4_commit(vcpu);
922 }
923 
924 void kvm_vgic_load(struct kvm_vcpu *vcpu)
925 {
926 	if (unlikely(!vgic_initialized(vcpu->kvm)))
927 		return;
928 
929 	if (kvm_vgic_global_state.type == VGIC_V2)
930 		vgic_v2_load(vcpu);
931 	else
932 		vgic_v3_load(vcpu);
933 }
934 
935 void kvm_vgic_put(struct kvm_vcpu *vcpu)
936 {
937 	if (unlikely(!vgic_initialized(vcpu->kvm)))
938 		return;
939 
940 	if (kvm_vgic_global_state.type == VGIC_V2)
941 		vgic_v2_put(vcpu);
942 	else
943 		vgic_v3_put(vcpu);
944 }
945 
946 void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu)
947 {
948 	if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
949 		return;
950 
951 	if (kvm_vgic_global_state.type == VGIC_V2)
952 		vgic_v2_vmcr_sync(vcpu);
953 	else
954 		vgic_v3_vmcr_sync(vcpu);
955 }
956 
957 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
958 {
959 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
960 	struct vgic_irq *irq;
961 	bool pending = false;
962 	unsigned long flags;
963 	struct vgic_vmcr vmcr;
964 
965 	if (!vcpu->kvm->arch.vgic.enabled)
966 		return false;
967 
968 	if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
969 		return true;
970 
971 	vgic_get_vmcr(vcpu, &vmcr);
972 
973 	raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
974 
975 	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
976 		raw_spin_lock(&irq->irq_lock);
977 		pending = irq_is_pending(irq) && irq->enabled &&
978 			  !irq->active &&
979 			  irq->priority < vmcr.pmr;
980 		raw_spin_unlock(&irq->irq_lock);
981 
982 		if (pending)
983 			break;
984 	}
985 
986 	raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
987 
988 	return pending;
989 }
990 
991 void vgic_kick_vcpus(struct kvm *kvm)
992 {
993 	struct kvm_vcpu *vcpu;
994 	int c;
995 
996 	/*
997 	 * We've injected an interrupt, time to find out who deserves
998 	 * a good kick...
999 	 */
1000 	kvm_for_each_vcpu(c, vcpu, kvm) {
1001 		if (kvm_vgic_vcpu_pending_irq(vcpu)) {
1002 			kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
1003 			kvm_vcpu_kick(vcpu);
1004 		}
1005 	}
1006 }
1007 
1008 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
1009 {
1010 	struct vgic_irq *irq;
1011 	bool map_is_active;
1012 	unsigned long flags;
1013 
1014 	if (!vgic_initialized(vcpu->kvm))
1015 		return false;
1016 
1017 	irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
1018 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
1019 	map_is_active = irq->hw && irq->active;
1020 	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
1021 	vgic_put_irq(vcpu->kvm, irq);
1022 
1023 	return map_is_active;
1024 }
1025