1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015, 2016 ARM Ltd. 4 */ 5 6 #include <linux/interrupt.h> 7 #include <linux/irq.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/list_sort.h> 11 #include <linux/nospec.h> 12 13 #include <asm/kvm_hyp.h> 14 15 #include "vgic.h" 16 17 #define CREATE_TRACE_POINTS 18 #include "trace.h" 19 20 struct vgic_global kvm_vgic_global_state __ro_after_init = { 21 .gicv3_cpuif = STATIC_KEY_FALSE_INIT, 22 }; 23 24 /* 25 * Locking order is always: 26 * kvm->lock (mutex) 27 * its->cmd_lock (mutex) 28 * its->its_lock (mutex) 29 * vgic_cpu->ap_list_lock must be taken with IRQs disabled 30 * kvm->lpi_list_lock must be taken with IRQs disabled 31 * vgic_irq->irq_lock must be taken with IRQs disabled 32 * 33 * As the ap_list_lock might be taken from the timer interrupt handler, 34 * we have to disable IRQs before taking this lock and everything lower 35 * than it. 36 * 37 * If you need to take multiple locks, always take the upper lock first, 38 * then the lower ones, e.g. first take the its_lock, then the irq_lock. 39 * If you are already holding a lock and need to take a higher one, you 40 * have to drop the lower ranking lock first and re-aquire it after having 41 * taken the upper one. 42 * 43 * When taking more than one ap_list_lock at the same time, always take the 44 * lowest numbered VCPU's ap_list_lock first, so: 45 * vcpuX->vcpu_id < vcpuY->vcpu_id: 46 * raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock); 47 * raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); 48 * 49 * Since the VGIC must support injecting virtual interrupts from ISRs, we have 50 * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer 51 * spinlocks for any lock that may be taken while injecting an interrupt. 52 */ 53 54 /* 55 * Iterate over the VM's list of mapped LPIs to find the one with a 56 * matching interrupt ID and return a reference to the IRQ structure. 57 */ 58 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) 59 { 60 struct vgic_dist *dist = &kvm->arch.vgic; 61 struct vgic_irq *irq = NULL; 62 unsigned long flags; 63 64 raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); 65 66 list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { 67 if (irq->intid != intid) 68 continue; 69 70 /* 71 * This increases the refcount, the caller is expected to 72 * call vgic_put_irq() later once it's finished with the IRQ. 73 */ 74 vgic_get_irq_kref(irq); 75 goto out_unlock; 76 } 77 irq = NULL; 78 79 out_unlock: 80 raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); 81 82 return irq; 83 } 84 85 /* 86 * This looks up the virtual interrupt ID to get the corresponding 87 * struct vgic_irq. It also increases the refcount, so any caller is expected 88 * to call vgic_put_irq() once it's finished with this IRQ. 89 */ 90 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, 91 u32 intid) 92 { 93 /* SGIs and PPIs */ 94 if (intid <= VGIC_MAX_PRIVATE) { 95 intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1); 96 return &vcpu->arch.vgic_cpu.private_irqs[intid]; 97 } 98 99 /* SPIs */ 100 if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) { 101 intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS); 102 return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; 103 } 104 105 /* LPIs */ 106 if (intid >= VGIC_MIN_LPI) 107 return vgic_get_lpi(kvm, intid); 108 109 WARN(1, "Looking up struct vgic_irq for reserved INTID"); 110 return NULL; 111 } 112 113 /* 114 * We can't do anything in here, because we lack the kvm pointer to 115 * lock and remove the item from the lpi_list. So we keep this function 116 * empty and use the return value of kref_put() to trigger the freeing. 117 */ 118 static void vgic_irq_release(struct kref *ref) 119 { 120 } 121 122 /* 123 * Drop the refcount on the LPI. Must be called with lpi_list_lock held. 124 */ 125 void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq) 126 { 127 struct vgic_dist *dist = &kvm->arch.vgic; 128 129 if (!kref_put(&irq->refcount, vgic_irq_release)) 130 return; 131 132 list_del(&irq->lpi_list); 133 dist->lpi_list_count--; 134 135 kfree(irq); 136 } 137 138 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) 139 { 140 struct vgic_dist *dist = &kvm->arch.vgic; 141 unsigned long flags; 142 143 if (irq->intid < VGIC_MIN_LPI) 144 return; 145 146 raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); 147 __vgic_put_lpi_locked(kvm, irq); 148 raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); 149 } 150 151 void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) 152 { 153 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 154 struct vgic_irq *irq, *tmp; 155 unsigned long flags; 156 157 raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); 158 159 list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { 160 if (irq->intid >= VGIC_MIN_LPI) { 161 raw_spin_lock(&irq->irq_lock); 162 list_del(&irq->ap_list); 163 irq->vcpu = NULL; 164 raw_spin_unlock(&irq->irq_lock); 165 vgic_put_irq(vcpu->kvm, irq); 166 } 167 } 168 169 raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); 170 } 171 172 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending) 173 { 174 WARN_ON(irq_set_irqchip_state(irq->host_irq, 175 IRQCHIP_STATE_PENDING, 176 pending)); 177 } 178 179 bool vgic_get_phys_line_level(struct vgic_irq *irq) 180 { 181 bool line_level; 182 183 BUG_ON(!irq->hw); 184 185 if (irq->get_input_level) 186 return irq->get_input_level(irq->intid); 187 188 WARN_ON(irq_get_irqchip_state(irq->host_irq, 189 IRQCHIP_STATE_PENDING, 190 &line_level)); 191 return line_level; 192 } 193 194 /* Set/Clear the physical active state */ 195 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active) 196 { 197 198 BUG_ON(!irq->hw); 199 WARN_ON(irq_set_irqchip_state(irq->host_irq, 200 IRQCHIP_STATE_ACTIVE, 201 active)); 202 } 203 204 /** 205 * kvm_vgic_target_oracle - compute the target vcpu for an irq 206 * 207 * @irq: The irq to route. Must be already locked. 208 * 209 * Based on the current state of the interrupt (enabled, pending, 210 * active, vcpu and target_vcpu), compute the next vcpu this should be 211 * given to. Return NULL if this shouldn't be injected at all. 212 * 213 * Requires the IRQ lock to be held. 214 */ 215 static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) 216 { 217 lockdep_assert_held(&irq->irq_lock); 218 219 /* If the interrupt is active, it must stay on the current vcpu */ 220 if (irq->active) 221 return irq->vcpu ? : irq->target_vcpu; 222 223 /* 224 * If the IRQ is not active but enabled and pending, we should direct 225 * it to its configured target VCPU. 226 * If the distributor is disabled, pending interrupts shouldn't be 227 * forwarded. 228 */ 229 if (irq->enabled && irq_is_pending(irq)) { 230 if (unlikely(irq->target_vcpu && 231 !irq->target_vcpu->kvm->arch.vgic.enabled)) 232 return NULL; 233 234 return irq->target_vcpu; 235 } 236 237 /* If neither active nor pending and enabled, then this IRQ should not 238 * be queued to any VCPU. 239 */ 240 return NULL; 241 } 242 243 /* 244 * The order of items in the ap_lists defines how we'll pack things in LRs as 245 * well, the first items in the list being the first things populated in the 246 * LRs. 247 * 248 * A hard rule is that active interrupts can never be pushed out of the LRs 249 * (and therefore take priority) since we cannot reliably trap on deactivation 250 * of IRQs and therefore they have to be present in the LRs. 251 * 252 * Otherwise things should be sorted by the priority field and the GIC 253 * hardware support will take care of preemption of priority groups etc. 254 * 255 * Return negative if "a" sorts before "b", 0 to preserve order, and positive 256 * to sort "b" before "a". 257 */ 258 static int vgic_irq_cmp(void *priv, const struct list_head *a, 259 const struct list_head *b) 260 { 261 struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list); 262 struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list); 263 bool penda, pendb; 264 int ret; 265 266 /* 267 * list_sort may call this function with the same element when 268 * the list is fairly long. 269 */ 270 if (unlikely(irqa == irqb)) 271 return 0; 272 273 raw_spin_lock(&irqa->irq_lock); 274 raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); 275 276 if (irqa->active || irqb->active) { 277 ret = (int)irqb->active - (int)irqa->active; 278 goto out; 279 } 280 281 penda = irqa->enabled && irq_is_pending(irqa); 282 pendb = irqb->enabled && irq_is_pending(irqb); 283 284 if (!penda || !pendb) { 285 ret = (int)pendb - (int)penda; 286 goto out; 287 } 288 289 /* Both pending and enabled, sort by priority */ 290 ret = irqa->priority - irqb->priority; 291 out: 292 raw_spin_unlock(&irqb->irq_lock); 293 raw_spin_unlock(&irqa->irq_lock); 294 return ret; 295 } 296 297 /* Must be called with the ap_list_lock held */ 298 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) 299 { 300 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 301 302 lockdep_assert_held(&vgic_cpu->ap_list_lock); 303 304 list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp); 305 } 306 307 /* 308 * Only valid injection if changing level for level-triggered IRQs or for a 309 * rising edge, and in-kernel connected IRQ lines can only be controlled by 310 * their owner. 311 */ 312 static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner) 313 { 314 if (irq->owner != owner) 315 return false; 316 317 switch (irq->config) { 318 case VGIC_CONFIG_LEVEL: 319 return irq->line_level != level; 320 case VGIC_CONFIG_EDGE: 321 return level; 322 } 323 324 return false; 325 } 326 327 /* 328 * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list. 329 * Do the queuing if necessary, taking the right locks in the right order. 330 * Returns true when the IRQ was queued, false otherwise. 331 * 332 * Needs to be entered with the IRQ lock already held, but will return 333 * with all locks dropped. 334 */ 335 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, 336 unsigned long flags) 337 { 338 struct kvm_vcpu *vcpu; 339 340 lockdep_assert_held(&irq->irq_lock); 341 342 retry: 343 vcpu = vgic_target_oracle(irq); 344 if (irq->vcpu || !vcpu) { 345 /* 346 * If this IRQ is already on a VCPU's ap_list, then it 347 * cannot be moved or modified and there is no more work for 348 * us to do. 349 * 350 * Otherwise, if the irq is not pending and enabled, it does 351 * not need to be inserted into an ap_list and there is also 352 * no more work for us to do. 353 */ 354 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 355 356 /* 357 * We have to kick the VCPU here, because we could be 358 * queueing an edge-triggered interrupt for which we 359 * get no EOI maintenance interrupt. In that case, 360 * while the IRQ is already on the VCPU's AP list, the 361 * VCPU could have EOI'ed the original interrupt and 362 * won't see this one until it exits for some other 363 * reason. 364 */ 365 if (vcpu) { 366 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 367 kvm_vcpu_kick(vcpu); 368 } 369 return false; 370 } 371 372 /* 373 * We must unlock the irq lock to take the ap_list_lock where 374 * we are going to insert this new pending interrupt. 375 */ 376 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 377 378 /* someone can do stuff here, which we re-check below */ 379 380 raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags); 381 raw_spin_lock(&irq->irq_lock); 382 383 /* 384 * Did something change behind our backs? 385 * 386 * There are two cases: 387 * 1) The irq lost its pending state or was disabled behind our 388 * backs and/or it was queued to another VCPU's ap_list. 389 * 2) Someone changed the affinity on this irq behind our 390 * backs and we are now holding the wrong ap_list_lock. 391 * 392 * In both cases, drop the locks and retry. 393 */ 394 395 if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) { 396 raw_spin_unlock(&irq->irq_lock); 397 raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, 398 flags); 399 400 raw_spin_lock_irqsave(&irq->irq_lock, flags); 401 goto retry; 402 } 403 404 /* 405 * Grab a reference to the irq to reflect the fact that it is 406 * now in the ap_list. 407 */ 408 vgic_get_irq_kref(irq); 409 list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); 410 irq->vcpu = vcpu; 411 412 raw_spin_unlock(&irq->irq_lock); 413 raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); 414 415 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 416 kvm_vcpu_kick(vcpu); 417 418 return true; 419 } 420 421 /** 422 * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic 423 * @kvm: The VM structure pointer 424 * @cpuid: The CPU for PPIs 425 * @intid: The INTID to inject a new state to. 426 * @level: Edge-triggered: true: to trigger the interrupt 427 * false: to ignore the call 428 * Level-sensitive true: raise the input signal 429 * false: lower the input signal 430 * @owner: The opaque pointer to the owner of the IRQ being raised to verify 431 * that the caller is allowed to inject this IRQ. Userspace 432 * injections will have owner == NULL. 433 * 434 * The VGIC is not concerned with devices being active-LOW or active-HIGH for 435 * level-sensitive interrupts. You can think of the level parameter as 1 436 * being HIGH and 0 being LOW and all devices being active-HIGH. 437 */ 438 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, 439 bool level, void *owner) 440 { 441 struct kvm_vcpu *vcpu; 442 struct vgic_irq *irq; 443 unsigned long flags; 444 int ret; 445 446 trace_vgic_update_irq_pending(cpuid, intid, level); 447 448 ret = vgic_lazy_init(kvm); 449 if (ret) 450 return ret; 451 452 vcpu = kvm_get_vcpu(kvm, cpuid); 453 if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS) 454 return -EINVAL; 455 456 irq = vgic_get_irq(kvm, vcpu, intid); 457 if (!irq) 458 return -EINVAL; 459 460 raw_spin_lock_irqsave(&irq->irq_lock, flags); 461 462 if (!vgic_validate_injection(irq, level, owner)) { 463 /* Nothing to see here, move along... */ 464 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 465 vgic_put_irq(kvm, irq); 466 return 0; 467 } 468 469 if (irq->config == VGIC_CONFIG_LEVEL) 470 irq->line_level = level; 471 else 472 irq->pending_latch = true; 473 474 vgic_queue_irq_unlock(kvm, irq, flags); 475 vgic_put_irq(kvm, irq); 476 477 return 0; 478 } 479 480 /* @irq->irq_lock must be held */ 481 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, 482 unsigned int host_irq, 483 bool (*get_input_level)(int vindid)) 484 { 485 struct irq_desc *desc; 486 struct irq_data *data; 487 488 /* 489 * Find the physical IRQ number corresponding to @host_irq 490 */ 491 desc = irq_to_desc(host_irq); 492 if (!desc) { 493 kvm_err("%s: no interrupt descriptor\n", __func__); 494 return -EINVAL; 495 } 496 data = irq_desc_get_irq_data(desc); 497 while (data->parent_data) 498 data = data->parent_data; 499 500 irq->hw = true; 501 irq->host_irq = host_irq; 502 irq->hwintid = data->hwirq; 503 irq->get_input_level = get_input_level; 504 return 0; 505 } 506 507 /* @irq->irq_lock must be held */ 508 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) 509 { 510 irq->hw = false; 511 irq->hwintid = 0; 512 irq->get_input_level = NULL; 513 } 514 515 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, 516 u32 vintid, bool (*get_input_level)(int vindid)) 517 { 518 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); 519 unsigned long flags; 520 int ret; 521 522 BUG_ON(!irq); 523 524 raw_spin_lock_irqsave(&irq->irq_lock, flags); 525 ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level); 526 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 527 vgic_put_irq(vcpu->kvm, irq); 528 529 return ret; 530 } 531 532 /** 533 * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ 534 * @vcpu: The VCPU pointer 535 * @vintid: The INTID of the interrupt 536 * 537 * Reset the active and pending states of a mapped interrupt. Kernel 538 * subsystems injecting mapped interrupts should reset their interrupt lines 539 * when we are doing a reset of the VM. 540 */ 541 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid) 542 { 543 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); 544 unsigned long flags; 545 546 if (!irq->hw) 547 goto out; 548 549 raw_spin_lock_irqsave(&irq->irq_lock, flags); 550 irq->active = false; 551 irq->pending_latch = false; 552 irq->line_level = false; 553 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 554 out: 555 vgic_put_irq(vcpu->kvm, irq); 556 } 557 558 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) 559 { 560 struct vgic_irq *irq; 561 unsigned long flags; 562 563 if (!vgic_initialized(vcpu->kvm)) 564 return -EAGAIN; 565 566 irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); 567 BUG_ON(!irq); 568 569 raw_spin_lock_irqsave(&irq->irq_lock, flags); 570 kvm_vgic_unmap_irq(irq); 571 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 572 vgic_put_irq(vcpu->kvm, irq); 573 574 return 0; 575 } 576 577 /** 578 * kvm_vgic_set_owner - Set the owner of an interrupt for a VM 579 * 580 * @vcpu: Pointer to the VCPU (used for PPIs) 581 * @intid: The virtual INTID identifying the interrupt (PPI or SPI) 582 * @owner: Opaque pointer to the owner 583 * 584 * Returns 0 if intid is not already used by another in-kernel device and the 585 * owner is set, otherwise returns an error code. 586 */ 587 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) 588 { 589 struct vgic_irq *irq; 590 unsigned long flags; 591 int ret = 0; 592 593 if (!vgic_initialized(vcpu->kvm)) 594 return -EAGAIN; 595 596 /* SGIs and LPIs cannot be wired up to any device */ 597 if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) 598 return -EINVAL; 599 600 irq = vgic_get_irq(vcpu->kvm, vcpu, intid); 601 raw_spin_lock_irqsave(&irq->irq_lock, flags); 602 if (irq->owner && irq->owner != owner) 603 ret = -EEXIST; 604 else 605 irq->owner = owner; 606 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 607 608 return ret; 609 } 610 611 /** 612 * vgic_prune_ap_list - Remove non-relevant interrupts from the list 613 * 614 * @vcpu: The VCPU pointer 615 * 616 * Go over the list of "interesting" interrupts, and prune those that we 617 * won't have to consider in the near future. 618 */ 619 static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) 620 { 621 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 622 struct vgic_irq *irq, *tmp; 623 624 DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 625 626 retry: 627 raw_spin_lock(&vgic_cpu->ap_list_lock); 628 629 list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { 630 struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; 631 bool target_vcpu_needs_kick = false; 632 633 raw_spin_lock(&irq->irq_lock); 634 635 BUG_ON(vcpu != irq->vcpu); 636 637 target_vcpu = vgic_target_oracle(irq); 638 639 if (!target_vcpu) { 640 /* 641 * We don't need to process this interrupt any 642 * further, move it off the list. 643 */ 644 list_del(&irq->ap_list); 645 irq->vcpu = NULL; 646 raw_spin_unlock(&irq->irq_lock); 647 648 /* 649 * This vgic_put_irq call matches the 650 * vgic_get_irq_kref in vgic_queue_irq_unlock, 651 * where we added the LPI to the ap_list. As 652 * we remove the irq from the list, we drop 653 * also drop the refcount. 654 */ 655 vgic_put_irq(vcpu->kvm, irq); 656 continue; 657 } 658 659 if (target_vcpu == vcpu) { 660 /* We're on the right CPU */ 661 raw_spin_unlock(&irq->irq_lock); 662 continue; 663 } 664 665 /* This interrupt looks like it has to be migrated. */ 666 667 raw_spin_unlock(&irq->irq_lock); 668 raw_spin_unlock(&vgic_cpu->ap_list_lock); 669 670 /* 671 * Ensure locking order by always locking the smallest 672 * ID first. 673 */ 674 if (vcpu->vcpu_id < target_vcpu->vcpu_id) { 675 vcpuA = vcpu; 676 vcpuB = target_vcpu; 677 } else { 678 vcpuA = target_vcpu; 679 vcpuB = vcpu; 680 } 681 682 raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); 683 raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, 684 SINGLE_DEPTH_NESTING); 685 raw_spin_lock(&irq->irq_lock); 686 687 /* 688 * If the affinity has been preserved, move the 689 * interrupt around. Otherwise, it means things have 690 * changed while the interrupt was unlocked, and we 691 * need to replay this. 692 * 693 * In all cases, we cannot trust the list not to have 694 * changed, so we restart from the beginning. 695 */ 696 if (target_vcpu == vgic_target_oracle(irq)) { 697 struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu; 698 699 list_del(&irq->ap_list); 700 irq->vcpu = target_vcpu; 701 list_add_tail(&irq->ap_list, &new_cpu->ap_list_head); 702 target_vcpu_needs_kick = true; 703 } 704 705 raw_spin_unlock(&irq->irq_lock); 706 raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); 707 raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); 708 709 if (target_vcpu_needs_kick) { 710 kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu); 711 kvm_vcpu_kick(target_vcpu); 712 } 713 714 goto retry; 715 } 716 717 raw_spin_unlock(&vgic_cpu->ap_list_lock); 718 } 719 720 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) 721 { 722 if (kvm_vgic_global_state.type == VGIC_V2) 723 vgic_v2_fold_lr_state(vcpu); 724 else 725 vgic_v3_fold_lr_state(vcpu); 726 } 727 728 /* Requires the irq_lock to be held. */ 729 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu, 730 struct vgic_irq *irq, int lr) 731 { 732 lockdep_assert_held(&irq->irq_lock); 733 734 if (kvm_vgic_global_state.type == VGIC_V2) 735 vgic_v2_populate_lr(vcpu, irq, lr); 736 else 737 vgic_v3_populate_lr(vcpu, irq, lr); 738 } 739 740 static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr) 741 { 742 if (kvm_vgic_global_state.type == VGIC_V2) 743 vgic_v2_clear_lr(vcpu, lr); 744 else 745 vgic_v3_clear_lr(vcpu, lr); 746 } 747 748 static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) 749 { 750 if (kvm_vgic_global_state.type == VGIC_V2) 751 vgic_v2_set_underflow(vcpu); 752 else 753 vgic_v3_set_underflow(vcpu); 754 } 755 756 /* Requires the ap_list_lock to be held. */ 757 static int compute_ap_list_depth(struct kvm_vcpu *vcpu, 758 bool *multi_sgi) 759 { 760 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 761 struct vgic_irq *irq; 762 int count = 0; 763 764 *multi_sgi = false; 765 766 lockdep_assert_held(&vgic_cpu->ap_list_lock); 767 768 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 769 int w; 770 771 raw_spin_lock(&irq->irq_lock); 772 /* GICv2 SGIs can count for more than one... */ 773 w = vgic_irq_get_lr_count(irq); 774 raw_spin_unlock(&irq->irq_lock); 775 776 count += w; 777 *multi_sgi |= (w > 1); 778 } 779 return count; 780 } 781 782 /* Requires the VCPU's ap_list_lock to be held. */ 783 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) 784 { 785 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 786 struct vgic_irq *irq; 787 int count; 788 bool multi_sgi; 789 u8 prio = 0xff; 790 int i = 0; 791 792 lockdep_assert_held(&vgic_cpu->ap_list_lock); 793 794 count = compute_ap_list_depth(vcpu, &multi_sgi); 795 if (count > kvm_vgic_global_state.nr_lr || multi_sgi) 796 vgic_sort_ap_list(vcpu); 797 798 count = 0; 799 800 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 801 raw_spin_lock(&irq->irq_lock); 802 803 /* 804 * If we have multi-SGIs in the pipeline, we need to 805 * guarantee that they are all seen before any IRQ of 806 * lower priority. In that case, we need to filter out 807 * these interrupts by exiting early. This is easy as 808 * the AP list has been sorted already. 809 */ 810 if (multi_sgi && irq->priority > prio) { 811 _raw_spin_unlock(&irq->irq_lock); 812 break; 813 } 814 815 if (likely(vgic_target_oracle(irq) == vcpu)) { 816 vgic_populate_lr(vcpu, irq, count++); 817 818 if (irq->source) 819 prio = irq->priority; 820 } 821 822 raw_spin_unlock(&irq->irq_lock); 823 824 if (count == kvm_vgic_global_state.nr_lr) { 825 if (!list_is_last(&irq->ap_list, 826 &vgic_cpu->ap_list_head)) 827 vgic_set_underflow(vcpu); 828 break; 829 } 830 } 831 832 /* Nuke remaining LRs */ 833 for (i = count ; i < kvm_vgic_global_state.nr_lr; i++) 834 vgic_clear_lr(vcpu, i); 835 836 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 837 vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count; 838 else 839 vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count; 840 } 841 842 static inline bool can_access_vgic_from_kernel(void) 843 { 844 /* 845 * GICv2 can always be accessed from the kernel because it is 846 * memory-mapped, and VHE systems can access GICv3 EL2 system 847 * registers. 848 */ 849 return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe(); 850 } 851 852 static inline void vgic_save_state(struct kvm_vcpu *vcpu) 853 { 854 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 855 vgic_v2_save_state(vcpu); 856 else 857 __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); 858 } 859 860 /* Sync back the hardware VGIC state into our emulation after a guest's run. */ 861 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) 862 { 863 int used_lrs; 864 865 /* An empty ap_list_head implies used_lrs == 0 */ 866 if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) 867 return; 868 869 if (can_access_vgic_from_kernel()) 870 vgic_save_state(vcpu); 871 872 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 873 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; 874 else 875 used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; 876 877 if (used_lrs) 878 vgic_fold_lr_state(vcpu); 879 vgic_prune_ap_list(vcpu); 880 } 881 882 static inline void vgic_restore_state(struct kvm_vcpu *vcpu) 883 { 884 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 885 vgic_v2_restore_state(vcpu); 886 else 887 __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); 888 } 889 890 /* Flush our emulation state into the GIC hardware before entering the guest. */ 891 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) 892 { 893 /* 894 * If there are no virtual interrupts active or pending for this 895 * VCPU, then there is no work to do and we can bail out without 896 * taking any lock. There is a potential race with someone injecting 897 * interrupts to the VCPU, but it is a benign race as the VCPU will 898 * either observe the new interrupt before or after doing this check, 899 * and introducing additional synchronization mechanism doesn't change 900 * this. 901 * 902 * Note that we still need to go through the whole thing if anything 903 * can be directly injected (GICv4). 904 */ 905 if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) && 906 !vgic_supports_direct_msis(vcpu->kvm)) 907 return; 908 909 DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 910 911 if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) { 912 raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); 913 vgic_flush_lr_state(vcpu); 914 raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); 915 } 916 917 if (can_access_vgic_from_kernel()) 918 vgic_restore_state(vcpu); 919 920 if (vgic_supports_direct_msis(vcpu->kvm)) 921 vgic_v4_commit(vcpu); 922 } 923 924 void kvm_vgic_load(struct kvm_vcpu *vcpu) 925 { 926 if (unlikely(!vgic_initialized(vcpu->kvm))) 927 return; 928 929 if (kvm_vgic_global_state.type == VGIC_V2) 930 vgic_v2_load(vcpu); 931 else 932 vgic_v3_load(vcpu); 933 } 934 935 void kvm_vgic_put(struct kvm_vcpu *vcpu) 936 { 937 if (unlikely(!vgic_initialized(vcpu->kvm))) 938 return; 939 940 if (kvm_vgic_global_state.type == VGIC_V2) 941 vgic_v2_put(vcpu); 942 else 943 vgic_v3_put(vcpu); 944 } 945 946 void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu) 947 { 948 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 949 return; 950 951 if (kvm_vgic_global_state.type == VGIC_V2) 952 vgic_v2_vmcr_sync(vcpu); 953 else 954 vgic_v3_vmcr_sync(vcpu); 955 } 956 957 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) 958 { 959 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 960 struct vgic_irq *irq; 961 bool pending = false; 962 unsigned long flags; 963 struct vgic_vmcr vmcr; 964 965 if (!vcpu->kvm->arch.vgic.enabled) 966 return false; 967 968 if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last) 969 return true; 970 971 vgic_get_vmcr(vcpu, &vmcr); 972 973 raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); 974 975 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 976 raw_spin_lock(&irq->irq_lock); 977 pending = irq_is_pending(irq) && irq->enabled && 978 !irq->active && 979 irq->priority < vmcr.pmr; 980 raw_spin_unlock(&irq->irq_lock); 981 982 if (pending) 983 break; 984 } 985 986 raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); 987 988 return pending; 989 } 990 991 void vgic_kick_vcpus(struct kvm *kvm) 992 { 993 struct kvm_vcpu *vcpu; 994 int c; 995 996 /* 997 * We've injected an interrupt, time to find out who deserves 998 * a good kick... 999 */ 1000 kvm_for_each_vcpu(c, vcpu, kvm) { 1001 if (kvm_vgic_vcpu_pending_irq(vcpu)) { 1002 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 1003 kvm_vcpu_kick(vcpu); 1004 } 1005 } 1006 } 1007 1008 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) 1009 { 1010 struct vgic_irq *irq; 1011 bool map_is_active; 1012 unsigned long flags; 1013 1014 if (!vgic_initialized(vcpu->kvm)) 1015 return false; 1016 1017 irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); 1018 raw_spin_lock_irqsave(&irq->irq_lock, flags); 1019 map_is_active = irq->hw && irq->active; 1020 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 1021 vgic_put_irq(vcpu->kvm, irq); 1022 1023 return map_is_active; 1024 } 1025