1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 ARM Ltd. 4 * Author: Marc Zyngier <marc.zyngier@arm.com> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/interrupt.h> 11 #include <linux/irq.h> 12 #include <linux/irqdomain.h> 13 #include <linux/uaccess.h> 14 15 #include <clocksource/arm_arch_timer.h> 16 #include <asm/arch_timer.h> 17 #include <asm/kvm_emulate.h> 18 #include <asm/kvm_hyp.h> 19 #include <asm/kvm_nested.h> 20 21 #include <kvm/arm_vgic.h> 22 #include <kvm/arm_arch_timer.h> 23 24 #include "trace.h" 25 26 static struct timecounter *timecounter; 27 static unsigned int host_vtimer_irq; 28 static unsigned int host_ptimer_irq; 29 static u32 host_vtimer_irq_flags; 30 static u32 host_ptimer_irq_flags; 31 32 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); 33 34 static const u8 default_ppi[] = { 35 [TIMER_PTIMER] = 30, 36 [TIMER_VTIMER] = 27, 37 [TIMER_HPTIMER] = 26, 38 [TIMER_HVTIMER] = 28, 39 }; 40 41 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); 42 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 43 struct arch_timer_context *timer_ctx); 44 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); 45 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 46 struct arch_timer_context *timer, 47 enum kvm_arch_timer_regs treg, 48 u64 val); 49 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 50 struct arch_timer_context *timer, 51 enum kvm_arch_timer_regs treg); 52 static bool kvm_arch_timer_get_input_level(int vintid); 53 54 static struct irq_ops arch_timer_irq_ops = { 55 .get_input_level = kvm_arch_timer_get_input_level, 56 }; 57 58 static bool has_cntpoff(void) 59 { 60 return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); 61 } 62 63 static int nr_timers(struct kvm_vcpu *vcpu) 64 { 65 if (!vcpu_has_nv(vcpu)) 66 return NR_KVM_EL0_TIMERS; 67 68 return NR_KVM_TIMERS; 69 } 70 71 u32 timer_get_ctl(struct arch_timer_context *ctxt) 72 { 73 struct kvm_vcpu *vcpu = ctxt->vcpu; 74 75 switch(arch_timer_ctx_index(ctxt)) { 76 case TIMER_VTIMER: 77 return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0); 78 case TIMER_PTIMER: 79 return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0); 80 case TIMER_HVTIMER: 81 return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2); 82 case TIMER_HPTIMER: 83 return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2); 84 default: 85 WARN_ON(1); 86 return 0; 87 } 88 } 89 90 u64 timer_get_cval(struct arch_timer_context *ctxt) 91 { 92 struct kvm_vcpu *vcpu = ctxt->vcpu; 93 94 switch(arch_timer_ctx_index(ctxt)) { 95 case TIMER_VTIMER: 96 return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0); 97 case TIMER_PTIMER: 98 return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); 99 case TIMER_HVTIMER: 100 return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2); 101 case TIMER_HPTIMER: 102 return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); 103 default: 104 WARN_ON(1); 105 return 0; 106 } 107 } 108 109 static u64 timer_get_offset(struct arch_timer_context *ctxt) 110 { 111 u64 offset = 0; 112 113 if (!ctxt) 114 return 0; 115 116 if (ctxt->offset.vm_offset) 117 offset += *ctxt->offset.vm_offset; 118 if (ctxt->offset.vcpu_offset) 119 offset += *ctxt->offset.vcpu_offset; 120 121 return offset; 122 } 123 124 static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) 125 { 126 struct kvm_vcpu *vcpu = ctxt->vcpu; 127 128 switch(arch_timer_ctx_index(ctxt)) { 129 case TIMER_VTIMER: 130 __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl; 131 break; 132 case TIMER_PTIMER: 133 __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; 134 break; 135 case TIMER_HVTIMER: 136 __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl; 137 break; 138 case TIMER_HPTIMER: 139 __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl; 140 break; 141 default: 142 WARN_ON(1); 143 } 144 } 145 146 static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) 147 { 148 struct kvm_vcpu *vcpu = ctxt->vcpu; 149 150 switch(arch_timer_ctx_index(ctxt)) { 151 case TIMER_VTIMER: 152 __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval; 153 break; 154 case TIMER_PTIMER: 155 __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; 156 break; 157 case TIMER_HVTIMER: 158 __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval; 159 break; 160 case TIMER_HPTIMER: 161 __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval; 162 break; 163 default: 164 WARN_ON(1); 165 } 166 } 167 168 static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset) 169 { 170 if (!ctxt->offset.vm_offset) { 171 WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt)); 172 return; 173 } 174 175 WRITE_ONCE(*ctxt->offset.vm_offset, offset); 176 } 177 178 u64 kvm_phys_timer_read(void) 179 { 180 return timecounter->cc->read(timecounter->cc); 181 } 182 183 static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) 184 { 185 if (vcpu_has_nv(vcpu)) { 186 if (is_hyp_ctxt(vcpu)) { 187 map->direct_vtimer = vcpu_hvtimer(vcpu); 188 map->direct_ptimer = vcpu_hptimer(vcpu); 189 map->emul_vtimer = vcpu_vtimer(vcpu); 190 map->emul_ptimer = vcpu_ptimer(vcpu); 191 } else { 192 map->direct_vtimer = vcpu_vtimer(vcpu); 193 map->direct_ptimer = vcpu_ptimer(vcpu); 194 map->emul_vtimer = vcpu_hvtimer(vcpu); 195 map->emul_ptimer = vcpu_hptimer(vcpu); 196 } 197 } else if (has_vhe()) { 198 map->direct_vtimer = vcpu_vtimer(vcpu); 199 map->direct_ptimer = vcpu_ptimer(vcpu); 200 map->emul_vtimer = NULL; 201 map->emul_ptimer = NULL; 202 } else { 203 map->direct_vtimer = vcpu_vtimer(vcpu); 204 map->direct_ptimer = NULL; 205 map->emul_vtimer = NULL; 206 map->emul_ptimer = vcpu_ptimer(vcpu); 207 } 208 209 trace_kvm_get_timer_map(vcpu->vcpu_id, map); 210 } 211 212 static inline bool userspace_irqchip(struct kvm *kvm) 213 { 214 return static_branch_unlikely(&userspace_irqchip_in_use) && 215 unlikely(!irqchip_in_kernel(kvm)); 216 } 217 218 static void soft_timer_start(struct hrtimer *hrt, u64 ns) 219 { 220 hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), 221 HRTIMER_MODE_ABS_HARD); 222 } 223 224 static void soft_timer_cancel(struct hrtimer *hrt) 225 { 226 hrtimer_cancel(hrt); 227 } 228 229 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) 230 { 231 struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; 232 struct arch_timer_context *ctx; 233 struct timer_map map; 234 235 /* 236 * We may see a timer interrupt after vcpu_put() has been called which 237 * sets the CPU's vcpu pointer to NULL, because even though the timer 238 * has been disabled in timer_save_state(), the hardware interrupt 239 * signal may not have been retired from the interrupt controller yet. 240 */ 241 if (!vcpu) 242 return IRQ_HANDLED; 243 244 get_timer_map(vcpu, &map); 245 246 if (irq == host_vtimer_irq) 247 ctx = map.direct_vtimer; 248 else 249 ctx = map.direct_ptimer; 250 251 if (kvm_timer_should_fire(ctx)) 252 kvm_timer_update_irq(vcpu, true, ctx); 253 254 if (userspace_irqchip(vcpu->kvm) && 255 !static_branch_unlikely(&has_gic_active_state)) 256 disable_percpu_irq(host_vtimer_irq); 257 258 return IRQ_HANDLED; 259 } 260 261 static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx, 262 u64 val) 263 { 264 u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 265 266 if (now < val) { 267 u64 ns; 268 269 ns = cyclecounter_cyc2ns(timecounter->cc, 270 val - now, 271 timecounter->mask, 272 &timer_ctx->ns_frac); 273 return ns; 274 } 275 276 return 0; 277 } 278 279 static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) 280 { 281 return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx)); 282 } 283 284 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) 285 { 286 WARN_ON(timer_ctx && timer_ctx->loaded); 287 return timer_ctx && 288 ((timer_get_ctl(timer_ctx) & 289 (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE); 290 } 291 292 static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) 293 { 294 return (cpus_have_final_cap(ARM64_HAS_WFXT) && 295 vcpu_get_flag(vcpu, IN_WFIT)); 296 } 297 298 static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) 299 { 300 u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu)); 301 struct arch_timer_context *ctx; 302 303 ctx = (vcpu_has_nv(vcpu) && is_hyp_ctxt(vcpu)) ? vcpu_hvtimer(vcpu) 304 : vcpu_vtimer(vcpu); 305 306 return kvm_counter_compute_delta(ctx, val); 307 } 308 309 /* 310 * Returns the earliest expiration time in ns among guest timers. 311 * Note that it will return 0 if none of timers can fire. 312 */ 313 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) 314 { 315 u64 min_delta = ULLONG_MAX; 316 int i; 317 318 for (i = 0; i < nr_timers(vcpu); i++) { 319 struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; 320 321 WARN(ctx->loaded, "timer %d loaded\n", i); 322 if (kvm_timer_irq_can_fire(ctx)) 323 min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); 324 } 325 326 if (vcpu_has_wfit_active(vcpu)) 327 min_delta = min(min_delta, wfit_delay_ns(vcpu)); 328 329 /* If none of timers can fire, then return 0 */ 330 if (min_delta == ULLONG_MAX) 331 return 0; 332 333 return min_delta; 334 } 335 336 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) 337 { 338 struct arch_timer_cpu *timer; 339 struct kvm_vcpu *vcpu; 340 u64 ns; 341 342 timer = container_of(hrt, struct arch_timer_cpu, bg_timer); 343 vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); 344 345 /* 346 * Check that the timer has really expired from the guest's 347 * PoV (NTP on the host may have forced it to expire 348 * early). If we should have slept longer, restart it. 349 */ 350 ns = kvm_timer_earliest_exp(vcpu); 351 if (unlikely(ns)) { 352 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 353 return HRTIMER_RESTART; 354 } 355 356 kvm_vcpu_wake_up(vcpu); 357 return HRTIMER_NORESTART; 358 } 359 360 static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) 361 { 362 struct arch_timer_context *ctx; 363 struct kvm_vcpu *vcpu; 364 u64 ns; 365 366 ctx = container_of(hrt, struct arch_timer_context, hrtimer); 367 vcpu = ctx->vcpu; 368 369 trace_kvm_timer_hrtimer_expire(ctx); 370 371 /* 372 * Check that the timer has really expired from the guest's 373 * PoV (NTP on the host may have forced it to expire 374 * early). If not ready, schedule for a later time. 375 */ 376 ns = kvm_timer_compute_delta(ctx); 377 if (unlikely(ns)) { 378 hrtimer_forward_now(hrt, ns_to_ktime(ns)); 379 return HRTIMER_RESTART; 380 } 381 382 kvm_timer_update_irq(vcpu, true, ctx); 383 return HRTIMER_NORESTART; 384 } 385 386 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) 387 { 388 enum kvm_arch_timers index; 389 u64 cval, now; 390 391 if (!timer_ctx) 392 return false; 393 394 index = arch_timer_ctx_index(timer_ctx); 395 396 if (timer_ctx->loaded) { 397 u32 cnt_ctl = 0; 398 399 switch (index) { 400 case TIMER_VTIMER: 401 case TIMER_HVTIMER: 402 cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); 403 break; 404 case TIMER_PTIMER: 405 case TIMER_HPTIMER: 406 cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); 407 break; 408 case NR_KVM_TIMERS: 409 /* GCC is braindead */ 410 cnt_ctl = 0; 411 break; 412 } 413 414 return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && 415 (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && 416 !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); 417 } 418 419 if (!kvm_timer_irq_can_fire(timer_ctx)) 420 return false; 421 422 cval = timer_get_cval(timer_ctx); 423 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx); 424 425 return cval <= now; 426 } 427 428 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) 429 { 430 return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0; 431 } 432 433 /* 434 * Reflect the timer output level into the kvm_run structure 435 */ 436 void kvm_timer_update_run(struct kvm_vcpu *vcpu) 437 { 438 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 439 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 440 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 441 442 /* Populate the device bitmap with the timer states */ 443 regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | 444 KVM_ARM_DEV_EL1_PTIMER); 445 if (kvm_timer_should_fire(vtimer)) 446 regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; 447 if (kvm_timer_should_fire(ptimer)) 448 regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; 449 } 450 451 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, 452 struct arch_timer_context *timer_ctx) 453 { 454 int ret; 455 456 timer_ctx->irq.level = new_level; 457 trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), 458 timer_ctx->irq.level); 459 460 if (!userspace_irqchip(vcpu->kvm)) { 461 ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 462 timer_irq(timer_ctx), 463 timer_ctx->irq.level, 464 timer_ctx); 465 WARN_ON(ret); 466 } 467 } 468 469 /* Only called for a fully emulated timer */ 470 static void timer_emulate(struct arch_timer_context *ctx) 471 { 472 bool should_fire = kvm_timer_should_fire(ctx); 473 474 trace_kvm_timer_emulate(ctx, should_fire); 475 476 if (should_fire != ctx->irq.level) { 477 kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); 478 return; 479 } 480 481 /* 482 * If the timer can fire now, we don't need to have a soft timer 483 * scheduled for the future. If the timer cannot fire at all, 484 * then we also don't need a soft timer. 485 */ 486 if (should_fire || !kvm_timer_irq_can_fire(ctx)) 487 return; 488 489 soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); 490 } 491 492 static void set_cntvoff(u64 cntvoff) 493 { 494 kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); 495 } 496 497 static void set_cntpoff(u64 cntpoff) 498 { 499 if (has_cntpoff()) 500 write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2); 501 } 502 503 static void timer_save_state(struct arch_timer_context *ctx) 504 { 505 struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); 506 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 507 unsigned long flags; 508 509 if (!timer->enabled) 510 return; 511 512 local_irq_save(flags); 513 514 if (!ctx->loaded) 515 goto out; 516 517 switch (index) { 518 u64 cval; 519 520 case TIMER_VTIMER: 521 case TIMER_HVTIMER: 522 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL)); 523 timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL)); 524 525 /* Disable the timer */ 526 write_sysreg_el0(0, SYS_CNTV_CTL); 527 isb(); 528 529 /* 530 * The kernel may decide to run userspace after 531 * calling vcpu_put, so we reset cntvoff to 0 to 532 * ensure a consistent read between user accesses to 533 * the virtual counter and kernel access to the 534 * physical counter of non-VHE case. 535 * 536 * For VHE, the virtual counter uses a fixed virtual 537 * offset of zero, so no need to zero CNTVOFF_EL2 538 * register, but this is actually useful when switching 539 * between EL1/vEL2 with NV. 540 * 541 * Do it unconditionally, as this is either unavoidable 542 * or dirt cheap. 543 */ 544 set_cntvoff(0); 545 break; 546 case TIMER_PTIMER: 547 case TIMER_HPTIMER: 548 timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); 549 cval = read_sysreg_el0(SYS_CNTP_CVAL); 550 551 if (!has_cntpoff()) 552 cval -= timer_get_offset(ctx); 553 554 timer_set_cval(ctx, cval); 555 556 /* Disable the timer */ 557 write_sysreg_el0(0, SYS_CNTP_CTL); 558 isb(); 559 560 set_cntpoff(0); 561 break; 562 case NR_KVM_TIMERS: 563 BUG(); 564 } 565 566 trace_kvm_timer_save_state(ctx); 567 568 ctx->loaded = false; 569 out: 570 local_irq_restore(flags); 571 } 572 573 /* 574 * Schedule the background timer before calling kvm_vcpu_halt, so that this 575 * thread is removed from its waitqueue and made runnable when there's a timer 576 * interrupt to handle. 577 */ 578 static void kvm_timer_blocking(struct kvm_vcpu *vcpu) 579 { 580 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 581 struct timer_map map; 582 583 get_timer_map(vcpu, &map); 584 585 /* 586 * If no timers are capable of raising interrupts (disabled or 587 * masked), then there's no more work for us to do. 588 */ 589 if (!kvm_timer_irq_can_fire(map.direct_vtimer) && 590 !kvm_timer_irq_can_fire(map.direct_ptimer) && 591 !kvm_timer_irq_can_fire(map.emul_vtimer) && 592 !kvm_timer_irq_can_fire(map.emul_ptimer) && 593 !vcpu_has_wfit_active(vcpu)) 594 return; 595 596 /* 597 * At least one guest time will expire. Schedule a background timer. 598 * Set the earliest expiration time among the guest timers. 599 */ 600 soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); 601 } 602 603 static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) 604 { 605 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 606 607 soft_timer_cancel(&timer->bg_timer); 608 } 609 610 static void timer_restore_state(struct arch_timer_context *ctx) 611 { 612 struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); 613 enum kvm_arch_timers index = arch_timer_ctx_index(ctx); 614 unsigned long flags; 615 616 if (!timer->enabled) 617 return; 618 619 local_irq_save(flags); 620 621 if (ctx->loaded) 622 goto out; 623 624 switch (index) { 625 u64 cval, offset; 626 627 case TIMER_VTIMER: 628 case TIMER_HVTIMER: 629 set_cntvoff(timer_get_offset(ctx)); 630 write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL); 631 isb(); 632 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL); 633 break; 634 case TIMER_PTIMER: 635 case TIMER_HPTIMER: 636 cval = timer_get_cval(ctx); 637 offset = timer_get_offset(ctx); 638 set_cntpoff(offset); 639 if (!has_cntpoff()) 640 cval += offset; 641 write_sysreg_el0(cval, SYS_CNTP_CVAL); 642 isb(); 643 write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); 644 break; 645 case NR_KVM_TIMERS: 646 BUG(); 647 } 648 649 trace_kvm_timer_restore_state(ctx); 650 651 ctx->loaded = true; 652 out: 653 local_irq_restore(flags); 654 } 655 656 static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) 657 { 658 int r; 659 r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active); 660 WARN_ON(r); 661 } 662 663 static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) 664 { 665 struct kvm_vcpu *vcpu = ctx->vcpu; 666 bool phys_active = false; 667 668 /* 669 * Update the timer output so that it is likely to match the 670 * state we're about to restore. If the timer expires between 671 * this point and the register restoration, we'll take the 672 * interrupt anyway. 673 */ 674 kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); 675 676 if (irqchip_in_kernel(vcpu->kvm)) 677 phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx)); 678 679 phys_active |= ctx->irq.level; 680 681 set_timer_irq_phys_active(ctx, phys_active); 682 } 683 684 static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) 685 { 686 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 687 688 /* 689 * Update the timer output so that it is likely to match the 690 * state we're about to restore. If the timer expires between 691 * this point and the register restoration, we'll take the 692 * interrupt anyway. 693 */ 694 kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer); 695 696 /* 697 * When using a userspace irqchip with the architected timers and a 698 * host interrupt controller that doesn't support an active state, we 699 * must still prevent continuously exiting from the guest, and 700 * therefore mask the physical interrupt by disabling it on the host 701 * interrupt controller when the virtual level is high, such that the 702 * guest can make forward progress. Once we detect the output level 703 * being de-asserted, we unmask the interrupt again so that we exit 704 * from the guest when the timer fires. 705 */ 706 if (vtimer->irq.level) 707 disable_percpu_irq(host_vtimer_irq); 708 else 709 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 710 } 711 712 /* If _pred is true, set bit in _set, otherwise set it in _clr */ 713 #define assign_clear_set_bit(_pred, _bit, _clr, _set) \ 714 do { \ 715 if (_pred) \ 716 (_set) |= (_bit); \ 717 else \ 718 (_clr) |= (_bit); \ 719 } while (0) 720 721 static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, 722 struct timer_map *map) 723 { 724 int hw, ret; 725 726 if (!irqchip_in_kernel(vcpu->kvm)) 727 return; 728 729 /* 730 * We only ever unmap the vtimer irq on a VHE system that runs nested 731 * virtualization, in which case we have both a valid emul_vtimer, 732 * emul_ptimer, direct_vtimer, and direct_ptimer. 733 * 734 * Since this is called from kvm_timer_vcpu_load(), a change between 735 * vEL2 and vEL1/0 will have just happened, and the timer_map will 736 * represent this, and therefore we switch the emul/direct mappings 737 * below. 738 */ 739 hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer)); 740 if (hw < 0) { 741 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer)); 742 kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer)); 743 744 ret = kvm_vgic_map_phys_irq(vcpu, 745 map->direct_vtimer->host_timer_irq, 746 timer_irq(map->direct_vtimer), 747 &arch_timer_irq_ops); 748 WARN_ON_ONCE(ret); 749 ret = kvm_vgic_map_phys_irq(vcpu, 750 map->direct_ptimer->host_timer_irq, 751 timer_irq(map->direct_ptimer), 752 &arch_timer_irq_ops); 753 WARN_ON_ONCE(ret); 754 755 /* 756 * The virtual offset behaviour is "interresting", as it 757 * always applies when HCR_EL2.E2H==0, but only when 758 * accessed from EL1 when HCR_EL2.E2H==1. So make sure we 759 * track E2H when putting the HV timer in "direct" mode. 760 */ 761 if (map->direct_vtimer == vcpu_hvtimer(vcpu)) { 762 struct arch_timer_offset *offs = &map->direct_vtimer->offset; 763 764 if (vcpu_el2_e2h_is_set(vcpu)) 765 offs->vcpu_offset = NULL; 766 else 767 offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); 768 } 769 } 770 } 771 772 static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map) 773 { 774 bool tpt, tpc; 775 u64 clr, set; 776 777 /* 778 * No trapping gets configured here with nVHE. See 779 * __timer_enable_traps(), which is where the stuff happens. 780 */ 781 if (!has_vhe()) 782 return; 783 784 /* 785 * Our default policy is not to trap anything. As we progress 786 * within this function, reality kicks in and we start adding 787 * traps based on emulation requirements. 788 */ 789 tpt = tpc = false; 790 791 /* 792 * We have two possibility to deal with a physical offset: 793 * 794 * - Either we have CNTPOFF (yay!) or the offset is 0: 795 * we let the guest freely access the HW 796 * 797 * - or neither of these condition apply: 798 * we trap accesses to the HW, but still use it 799 * after correcting the physical offset 800 */ 801 if (!has_cntpoff() && timer_get_offset(map->direct_ptimer)) 802 tpt = tpc = true; 803 804 /* 805 * Apply the enable bits that the guest hypervisor has requested for 806 * its own guest. We can only add traps that wouldn't have been set 807 * above. 808 */ 809 if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { 810 u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2); 811 812 /* Use the VHE format for mental sanity */ 813 if (!vcpu_el2_e2h_is_set(vcpu)) 814 val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10; 815 816 tpt |= !(val & (CNTHCTL_EL1PCEN << 10)); 817 tpc |= !(val & (CNTHCTL_EL1PCTEN << 10)); 818 } 819 820 /* 821 * Now that we have collected our requirements, compute the 822 * trap and enable bits. 823 */ 824 set = 0; 825 clr = 0; 826 827 assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr); 828 assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr); 829 830 /* This only happens on VHE, so use the CNTKCTL_EL1 accessor */ 831 sysreg_clear_set(cntkctl_el1, clr, set); 832 } 833 834 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) 835 { 836 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 837 struct timer_map map; 838 839 if (unlikely(!timer->enabled)) 840 return; 841 842 get_timer_map(vcpu, &map); 843 844 if (static_branch_likely(&has_gic_active_state)) { 845 if (vcpu_has_nv(vcpu)) 846 kvm_timer_vcpu_load_nested_switch(vcpu, &map); 847 848 kvm_timer_vcpu_load_gic(map.direct_vtimer); 849 if (map.direct_ptimer) 850 kvm_timer_vcpu_load_gic(map.direct_ptimer); 851 } else { 852 kvm_timer_vcpu_load_nogic(vcpu); 853 } 854 855 kvm_timer_unblocking(vcpu); 856 857 timer_restore_state(map.direct_vtimer); 858 if (map.direct_ptimer) 859 timer_restore_state(map.direct_ptimer); 860 if (map.emul_vtimer) 861 timer_emulate(map.emul_vtimer); 862 if (map.emul_ptimer) 863 timer_emulate(map.emul_ptimer); 864 865 timer_set_traps(vcpu, &map); 866 } 867 868 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) 869 { 870 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 871 struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); 872 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 873 bool vlevel, plevel; 874 875 if (likely(irqchip_in_kernel(vcpu->kvm))) 876 return false; 877 878 vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; 879 plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; 880 881 return kvm_timer_should_fire(vtimer) != vlevel || 882 kvm_timer_should_fire(ptimer) != plevel; 883 } 884 885 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) 886 { 887 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 888 struct timer_map map; 889 890 if (unlikely(!timer->enabled)) 891 return; 892 893 get_timer_map(vcpu, &map); 894 895 timer_save_state(map.direct_vtimer); 896 if (map.direct_ptimer) 897 timer_save_state(map.direct_ptimer); 898 899 /* 900 * Cancel soft timer emulation, because the only case where we 901 * need it after a vcpu_put is in the context of a sleeping VCPU, and 902 * in that case we already factor in the deadline for the physical 903 * timer when scheduling the bg_timer. 904 * 905 * In any case, we re-schedule the hrtimer for the physical timer when 906 * coming back to the VCPU thread in kvm_timer_vcpu_load(). 907 */ 908 if (map.emul_vtimer) 909 soft_timer_cancel(&map.emul_vtimer->hrtimer); 910 if (map.emul_ptimer) 911 soft_timer_cancel(&map.emul_ptimer->hrtimer); 912 913 if (kvm_vcpu_is_blocking(vcpu)) 914 kvm_timer_blocking(vcpu); 915 } 916 917 /* 918 * With a userspace irqchip we have to check if the guest de-asserted the 919 * timer and if so, unmask the timer irq signal on the host interrupt 920 * controller to ensure that we see future timer signals. 921 */ 922 static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) 923 { 924 struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); 925 926 if (!kvm_timer_should_fire(vtimer)) { 927 kvm_timer_update_irq(vcpu, false, vtimer); 928 if (static_branch_likely(&has_gic_active_state)) 929 set_timer_irq_phys_active(vtimer, false); 930 else 931 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 932 } 933 } 934 935 void kvm_timer_sync_user(struct kvm_vcpu *vcpu) 936 { 937 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 938 939 if (unlikely(!timer->enabled)) 940 return; 941 942 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 943 unmask_vtimer_irq_user(vcpu); 944 } 945 946 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) 947 { 948 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 949 struct timer_map map; 950 951 get_timer_map(vcpu, &map); 952 953 /* 954 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 955 * and to 0 for ARMv7. We provide an implementation that always 956 * resets the timer to be disabled and unmasked and is compliant with 957 * the ARMv7 architecture. 958 */ 959 for (int i = 0; i < nr_timers(vcpu); i++) 960 timer_set_ctl(vcpu_get_timer(vcpu, i), 0); 961 962 /* 963 * A vcpu running at EL2 is in charge of the offset applied to 964 * the virtual timer, so use the physical VM offset, and point 965 * the vcpu offset to CNTVOFF_EL2. 966 */ 967 if (vcpu_has_nv(vcpu)) { 968 struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; 969 970 offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); 971 offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; 972 } 973 974 if (timer->enabled) { 975 for (int i = 0; i < nr_timers(vcpu); i++) 976 kvm_timer_update_irq(vcpu, false, 977 vcpu_get_timer(vcpu, i)); 978 979 if (irqchip_in_kernel(vcpu->kvm)) { 980 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer)); 981 if (map.direct_ptimer) 982 kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer)); 983 } 984 } 985 986 if (map.emul_vtimer) 987 soft_timer_cancel(&map.emul_vtimer->hrtimer); 988 if (map.emul_ptimer) 989 soft_timer_cancel(&map.emul_ptimer->hrtimer); 990 991 return 0; 992 } 993 994 static void timer_context_init(struct kvm_vcpu *vcpu, int timerid) 995 { 996 struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid); 997 struct kvm *kvm = vcpu->kvm; 998 999 ctxt->vcpu = vcpu; 1000 1001 if (timerid == TIMER_VTIMER) 1002 ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset; 1003 else 1004 ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset; 1005 1006 hrtimer_init(&ctxt->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); 1007 ctxt->hrtimer.function = kvm_hrtimer_expire; 1008 1009 switch (timerid) { 1010 case TIMER_PTIMER: 1011 case TIMER_HPTIMER: 1012 ctxt->host_timer_irq = host_ptimer_irq; 1013 break; 1014 case TIMER_VTIMER: 1015 case TIMER_HVTIMER: 1016 ctxt->host_timer_irq = host_vtimer_irq; 1017 break; 1018 } 1019 } 1020 1021 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) 1022 { 1023 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1024 1025 for (int i = 0; i < NR_KVM_TIMERS; i++) 1026 timer_context_init(vcpu, i); 1027 1028 /* Synchronize offsets across timers of a VM if not already provided */ 1029 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) { 1030 timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read()); 1031 timer_set_offset(vcpu_ptimer(vcpu), 0); 1032 } 1033 1034 hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); 1035 timer->bg_timer.function = kvm_bg_timer_expire; 1036 } 1037 1038 void kvm_timer_init_vm(struct kvm *kvm) 1039 { 1040 for (int i = 0; i < NR_KVM_TIMERS; i++) 1041 kvm->arch.timer_data.ppi[i] = default_ppi[i]; 1042 } 1043 1044 void kvm_timer_cpu_up(void) 1045 { 1046 enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); 1047 if (host_ptimer_irq) 1048 enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); 1049 } 1050 1051 void kvm_timer_cpu_down(void) 1052 { 1053 disable_percpu_irq(host_vtimer_irq); 1054 if (host_ptimer_irq) 1055 disable_percpu_irq(host_ptimer_irq); 1056 } 1057 1058 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) 1059 { 1060 struct arch_timer_context *timer; 1061 1062 switch (regid) { 1063 case KVM_REG_ARM_TIMER_CTL: 1064 timer = vcpu_vtimer(vcpu); 1065 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); 1066 break; 1067 case KVM_REG_ARM_TIMER_CNT: 1068 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, 1069 &vcpu->kvm->arch.flags)) { 1070 timer = vcpu_vtimer(vcpu); 1071 timer_set_offset(timer, kvm_phys_timer_read() - value); 1072 } 1073 break; 1074 case KVM_REG_ARM_TIMER_CVAL: 1075 timer = vcpu_vtimer(vcpu); 1076 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); 1077 break; 1078 case KVM_REG_ARM_PTIMER_CTL: 1079 timer = vcpu_ptimer(vcpu); 1080 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); 1081 break; 1082 case KVM_REG_ARM_PTIMER_CNT: 1083 if (!test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, 1084 &vcpu->kvm->arch.flags)) { 1085 timer = vcpu_ptimer(vcpu); 1086 timer_set_offset(timer, kvm_phys_timer_read() - value); 1087 } 1088 break; 1089 case KVM_REG_ARM_PTIMER_CVAL: 1090 timer = vcpu_ptimer(vcpu); 1091 kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); 1092 break; 1093 1094 default: 1095 return -1; 1096 } 1097 1098 return 0; 1099 } 1100 1101 static u64 read_timer_ctl(struct arch_timer_context *timer) 1102 { 1103 /* 1104 * Set ISTATUS bit if it's expired. 1105 * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is 1106 * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit 1107 * regardless of ENABLE bit for our implementation convenience. 1108 */ 1109 u32 ctl = timer_get_ctl(timer); 1110 1111 if (!kvm_timer_compute_delta(timer)) 1112 ctl |= ARCH_TIMER_CTRL_IT_STAT; 1113 1114 return ctl; 1115 } 1116 1117 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) 1118 { 1119 switch (regid) { 1120 case KVM_REG_ARM_TIMER_CTL: 1121 return kvm_arm_timer_read(vcpu, 1122 vcpu_vtimer(vcpu), TIMER_REG_CTL); 1123 case KVM_REG_ARM_TIMER_CNT: 1124 return kvm_arm_timer_read(vcpu, 1125 vcpu_vtimer(vcpu), TIMER_REG_CNT); 1126 case KVM_REG_ARM_TIMER_CVAL: 1127 return kvm_arm_timer_read(vcpu, 1128 vcpu_vtimer(vcpu), TIMER_REG_CVAL); 1129 case KVM_REG_ARM_PTIMER_CTL: 1130 return kvm_arm_timer_read(vcpu, 1131 vcpu_ptimer(vcpu), TIMER_REG_CTL); 1132 case KVM_REG_ARM_PTIMER_CNT: 1133 return kvm_arm_timer_read(vcpu, 1134 vcpu_ptimer(vcpu), TIMER_REG_CNT); 1135 case KVM_REG_ARM_PTIMER_CVAL: 1136 return kvm_arm_timer_read(vcpu, 1137 vcpu_ptimer(vcpu), TIMER_REG_CVAL); 1138 } 1139 return (u64)-1; 1140 } 1141 1142 static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, 1143 struct arch_timer_context *timer, 1144 enum kvm_arch_timer_regs treg) 1145 { 1146 u64 val; 1147 1148 switch (treg) { 1149 case TIMER_REG_TVAL: 1150 val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer); 1151 val = lower_32_bits(val); 1152 break; 1153 1154 case TIMER_REG_CTL: 1155 val = read_timer_ctl(timer); 1156 break; 1157 1158 case TIMER_REG_CVAL: 1159 val = timer_get_cval(timer); 1160 break; 1161 1162 case TIMER_REG_CNT: 1163 val = kvm_phys_timer_read() - timer_get_offset(timer); 1164 break; 1165 1166 case TIMER_REG_VOFF: 1167 val = *timer->offset.vcpu_offset; 1168 break; 1169 1170 default: 1171 BUG(); 1172 } 1173 1174 return val; 1175 } 1176 1177 u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, 1178 enum kvm_arch_timers tmr, 1179 enum kvm_arch_timer_regs treg) 1180 { 1181 struct arch_timer_context *timer; 1182 struct timer_map map; 1183 u64 val; 1184 1185 get_timer_map(vcpu, &map); 1186 timer = vcpu_get_timer(vcpu, tmr); 1187 1188 if (timer == map.emul_vtimer || timer == map.emul_ptimer) 1189 return kvm_arm_timer_read(vcpu, timer, treg); 1190 1191 preempt_disable(); 1192 timer_save_state(timer); 1193 1194 val = kvm_arm_timer_read(vcpu, timer, treg); 1195 1196 timer_restore_state(timer); 1197 preempt_enable(); 1198 1199 return val; 1200 } 1201 1202 static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, 1203 struct arch_timer_context *timer, 1204 enum kvm_arch_timer_regs treg, 1205 u64 val) 1206 { 1207 switch (treg) { 1208 case TIMER_REG_TVAL: 1209 timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val); 1210 break; 1211 1212 case TIMER_REG_CTL: 1213 timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT); 1214 break; 1215 1216 case TIMER_REG_CVAL: 1217 timer_set_cval(timer, val); 1218 break; 1219 1220 case TIMER_REG_VOFF: 1221 *timer->offset.vcpu_offset = val; 1222 break; 1223 1224 default: 1225 BUG(); 1226 } 1227 } 1228 1229 void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, 1230 enum kvm_arch_timers tmr, 1231 enum kvm_arch_timer_regs treg, 1232 u64 val) 1233 { 1234 struct arch_timer_context *timer; 1235 struct timer_map map; 1236 1237 get_timer_map(vcpu, &map); 1238 timer = vcpu_get_timer(vcpu, tmr); 1239 if (timer == map.emul_vtimer || timer == map.emul_ptimer) { 1240 soft_timer_cancel(&timer->hrtimer); 1241 kvm_arm_timer_write(vcpu, timer, treg, val); 1242 timer_emulate(timer); 1243 } else { 1244 preempt_disable(); 1245 timer_save_state(timer); 1246 kvm_arm_timer_write(vcpu, timer, treg, val); 1247 timer_restore_state(timer); 1248 preempt_enable(); 1249 } 1250 } 1251 1252 static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) 1253 { 1254 if (vcpu) 1255 irqd_set_forwarded_to_vcpu(d); 1256 else 1257 irqd_clr_forwarded_to_vcpu(d); 1258 1259 return 0; 1260 } 1261 1262 static int timer_irq_set_irqchip_state(struct irq_data *d, 1263 enum irqchip_irq_state which, bool val) 1264 { 1265 if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d)) 1266 return irq_chip_set_parent_state(d, which, val); 1267 1268 if (val) 1269 irq_chip_mask_parent(d); 1270 else 1271 irq_chip_unmask_parent(d); 1272 1273 return 0; 1274 } 1275 1276 static void timer_irq_eoi(struct irq_data *d) 1277 { 1278 if (!irqd_is_forwarded_to_vcpu(d)) 1279 irq_chip_eoi_parent(d); 1280 } 1281 1282 static void timer_irq_ack(struct irq_data *d) 1283 { 1284 d = d->parent_data; 1285 if (d->chip->irq_ack) 1286 d->chip->irq_ack(d); 1287 } 1288 1289 static struct irq_chip timer_chip = { 1290 .name = "KVM", 1291 .irq_ack = timer_irq_ack, 1292 .irq_mask = irq_chip_mask_parent, 1293 .irq_unmask = irq_chip_unmask_parent, 1294 .irq_eoi = timer_irq_eoi, 1295 .irq_set_type = irq_chip_set_type_parent, 1296 .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity, 1297 .irq_set_irqchip_state = timer_irq_set_irqchip_state, 1298 }; 1299 1300 static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, 1301 unsigned int nr_irqs, void *arg) 1302 { 1303 irq_hw_number_t hwirq = (uintptr_t)arg; 1304 1305 return irq_domain_set_hwirq_and_chip(domain, virq, hwirq, 1306 &timer_chip, NULL); 1307 } 1308 1309 static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq, 1310 unsigned int nr_irqs) 1311 { 1312 } 1313 1314 static const struct irq_domain_ops timer_domain_ops = { 1315 .alloc = timer_irq_domain_alloc, 1316 .free = timer_irq_domain_free, 1317 }; 1318 1319 static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) 1320 { 1321 *flags = irq_get_trigger_type(virq); 1322 if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) { 1323 kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n", 1324 virq); 1325 *flags = IRQF_TRIGGER_LOW; 1326 } 1327 } 1328 1329 static int kvm_irq_init(struct arch_timer_kvm_info *info) 1330 { 1331 struct irq_domain *domain = NULL; 1332 1333 if (info->virtual_irq <= 0) { 1334 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", 1335 info->virtual_irq); 1336 return -ENODEV; 1337 } 1338 1339 host_vtimer_irq = info->virtual_irq; 1340 kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); 1341 1342 if (kvm_vgic_global_state.no_hw_deactivation) { 1343 struct fwnode_handle *fwnode; 1344 struct irq_data *data; 1345 1346 fwnode = irq_domain_alloc_named_fwnode("kvm-timer"); 1347 if (!fwnode) 1348 return -ENOMEM; 1349 1350 /* Assume both vtimer and ptimer in the same parent */ 1351 data = irq_get_irq_data(host_vtimer_irq); 1352 domain = irq_domain_create_hierarchy(data->domain, 0, 1353 NR_KVM_TIMERS, fwnode, 1354 &timer_domain_ops, NULL); 1355 if (!domain) { 1356 irq_domain_free_fwnode(fwnode); 1357 return -ENOMEM; 1358 } 1359 1360 arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; 1361 WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, 1362 (void *)TIMER_VTIMER)); 1363 } 1364 1365 if (info->physical_irq > 0) { 1366 host_ptimer_irq = info->physical_irq; 1367 kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); 1368 1369 if (domain) 1370 WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq, 1371 (void *)TIMER_PTIMER)); 1372 } 1373 1374 return 0; 1375 } 1376 1377 int __init kvm_timer_hyp_init(bool has_gic) 1378 { 1379 struct arch_timer_kvm_info *info; 1380 int err; 1381 1382 info = arch_timer_get_kvm_info(); 1383 timecounter = &info->timecounter; 1384 1385 if (!timecounter->cc) { 1386 kvm_err("kvm_arch_timer: uninitialized timecounter\n"); 1387 return -ENODEV; 1388 } 1389 1390 err = kvm_irq_init(info); 1391 if (err) 1392 return err; 1393 1394 /* First, do the virtual EL1 timer irq */ 1395 1396 err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, 1397 "kvm guest vtimer", kvm_get_running_vcpus()); 1398 if (err) { 1399 kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n", 1400 host_vtimer_irq, err); 1401 return err; 1402 } 1403 1404 if (has_gic) { 1405 err = irq_set_vcpu_affinity(host_vtimer_irq, 1406 kvm_get_running_vcpus()); 1407 if (err) { 1408 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1409 goto out_free_vtimer_irq; 1410 } 1411 1412 static_branch_enable(&has_gic_active_state); 1413 } 1414 1415 kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); 1416 1417 /* Now let's do the physical EL1 timer irq */ 1418 1419 if (info->physical_irq > 0) { 1420 err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, 1421 "kvm guest ptimer", kvm_get_running_vcpus()); 1422 if (err) { 1423 kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n", 1424 host_ptimer_irq, err); 1425 goto out_free_vtimer_irq; 1426 } 1427 1428 if (has_gic) { 1429 err = irq_set_vcpu_affinity(host_ptimer_irq, 1430 kvm_get_running_vcpus()); 1431 if (err) { 1432 kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); 1433 goto out_free_ptimer_irq; 1434 } 1435 } 1436 1437 kvm_debug("physical timer IRQ%d\n", host_ptimer_irq); 1438 } else if (has_vhe()) { 1439 kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n", 1440 info->physical_irq); 1441 err = -ENODEV; 1442 goto out_free_vtimer_irq; 1443 } 1444 1445 return 0; 1446 1447 out_free_ptimer_irq: 1448 if (info->physical_irq > 0) 1449 free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus()); 1450 out_free_vtimer_irq: 1451 free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); 1452 return err; 1453 } 1454 1455 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) 1456 { 1457 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1458 1459 soft_timer_cancel(&timer->bg_timer); 1460 } 1461 1462 static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) 1463 { 1464 u32 ppis = 0; 1465 bool valid; 1466 1467 mutex_lock(&vcpu->kvm->arch.config_lock); 1468 1469 for (int i = 0; i < nr_timers(vcpu); i++) { 1470 struct arch_timer_context *ctx; 1471 int irq; 1472 1473 ctx = vcpu_get_timer(vcpu, i); 1474 irq = timer_irq(ctx); 1475 if (kvm_vgic_set_owner(vcpu, irq, ctx)) 1476 break; 1477 1478 /* 1479 * We know by construction that we only have PPIs, so 1480 * all values are less than 32. 1481 */ 1482 ppis |= BIT(irq); 1483 } 1484 1485 valid = hweight32(ppis) == nr_timers(vcpu); 1486 1487 if (valid) 1488 set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags); 1489 1490 mutex_unlock(&vcpu->kvm->arch.config_lock); 1491 1492 return valid; 1493 } 1494 1495 static bool kvm_arch_timer_get_input_level(int vintid) 1496 { 1497 struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 1498 1499 if (WARN(!vcpu, "No vcpu context!\n")) 1500 return false; 1501 1502 for (int i = 0; i < nr_timers(vcpu); i++) { 1503 struct arch_timer_context *ctx; 1504 1505 ctx = vcpu_get_timer(vcpu, i); 1506 if (timer_irq(ctx) == vintid) 1507 return kvm_timer_should_fire(ctx); 1508 } 1509 1510 /* A timer IRQ has fired, but no matching timer was found? */ 1511 WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid); 1512 1513 return false; 1514 } 1515 1516 int kvm_timer_enable(struct kvm_vcpu *vcpu) 1517 { 1518 struct arch_timer_cpu *timer = vcpu_timer(vcpu); 1519 struct timer_map map; 1520 int ret; 1521 1522 if (timer->enabled) 1523 return 0; 1524 1525 /* Without a VGIC we do not map virtual IRQs to physical IRQs */ 1526 if (!irqchip_in_kernel(vcpu->kvm)) 1527 goto no_vgic; 1528 1529 /* 1530 * At this stage, we have the guarantee that the vgic is both 1531 * available and initialized. 1532 */ 1533 if (!timer_irqs_are_valid(vcpu)) { 1534 kvm_debug("incorrectly configured timer irqs\n"); 1535 return -EINVAL; 1536 } 1537 1538 get_timer_map(vcpu, &map); 1539 1540 ret = kvm_vgic_map_phys_irq(vcpu, 1541 map.direct_vtimer->host_timer_irq, 1542 timer_irq(map.direct_vtimer), 1543 &arch_timer_irq_ops); 1544 if (ret) 1545 return ret; 1546 1547 if (map.direct_ptimer) { 1548 ret = kvm_vgic_map_phys_irq(vcpu, 1549 map.direct_ptimer->host_timer_irq, 1550 timer_irq(map.direct_ptimer), 1551 &arch_timer_irq_ops); 1552 } 1553 1554 if (ret) 1555 return ret; 1556 1557 no_vgic: 1558 timer->enabled = 1; 1559 return 0; 1560 } 1561 1562 /* If we have CNTPOFF, permanently set ECV to enable it */ 1563 void kvm_timer_init_vhe(void) 1564 { 1565 if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)) 1566 sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV); 1567 } 1568 1569 int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1570 { 1571 int __user *uaddr = (int __user *)(long)attr->addr; 1572 int irq, idx, ret = 0; 1573 1574 if (!irqchip_in_kernel(vcpu->kvm)) 1575 return -EINVAL; 1576 1577 if (get_user(irq, uaddr)) 1578 return -EFAULT; 1579 1580 if (!(irq_is_ppi(irq))) 1581 return -EINVAL; 1582 1583 mutex_lock(&vcpu->kvm->arch.config_lock); 1584 1585 if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, 1586 &vcpu->kvm->arch.flags)) { 1587 ret = -EBUSY; 1588 goto out; 1589 } 1590 1591 switch (attr->attr) { 1592 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1593 idx = TIMER_VTIMER; 1594 break; 1595 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1596 idx = TIMER_PTIMER; 1597 break; 1598 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1599 idx = TIMER_HVTIMER; 1600 break; 1601 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1602 idx = TIMER_HPTIMER; 1603 break; 1604 default: 1605 ret = -ENXIO; 1606 goto out; 1607 } 1608 1609 /* 1610 * We cannot validate the IRQ unicity before we run, so take it at 1611 * face value. The verdict will be given on first vcpu run, for each 1612 * vcpu. Yes this is late. Blame it on the stupid API. 1613 */ 1614 vcpu->kvm->arch.timer_data.ppi[idx] = irq; 1615 1616 out: 1617 mutex_unlock(&vcpu->kvm->arch.config_lock); 1618 return ret; 1619 } 1620 1621 int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1622 { 1623 int __user *uaddr = (int __user *)(long)attr->addr; 1624 struct arch_timer_context *timer; 1625 int irq; 1626 1627 switch (attr->attr) { 1628 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1629 timer = vcpu_vtimer(vcpu); 1630 break; 1631 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1632 timer = vcpu_ptimer(vcpu); 1633 break; 1634 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1635 timer = vcpu_hvtimer(vcpu); 1636 break; 1637 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1638 timer = vcpu_hptimer(vcpu); 1639 break; 1640 default: 1641 return -ENXIO; 1642 } 1643 1644 irq = timer_irq(timer); 1645 return put_user(irq, uaddr); 1646 } 1647 1648 int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1649 { 1650 switch (attr->attr) { 1651 case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: 1652 case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: 1653 case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER: 1654 case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER: 1655 return 0; 1656 } 1657 1658 return -ENXIO; 1659 } 1660 1661 int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm, 1662 struct kvm_arm_counter_offset *offset) 1663 { 1664 int ret = 0; 1665 1666 if (offset->reserved) 1667 return -EINVAL; 1668 1669 mutex_lock(&kvm->lock); 1670 1671 if (lock_all_vcpus(kvm)) { 1672 set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags); 1673 1674 /* 1675 * If userspace decides to set the offset using this 1676 * API rather than merely restoring the counter 1677 * values, the offset applies to both the virtual and 1678 * physical views. 1679 */ 1680 kvm->arch.timer_data.voffset = offset->counter_offset; 1681 kvm->arch.timer_data.poffset = offset->counter_offset; 1682 1683 unlock_all_vcpus(kvm); 1684 } else { 1685 ret = -EBUSY; 1686 } 1687 1688 mutex_unlock(&kvm->lock); 1689 1690 return ret; 1691 } 1692